diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-08-04 12:10:18 +0300 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-08-04 12:10:18 +0300 |
commit | 3d00c59d147724e536b415e389445ece6fcda42f (patch) | |
tree | b7abf11faad68372dfd889eb644a825f4cdb19a2 /drivers/gpu/drm/amd/amdkfd | |
parent | 52920704df878050123dfeb469aa6ab8022547c1 (diff) | |
parent | 7ea1db28119e237d634c6f74ba52056939c009ad (diff) | |
download | linux-3d00c59d147724e536b415e389445ece6fcda42f.tar.xz |
Merge tag 'amd-drm-next-6.6-2023-07-28' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.6-2023-07-28:
amdgpu:
- Lots of checkpatch cleanups
- GFX 9.4.3 updates
- Add USB PD and IFWI flashing documentation
- GPUVM updates
- RAS fixes
- DRR fixes
- FAMS fixes
- Virtual display fixes
- Soft IH fixes
- SMU13 fixes
- Rework PSP firmware loading for other IPs
- Kernel doc fixes
- DCN 3.0.1 fixes
- LTTPR fixes
- DP MST fixes
- DCN 3.1.6 fixes
- SubVP fixes
- Display bandwidth calculation fixes
- VCN4 secure submission fixes
- Allow building DC on RISC-V
- Add visible FB info to bo_print_info
- HBR3 fixes
- Add PSP 14.0 support
- GFX9 MCBP fix
- GMC10 vmhub index fix
- GMC11 vmhub index fix
- Create a new doorbell manager
- SR-IOV fixes
amdkfd:
- Cleanup CRIU dma-buf handling
- Use KIQ to unmap HIQ
- GFX 9.4.3 debugger updates
- GFX 9.4.2 debugger fixes
- Enable cooperative groups fof gfx11
- SVM fixes
radeon:
- Lots of checkpatch cleanups
Merge conflicts:
- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
The switch to drm eu helpers in 8a206685d36f ("drm/amdgpu: use
drm_exec for GEM and CSA handling v2") clashed with the
cosmetic cleanups from 30953c4d000b ("drm/amdgpu: Fix style
issues in amdgpu_gem.c"). I
kept the former since the cleanup up code is gone.
- drivers/gpu/drm/amd/amdgpu/atom.c.
adf64e214280 ("drm/amd: Avoid reading the VBIOS part number
twice") removed code that 992b8fe106ab ("drm/radeon: Replace
all non-returning strlcpy with strscpy") polished.
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230728214228.8102-1-alexander.deucher@amd.com
[sima: some merge conflict wrangling as noted]
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 35 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 34 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 38 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 36 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 21 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 3 |
13 files changed, 200 insertions, 66 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6a27b000a246..aef8e12df61f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1487,7 +1487,8 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, goto out_unlock; } - if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) { + if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) || + kfd_dbg_has_cwsr_workaround(dev))) { retval = -EBUSY; goto out_unlock; } @@ -1845,22 +1846,21 @@ static uint32_t get_process_num_bos(struct kfd_process *p) idr_for_each_entry(&pdd->alloc_idr, mem, id) { struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; - if ((uint64_t)kgd_mem->va > pdd->gpuvm_base) + if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base) num_of_bos++; } } return num_of_bos; } -static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags, +static int criu_get_prime_handle(struct kgd_mem *mem, int flags, u32 *shared_fd) { struct dma_buf *dmabuf; int ret; - dmabuf = amdgpu_gem_prime_export(gobj, flags); - if (IS_ERR(dmabuf)) { - ret = PTR_ERR(dmabuf); + ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf); + if (ret) { pr_err("dmabuf export failed for the BO\n"); return ret; } @@ -1918,7 +1918,11 @@ static int criu_checkpoint_bos(struct kfd_process *p, kgd_mem = (struct kgd_mem *)mem; dumper_bo = kgd_mem->bo; - if ((uint64_t)kgd_mem->va <= pdd->gpuvm_base) + /* Skip checkpointing BOs that are used for Trap handler + * code and state. Currently, these BOs have a VA that + * is less GPUVM Base + */ + if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base) continue; bo_bucket = &bo_buckets[bo_index]; @@ -1940,7 +1944,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, } if (bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { - ret = criu_get_prime_handle(&dumper_bo->tbo.base, + ret = criu_get_prime_handle(kgd_mem, bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, &bo_bucket->dmabuf_fd); @@ -2402,7 +2406,7 @@ static int criu_restore_bo(struct kfd_process *p, /* create the dmabuf object and export the bo */ if (bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { - ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR, + ret = criu_get_prime_handle(kgd_mem, DRM_RDWR, &bo_bucket->dmabuf_fd); if (ret) return ret; @@ -2755,6 +2759,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug, if (pdd->qpd.queue_count) return -EEXIST; + + /* + * Setup TTMPs by default. + * Note that this call must remain here for MES ADD QUEUE to + * skip_process_ctx_clear unconditionally as the first call to + * SET_SHADER_DEBUGGER clears any stale process context data + * saved in MES. + */ + if (pdd->dev->kfd->shared_resources.enable_mes) + kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev)); } p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED; @@ -2848,7 +2862,8 @@ static int runtime_disable(struct kfd_process *p) if (!pdd->dev->kfd->shared_resources.enable_mes) debug_refresh_runlist(pdd->dev->dqm); else - kfd_dbg_set_mes_debug_mode(pdd); + kfd_dbg_set_mes_debug_mode(pdd, + !kfd_dbg_has_cwsr_workaround(pdd->dev)); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index fff3ccc04fa9..9ec750666382 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -302,8 +302,7 @@ static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable) if (!q) return 0; - if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) || - KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0)) + if (!kfd_dbg_has_cwsr_workaround(q->device)) return 0; if (enable && q->properties.is_user_cu_masked) @@ -345,11 +344,10 @@ unwind: return r; } -int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd) +int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en) { uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; uint32_t flags = pdd->process->dbg_flags; - bool sq_trap_en = !!spi_dbg_cntl; if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) return 0; @@ -433,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd, if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_map_and_unlock(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); kfd_dbg_clear_dev_watch_id(pdd, watch_id); @@ -446,7 +444,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, uint32_t *watch_id, uint32_t watch_mode) { - int r = kfd_dbg_get_dev_watch_id(pdd, watch_id); + int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id); + uint32_t xcc_mask = pdd->dev->xcc_mask; if (r) return r; @@ -460,19 +459,21 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, } amdgpu_gfx_off_ctrl(pdd->dev->adev, false); - pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch( + for_each_inst(xcc_id, xcc_mask) + pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch( pdd->dev->adev, watch_address, watch_address_mask, *watch_id, watch_mode, - pdd->dev->vm_info.last_vmid_kfd); + pdd->dev->vm_info.last_vmid_kfd, + xcc_id); amdgpu_gfx_off_ctrl(pdd->dev->adev, true); if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_map_and_unlock(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); /* HWS is broken so no point in HW rollback but release the watchpoint anyways */ if (r) @@ -514,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_refresh_runlist(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); if (r) { target->dbg_flags = prev_flags; @@ -537,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) if (!pdd->dev->kfd->shared_resources.enable_mes) debug_refresh_runlist(pdd->dev->dqm); else - kfd_dbg_set_mes_debug_mode(pdd); + kfd_dbg_set_mes_debug_mode(pdd, true); } } @@ -599,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind if (!pdd->dev->kfd->shared_resources.enable_mes) debug_refresh_runlist(pdd->dev->dqm); else - kfd_dbg_set_mes_debug_mode(pdd); + kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev)); } kfd_dbg_set_workaround(target, false); @@ -715,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target) if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_refresh_runlist(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); if (r) { target->runtime_info.runtime_state = @@ -751,7 +752,8 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd, if (!KFD_IS_SOC15(pdd->dev)) return -ENODEV; - if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws) + if (pdd->qpd.num_gws && (!kfd_dbg_has_gws_support(pdd->dev) || + kfd_dbg_has_cwsr_workaround(pdd->dev))) return -EBUSY; } @@ -848,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target, if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_refresh_runlist(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); if (r) break; @@ -880,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target, if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_refresh_runlist(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); if (r) break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index a289e59ceb79..fd0ff64d4184 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -76,8 +76,9 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p, static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev) { - return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || - KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0); + return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || + KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)); } void debug_event_write_work_handler(struct work_struct *work); @@ -100,6 +101,12 @@ static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev) KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1)); } +static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev) +{ + return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3); +} + static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev) { if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1) @@ -119,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev) return true; } -int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd); +int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en); + +static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev) +{ + return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) || + (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) && + (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70); +} #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0b3dc754e06b..ebc9674d3ce1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -508,6 +508,7 @@ static int kfd_gws_init(struct kfd_node *node) { int ret = 0; struct kfd_dev *kfd = node->kfd; + uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) return 0; @@ -524,7 +525,10 @@ static int kfd_gws_init(struct kfd_node *node) (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) || (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) - && kfd->mec2_fw_version >= 0x6b)))) + && kfd->mec2_fw_version >= 0x6b) || + (KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0) + && KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0) + && mes_rev >= 68)))) ret = amdgpu_amdkfd_alloc_gws(node->adev, node->adev->gds.gws_size, &node->gws); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f515cb8f30ca..ccaf85fc12c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -226,9 +226,10 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.paging = false; queue_input.tba_addr = qpd->tba_addr; queue_input.tma_addr = qpd->tma_addr; - queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) || - KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3); + queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled; + queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled || + kfd_dbg_has_ttmps_always_setup(q->device); queue_type = convert_to_mes_queue_type(q->properties.type); if (queue_type < 0) { @@ -238,10 +239,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, } queue_input.queue_type = (uint32_t)queue_type; - if (q->gws) { - queue_input.gws_base = 0; - queue_input.gws_size = qpd->num_gws; - } + queue_input.exclusively_scheduled = q->properties.is_gws; amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); @@ -251,7 +249,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, q->properties.doorbell_off); pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); kfd_hws_hang(dqm); -} + } return r; } @@ -1621,7 +1619,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm) if (dqm->dev->kfd2kgd->get_iq_wait_times) dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, - &dqm->wait_times); + &dqm->wait_times, + ffs(dqm->dev->xcc_mask) - 1); return 0; } @@ -1663,6 +1662,26 @@ static int start_cpsch(struct device_queue_manager *dqm) if (!dqm->dev->kfd->shared_resources.enable_mes) execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); + + /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ + if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && + (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { + uint32_t reg_offset = 0; + uint32_t grace_period = 1; + + retval = pm_update_grace_period(&dqm->packet_mgr, + grace_period); + if (retval) + pr_err("Setting grace timeout failed\n"); + else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) + /* Update dqm->wait_times maintained in software */ + dqm->dev->kfd2kgd->build_grace_period_packet_info( + dqm->dev->adev, dqm->wait_times, + grace_period, ®_offset, + &dqm->wait_times, + ffs(dqm->dev->xcc_mask) - 1); + } + dqm_unlock(dqm); return 0; @@ -1806,8 +1825,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, */ q->properties.is_evicted = !!qpd->evicted; q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && - KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) && - KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3); + kfd_dbg_has_cwsr_workaround(q->device); if (qd) mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 94c0fc2e57b7..83699392c808 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -318,6 +318,26 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; } +static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, unsigned int timeout, + uint32_t pipe_id, uint32_t queue_id) +{ + int err; + struct v10_compute_mqd *m; + u32 doorbell_off; + + m = get_mqd(mqd); + + doorbell_off = m->cp_hqd_pq_doorbell_control >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + + err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0); + if (err) + pr_debug("Destroy HIQ MQD failed: %d\n", err); + + return err; +} + static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -460,7 +480,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->free_mqd = free_mqd_hiq_sdma; mqd->load_mqd = kfd_hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; - mqd->destroy_mqd = kfd_destroy_mqd_cp; + mqd->destroy_mqd = destroy_hiq_mqd; mqd->is_occupied = kfd_is_occupied_cp; mqd->mqd_size = sizeof(struct v10_compute_mqd); mqd->mqd_stride = kfd_mqd_stride; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 31fec5e70d13..2319467d2d95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -335,6 +335,26 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; } +static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, unsigned int timeout, + uint32_t pipe_id, uint32_t queue_id) +{ + int err; + struct v11_compute_mqd *m; + u32 doorbell_off; + + m = get_mqd(mqd); + + doorbell_off = m->cp_hqd_pq_doorbell_control >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + + err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0); + if (err) + pr_debug("Destroy HIQ MQD failed: %d\n", err); + + return err; +} + static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -449,7 +469,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->free_mqd = free_mqd_hiq_sdma; mqd->load_mqd = kfd_hiq_load_mqd_kiq; mqd->update_mqd = update_mqd; - mqd->destroy_mqd = kfd_destroy_mqd_cp; + mqd->destroy_mqd = destroy_hiq_mqd; mqd->is_occupied = kfd_is_occupied_cp; mqd->mqd_size = sizeof(struct v11_compute_mqd); #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 601bb9f68048..e23d32f35607 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -405,6 +405,25 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; } +static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, unsigned int timeout, + uint32_t pipe_id, uint32_t queue_id) +{ + int err; + struct v9_mqd *m; + u32 doorbell_off; + + m = get_mqd(mqd); + + doorbell_off = m->cp_hqd_pq_doorbell_control >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0); + if (err) + pr_debug("Destroy HIQ MQD failed: %d\n", err); + + return err; +} + static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -548,16 +567,19 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, { uint32_t xcc_mask = mm->dev->xcc_mask; int xcc_id, err, inst = 0; - void *xcc_mqd; uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev); + struct v9_mqd *m; + u32 doorbell_off; for_each_inst(xcc_id, xcc_mask) { - xcc_mqd = mqd + hiq_mqd_size * inst; - err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd, - type, timeout, pipe_id, - queue_id, xcc_id); + m = get_mqd(mqd + hiq_mqd_size * inst); + + doorbell_off = m->cp_hqd_pq_doorbell_control >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + + err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, xcc_id); if (err) { - pr_debug("Destroy MQD failed for xcc: %d\n", inst); + pr_debug("Destroy HIQ MQD failed for xcc: %d\n", inst); break; } ++inst; @@ -846,7 +868,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, } else { mqd->init_mqd = init_mqd_hiq; mqd->load_mqd = kfd_hiq_load_mqd_kiq; - mqd->destroy_mqd = kfd_destroy_mqd_cp; + mqd->destroy_mqd = destroy_hiq_mqd; } break; case KFD_MQD_TYPE_DIQ: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 29a2d0499b67..8fda16e6fee6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -298,7 +298,8 @@ static int pm_set_grace_period_v9(struct packet_manager *pm, pm->dqm->wait_times, grace_period, ®_offset, - ®_data); + ®_data, + 0); if (grace_period == USE_DEFAULT_GRACE_PERIOD) reg_data = pm->dqm->wait_times; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index ba9d69054119..60e6b37b43ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -123,7 +123,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) { + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) { if (gws) ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, gws, &mem); @@ -136,7 +136,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, } else { /* * Intentionally set GWS to a non-NULL value - * for GFX 9.4.3. + * for devices that do not use GWS for global wave + * synchronization but require the formality + * of setting GWS for cooperative groups. */ pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL; } @@ -173,7 +175,8 @@ void pqm_uninit(struct process_queue_manager *pqm) list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { if (pqn->q && pqn->q->gws && - KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && + !pqn->q->device->kfd->shared_resources.enable_mes) amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, pqn->q->gws); kfd_procfs_del_queue(pqn->q); @@ -455,7 +458,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q->gws) { - if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && + !dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_remove_gws_from_process( pqm->process->kgd_process_info, pqn->q->gws); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index aa32b06eea77..01c7de2d6e19 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -48,6 +48,8 @@ * page table is updated. */ #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC) +#define dynamic_svm_range_dump(svms) \ + _dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms) /* Giant svm range split into smaller ranges based on this, it is decided using * minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to @@ -1521,6 +1523,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) struct kfd_process_device *pdd; pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) + return NULL; return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev); } @@ -1595,12 +1599,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { - if (!prange->mapped_to_gpu) { + bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); + if (!prange->mapped_to_gpu || + bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { r = 0; goto free_ctx; } - - bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); } if (prange->actual_loc && !prange->ttm_res) { @@ -3560,7 +3564,7 @@ out_unlock_range: break; } - svm_range_debug_dump(svms); + dynamic_svm_range_dump(svms); mutex_unlock(&svms->lock); mmap_read_unlock(mm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 61fc62f3e003..3b0749390388 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -38,6 +38,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_iommu.h" #include "kfd_svm.h" +#include "kfd_debug.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" #include "amdgpu.h" @@ -1931,23 +1932,27 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED | HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED; - if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { - dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 | - HSA_DBG_WATCH_ADDR_MASK_HI_BIT; + if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) + dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; - if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 4, 2)) + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { + if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3)) dev->node_props.debug_prop |= - HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; + HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 | + HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3; else + dev->node_props.debug_prop |= + HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 | + HSA_DBG_WATCH_ADDR_MASK_HI_BIT; + + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; - if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0)) - dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; - else + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index cba2cd5ed9d1..dea32a9e5506 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -32,9 +32,12 @@ #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32 #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 6 +#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 7 #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 7 #define HSA_DBG_WATCH_ADDR_MASK_HI_BIT \ (29 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT) +#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3 \ + (30 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT) struct kfd_node_properties { uint64_t hive_id; |