diff options
author | Lijo Lazar <lijo.lazar@amd.com> | 2024-02-22 12:24:50 +0300 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-02-26 19:14:31 +0300 |
commit | e1f6746f339c9514c0564bab02c3f8847f4aa44b (patch) | |
tree | 4cff9b7b5b0f8045ee0fd439a1032fcaa2770c8c /drivers/gpu/drm/amd/amdkfd | |
parent | 1b6ef74b2b03b54776778476f8adf87dd4f8beb1 (diff) | |
download | linux-e1f6746f339c9514c0564bab02c3f8847f4aa44b.tar.xz |
drm/amdkfd: Skip packet submission on fatal error
If fatal error is detected, packet submission won't go through. Return
error in such cases. Also, avoid waiting for fence when fatal error is
detected.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Asad Kamal <asad.kamal@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 10 |
4 files changed, 17 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c0e71543389a..f4d395e38683 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1903,6 +1903,10 @@ int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, uint64_t *fence_addr = dqm->fence_addr; while (*fence_addr != fence_value) { + /* Fatal err detected, this response won't come */ + if (amdgpu_amdkfd_is_fed(dqm->dev->adev)) + return -EIO; + if (time_after(jiffies, end_jiffies)) { dev_err(dev, "qcm fence wait loop timeout expired\n"); /* In HWS case, this is used to halt the driver thread diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 1bea629c49ca..32c926986dbb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -286,7 +286,7 @@ err_no_space: return -ENOMEM; } -void kq_submit_packet(struct kernel_queue *kq) +int kq_submit_packet(struct kernel_queue *kq) { #ifdef DEBUG int i; @@ -298,6 +298,10 @@ void kq_submit_packet(struct kernel_queue *kq) } pr_debug("\n"); #endif + /* Fatal err detected, packet submission won't go through */ + if (amdgpu_amdkfd_is_fed(kq->dev->adev)) + return -EIO; + if (kq->dev->kfd->device_info.doorbell_size == 8) { *kq->wptr64_kernel = kq->pending_wptr64; write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, @@ -307,6 +311,8 @@ void kq_submit_packet(struct kernel_queue *kq) write_kernel_doorbell(kq->queue->properties.doorbell_ptr, kq->pending_wptr); } + + return 0; } void kq_rollback_packet(struct kernel_queue *kq) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 9a6244430845..e24ee50acdf0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -47,7 +47,7 @@ int kq_acquire_packet_buffer(struct kernel_queue *kq, size_t packet_size_in_dwords, unsigned int **buffer_ptr); -void kq_submit_packet(struct kernel_queue *kq); +int kq_submit_packet(struct kernel_queue *kq); void kq_rollback_packet(struct kernel_queue *kq); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 401096c103b2..d6f65f39072b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -288,7 +288,7 @@ int pm_send_set_resources(struct packet_manager *pm, retval = pm->pmf->set_resources(pm, buffer, res); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); @@ -325,7 +325,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval) goto fail_create_runlist; - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); @@ -361,7 +361,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); @@ -392,7 +392,7 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) retval = pm->pmf->set_grace_period(pm, buffer, grace_period); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); } @@ -421,7 +421,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); |