summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/lima
diff options
context:
space:
mode:
authorErico Nunes <nunes.erico@gmail.com>2024-01-24 05:59:44 +0300
committerQiang Yu <yuq825@gmail.com>2024-02-12 11:27:17 +0300
commit53cb55b20208eaa79de63c82795c8d51caa9cfcc (patch)
treed97c2afc81bd2f8f5fb47a613c79dde702d231d0 /drivers/gpu/drm/lima
parent27aa58ec85f973d98d336df7b7941149308db80f (diff)
downloadlinux-53cb55b20208eaa79de63c82795c8d51caa9cfcc.tar.xz
drm/lima: handle spurious timeouts due to high irq latency
There are several unexplained and unreproduced cases of rendering timeouts with lima, for which one theory is high IRQ latency coming from somewhere else in the system. This kind of occurrence may cause applications to trigger unnecessary resets of the GPU or even applications to hang if it hits an issue in the recovery path. Panfrost already does some special handling to account for such "spurious timeouts", it makes sense to have this in lima too to reduce the chance that it hit users. Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240124025947.2110659-6-nunes.erico@gmail.com
Diffstat (limited to 'drivers/gpu/drm/lima')
-rw-r--r--drivers/gpu/drm/lima/lima_sched.c31
1 files changed, 28 insertions, 3 deletions
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index c3bf8cda8498..814428564637 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
+#include <linux/hardirq.h>
#include <linux/iosys-map.h>
#include <linux/kthread.h>
#include <linux/slab.h>
@@ -401,9 +402,35 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job
struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
struct lima_sched_task *task = to_lima_task(job);
struct lima_device *ldev = pipe->ldev;
+ struct lima_ip *ip = pipe->processor[0];
+ int i;
+
+ /*
+ * If the GPU managed to complete this jobs fence, the timeout is
+ * spurious. Bail out.
+ */
+ if (dma_fence_is_signaled(task->fence)) {
+ DRM_WARN("%s spurious timeout\n", lima_ip_name(ip));
+ return DRM_GPU_SCHED_STAT_NOMINAL;
+ }
+
+ /*
+ * Lima IRQ handler may take a long time to process an interrupt
+ * if there is another IRQ handler hogging the processing.
+ * In order to catch such cases and not report spurious Lima job
+ * timeouts, synchronize the IRQ handler and re-check the fence
+ * status.
+ */
+ for (i = 0; i < pipe->num_processor; i++)
+ synchronize_irq(pipe->processor[i]->irq);
+
+ if (dma_fence_is_signaled(task->fence)) {
+ DRM_WARN("%s unexpectedly high interrupt latency\n", lima_ip_name(ip));
+ return DRM_GPU_SCHED_STAT_NOMINAL;
+ }
if (!pipe->error)
- DRM_ERROR("lima job timeout\n");
+ DRM_ERROR("%s job timeout\n", lima_ip_name(ip));
drm_sched_stop(&pipe->base, &task->base);
@@ -417,8 +444,6 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job
if (pipe->bcast_mmu)
lima_mmu_page_fault_resume(pipe->bcast_mmu);
else {
- int i;
-
for (i = 0; i < pipe->num_mmu; i++)
lima_mmu_page_fault_resume(pipe->mmu[i]);
}