summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2022-11-14 22:30:41 +0300
committerRob Clark <robdclark@chromium.org>2022-11-17 21:39:12 +0300
commitd73b1d02de0858b96f743e1e8b767fb092ae4c1b (patch)
treea5fedb0e54c0f5e721fa11619796c818a9963e01 /drivers/gpu/drm/msm/adreno/a6xx_gpu.c
parentcade05b2a88558847984287dd389fae0c7de31d6 (diff)
downloadlinux-d73b1d02de0858b96f743e1e8b767fb092ae4c1b.tar.xz
drm/msm: Hangcheck progress detection
If the hangcheck timer expires, check if the fw's position in the cmdstream has advanced (changed) since last timer expiration, and allow it up to three additional "extensions" to it's alotted time. The intention is to continue to catch "shader stuck in a loop" type hangs quickly, but allow more time for things that are actually making forward progress. Because we need to sample the CP state twice to detect if there has not been progress, this also cuts the the timer's duration in half. v2: Fix typo (REG_A6XX_CP_CSQ_IB2_STAT), add comment v3: Only halve hangcheck timer duration for generations which support progress detection (hdanton); removed unused a5xx progress (without knowing how to adjust for data buffered in ROQ it is too likely to report a false negative) v4: Comment updates to better describe the total hangcheck duration when progress detection is applied Reviewed-by: Chia-I Wu <olvaffe@gmail.com> Tested-by: Chia-I Wu <olvaffe@gmail.com> # dEQP-GLES2.functional.flush_finish.wait Signed-off-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Akhil P Oommen <quic_akhilpo@quicinc.com> Patchwork: https://patchwork.freedesktop.org/patch/511584/ Link: https://lore.kernel.org/r/20221114193049.1533391-3-robdclark@gmail.com
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c34
1 files changed, 34 insertions, 0 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 9b462c6fcac8..e495d8e192db 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1843,6 +1843,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
}
+static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ struct msm_cp_state cp_state = {
+ .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
+ .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
+ .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
+ .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
+ };
+ bool progress;
+
+ /*
+ * Adjust the remaining data to account for what has already been
+ * fetched from memory, but not yet consumed by the SQE.
+ *
+ * This is not *technically* correct, the amount buffered could
+ * exceed the IB size due to hw prefetching ahead, but:
+ *
+ * (1) We aren't trying to find the exact position, just whether
+ * progress has been made
+ * (2) The CP_REG_TO_MEM at the end of a submit should be enough
+ * to prevent prefetching into an unrelated submit. (And
+ * either way, at some point the ROQ will be full.)
+ */
+ cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
+ cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16;
+
+ progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
+
+ ring->last_cp_state = cp_state;
+
+ return progress;
+}
+
static u32 a618_get_speed_bin(u32 fuse)
{
if (fuse == 0)
@@ -1959,6 +1992,7 @@ static const struct adreno_gpu_funcs funcs = {
.create_address_space = a6xx_create_address_space,
.create_private_address_space = a6xx_create_private_address_space,
.get_rptr = a6xx_get_rptr,
+ .progress = a6xx_progress,
},
.get_timestamp = a6xx_get_timestamp,
};