summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorYiPeng Chai <YiPeng.Chai@amd.com>2023-06-25 05:18:32 +0300
committerAlex Deucher <alexander.deucher@amd.com>2023-06-30 20:12:15 +0300
commit2c7cd280e5c4a626690315a6fbb70b49124d8354 (patch)
treebed47f53ab77c0325d05aeda96570ada614a263b /drivers/gpu
parent50a7c8765ca69543ffdbf855de0fd69aea769ccf (diff)
downloadlinux-2c7cd280e5c4a626690315a6fbb70b49124d8354.tar.xz
drm/amdgpu: gpu recovers from fatal error in poison mode
Fatal error occurs in ras poison mode, mode1 reset is used to recover gpu. Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h1
2 files changed, 12 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 4769a18304d7..8aaa427f8c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2065,6 +2065,14 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
reset_context.method = AMD_RESET_METHOD_MODE2;
}
+
+ /* Fatal error occurs in poison mode, mode1 reset is used to
+ * recover gpu.
+ */
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ }
}
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
@@ -2955,9 +2963,12 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
return;
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
dev_info(adev->dev, "uncorrectable hardware error"
"(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
amdgpu_ras_reset_gpu(adev);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 46bf1889a9d7..ffb49b2d533a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -340,6 +340,7 @@ enum amdgpu_ras_ret {
#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2)
#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0)
+#define AMDGPU_RAS_GPU_RESET_MODE1_RESET (0x1 << 1)
struct amdgpu_ras_err_status_reg_entry {
uint32_t hwip;