summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2023-10-12 09:33:37 +0300
committerAlex Deucher <alexander.deucher@amd.com>2023-10-27 01:41:22 +0300
commit73582be11ac8f6d6765e185bf48f22efb9d28c3b (patch)
treee506f510dba54e20e0b055df9a79f4ca1651914a /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parentf3a3bbf1566c7b6b0f9ac36e8e597c73dc0afdf8 (diff)
downloadlinux-73582be11ac8f6d6765e185bf48f22efb9d28c3b.tar.xz
drm/amdgpu: bypass RAS error reset in some conditions
PMFW is responsible for RAS error reset in some conditions, driver can skip the operation. v2: add check for ras->in_recovery, it's set earlier than amdgpu_in_reset. v3: fix error in gpu reset check. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c10
1 files changed, 9 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index f4c2c737b12f..303fbb6a48b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1220,6 +1220,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
enum amdgpu_ras_block block)
{
struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
if (!block_obj || !block_obj->hw_ops) {
dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
@@ -1227,7 +1229,13 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
return -EOPNOTSUPP;
}
- if (!amdgpu_ras_is_supported(adev, block))
+ /* skip ras error reset in gpu reset */
+ if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) &&
+ mca_funcs && mca_funcs->mca_set_debug_mode)
+ return -EOPNOTSUPP;
+
+ if (!amdgpu_ras_is_supported(adev, block) ||
+ !amdgpu_ras_get_mca_debug_mode(adev))
return -EOPNOTSUPP;
if (block_obj->hw_ops->reset_ras_error_count)