summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2023-10-30 15:44:37 +0300
committerAlex Deucher <alexander.deucher@amd.com>2023-11-03 19:18:32 +0300
commit18eae367cb74d05b5e37ce77ef4025b735df012e (patch)
tree31ff4684a598ec1f957979ae717141bd3c39e8db /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent88e5c8f8745b389b8e088a743a70840ead1dad37 (diff)
downloadlinux-18eae367cb74d05b5e37ce77ef4025b735df012e.tar.xz
drm/amdgpu: check recovery status of xgmi hive in ras_reset_error_count
Handle xgmi hive case. Suggested-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Stanley.Yang <Stanley.Yang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c11
1 files changed, 10 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 3af50754800d..b7fe5951b166 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1222,6 +1222,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ struct amdgpu_hive_info *hive;
+ int hive_ras_recovery = 0;
if (!block_obj || !block_obj->hw_ops) {
dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
@@ -1233,8 +1235,15 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
!amdgpu_ras_get_mca_debug_mode(adev))
return -EOPNOTSUPP;
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+
/* skip ras error reset in gpu reset */
- if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) &&
+ if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
+ hive_ras_recovery) &&
mca_funcs && mca_funcs->mca_set_debug_mode)
return -EOPNOTSUPP;