summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
diff options
context:
space:
mode:
authorGuchun Chen <guchun.chen@amd.com>2020-08-04 10:00:53 +0300
committerAlex Deucher <alexander.deucher@amd.com>2020-08-14 23:12:22 +0300
commitf75e94d86829e92a758a26fc5bbdb4c9eba86260 (patch)
treeaef308d7dcb3e9d3ac29814a597f916f7257a9a8 /drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
parent0cf0ee983b14af5e2160fa677b2eeb4c5dbb03ad (diff)
downloadlinux-f75e94d86829e92a758a26fc5bbdb4c9eba86260.tar.xz
drm/amdgpu: bypass querying ras error count registers
Once ras recovery is issued by ras sync flood interrupt or ras controller interrupt, add this guard to bypass or execute ras error count register harvest of all IPs. Signed-off-by: Guchun Chen <guchun.chen@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Dennis Li <Dennis.Li@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c48
1 files changed, 26 insertions, 22 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index e629156173d3..eadc9526d33f 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -302,6 +302,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
uint32_t bif_doorbell_intr_cntl;
struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
if (REG_GET_FIELD(bif_doorbell_intr_cntl,
@@ -312,28 +313,31 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
RAS_CNTLR_INTERRUPT_CLEAR, 1);
WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
- /*
- * clear error status after ras_controller_intr according to
- * hw team and count ue number for query
- */
- nbio_v7_4_query_ras_error_count(adev, &err_data);
-
- /* logging on error counter and printing for awareness */
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
-
- if (err_data.ce_count)
- dev_info(adev->dev, "%ld correctable hardware "
- "errors detected in %s block, "
- "no user action is needed.\n",
- obj->err_data.ce_count,
- adev->nbio.ras_if->name);
-
- if (err_data.ue_count)
- dev_info(adev->dev, "%ld uncorrectable hardware "
- "errors detected in %s block\n",
- obj->err_data.ue_count,
- adev->nbio.ras_if->name);
+ if (!ras->disable_ras_err_cnt_harvest) {
+ /*
+ * clear error status after ras_controller_intr
+ * according to hw team and count ue number
+ * for query
+ */
+ nbio_v7_4_query_ras_error_count(adev, &err_data);
+
+ /* logging on error cnt and printing for awareness */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+
+ if (err_data.ce_count)
+ dev_info(adev->dev, "%ld correctable hardware "
+ "errors detected in %s block, "
+ "no user action is needed.\n",
+ obj->err_data.ce_count,
+ adev->nbio.ras_if->name);
+
+ if (err_data.ue_count)
+ dev_info(adev->dev, "%ld uncorrectable hardware "
+ "errors detected in %s block\n",
+ obj->err_data.ue_count,
+ adev->nbio.ras_if->name);
+ }
dev_info(adev->dev, "RAS controller interrupt triggered "
"by NBIF error\n");