summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
diff options
context:
space:
mode:
authorYang Wang <kevinyang.wang@amd.com>2023-11-24 10:43:20 +0300
committerAlex Deucher <alexander.deucher@amd.com>2024-01-16 02:35:35 +0300
commit0599849c3276f3a24abc34d80ec4d1ca564d9971 (patch)
tree57f4386fc4e3898620a3a11f119837cad4b6e3de /drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
parentc8cb7e09db90109cf4a34b0ad994e335f88e5bdb (diff)
downloadlinux-0599849c3276f3a24abc34d80ec4d1ca564d9971.tar.xz
drm/amdgpu: add ACA kernel hardware error log support
add ACA kernel hardware error log support. Signed-off-by: Yang Wang <kevinyang.wang@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c29
1 files changed, 29 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index ba383df5f63a..e091e9ac5edd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -100,6 +100,33 @@ static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_erro
return smu_funcs->get_valid_aca_count(adev, type, count);
}
+static struct aca_regs_dump {
+ const char *name;
+ int reg_idx;
+} aca_regs[] = {
+ {"CONTROL", ACA_REG_IDX_CTL},
+ {"STATUS", ACA_REG_IDX_STATUS},
+ {"ADDR", ACA_REG_IDX_ADDR},
+ {"MISC", ACA_REG_IDX_MISC0},
+ {"CONFIG", ACA_REG_IDX_CONFG},
+ {"IPID", ACA_REG_IDX_IPID},
+ {"SYND", ACA_REG_IDX_SYND},
+ {"DESTAT", ACA_REG_IDX_DESTAT},
+ {"DEADDR", ACA_REG_IDX_DEADDR},
+ {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK},
+};
+
+static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank)
+{
+ int i;
+
+ dev_info(adev->dev, "[Hardware error] Accelerator Check Architecture events logged\n");
+ /* plus 1 for output format, e.g: ACA[08/08]: xxxx */
+ for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+ dev_info(adev->dev, "[Hardware error] ACA[%02d/%02d].%s=0x%016llx\n",
+ idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
+}
+
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_error_type type,
int start, int count,
struct aca_banks *banks)
@@ -137,6 +164,8 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro
if (ret)
return ret;
+ aca_smu_bank_dump(adev, i, count, &bank);
+
ret = aca_banks_add_bank(banks, &bank);
if (ret)
return ret;