summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c')
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c327
1 files changed, 147 insertions, 180 deletions
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index f42b48b31927..0e5a77c3c2e2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -48,6 +48,7 @@
#include "smu_cmn.h"
#include "mp/mp_13_0_6_offset.h"
#include "mp/mp_13_0_6_sh_mask.h"
+#include "umc_v12_0.h"
#undef MP1_Public
#undef smnMP1_FIRMWARE_FLAGS
@@ -94,22 +95,11 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin");
#define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5
#define LINK_SPEED_MAX 4
-#define SMU_13_0_6_DSCLK_THRESHOLD 100
+#define SMU_13_0_6_DSCLK_THRESHOLD 140
#define MCA_BANK_IPID(_ip, _hwid, _type) \
[AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, }
-enum mca_reg_idx {
- MCA_REG_IDX_CONTROL = 0,
- MCA_REG_IDX_STATUS = 1,
- MCA_REG_IDX_ADDR = 2,
- MCA_REG_IDX_MISC0 = 3,
- MCA_REG_IDX_CONFIG = 4,
- MCA_REG_IDX_IPID = 5,
- MCA_REG_IDX_SYND = 6,
- MCA_REG_IDX_COUNT = 16,
-};
-
struct mca_bank_ipid {
enum amdgpu_mca_ip ip;
uint16_t hwid;
@@ -122,7 +112,9 @@ struct mca_ras_info {
int *err_code_array;
int err_code_count;
int (*get_err_count)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
- enum amdgpu_mca_error_type type, int idx, uint32_t *count);
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count);
+ bool (*bank_is_valid)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry);
};
#define P2S_TABLE_ID_A 0x50325341
@@ -270,7 +262,7 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
uint32_t p2s_table_id = P2S_TABLE_ID_A;
int ret = 0, i, p2stable_count;
- char ucode_prefix[30];
+ char ucode_prefix[15];
char fw_name[30];
/* No need to load P2S tables in IOV mode */
@@ -1462,7 +1454,7 @@ static int smu_v13_0_6_register_irq_handler(struct smu_context *smu)
static int smu_v13_0_6_notify_unload(struct smu_context *smu)
{
- if (smu->smc_fw_version <= 0x553500)
+ if (amdgpu_in_reset(smu->adev))
return 0;
dev_dbg(smu->adev->dev, "Notify PMFW about driver unload");
@@ -2103,6 +2095,14 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
smu_v13_0_6_get_current_pcie_link_speed(smu);
gpu_metrics->pcie_bandwidth_acc =
SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
+ gpu_metrics->pcie_bandwidth_inst =
+ SMUQ10_ROUND(metrics->PcieBandwidth[0]);
+ gpu_metrics->pcie_l0_to_recov_count_acc =
+ metrics->PCIeL0ToRecoveryCountAcc;
+ gpu_metrics->pcie_replay_count_acc =
+ metrics->PCIenReplayAAcc;
+ gpu_metrics->pcie_replay_rover_count_acc =
+ metrics->PCIenReplayARolloverCountAcc;
}
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
@@ -2305,7 +2305,7 @@ static int smu_v13_0_6_post_init(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
if (!amdgpu_sriov_vf(adev) && adev->ras_enabled)
- return smu_v13_0_6_mca_set_debug_mode(smu, true);
+ return smu_v13_0_6_mca_set_debug_mode(smu, false);
return 0;
}
@@ -2387,6 +2387,7 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT
MCA_BANK_IPID(UMC, 0x96, 0x0),
MCA_BANK_IPID(SMU, 0x01, 0x1),
MCA_BANK_IPID(MP5, 0x01, 0x2),
+ MCA_BANK_IPID(PCS_XGMI, 0x50, 0x0),
};
static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info)
@@ -2448,53 +2449,60 @@ static int mca_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_t
return 0;
}
-static int mca_decode_mca_ipid(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, int *ip)
+static int mca_decode_ipid_to_hwip(uint64_t val)
{
const struct mca_bank_ipid *ipid;
- uint64_t val;
uint16_t hwid, mcatype;
- int i, ret;
-
- ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_IPID, &val);
- if (ret)
- return ret;
+ int i;
hwid = REG_GET_FIELD(val, MCMP1_IPIDT0, HardwareID);
mcatype = REG_GET_FIELD(val, MCMP1_IPIDT0, McaType);
- if (hwid) {
- for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) {
- ipid = &smu_v13_0_6_mca_ipid_table[i];
+ for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) {
+ ipid = &smu_v13_0_6_mca_ipid_table[i];
- if (!ipid->hwid)
- continue;
+ if (!ipid->hwid)
+ continue;
- if (ipid->hwid == hwid && ipid->mcatype == mcatype) {
- *ip = i;
- return 0;
- }
- }
+ if (ipid->hwid == hwid && ipid->mcatype == mcatype)
+ return i;
}
- *ip = AMDGPU_MCA_IP_UNKNOW;
+ return AMDGPU_MCA_IP_UNKNOW;
+}
+
+static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
+{
+ uint64_t status0;
+
+ status0 = entry->regs[MCA_REG_IDX_STATUS];
+
+ if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
+ *count = 0;
+ return 0;
+ }
+
+ if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0))
+ *count = 1;
+ else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0))
+ *count = 1;
return 0;
}
-static int mca_normal_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
- enum amdgpu_mca_error_type type, int idx, uint32_t *count)
+static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry,
+ uint32_t *count)
{
- uint64_t status0;
- int ret;
+ u32 ext_error_code;
- ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0);
- if (ret)
- return ret;
+ ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]);
- if (REG_GET_FIELD(status0, MCMP1_STATUST0, Val))
+ if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0)
+ *count = 1;
+ else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6)
*count = 1;
- else
- *count = 0;
return 0;
}
@@ -2515,70 +2523,41 @@ static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mc
return false;
}
-static int mca_mp5_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
- enum amdgpu_mca_error_type type, int idx, uint32_t *count)
+static int mca_gfx_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
{
- uint64_t status0 = 0, misc0 = 0;
- uint32_t errcode;
- int ret;
-
- if (mca_ras->ip != AMDGPU_MCA_IP_MP5)
- return -EINVAL;
-
- ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0);
- if (ret)
- return ret;
+ uint64_t status0, misc0;
+ status0 = entry->regs[MCA_REG_IDX_STATUS];
if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
*count = 0;
return 0;
}
- errcode = REG_GET_FIELD(status0, MCMP1_STATUST0, ErrorCode);
- if (!mca_smu_check_error_code(adev, mca_ras, errcode))
- return 0;
-
if (type == AMDGPU_MCA_ERROR_TYPE_UE &&
REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 &&
REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) {
- if (count)
- *count = 1;
+ *count = 1;
return 0;
- }
-
- ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_MISC0, &misc0);
- if (ret)
- return ret;
-
- if (count)
+ } else {
+ misc0 = entry->regs[MCA_REG_IDX_MISC0];
*count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt);
+ }
return 0;
}
static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
- enum amdgpu_mca_error_type type, int idx, uint32_t *count)
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
{
- uint64_t status0 = 0, misc0 = 0;
- uint32_t errcode;
- int ret;
-
- if (mca_ras->ip != AMDGPU_MCA_IP_SMU)
- return -EINVAL;
-
- ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0);
- if (ret)
- return ret;
+ uint64_t status0, misc0;
+ status0 = entry->regs[MCA_REG_IDX_STATUS];
if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
*count = 0;
return 0;
}
- errcode = REG_GET_FIELD(status0, MCMP1_STATUST0, ErrorCode);
- if (!mca_smu_check_error_code(adev, mca_ras, errcode))
- return 0;
-
if (type == AMDGPU_MCA_ERROR_TYPE_UE &&
REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 &&
REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) {
@@ -2587,16 +2566,43 @@ static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct
return 0;
}
- ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_MISC0, &misc0);
- if (ret)
- return ret;
-
- if (count)
- *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt);
+ misc0 = entry->regs[MCA_REG_IDX_MISC0];
+ *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt);
return 0;
}
+static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
+{
+ uint32_t instlo;
+
+ instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+ switch (instlo) {
+ case 0x36430400: /* SMNAID XCD 0 */
+ case 0x38430400: /* SMNAID XCD 1 */
+ case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */
+ return true;
+ default:
+ return false;
+ }
+
+ return false;
+};
+
+static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
+{
+ uint32_t errcode, instlo;
+
+ instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo);
+ if (instlo != 0x03b30400)
+ return false;
+
+ errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode);
+ return mca_smu_check_error_code(adev, mca_ras, errcode);
+}
+
static int sdma_err_codes[] = { CODE_SDMA0, CODE_SDMA1, CODE_SDMA2, CODE_SDMA3 };
static int mmhub_err_codes[] = {
CODE_DAGB0, CODE_DAGB0 + 1, CODE_DAGB0 + 2, CODE_DAGB0 + 3, CODE_DAGB0 + 4, /* DAGB0-4 */
@@ -2608,23 +2614,30 @@ static const struct mca_ras_info mca_ras_table[] = {
{
.blkid = AMDGPU_RAS_BLOCK__UMC,
.ip = AMDGPU_MCA_IP_UMC,
- .get_err_count = mca_normal_mca_get_err_count,
+ .get_err_count = mca_umc_mca_get_err_count,
}, {
.blkid = AMDGPU_RAS_BLOCK__GFX,
- .ip = AMDGPU_MCA_IP_MP5,
- .get_err_count = mca_mp5_mca_get_err_count,
+ .ip = AMDGPU_MCA_IP_SMU,
+ .get_err_count = mca_gfx_mca_get_err_count,
+ .bank_is_valid = mca_gfx_smu_bank_is_valid,
}, {
.blkid = AMDGPU_RAS_BLOCK__SDMA,
.ip = AMDGPU_MCA_IP_SMU,
.err_code_array = sdma_err_codes,
.err_code_count = ARRAY_SIZE(sdma_err_codes),
.get_err_count = mca_smu_mca_get_err_count,
+ .bank_is_valid = mca_smu_bank_is_valid,
}, {
.blkid = AMDGPU_RAS_BLOCK__MMHUB,
.ip = AMDGPU_MCA_IP_SMU,
.err_code_array = mmhub_err_codes,
.err_code_count = ARRAY_SIZE(mmhub_err_codes),
.get_err_count = mca_smu_mca_get_err_count,
+ .bank_is_valid = mca_smu_bank_is_valid,
+ }, {
+ .blkid = AMDGPU_RAS_BLOCK__XGMI_WAFL,
+ .ip = AMDGPU_MCA_IP_PCS_XGMI,
+ .get_err_count = mca_pcs_xgmi_mca_get_err_count,
},
};
@@ -2659,130 +2672,84 @@ static int mca_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_e
}
static bool mca_bank_is_valid(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
- enum amdgpu_mca_error_type type, int idx)
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry)
{
- int ret, ip = AMDGPU_MCA_IP_UNKNOW;
-
- ret = mca_decode_mca_ipid(adev, type, idx, &ip);
- if (ret)
- return false;
-
- if (ip == AMDGPU_MCA_IP_UNKNOW)
+ if (mca_decode_ipid_to_hwip(entry->regs[MCA_REG_IDX_IPID]) != mca_ras->ip)
return false;
- return ip == mca_ras->ip;
-}
+ if (mca_ras->bank_is_valid)
+ return mca_ras->bank_is_valid(mca_ras, adev, type, entry);
-static int mca_get_valid_mca_idx(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
- enum amdgpu_mca_error_type type,
- uint32_t mca_cnt, int *idx_array, int idx_array_size)
-{
- int i, idx_cnt = 0;
-
- for (i = 0; i < mca_cnt; i++) {
- if (!mca_bank_is_valid(adev, mca_ras, type, i))
- continue;
-
- if (idx_array) {
- if (idx_cnt < idx_array_size)
- idx_array[idx_cnt] = i;
- else
- return -EINVAL;
- }
-
- idx_cnt++;
- }
-
- return idx_cnt;
+ return true;
}
-static int __mca_smu_get_error_count(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, enum amdgpu_mca_error_type type, uint32_t *count)
+static int __mca_smu_get_ras_mca_set(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
+ enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set)
{
- uint32_t result, mca_cnt, total = 0;
- int idx_array[16];
- int i, ret, idx_cnt = 0;
+ struct mca_bank_entry entry;
+ uint32_t mca_cnt;
+ int i, ret;
ret = mca_get_valid_mca_count(adev, type, &mca_cnt);
if (ret)
return ret;
/* if valid mca bank count is 0, the driver can return 0 directly */
- if (!mca_cnt) {
- *count = 0;
+ if (!mca_cnt)
return 0;
- }
- if (!mca_ras->get_err_count)
- return -EINVAL;
+ for (i = 0; i < mca_cnt; i++) {
+ memset(&entry, 0, sizeof(entry));
+ ret = mca_get_mca_entry(adev, type, i, &entry);
+ if (ret)
+ return ret;
- idx_cnt = mca_get_valid_mca_idx(adev, mca_ras, type, mca_cnt, idx_array, ARRAY_SIZE(idx_array));
- if (idx_cnt < 0)
- return -EINVAL;
+ if (mca_ras && !mca_bank_is_valid(adev, mca_ras, type, &entry))
+ continue;
- for (i = 0; i < idx_cnt; i++) {
- result = 0;
- ret = mca_ras->get_err_count(mca_ras, adev, type, idx_array[i], &result);
+ ret = amdgpu_mca_bank_set_add_entry(mca_set, &entry);
if (ret)
return ret;
-
- total += result;
}
- *count = total;
-
return 0;
}
-static int mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
- enum amdgpu_mca_error_type type, uint32_t *count)
+static int mca_smu_get_ras_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set)
{
- const struct mca_ras_info *mca_ras;
+ const struct mca_ras_info *mca_ras = NULL;
- if (!count)
+ if (!mca_set)
return -EINVAL;
- mca_ras = mca_get_mca_ras_info(adev, blk);
- if (!mca_ras)
- return -EOPNOTSUPP;
-
- return __mca_smu_get_error_count(adev, mca_ras, type, count);
-}
-
-static int __mca_smu_get_ras_mca_idx_array(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras,
- enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size)
-{
- uint32_t mca_cnt = 0;
- int ret, idx_cnt = 0;
-
- ret = mca_get_valid_mca_count(adev, type, &mca_cnt);
- if (ret)
- return ret;
-
- /* if valid mca bank count is 0, the driver can return 0 directly */
- if (!mca_cnt) {
- *idx_array_size = 0;
- return 0;
+ if (blk != AMDGPU_RAS_BLOCK_COUNT) {
+ mca_ras = mca_get_mca_ras_info(adev, blk);
+ if (!mca_ras)
+ return -EOPNOTSUPP;
}
- idx_cnt = mca_get_valid_mca_idx(adev, mca_ras, type, mca_cnt, idx_array, *idx_array_size);
- if (idx_cnt < 0)
- return -EINVAL;
-
- *idx_array_size = idx_cnt;
-
- return 0;
+ return __mca_smu_get_ras_mca_set(adev, mca_ras, type, mca_set);
}
-static int mca_smu_get_ras_mca_idx_array(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
- enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size)
+static int mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct mca_bank_entry *entry, uint32_t *count)
{
const struct mca_ras_info *mca_ras;
+ if (!entry || !count)
+ return -EINVAL;
+
mca_ras = mca_get_mca_ras_info(adev, blk);
if (!mca_ras)
return -EOPNOTSUPP;
- return __mca_smu_get_ras_mca_idx_array(adev, mca_ras, type, idx_array, idx_array_size);
+ if (!mca_bank_is_valid(adev, mca_ras, type, entry)) {
+ *count = 0;
+ return 0;
+ }
+
+ return mca_ras->get_err_count(mca_ras, adev, type, entry, count);
}
static int mca_smu_get_mca_entry(struct amdgpu_device *adev,
@@ -2801,10 +2768,10 @@ static const struct amdgpu_mca_smu_funcs smu_v13_0_6_mca_smu_funcs = {
.max_ue_count = 12,
.max_ce_count = 12,
.mca_set_debug_mode = mca_smu_set_debug_mode,
- .mca_get_error_count = mca_smu_get_error_count,
+ .mca_get_ras_mca_set = mca_smu_get_ras_mca_set,
+ .mca_parse_mca_error_count = mca_smu_parse_mca_error_count,
.mca_get_mca_entry = mca_smu_get_mca_entry,
.mca_get_valid_mca_count = mca_smu_get_valid_mca_count,
- .mca_get_ras_mca_idx_array = mca_smu_get_ras_mca_idx_array,
};
static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu,