diff options
Diffstat (limited to 'drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c')
-rw-r--r-- | drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 257 |
1 files changed, 173 insertions, 84 deletions
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 0e5a77c3c2e2..4ebc6b421c2c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -120,6 +120,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +// clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -128,6 +129,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0), MSG_MAP(RequestI2cTransaction, PPSMC_MSG_RequestI2cTransaction, 0), MSG_MAP(GetMetricsTable, PPSMC_MSG_GetMetricsTable, 1), + MSG_MAP(GetMetricsVersion, PPSMC_MSG_GetMetricsVersion, 1), MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetEnabledSmuFeaturesHigh, 1), MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetEnabledSmuFeaturesLow, 1), MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), @@ -171,6 +173,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), }; +// clang-format on static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { CLK_MAP(SOCCLK, PPCLK_SOCCLK), CLK_MAP(FCLK, PPCLK_FCLK), @@ -245,6 +248,8 @@ struct PPTable_t { #define SMUQ10_TO_UINT(x) ((x) >> 10) #define SMUQ10_FRAC(x) ((x) & 0x3ff) #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200)) +#define GET_METRIC_FIELD(field) ((adev->flags & AMD_IS_APU) ?\ + (metrics_a->field) : (metrics_x->field)) struct smu_v13_0_6_dpm_map { enum smu_clk_type clk_type; @@ -327,7 +332,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t), + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, + max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); @@ -335,12 +341,13 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); + smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableX_t), + sizeof(MetricsTableA_t)), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_4); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_5); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) { @@ -428,13 +435,51 @@ static int smu_v13_0_6_get_metrics_table(struct smu_context *smu, return 0; } +static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu, + void *metrics, size_t max_size) +{ + struct smu_table_context *smu_tbl_ctxt = &smu->smu_table; + uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version; + uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size; + struct amdgpu_pm_metrics *pm_metrics = metrics; + uint32_t pmfw_version; + int ret; + + if (!pm_metrics || !max_size) + return -EINVAL; + + if (max_size < (table_size + sizeof(pm_metrics->common_header))) + return -EOVERFLOW; + + /* Don't use cached metrics data */ + ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true); + if (ret) + return ret; + + smu_cmn_get_smc_version(smu, NULL, &pmfw_version); + + memset(&pm_metrics->common_header, 0, + sizeof(pm_metrics->common_header)); + pm_metrics->common_header.mp1_ip_discovery_version = + IP_VERSION(13, 0, 6); + pm_metrics->common_header.pmfw_version = pmfw_version; + pm_metrics->common_header.pmmetrics_version = table_version; + pm_metrics->common_header.structure_size = + sizeof(pm_metrics->common_header) + table_size; + + return pm_metrics->common_header.structure_size; +} + static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; + MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; + MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; + struct amdgpu_device *adev = smu->adev; int ret, i, retry = 100; + uint32_t table_version; /* Store one-time values in driver PPTable */ if (!pptable->Init) { @@ -444,7 +489,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) return ret; /* Ensure that metrics have been updated */ - if (metrics->AccumulationCounter) + if (GET_METRIC_FIELD(AccumulationCounter)) break; usleep_range(1000, 1100); @@ -453,30 +498,37 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) if (!retry) return -ETIME; + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion, + &table_version); + if (ret) + return ret; + smu_table->tables[SMU_TABLE_SMU_METRICS].version = + table_version; + pptable->MaxSocketPowerLimit = - SMUQ10_ROUND(metrics->MaxSocketPowerLimit); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit)); pptable->MaxGfxclkFrequency = - SMUQ10_ROUND(metrics->MaxGfxclkFrequency); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency)); pptable->MinGfxclkFrequency = - SMUQ10_ROUND(metrics->MinGfxclkFrequency); + SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency)); for (i = 0; i < 4; ++i) { pptable->FclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->FclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable)[i]); pptable->UclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->UclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable)[i]); pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND( - metrics->SocclkFrequencyTable[i]); + GET_METRIC_FIELD(SocclkFrequencyTable)[i]); pptable->VclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->VclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable)[i]); pptable->DclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->DclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable)[i]); pptable->LclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->LclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable)[i]); } /* use AID0 serial number by default */ - pptable->PublicSerialNumber_AID = metrics->PublicSerialNumber_AID[0]; + pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID)[0]; pptable->Init = true; } @@ -778,7 +830,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, uint32_t *value) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; + MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; + MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct amdgpu_device *adev = smu->adev; int ret = 0; int xcc_id; @@ -793,50 +846,50 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_GFXCLK: if (smu->smc_fw_version >= 0x552F00) { xcc_id = GET_INST(GC, 0); - *value = SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); } else { *value = 0; } break; case METRICS_CURR_SOCCLK: case METRICS_AVERAGE_SOCCLK: - *value = SMUQ10_ROUND(metrics->SocclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[0]); break; case METRICS_CURR_UCLK: case METRICS_AVERAGE_UCLK: - *value = SMUQ10_ROUND(metrics->UclkFrequency); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); break; case METRICS_CURR_VCLK: - *value = SMUQ10_ROUND(metrics->VclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[0]); break; case METRICS_CURR_DCLK: - *value = SMUQ10_ROUND(metrics->DclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[0]); break; case METRICS_CURR_FCLK: - *value = SMUQ10_ROUND(metrics->FclkFrequency); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency)); break; case METRICS_AVERAGE_GFXACTIVITY: - *value = SMUQ10_ROUND(metrics->SocketGfxBusy); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); break; case METRICS_AVERAGE_MEMACTIVITY: - *value = SMUQ10_ROUND(metrics->DramBandwidthUtilization); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); break; case METRICS_CURR_SOCKETPOWER: - *value = SMUQ10_ROUND(metrics->SocketPower) << 8; + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)) << 8; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = SMUQ10_ROUND(metrics->MaxSocketTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_MEM: - *value = SMUQ10_ROUND(metrics->MaxHbmTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; /* This is the max of all VRs and not just SOC VR. * No need to define another data type for the same. */ case METRICS_TEMPERATURE_VRSOC: - *value = SMUQ10_ROUND(metrics->MaxVrTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; default: @@ -1470,7 +1523,6 @@ static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable) if (smu->smc_fw_version < 0x554800) return 0; - amdgpu_ras_set_mca_debug_mode(smu->adev, enable); return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead, enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK, NULL); @@ -2022,67 +2074,70 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_4 *gpu_metrics = - (struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_5 *gpu_metrics = + (struct gpu_metrics_v1_5 *)smu_table->gpu_metrics_table; struct amdgpu_device *adev = smu->adev; - int ret = 0, xcc_id, inst, i; - MetricsTable_t *metrics; + int ret = 0, xcc_id, inst, i, j; + MetricsTableX_t *metrics_x; + MetricsTableA_t *metrics_a; u16 link_width_level; - metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); - ret = smu_v13_0_6_get_metrics_table(smu, metrics, true); + metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL); + ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true); if (ret) { - kfree(metrics); + kfree(metrics_x); return ret; } - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4); + metrics_a = (MetricsTableA_t *)metrics_x; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 5); gpu_metrics->temperature_hotspot = - SMUQ10_ROUND(metrics->MaxSocketTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)); /* Individual HBM stack temperature is not reported */ gpu_metrics->temperature_mem = - SMUQ10_ROUND(metrics->MaxHbmTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)); /* Reports max temperature of all voltage rails */ gpu_metrics->temperature_vrsoc = - SMUQ10_ROUND(metrics->MaxVrTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)); gpu_metrics->average_gfx_activity = - SMUQ10_ROUND(metrics->SocketGfxBusy); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); gpu_metrics->average_umc_activity = - SMUQ10_ROUND(metrics->DramBandwidthUtilization); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); gpu_metrics->curr_socket_power = - SMUQ10_ROUND(metrics->SocketPower); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)); /* Energy counter reported in 15.259uJ (2^-16) units */ - gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc; + gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc); for (i = 0; i < MAX_GFX_CLKS; i++) { xcc_id = GET_INST(GC, i); if (xcc_id >= 0) gpu_metrics->current_gfxclk[i] = - SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]); + SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); if (i < MAX_CLKS) { gpu_metrics->current_socclk[i] = - SMUQ10_ROUND(metrics->SocclkFrequency[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[i]); inst = GET_INST(VCN, i); if (inst >= 0) { gpu_metrics->current_vclk0[i] = - SMUQ10_ROUND(metrics->VclkFrequency[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[inst]); gpu_metrics->current_dclk0[i] = - SMUQ10_ROUND(metrics->DclkFrequency[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[inst]); } } } - gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency); + gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); /* Throttle status is not reported through metrics now */ gpu_metrics->throttle_status = 0; /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */ - gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> GET_INST(GC, 0); + gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> GET_INST(GC, 0); if (!(adev->flags & AMD_IS_APU)) { link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); @@ -2094,38 +2149,57 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->pcie_link_speed = smu_v13_0_6_get_current_pcie_link_speed(smu); gpu_metrics->pcie_bandwidth_acc = - SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]); + SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]); gpu_metrics->pcie_bandwidth_inst = - SMUQ10_ROUND(metrics->PcieBandwidth[0]); + SMUQ10_ROUND(metrics_x->PcieBandwidth[0]); gpu_metrics->pcie_l0_to_recov_count_acc = - metrics->PCIeL0ToRecoveryCountAcc; + metrics_x->PCIeL0ToRecoveryCountAcc; gpu_metrics->pcie_replay_count_acc = - metrics->PCIenReplayAAcc; + metrics_x->PCIenReplayAAcc; gpu_metrics->pcie_replay_rover_count_acc = - metrics->PCIenReplayARolloverCountAcc; + metrics_x->PCIenReplayARolloverCountAcc; + gpu_metrics->pcie_nak_sent_count_acc = + metrics_x->PCIeNAKSentCountAcc; + gpu_metrics->pcie_nak_rcvd_count_acc = + metrics_x->PCIeNAKReceivedCountAcc; } gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); gpu_metrics->gfx_activity_acc = - SMUQ10_ROUND(metrics->SocketGfxBusyAcc); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc)); gpu_metrics->mem_activity_acc = - SMUQ10_ROUND(metrics->DramBandwidthUtilizationAcc); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc)); for (i = 0; i < NUM_XGMI_LINKS; i++) { gpu_metrics->xgmi_read_data_acc[i] = - SMUQ10_ROUND(metrics->XgmiReadDataSizeAcc[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc)[i]); gpu_metrics->xgmi_write_data_acc[i] = - SMUQ10_ROUND(metrics->XgmiWriteDataSizeAcc[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]); } - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth); - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + inst = GET_INST(JPEG, i); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + gpu_metrics->jpeg_activity[(i * adev->jpeg.num_jpeg_rings) + j] = + SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy) + [(inst * adev->jpeg.num_jpeg_rings) + j]); + } + } - gpu_metrics->firmware_timestamp = metrics->Timestamp; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + inst = GET_INST(VCN, i); + gpu_metrics->vcn_activity[i] = + SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy)[inst]); + } + + gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth)); + gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate)); + + gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp); *table = (void *)gpu_metrics; - kfree(metrics); + kfree(metrics_x); return sizeof(*gpu_metrics); } @@ -2300,16 +2374,6 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, return ret; } -static int smu_v13_0_6_post_init(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - - if (!amdgpu_sriov_vf(adev) && adev->ras_enabled) - return smu_v13_0_6_mca_set_debug_mode(smu, false); - - return 0; -} - static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -2392,8 +2456,8 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info) { - uint64_t ipid = entry->regs[MCA_REG_IDX_IPID]; - uint32_t insthi; + u64 ipid = entry->regs[MCA_REG_IDX_IPID]; + u32 instidhi, instid; /* NOTE: All MCA IPID register share the same format, * so the driver can share the MCMP1 register header file. @@ -2402,9 +2466,15 @@ static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_ info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); - insthi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi); - info->aid = ((insthi >> 2) & 0x03); - info->socket_id = insthi & 0x03; + /* + * Unfied DieID Format: SAASS. A:AID, S:Socket. + * Unfied DieID[4] = InstanceId[0] + * Unfied DieID[0:3] = InstanceIdHi[0:3] + */ + instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi); + instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo); + info->aid = ((instidhi >> 2) & 0x03); + info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03); } static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, @@ -2483,9 +2553,9 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct return 0; } - if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0)) + if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(adev, status0)) *count = 1; - else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0)) + else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(adev, status0)) *count = 1; return 0; @@ -2496,13 +2566,15 @@ static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, st uint32_t *count) { u32 ext_error_code; + u32 err_cnt; ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]); + err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0) - *count = 1; + *count = err_cnt; else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6) - *count = 1; + *count = err_cnt; return 0; } @@ -2578,6 +2650,7 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct uint32_t instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + instlo &= GENMASK(31, 1); switch (instlo) { case 0x36430400: /* SMNAID XCD 0 */ case 0x38430400: /* SMNAID XCD 1 */ @@ -2593,13 +2666,21 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) { + struct smu_context *smu = adev->powerplay.pp_handle; uint32_t errcode, instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + instlo &= GENMASK(31, 1); if (instlo != 0x03b30400) return false; - errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) { + errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); + errcode &= 0xff; + } else { + errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + } + return mca_smu_check_error_code(adev, mca_ras, errcode); } @@ -2812,6 +2893,13 @@ static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu, return ret; } +static ssize_t smu_v13_0_6_get_ecc_info(struct smu_context *smu, + void *table) +{ + /* Support ecc info by default */ + return 0; +} + static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { /* init dpm */ .get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask, @@ -2856,6 +2944,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics, + .get_pm_metrics = smu_v13_0_6_get_pm_metrics, .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, .mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported, @@ -2865,7 +2954,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .i2c_init = smu_v13_0_6_i2c_control_init, .i2c_fini = smu_v13_0_6_i2c_control_fini, .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, - .post_init = smu_v13_0_6_post_init, + .get_ecc_info = smu_v13_0_6_get_ecc_info, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) |