diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 344 |
1 files changed, 248 insertions, 96 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9d39fd5b1822..d7530fdfaad5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -27,6 +27,7 @@ #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15d.h" +#include "amdgpu_atomfirmware.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" @@ -41,7 +42,6 @@ #define GFX9_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L -#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 #define mmPWR_MISC_CNTL_STATUS 0x0183 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 @@ -64,6 +64,13 @@ MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); +MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); +MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vega20_me.bin"); +MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); +MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); + MODULE_FIRMWARE("amdgpu/raven_ce.bin"); MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); MODULE_FIRMWARE("amdgpu/raven_me.bin"); @@ -73,29 +80,22 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_9_0[] = { - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = @@ -109,6 +109,20 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800) }; +static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) +}; + static const struct soc15_reg_golden golden_settings_gc_9_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), @@ -185,6 +199,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000) }; +static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = +{ + mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, +}; + +static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = +{ + mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, +}; + #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 @@ -218,6 +256,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_2_1_vg12, ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); break; + case CHIP_VEGA20: + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg20, + ARRAY_SIZE(golden_settings_gc_9_0_vg20)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_gc_9_1, @@ -401,6 +447,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -412,6 +479,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) const struct rlc_firmware_header_v2_0 *rlc_hdr; unsigned int *tmp = NULL; unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; DRM_DEBUG("\n"); @@ -422,6 +491,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA12: chip_name = "vega12"; break; + case CHIP_VEGA20: + chip_name = "vega20"; + break; case CHIP_RAVEN: chip_name = "raven"; break; @@ -468,6 +540,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); adev->gfx.rlc.save_and_restore_offset = @@ -508,6 +586,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v9_0_init_rlc_ext_microcode(adev); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) @@ -566,6 +647,26 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + if (adev->gfx.rlc.is_rlc_v2_1) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -1013,9 +1114,10 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q }; -static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) +static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; + int err; adev->gfx.funcs = &gfx_v9_0_gfx_funcs; @@ -1037,6 +1139,20 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; DRM_INFO("fix gfx.config for vega12\n"); break; + case CHIP_VEGA20: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22014042; + /* check vbios table if gpu info is not available */ + err = amdgpu_atomfirmware_get_gfx_info(adev); + if (err) + return err; + break; case CHIP_RAVEN: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; @@ -1086,6 +1202,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config, GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); + + return 0; } static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, @@ -1319,6 +1437,7 @@ static int gfx_v9_0_sw_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: adev->gfx.mec.num_mec = 2; break; @@ -1446,7 +1565,9 @@ static int gfx_v9_0_sw_init(void *handle) adev->gfx.ce_ram_size = 0x8000; - gfx_v9_0_gpu_early_init(adev); + r = gfx_v9_0_gpu_early_init(adev); + if (r) + return r; r = gfx_v9_0_ngg_init(adev); if (r) @@ -1600,6 +1721,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) gfx_v9_0_setup_rb(adev); gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); + adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -1616,7 +1738,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - tmp = adev->gmc.shared_aperture_start >> 48; + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); } } @@ -1708,55 +1833,42 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.clear_state_size); } -static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, +static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, int indirect_offset, int list_size, int *unique_indirect_regs, int *unique_indirect_reg_count, - int max_indirect_reg_count, int *indirect_start_offsets, - int *indirect_start_offsets_count, - int max_indirect_start_offsets_count) + int *indirect_start_offsets_count) { int idx; - bool new_entry = true; for (; indirect_offset < list_size; indirect_offset++) { + indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; + *indirect_start_offsets_count = *indirect_start_offsets_count + 1; - if (new_entry) { - new_entry = false; - indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; - *indirect_start_offsets_count = *indirect_start_offsets_count + 1; - BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count); - } + while (register_list_format[indirect_offset] != 0xFFFFFFFF) { + indirect_offset += 2; - if (register_list_format[indirect_offset] == 0xFFFFFFFF) { - new_entry = true; - continue; - } + /* look for the matching indice */ + for (idx = 0; idx < *unique_indirect_reg_count; idx++) { + if (unique_indirect_regs[idx] == + register_list_format[indirect_offset] || + !unique_indirect_regs[idx]) + break; + } - indirect_offset += 2; + BUG_ON(idx >= *unique_indirect_reg_count); - /* look for the matching indice */ - for (idx = 0; idx < *unique_indirect_reg_count; idx++) { - if (unique_indirect_regs[idx] == - register_list_format[indirect_offset]) - break; - } + if (!unique_indirect_regs[idx]) + unique_indirect_regs[idx] = register_list_format[indirect_offset]; - if (idx >= *unique_indirect_reg_count) { - unique_indirect_regs[*unique_indirect_reg_count] = - register_list_format[indirect_offset]; - idx = *unique_indirect_reg_count; - *unique_indirect_reg_count = *unique_indirect_reg_count + 1; - BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count); + indirect_offset++; } - - register_list_format[indirect_offset] = idx; } } -static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) +static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) { int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; int unique_indirect_reg_count = 0; @@ -1765,7 +1877,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) int indirect_start_offsets_count = 0; int list_size = 0; - int i = 0; + int i = 0, j = 0; u32 tmp = 0; u32 *register_list_format = @@ -1776,15 +1888,14 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.reg_list_format_size_bytes); /* setup unique_indirect_regs array and indirect_start_offsets array */ - gfx_v9_0_parse_ind_reg_list(register_list_format, - GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, - adev->gfx.rlc.reg_list_format_size_bytes >> 2, - unique_indirect_regs, - &unique_indirect_reg_count, - ARRAY_SIZE(unique_indirect_regs), - indirect_start_offsets, - &indirect_start_offsets_count, - ARRAY_SIZE(indirect_start_offsets)); + unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); + gfx_v9_1_parse_ind_reg_list(register_list_format, + adev->gfx.rlc.reg_list_format_direct_reg_list_length, + adev->gfx.rlc.reg_list_format_size_bytes >> 2, + unique_indirect_regs, + &unique_indirect_reg_count, + indirect_start_offsets, + &indirect_start_offsets_count); /* enable auto inc in case it is disabled */ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); @@ -1798,19 +1909,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), adev->gfx.rlc.register_restore[i]); - /* load direct register */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0); - for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), - adev->gfx.rlc.register_restore[i]); - /* load indirect register */ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), adev->gfx.rlc.reg_list_format_start); - for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) + + /* direct register portion */ + for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), register_list_format[i]); + /* indirect register portion */ + while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { + if (register_list_format[i] == 0xFFFFFFFF) { + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + continue; + } + + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + + for (j = 0; j < unique_indirect_reg_count; j++) { + if (register_list_format[i] == unique_indirect_regs[j]) { + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); + break; + } + } + + BUG_ON(j >= unique_indirect_reg_count); + + i++; + } + /* set save/restore list size */ list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; list_size = list_size >> 1; @@ -1823,14 +1952,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.starting_offsets_start); for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), - indirect_start_offsets[i]); + indirect_start_offsets[i]); /* load unique indirect regs*/ for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, - unique_indirect_regs[i] & 0x3FFFF); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, - unique_indirect_regs[i] >> 20); + if (unique_indirect_regs[i] != 0) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], + unique_indirect_regs[i] & 0x3FFFF); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], + unique_indirect_regs[i] >> 20); + } } kfree(register_list_format); @@ -2010,6 +2144,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad static void gfx_v9_0_init_pg(struct amdgpu_device *adev) { + if (!adev->gfx.rlc.is_rlc_v2_1) + return; + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_DMG | @@ -2017,27 +2154,12 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { gfx_v9_0_init_csb(adev); - gfx_v9_0_init_rlc_save_restore_list(adev); + gfx_v9_1_init_rlc_save_restore_list(adev); gfx_v9_0_enable_save_restore_machine(adev); - if (adev->asic_type == CHIP_RAVEN) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); - gfx_v9_0_init_gfx_power_gating(adev); - - if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { - gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); - gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); - } else { - gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); - gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); - } - - if (adev->pg_flags & AMD_PG_SUPPORT_CP) - gfx_v9_0_enable_cp_power_gating(adev, true); - else - gfx_v9_0_enable_cp_power_gating(adev, false); - } + WREG32(mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); + gfx_v9_0_init_gfx_power_gating(adev); } } @@ -3061,6 +3183,9 @@ static int gfx_v9_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_UNGATE); + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -3279,6 +3404,11 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_GATE); + if (r) + return r; + return 0; } @@ -3339,8 +3469,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, bool enable) { - /* TODO: double check if we need to perform under safe mdoe */ - /* gfx_v9_0_enter_rlc_safe_mode(adev); */ + gfx_v9_0_enter_rlc_safe_mode(adev); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); @@ -3351,7 +3480,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } - /* gfx_v9_0_exit_rlc_safe_mode(adev); */ + gfx_v9_0_exit_rlc_safe_mode(adev); } static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, @@ -3605,6 +3734,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: gfx_v9_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -3742,7 +3872,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, header); -BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -3774,13 +3904,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); - amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | - EOP_TC_ACTION_EN | - EOP_TC_WB_ACTION_EN | - EOP_TC_MD_ACTION_EN | + amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | + EOP_TC_NC_ACTION_EN) : + (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EOP_TC_MD_ACTION_EN)) | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5))); amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -4137,6 +4270,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); } +static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + + if (amdgpu_sriov_vf(ring->adev)) + gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -4458,6 +4605,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_tmz = gfx_v9_0_ring_emit_tmz, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -4492,6 +4640,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .set_priority = gfx_v9_0_ring_set_priority_compute, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { @@ -4522,6 +4671,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .emit_rreg = gfx_v9_0_ring_emit_rreg, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) @@ -4577,6 +4727,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; break; @@ -4686,6 +4837,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; return 0; } |