diff options
Diffstat (limited to 'drivers/gpu/drm')
64 files changed, 550 insertions, 274 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index dc2d53081e80..a79d53bdbe13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1293,7 +1293,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); -bool amdgpu_sg_display_supported(struct amdgpu_device *adev); bool amdgpu_device_pcie_dynamic_switching_supported(void); bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); bool amdgpu_device_aspm_support_quirk(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index cdf6087706aa..25d5fda5b243 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -478,7 +478,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *c cu_info->cu_active_number = acu_info.number; cu_info->cu_ao_mask = acu_info.ao_cu_mask; memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], - sizeof(acu_info.bitmap)); + sizeof(cu_info->cu_bitmap)); cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index f1f2c24de081..69810b3f1c63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -980,8 +980,7 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst) + uint32_t *reg_data) { *reg_data = wait_times; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index ecaead24e8c9..67bcaa3d4226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -55,5 +55,4 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst); + uint32_t *reg_data); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index fa5ee96f8845..3c45a188b701 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1103,8 +1103,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst) + uint32_t *reg_data) { *reg_data = wait_times; @@ -1120,8 +1119,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, SCH_WAVE, grace_period); - *reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - mmCP_IQ_WAIT_TIME2); + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); } void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 936e501908ce..ce424615f59b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -100,5 +100,4 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst); + uint32_t *reg_data); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 73ee14f7a9a4..dce9e7d5e4ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1776,7 +1776,7 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, struct amdgpu_device *adev = drm_to_adev(ddev); struct atom_context *ctx = adev->mode_info.atom_context; - return sysfs_emit(buf, "%s\n", ctx->vbios_ver_str); + return sysfs_emit(buf, "%s\n", ctx->vbios_pn); } static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3f001a50b34a..2b8356699f23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1245,32 +1245,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) } /* - * On APUs with >= 64GB white flickering has been observed w/ SG enabled. - * Disable S/G on such systems until we have a proper fix. - * https://gitlab.freedesktop.org/drm/amd/-/issues/2354 - * https://gitlab.freedesktop.org/drm/amd/-/issues/2735 - */ -bool amdgpu_sg_display_supported(struct amdgpu_device *adev) -{ - switch (amdgpu_sg_display) { - case -1: - break; - case 0: - return false; - case 1: - return true; - default: - return false; - } - if ((totalram_pages() << (PAGE_SHIFT - 10)) + - (adev->gmc.real_vram_size / 1024) >= 64000000) { - DRM_WARN("Disabling S/G due to >=64GB RAM\n"); - return false; - } - return true; -} - -/* * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic * speed switching. Until we have confirmation from Intel that a specific host * supports it, it's safer that we keep it disabled for all. @@ -2119,7 +2093,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->flags |= AMD_IS_PX; if (!(adev->flags & AMD_IS_APU)) { - parent = pci_upstream_bridge(adev->pdev); + parent = pcie_find_root_port(adev->pdev); adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 9c66d98af6d8..7cd0dfaeee20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -170,6 +170,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) csum += pia[size - 1]; if (csum) { DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum); + kfree(pia); return -EIO; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 395c1768b9fc..0ca95c4d4bfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -43,6 +43,7 @@ #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L #define AMDGPU_MAX_GC_INSTANCES 8 +#define KGD_MAX_QUEUES 128 #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES @@ -257,7 +258,7 @@ struct amdgpu_cu_info { uint32_t number; uint32_t ao_cu_mask; uint32_t ao_cu_bitmap[4][4]; - uint32_t bitmap[4][4]; + uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4]; }; struct amdgpu_gfx_ras { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 99f4df133ed3..d30dc0b718c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -839,7 +839,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], sizeof(adev->gfx.cu_info.ao_cu_bitmap)); memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], - sizeof(adev->gfx.cu_info.bitmap)); + sizeof(dev_info->cu_bitmap)); dev_info->vram_type = adev->gmc.vram_type; dev_info->vram_bit_width = adev->gmc.vram_width; dev_info->vce_harvest_config = adev->vce.harvest_config; @@ -940,12 +940,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct atom_context *atom_context; atom_context = adev->mode_info.atom_context; - memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name)); - memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn)); - vbios_info.version = atom_context->version; - memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str, - sizeof(atom_context->vbios_ver_str)); - memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date)); + if (atom_context) { + memcpy(vbios_info.name, atom_context->name, + sizeof(atom_context->name)); + memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, + sizeof(atom_context->vbios_pn)); + vbios_info.version = atom_context->version; + memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str, + sizeof(atom_context->vbios_ver_str)); + memcpy(vbios_info.date, atom_context->date, + sizeof(atom_context->date)); + } return copy_to_user(out, &vbios_info, min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3c4600e15b86..163445baa4fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -801,6 +801,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, enable ? "enable":"disable", get_ras_block_str(head), amdgpu_ras_is_poison_mode_supported(adev), ret); + kfree(info); return ret; } @@ -1052,7 +1053,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, info->ce_count = obj->err_data.ce_count; if (err_data.ce_count) { - if (adev->smuio.funcs && + if (!adev->aid_mask && + adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d " @@ -1072,7 +1074,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, } } if (err_data.ue_count) { - if (adev->smuio.funcs && + if (!adev->aid_mask && + adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d " diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 0aee9c8288a2..9032d7a24d7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9449,7 +9449,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 5c3db694afa8..762d7a19f1be 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6368,7 +6368,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} */ - cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap; + cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index da6caff78c22..34f9211b2679 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) gfx_v6_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v6_0_get_cu_enabled(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 90b034b173c1..c2faf6b4c2fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5119,7 +5119,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) gfx_v7_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v7_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 51c1745c8369..885ebd703260 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) gfx_v8_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v8_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 458faf657042..fd61574a737c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1499,7 +1499,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { - if (cu_info->bitmap[i][j] & mask) { + if (cu_info->bitmap[0][i][j] & mask) { if (counter == pg_always_on_cu_num) WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); if (counter < always_on_cu_num) @@ -7233,7 +7233,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, * SE6,SH0 --> bitmap[2][1] * SE7,SH0 --> bitmap[3][1] */ - cu_info->bitmap[i % 4][j + i / 4] = bitmap; + cu_info->bitmap[0][i % 4][j + i / 4] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 0a26a00074a6..18ce5fe45f6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4259,7 +4259,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) } static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, - u32 bitmap) + u32 bitmap, int xcc_id) { u32 data; @@ -4269,15 +4269,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; - WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data); } -static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) +static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id) { u32 data, mask; - data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG); - data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG); + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG); data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; @@ -4290,7 +4290,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info) { - int i, j, k, counter, active_cu_number = 0; + int i, j, k, counter, xcc_id, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; unsigned disable_masks[4 * 4]; @@ -4309,46 +4309,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, adev->gfx.config.max_sh_per_se); mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - mask = 1; - ao_bitmap = 0; - counter = 0; - gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0); - gfx_v9_4_3_set_user_cu_inactive_bitmap( - adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); - bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); - - /* - * The bitmap(and ao_cu_bitmap) in cu_info structure is - * 4x4 size array, and it's usually suitable for Vega - * ASICs which has 4*2 SE/SH layout. - * But for Arcturus, SE/SH layout is changed to 8*1. - * To mostly reduce the impact, we make it compatible - * with current bitmap array as below: - * SE4,SH0 --> bitmap[0][1] - * SE5,SH0 --> bitmap[1][1] - * SE6,SH0 --> bitmap[2][1] - * SE7,SH0 --> bitmap[3][1] - */ - cu_info->bitmap[i % 4][j + i / 4] = bitmap; - - for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { - if (bitmap & mask) { - if (counter < adev->gfx.config.max_cu_per_sh) - ao_bitmap |= mask; - counter++; + for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id); + gfx_v9_4_3_set_user_cu_inactive_bitmap( + adev, + disable_masks[i * adev->gfx.config.max_sh_per_se + j], + xcc_id); + bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id); + + cu_info->bitmap[xcc_id][i][j] = bitmap; + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) { + if (counter < adev->gfx.config.max_cu_per_sh) + ao_bitmap |= mask; + counter++; + } + mask <<= 1; } - mask <<= 1; + active_cu_number += counter; + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } - active_cu_number += counter; - if (i < 2 && j < 2) - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); - cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index d5ed9e0e1a5f..e5b5b0f4940f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -345,6 +345,9 @@ static void nbio_v4_3_init_registers(struct amdgpu_device *adev) data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK; WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data); } + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 40d23738ee4e..8b2ff2b281b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -766,7 +766,7 @@ static int soc21_common_hw_init(void *handle) * for the purpose of expose those registers * to process space */ - if (adev->nbio.funcs->remap_hdp_registers) + if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev)) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ adev->nbio.funcs->enable_doorbell_aperture(adev, true); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 86fb7ac7982a..f76b7aee5c0a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2087,7 +2087,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); cu->num_simd_per_cu = cu_info.simd_per_cu; - cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; + cu->num_simd_cores = cu_info.simd_per_cu * + (cu_info.cu_active_number / kdev->kfd->num_nodes); cu->max_waves_simd = cu_info.max_waves_per_simd; cu->wave_front_size = cu_info.wave_front_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 387a8ef49385..74c2d7a0d628 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -79,6 +79,10 @@ struct crat_header { #define CRAT_SUBTYPE_IOLINK_AFFINITY 5 #define CRAT_SUBTYPE_MAX 6 +/* + * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32 + * as it breaks the ABI. + */ #define CRAT_SIBLINGMAP_SIZE 32 /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b166f30f083e..0d3d538b64eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -216,7 +216,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, if (q->wptr_bo) { wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); - queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; + queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off; } queue_input.is_kfd_process = 1; @@ -1677,8 +1677,7 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm->dev->kfd2kgd->build_grace_period_packet_info( dqm->dev->adev, dqm->wait_times, grace_period, ®_offset, - &dqm->wait_times, - ffs(dqm->dev->xcc_mask) - 1); + &dqm->wait_times); } dqm_unlock(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index c2e0b79dcc6d..7b38537c7c99 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -162,6 +162,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, return NULL; *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx); + inx *= 2; pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" @@ -176,6 +177,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) unsigned int inx; inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); + inx /= 2; mutex_lock(&kfd->doorbell_mutex); __clear_bit(inx, kfd->doorbell_bitmap); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index d01bb57733b3..447829c22295 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -97,18 +97,22 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, - uint32_t *se_mask) + uint32_t *se_mask, uint32_t inst) { struct kfd_cu_info cu_info; uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0); uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1; - int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1; + int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1; + uint32_t cu_active_per_node; + int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask); + int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1; amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info); - if (cu_mask_count > cu_info.cu_active_number) - cu_mask_count = cu_info.cu_active_number; + cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes; + if (cu_mask_count > cu_active_per_node) + cu_mask_count = cu_active_per_node; /* Exceeding these bounds corrupts the stack and indicates a coding error. * Returning with no CU's enabled will hang the queue, which should be @@ -141,7 +145,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, for (se = 0; se < cu_info.num_shader_engines; se++) for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) cu_per_sh[se][sh] = hweight32( - cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]); + cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) * + cu_bitmap_sh_mul]); /* Symmetrically map cu_mask to all SEs & SHs: * se_mask programs up to 2 SH in the upper and lower 16 bits. @@ -164,20 +169,33 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1) * ... * + * For GFX 9.4.3, the following code only looks at a + * subset of the cu_mask corresponding to the inst parameter. + * If we have n XCCs under one GPU node + * cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0) + * cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0) + * .. + * cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0) + * cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0) + * + * For example, if there are 6 XCCs under 1 KFD node, this code + * running for each inst, will look at the bits as: + * inst, inst + 6, inst + 12... + * * First ensure all CUs are disabled, then enable user specified CUs. */ for (i = 0; i < cu_info.num_shader_engines; i++) se_mask[i] = 0; - i = 0; - for (cu = 0; cu < 16; cu += inc) { + i = inst; + for (cu = 0; cu < 16; cu += cu_inc) { for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) { for (se = 0; se < cu_info.num_shader_engines; se++) { if (cu_per_sh[se][sh] > cu) { if (cu_mask[i / 32] & (en_mask << (i % 32))) se_mask[se] |= en_mask << (cu + sh * 16); i += inc; - if (i == cu_mask_count) + if (i >= cu_mask_count) return; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 23158db7da03..57bf5e513f4d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -138,7 +138,7 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, - uint32_t *se_mask); + uint32_t *se_mask, uint32_t inst); int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index ee1d32d957f2..1a4a69943c71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return; mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0); m = get_mqd(mqd); m->compute_static_thread_mgmt_se0 = se_mask[0]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 83699392c808..8b7fed913526 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return; mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0); m = get_mqd(mqd); m->compute_static_thread_mgmt_se0 = se_mask[0]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 0bbf0edbabd4..15277f1d5cf0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -71,7 +71,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, } mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0); m->compute_static_thread_mgmt_se0 = se_mask[0]; m->compute_static_thread_mgmt_se1 = se_mask[1]; @@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, return 0; } +static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) +{ + struct v11_compute_mqd *m; + + m = get_mqd(mqd); + + memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd)); +} + +static void restore_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *qp, + const void *mqd_src, + const void *ctl_stack_src, const u32 ctl_stack_size) +{ + uint64_t addr; + struct v11_compute_mqd *m; + + m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr; + addr = mqd_mem_obj->gpu_addr; + + memcpy(m, mqd_src, sizeof(*m)); + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + + m->cp_hqd_pq_doorbell_control = + qp->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + qp->is_active = 0; +} + + static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -458,6 +495,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->mqd_size = sizeof(struct v11_compute_mqd); mqd->get_wave_state = get_wave_state; mqd->mqd_stride = kfd_mqd_stride; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -502,6 +541,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = kfd_destroy_mqd_sdma; mqd->is_occupied = kfd_is_occupied_sdma; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; mqd->mqd_size = sizeof(struct v11_sdma_mqd); mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index e23d32f35607..42d881809dc7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -60,7 +60,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) } static void update_cu_mask(struct mqd_manager *mm, void *mqd, - struct mqd_update_info *minfo) + struct mqd_update_info *minfo, uint32_t inst) { struct v9_mqd *m; uint32_t se_mask[KFD_MAX_NUM_SE] = {0}; @@ -69,27 +69,36 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return; mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst); m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; m->compute_static_thread_mgmt_se1 = se_mask[1]; m->compute_static_thread_mgmt_se2 = se_mask[2]; m->compute_static_thread_mgmt_se3 = se_mask[3]; - m->compute_static_thread_mgmt_se4 = se_mask[4]; - m->compute_static_thread_mgmt_se5 = se_mask[5]; - m->compute_static_thread_mgmt_se6 = se_mask[6]; - m->compute_static_thread_mgmt_se7 = se_mask[7]; - - pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n", - m->compute_static_thread_mgmt_se0, - m->compute_static_thread_mgmt_se1, - m->compute_static_thread_mgmt_se2, - m->compute_static_thread_mgmt_se3, - m->compute_static_thread_mgmt_se4, - m->compute_static_thread_mgmt_se5, - m->compute_static_thread_mgmt_se6, - m->compute_static_thread_mgmt_se7); + if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) { + m->compute_static_thread_mgmt_se4 = se_mask[4]; + m->compute_static_thread_mgmt_se5 = se_mask[5]; + m->compute_static_thread_mgmt_se6 = se_mask[6]; + m->compute_static_thread_mgmt_se7 = se_mask[7]; + + pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3, + m->compute_static_thread_mgmt_se4, + m->compute_static_thread_mgmt_se5, + m->compute_static_thread_mgmt_se6, + m->compute_static_thread_mgmt_se7); + } else { + pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n", + inst, m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3); + } } static void set_priority(struct v9_mqd *m, struct queue_properties *q) @@ -290,7 +299,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) m->cp_hqd_ctx_save_control = 0; - update_cu_mask(mm, mqd, minfo); + if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) + update_cu_mask(mm, mqd, minfo, 0); set_priority(m, q); q->is_active = QUEUE_IS_ACTIVE(*q); @@ -676,6 +686,8 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd + size * xcc); update_mqd(mm, m, q, minfo); + update_cu_mask(mm, mqd, minfo, xcc); + if (q->format == KFD_QUEUE_FORMAT_AQL) { switch (xcc) { case 0: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 657c37822980..3e1a574d4ea6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -55,7 +55,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return; mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0); m = get_mqd(mqd); m->compute_static_thread_mgmt_se0 = se_mask[0]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 8ce6f5200905..1a03173e2313 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -299,8 +299,7 @@ static int pm_set_grace_period_v9(struct packet_manager *pm, pm->dqm->wait_times, grace_period, ®_offset, - ®_data, - 0); + ®_data); if (grace_period == USE_DEFAULT_GRACE_PERIOD) reg_data = pm->dqm->wait_times; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3d9ce44d88da..fa24e1852493 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1466,8 +1466,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { - return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || + return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) || (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ff98fded9534..c8c75ff7cea8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -450,8 +450,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count", dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, offs, "simd_count", - dev->gpu ? (dev->node_props.simd_count * - NUM_XCC(dev->gpu->xcc_mask)) : 0); + dev->gpu ? dev->node_props.simd_count : 0); sysfs_show_32bit_prop(buffer, offs, "mem_banks_count", dev->node_props.mem_banks_count); sysfs_show_32bit_prop(buffer, offs, "caches_count", @@ -1597,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, struct kfd_gpu_cache_info *pcache_info, struct kfd_cu_info *cu_info, - int cache_type, unsigned int cu_processor_id) + int cache_type, unsigned int cu_processor_id, + struct kfd_node *knode) { unsigned int cu_sibling_map_mask; int first_active_cu; - int i, j, k; + int i, j, k, xcc, start, end; struct kfd_cache_properties *pcache = NULL; - cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; + start = ffs(knode->xcc_mask) - 1; + end = start + NUM_XCC(knode->xcc_mask); + cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); first_active_cu = ffs(cu_sibling_map_mask); @@ -1639,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); k = 0; - for (i = 0; i < cu_info->num_shader_engines; i++) { - for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { - pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); - pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); - pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); - pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); - k += 4; - - cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; - cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + for (xcc = start; xcc < end; xcc++) { + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { + pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + k += 4; + + cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4]; + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + } } } pcache->sibling_map_size = k; @@ -1666,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev) { struct kfd_gpu_cache_info *pcache_info = NULL; - int i, j, k; + int i, j, k, xcc, start, end; int ct = 0; unsigned int cu_processor_id; int ret; @@ -1700,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct * then it will consider only one CU from * the shared unit */ + start = ffs(kdev->xcc_mask) - 1; + end = start + NUM_XCC(kdev->xcc_mask); + for (ct = 0; ct < num_of_cache_types; ct++) { cu_processor_id = gpu_processor_id; if (pcache_info[ct].cache_level == 1) { - for (i = 0; i < pcu_info->num_shader_engines; i++) { - for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { - for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { + for (xcc = start; xcc < end; xcc++) { + for (i = 0; i < pcu_info->num_shader_engines; i++) { + for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { + for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { - ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, - pcu_info->cu_bitmap[i % 4][j + i / 4], ct, + ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, + pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct, cu_processor_id, k); - if (ret < 0) - break; + if (ret < 0) + break; - if (!ret) { - num_of_entries++; - list_add_tail(&props_ext->list, &dev->cache_props); - } + if (!ret) { + num_of_entries++; + list_add_tail(&props_ext->list, &dev->cache_props); + } - /* Move to next CU block */ - num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= - pcu_info->num_cu_per_sh) ? - pcache_info[ct].num_cu_shared : - (pcu_info->num_cu_per_sh - k); - cu_processor_id += num_cu_shared; + /* Move to next CU block */ + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= + pcu_info->num_cu_per_sh) ? + pcache_info[ct].num_cu_shared : + (pcu_info->num_cu_per_sh - k); + cu_processor_id += num_cu_shared; + } } } } } else { ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, - pcu_info, ct, cu_processor_id); + pcu_info, ct, cu_processor_id, kdev); if (ret < 0) break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index dea32a9e5506..27386ce9a021 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -89,7 +89,7 @@ struct kfd_mem_properties { struct attribute attr; }; -#define CACHE_SIBLINGMAP_SIZE 64 +#define CACHE_SIBLINGMAP_SIZE 128 struct kfd_cache_properties { struct list_head list; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 88ba8b66de1f..868946dd7ef1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1274,11 +1274,15 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF; - page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12); - page_table_end.high_part = (u32)(adev->gmc.gart_end >> 44) & 0xF; - page_table_end.low_part = (u32)(adev->gmc.gart_end >> 12); - page_table_base.high_part = upper_32_bits(pt_base) & 0xF; + page_table_start.high_part = upper_32_bits(adev->gmc.gart_start >> + AMDGPU_GPU_PAGE_SHIFT); + page_table_start.low_part = lower_32_bits(adev->gmc.gart_start >> + AMDGPU_GPU_PAGE_SHIFT); + page_table_end.high_part = upper_32_bits(adev->gmc.gart_end >> + AMDGPU_GPU_PAGE_SHIFT); + page_table_end.low_part = lower_32_bits(adev->gmc.gart_end >> + AMDGPU_GPU_PAGE_SHIFT); + page_table_base.high_part = upper_32_bits(pt_base); page_table_base.low_part = lower_32_bits(pt_base); pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18; @@ -1640,8 +1644,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } break; } - if (init_data.flags.gpu_vm_support) - init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev); + if (init_data.flags.gpu_vm_support && + (amdgpu_sg_display == 0)) + init_data.flags.gpu_vm_support = false; if (init_data.flags.gpu_vm_support) adev->mode_info.gpu_vm_support = true; @@ -2335,14 +2340,62 @@ static int dm_late_init(void *handle) return detect_mst_link_for_all_connectors(adev_to_drm(adev)); } +static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr) +{ + int ret; + u8 guid[16]; + u64 tmp64; + + mutex_lock(&mgr->lock); + if (!mgr->mst_primary) + goto out_fail; + + if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) { + drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); + goto out_fail; + } + + ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, + DP_MST_EN | + DP_UP_REQ_EN | + DP_UPSTREAM_IS_SRC); + if (ret < 0) { + drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n"); + goto out_fail; + } + + /* Some hubs forget their guids after they resume */ + ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); + if (ret != 16) { + drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); + goto out_fail; + } + + if (memchr_inv(guid, 0, 16) == NULL) { + tmp64 = get_jiffies_64(); + memcpy(&guid[0], &tmp64, sizeof(u64)); + memcpy(&guid[8], &tmp64, sizeof(u64)); + + ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16); + + if (ret != 16) { + drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n"); + goto out_fail; + } + } + + memcpy(mgr->mst_primary->guid, guid, 16); + +out_fail: + mutex_unlock(&mgr->lock); +} + static void s3_handle_mst(struct drm_device *dev, bool suspend) { struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; struct drm_connector_list_iter iter; struct drm_dp_mst_topology_mgr *mgr; - int ret; - bool need_hotplug = false; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -2364,18 +2417,15 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) if (!dp_is_lttpr_present(aconnector->dc_link)) try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD); - ret = drm_dp_mst_topology_mgr_resume(mgr, true); - if (ret < 0) { - dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, - aconnector->dc_link); - need_hotplug = true; - } + /* TODO: move resume_mst_branch_status() into drm mst resume again + * once topology probing work is pulled out from mst resume into mst + * resume 2nd step. mst resume 2nd step should be called after old + * state getting restored (i.e. drm_atomic_helper_resume()). + */ + resume_mst_branch_status(mgr); } } drm_connector_list_iter_end(&iter); - - if (need_hotplug) - drm_kms_helper_hotplug_event(dev); } static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) @@ -2769,7 +2819,8 @@ static int dm_resume(void *handle) struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state); enum dc_connection_type new_connection_type = dc_connection_none; struct dc_state *dc_state; - int i, r, j; + int i, r, j, ret; + bool need_hotplug = false; if (amdgpu_in_reset(adev)) { dc_state = dm->cached_dc_state; @@ -2867,7 +2918,7 @@ static int dm_resume(void *handle) continue; /* - * this is the case when traversing through already created + * this is the case when traversing through already created end sink * MST connectors, should be skipped */ if (aconnector && aconnector->mst_root) @@ -2927,6 +2978,27 @@ static int dm_resume(void *handle) dm->cached_state = NULL; + /* Do mst topology probing after resuming cached state*/ + drm_connector_list_iter_begin(ddev, &iter); + drm_for_each_connector_iter(connector, &iter) { + aconnector = to_amdgpu_dm_connector(connector); + if (aconnector->dc_link->type != dc_connection_mst_branch || + aconnector->mst_root) + continue; + + ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true); + + if (ret < 0) { + dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, + aconnector->dc_link); + need_hotplug = true; + } + } + drm_connector_list_iter_end(&iter); + + if (need_hotplug) + drm_kms_helper_hotplug_event(ddev); + amdgpu_dm_irq_resume_late(adev); amdgpu_dm_smu_write_watermarks_table(adev); @@ -6026,8 +6098,6 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (recalculate_timing) drm_mode_set_crtcinfo(&saved_mode, 0); - else if (!old_stream) - drm_mode_set_crtcinfo(&mode, 0); /* * If scaling is enabled and refresh rate didn't change @@ -6589,6 +6659,8 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec goto fail; } + drm_mode_set_crtcinfo(mode, 0); + stream = create_validate_stream_for_sink(aconnector, mode, to_dm_connector_state(connector->state), NULL); @@ -8073,7 +8145,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].plane_info = &bundle->plane_infos[planes_count]; - if (acrtc_state->stream->link->psr_settings.psr_feature_enabled) { + if (acrtc_state->stream->link->psr_settings.psr_feature_enabled || + acrtc_state->stream->link->replay_settings.replay_feature_enabled) { fill_dc_dirty_rects(plane, old_plane_state, new_plane_state, new_crtc_state, &bundle->flip_addrs[planes_count], diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index a2d34be82613..9e4cc5eeda76 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -620,7 +620,7 @@ struct amdgpu_hdmi_vsdb_info { unsigned int max_refresh_rate_hz; /** - * @replay mode: Replay supported + * @replay_mode: Replay supported */ bool replay_mode; }; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index c435f7632e8e..5ee87965a078 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -157,7 +157,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) @@ -188,7 +188,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 984b52923534..e9345f6554db 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -355,7 +355,7 @@ static void dcn32_update_clocks_update_dentist( int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) @@ -401,7 +401,7 @@ static void dcn32_update_clocks_update_dentist( int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 30c0644d4418..be5a6d008b29 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -169,11 +169,23 @@ static void add_link_enc_assignment( /* Return first available DIG link encoder. */ static enum engine_id find_first_avail_link_enc( const struct dc_context *ctx, - const struct dc_state *state) + const struct dc_state *state, + enum engine_id eng_id_requested) { enum engine_id eng_id = ENGINE_ID_UNKNOWN; int i; + if (eng_id_requested != ENGINE_ID_UNKNOWN) { + + for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) { + eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i]; + if (eng_id == eng_id_requested) + return eng_id; + } + } + + eng_id = ENGINE_ID_UNKNOWN; + for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) { eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i]; if (eng_id != ENGINE_ID_UNKNOWN) @@ -287,7 +299,7 @@ void link_enc_cfg_link_encs_assign( struct dc_stream_state *streams[], uint8_t stream_count) { - enum engine_id eng_id = ENGINE_ID_UNKNOWN; + enum engine_id eng_id = ENGINE_ID_UNKNOWN, eng_id_req = ENGINE_ID_UNKNOWN; int i; int j; @@ -377,8 +389,14 @@ void link_enc_cfg_link_encs_assign( * assigned to that endpoint. */ link_enc = get_link_enc_used_by_link(state, stream->link); - if (link_enc == NULL) - eng_id = find_first_avail_link_enc(stream->ctx, state); + if (link_enc == NULL) { + + if (stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + stream->link->dpia_preferred_eng_id != ENGINE_ID_UNKNOWN) + eng_id_req = stream->link->dpia_preferred_eng_id; + + eng_id = find_first_avail_link_enc(stream->ctx, state, eng_id_req); + } else eng_id = link_enc->preferred_engine; @@ -402,7 +420,9 @@ void link_enc_cfg_link_encs_assign( DC_LOG_DEBUG("%s: CUR %s(%d) - enc_id(%d)\n", __func__, assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA", - assignment.ep_id.link_id.enum_id - 1, + assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? + assignment.ep_id.link_id.enum_id : + assignment.ep_id.link_id.enum_id - 1, assignment.eng_id); } for (i = 0; i < MAX_PIPES; i++) { @@ -413,7 +433,9 @@ void link_enc_cfg_link_encs_assign( DC_LOG_DEBUG("%s: NEW %s(%d) - enc_id(%d)\n", __func__, assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA", - assignment.ep_id.link_id.enum_id - 1, + assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? + assignment.ep_id.link_id.enum_id : + assignment.ep_id.link_id.enum_id - 1, assignment.eng_id); } @@ -478,7 +500,6 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc( if (stream) link = stream->link; - // dm_output_to_console("%s: No link using DIG(%d).\n", __func__, eng_id); return link; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0d0bef8eb331..31e3183497a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1496,6 +1496,7 @@ struct dc_link { * object creation. */ enum engine_id eng_id; + enum engine_id dpia_preferred_eng_id; bool test_pattern_enabled; enum dp_test_pattern current_test_pattern; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index ad967b58d7be..2a6157555fd1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -964,7 +964,9 @@ void dce110_edp_backlight_control( return; } - if (link->panel_cntl) { + if (link->panel_cntl && !(link->dpcd_sink_ext_caps.bits.oled || + link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 || + link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) { bool is_backlight_on = link->panel_cntl->funcs->is_panel_backlight_on(link->panel_cntl); if ((enable && is_backlight_on) || (!enable && !is_backlight_on)) { @@ -1176,12 +1178,15 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) dto_params.otg_inst = tg->inst; dto_params.timing = &pipe_ctx->stream->timing; dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); - dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); - dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && dccg->funcs->disable_symclk_se) + if (dccg) { + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); + dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); + } + } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A); + } if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { /* TODO: This looks like a bug to me as we are disabling HPO IO when @@ -2656,11 +2661,11 @@ void dce110_prepare_bandwidth( struct clk_mgr *dccg = dc->clk_mgr; dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool); - - dccg->funcs->update_clocks( - dccg, - context, - false); + if (dccg) + dccg->funcs->update_clocks( + dccg, + context, + false); } void dce110_optimize_bandwidth( @@ -2671,10 +2676,11 @@ void dce110_optimize_bandwidth( dce110_set_displaymarks(dc, context); - dccg->funcs->update_clocks( - dccg, - context, - true); + if (dccg) + dccg->funcs->update_clocks( + dccg, + context, + true); } static void dce110_program_front_end_for_pipe( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index e72f15ac0048..aeadc587433f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2692,8 +2692,6 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) struct dce_hwseq *hws = dc->hwseq; unsigned int k1_div = PIXEL_RATE_DIV_NA; unsigned int k2_div = PIXEL_RATE_DIV_NA; - struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); - struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { if (dc->hwseq->funcs.setup_hpo_hw_control) @@ -2713,10 +2711,8 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && dccg->funcs->enable_symclk_se) - dccg->funcs->enable_symclk_se(dccg, - stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A); - + } else { + } if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) { hws->funcs.calculate_dccg_k1_k2_values(pipe_ctx, &k1_div, &k2_div); diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 1c1fb2fa0822..004beed9bd44 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1032,6 +1032,28 @@ static const struct dce_i2c_mask i2c_masks = { I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) }; +/* ========================================================== */ + +/* + * DPIA index | Preferred Encoder | Host Router + * 0 | C | 0 + * 1 | First Available | 0 + * 2 | D | 1 + * 3 | First Available | 1 + */ +/* ========================================================== */ +static const enum engine_id dpia_to_preferred_enc_id_table[] = { + ENGINE_ID_DIGC, + ENGINE_ID_DIGC, + ENGINE_ID_DIGD, + ENGINE_ID_DIGD +}; + +static enum engine_id dcn314_get_preferred_eng_id_dpia(unsigned int dpia_index) +{ + return dpia_to_preferred_enc_id_table[dpia_index]; +} + static struct dce_i2c_hw *dcn31_i2c_hw_create( struct dc_context *ctx, uint32_t inst) @@ -1785,6 +1807,7 @@ static struct resource_funcs dcn314_res_pool_funcs = { .update_bw_bounding_box = dcn314_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn314_get_panel_config_defaults, + .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia, }; static struct clock_source *dcn30_clock_source_create( diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index 3082da04a63d..1d052f08aff5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -75,7 +75,7 @@ void mpc32_power_on_blnd_lut( if (power_on) { REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0); REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5); - } else { + } else if (!mpc->ctx->dc->debug.disable_mem_low_power) { ASSERT(false); /* TODO: change to mpc * dpp_base->ctx->dc->optimized_required = true; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 027aec70c070..eaad1260bfd1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -65,6 +65,7 @@ struct resource_context; struct clk_bw_params; struct resource_funcs { + enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index); void (*destroy)(struct resource_pool **pool); void (*link_init)(struct dc_link *link); struct panel_cntl*(*panel_cntl_create)( diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 195ca9e52eda..0895742a3102 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -791,6 +791,10 @@ static bool construct_dpia(struct dc_link *link, /* Set dpia port index : 0 to number of dpia ports */ link->ddc_hw_inst = init_params->connector_index; + // Assign Dpia preferred eng_id + if (link->dc->res_pool->funcs->get_preferred_eng_id_dpia) + link->dpia_preferred_eng_id = link->dc->res_pool->funcs->get_preferred_eng_id_dpia(link->ddc_hw_inst); + /* TODO: Create link encoder */ link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 8433f99f6667..3b5a56585c4b 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -31,12 +31,12 @@ #include <linux/types.h> #include <linux/bitmap.h> #include <linux/dma-fence.h> +#include "amdgpu_irq.h" +#include "amdgpu_gfx.h" struct pci_dev; struct amdgpu_device; -#define KGD_MAX_QUEUES 128 - struct kfd_dev; struct kgd_mem; @@ -68,7 +68,7 @@ struct kfd_cu_info { uint32_t wave_front_size; uint32_t max_scratch_slots_per_cu; uint32_t lds_size; - uint32_t cu_bitmap[4][4]; + uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4]; }; /* For getting GPU local memory information from KGD */ @@ -326,8 +326,7 @@ struct kfd2kgd_calls { uint32_t wait_times, uint32_t grace_period, uint32_t *reg_offset, - uint32_t *reg_data, - uint32_t inst); + uint32_t *reg_data); void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, int *wave_cnt, int *max_waves_per_cu, uint32_t inst); void (*program_trap_handler_settings)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 41147da54458..8bb2da13826f 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2040,6 +2040,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): *states = ATTR_STATE_SUPPORTED; break; default: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 4bb289f9b4b8..da2860da6018 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2082,36 +2082,41 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context return ret; } +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap) { struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table; - u32 smu_pcie_arg; + uint8_t *table_member1, *table_member2; + uint32_t min_gen_speed, max_gen_speed; + uint32_t min_lane_width, max_lane_width; + uint32_t smu_pcie_arg; int ret, i; - /* PCIE gen speed and lane width override */ - if (!amdgpu_device_pcie_dynamic_switching_supported()) { - if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap) - pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 1]; + GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1); + GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2); - if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap) - pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS - 1]; + min_gen_speed = MAX(0, table_member1[0]); + max_gen_speed = MIN(pcie_gen_cap, table_member1[1]); + min_gen_speed = min_gen_speed > max_gen_speed ? + max_gen_speed : min_gen_speed; + min_lane_width = MAX(1, table_member2[0]); + max_lane_width = MIN(pcie_width_cap, table_member2[1]); + min_lane_width = min_lane_width > max_lane_width ? + max_lane_width : min_lane_width; - /* Force all levels to use the same settings */ - for (i = 0; i < NUM_LINK_LEVELS; i++) { - pcie_table->pcie_gen[i] = pcie_gen_cap; - pcie_table->pcie_lane[i] = pcie_width_cap; - } + if (!amdgpu_device_pcie_dynamic_switching_supported()) { + pcie_table->pcie_gen[0] = max_gen_speed; + pcie_table->pcie_lane[0] = max_lane_width; } else { - for (i = 0; i < NUM_LINK_LEVELS; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) - pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) - pcie_table->pcie_lane[i] = pcie_width_cap; - } + pcie_table->pcie_gen[0] = min_gen_speed; + pcie_table->pcie_lane[0] = min_lane_width; } + pcie_table->pcie_gen[1] = max_gen_speed; + pcie_table->pcie_lane[1] = max_lane_width; for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16 | diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 199a673b8120..de80e191a92c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -336,7 +336,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) /* Store one-time values in driver PPTable */ if (!pptable->Init) { - while (retry--) { + while (--retry) { ret = smu_v13_0_6_get_metrics_table(smu, NULL, true); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 858c959f7bab..f735b035436c 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -3540,6 +3540,27 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata) return map_aux_ch(devdata->i915, devdata->child.aux_channel); } +bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata) +{ + struct drm_i915_private *i915; + u8 aux_channel; + int count = 0; + + if (!devdata || !devdata->child.aux_channel) + return false; + + i915 = devdata->i915; + aux_channel = devdata->child.aux_channel; + + list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + if (intel_bios_encoder_supports_dp(devdata) && + aux_channel == devdata->child.aux_channel) + count++; + } + + return count > 1; +} + int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata) { if (!devdata || devdata->i915->display.vbt.version < 196 || !devdata->child.iboost) diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 9680e3e92bb5..49e24b7cf675 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -273,6 +273,7 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata); +bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_boost_level(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 12bd2f322e62..e0e4cb529284 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5512,8 +5512,13 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, /* * VBT and straps are liars. Also check HPD as that seems * to be the most reliable piece of information available. + * + * ... expect on devices that forgot to hook HPD up for eDP + * (eg. Acer Chromebook C710), so we'll check it only if multiple + * ports are attempting to use the same AUX CH, according to VBT. */ - if (!intel_digital_port_connected(encoder)) { + if (intel_bios_dp_has_shared_aux_ch(encoder->devdata) && + !intel_digital_port_connected(encoder)) { /* * If this fails, presume the DPCD answer came * from some other port using the same AUX CH. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 6b6d22c19411..0ba955611dfb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -198,7 +198,7 @@ static void flush_tlb_invalidate(struct drm_i915_gem_object *obj) for_each_gt(gt, i915, id) { if (!obj->mm.tlb[id]) - return; + continue; intel_gt_invalidate_tlb_full(gt, obj->mm.tlb[id]); obj->mm.tlb[id] = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 8f1633c3fb93..73a4a4eb29e0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -100,6 +100,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, st->nents = 0; for (i = 0; i < page_count; i++) { struct folio *folio; + unsigned long nr_pages; const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, 0, @@ -150,6 +151,8 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, } } while (1); + nr_pages = min_t(unsigned long, + folio_nr_pages(folio), page_count - i); if (!i || sg->length >= max_segment || folio_pfn(folio) != next_pfn) { @@ -157,13 +160,13 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, sg = sg_next(sg); st->nents++; - sg_set_folio(sg, folio, folio_size(folio), 0); + sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0); } else { /* XXX: could overflow? */ - sg->length += folio_size(folio); + sg->length += nr_pages * PAGE_SIZE; } - next_pfn = folio_pfn(folio) + folio_nr_pages(folio); - i += folio_nr_pages(folio) - 1; + next_pfn = folio_pfn(folio) + nr_pages; + i += nr_pages - 1; /* Check that the i965g/gm workaround works. */ GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index a4ff55aa5e55..7ad36198aab2 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -271,8 +271,17 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; + /* + * L3 fabric flush is needed for AUX CCS invalidation + * which happens as part of pipe-control so we can + * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3 + * deals with Protected Memory which is not needed for + * AUX CCS invalidation and lead to unwanted side effects. + */ + if (mode & EMIT_FLUSH) + bit_group_1 |= PIPE_CONTROL_FLUSH_L3; + bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; - bit_group_1 |= PIPE_CONTROL_FLUSH_L3; bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl,adl-p */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index ee15486fed0d..e85d70a62123 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -558,7 +558,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, DRIVER_CAPS(i915)->has_logical_contexts = true; ewma__engine_latency_init(&engine->latency); - seqcount_init(&engine->stats.execlists.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 8a641bcf777c..3292524469d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3550,6 +3550,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); + seqcount_init(&engine->stats.execlists.lock); + if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) rcs_submission_override(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index dd0ed941441a..da21f2786b5d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +/* + * Reserve the top of the GuC address space for firmware images. Addresses + * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, + * which makes for a suitable range to hold GuC/HuC firmware images if the + * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT + * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk + * of the same size anyway, which is far more than needed, to keep the logic + * in uc_fw_ggtt_offset() simple. + */ +#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) + static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) { - u64 size; + u64 offset; int ret; if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; + GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); + offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; - ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, + GUC_TOP_RESERVE_SIZE, offset, + I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) drm_dbg(&ggtt->vm.i915->drm, "Failed to reserve top of GGTT for GuC\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 957d0aeb0c02..c378cc7c953c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1094,6 +1094,9 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) I915_BO_ALLOC_PM_VOLATILE); if (IS_ERR(obj)) { obj = i915_gem_object_create_shmem(engine->i915, context_size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + /* * Wa_22016122933: For Media version 13.0, all Media GT shared * memory needs to be mapped as WC on CPU side and UC (PAT @@ -1102,8 +1105,6 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (intel_gt_needs_wa_22016122933(engine->gt)) i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); } - if (IS_ERR(obj)) - return ERR_CAST(obj); vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b5b7f2fe8c78..dc7b40e06e38 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1433,6 +1433,36 @@ static void guc_timestamp_ping(struct work_struct *wrk) int srcu, ret; /* + * Ideally the busyness worker should take a gt pm wakeref because the + * worker only needs to be active while gt is awake. However, the + * gt_park path cancels the worker synchronously and this complicates + * the flow if the worker is also running at the same time. The cancel + * waits for the worker and when the worker releases the wakeref, that + * would call gt_park and would lead to a deadlock. + * + * The resolution is to take the global pm wakeref if runtime pm is + * already active. If not, we don't need to update the busyness stats as + * the stats would already be updated when the gt was parked. + * + * Note: + * - We do not requeue the worker if we cannot take a reference to runtime + * pm since intel_guc_busyness_unpark would requeue the worker in the + * resume path. + * + * - If the gt was parked longer than time taken for GT timestamp to roll + * over, we ignore those rollovers since we don't care about tracking + * the exact GT time. We only care about roll overs when the gt is + * active and running workloads. + * + * - There is a window of time between gt_park and runtime suspend, + * where the worker may run. This is acceptable since the worker will + * not find any new data to update busyness. + */ + wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); + if (!wakeref) + return; + + /* * Synchronize with gt reset to make sure the worker does not * corrupt the engine/guc stats. NB: can't actually block waiting * for a reset to complete as the reset requires flushing out @@ -1440,10 +1470,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) */ ret = intel_gt_reset_trylock(gt, &srcu); if (ret) - return; + goto err_trylock; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) - __update_guc_busyness_stats(guc); + __update_guc_busyness_stats(guc); /* adjust context stats for overflow */ xa_for_each(&guc->context_lookup, index, ce) @@ -1452,6 +1481,9 @@ static void guc_timestamp_ping(struct work_struct *wrk) intel_gt_reset_unlock(gt, srcu); guc_enable_busyness_worker(guc); + +err_trylock: + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } static int guc_action_enable_usage_stats(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f65bb33dd21..a8551ce322de 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1199,6 +1199,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } + /* + * Register engines early to ensure the engine list is in its final + * rb-tree form, lowering the amount of code that has to deal with + * the intermediate llist state. + */ + intel_engines_driver_register(dev_priv); + return 0; /* @@ -1246,8 +1253,6 @@ err_unlock: void i915_gem_driver_register(struct drm_i915_private *i915) { i915_gem_driver_register__shrinker(i915); - - intel_engines_driver_register(i915); } void i915_gem_driver_unregister(struct drm_i915_private *i915) |