summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c34
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c43
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c77
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c119
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.c4
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h9
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c41
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c21
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c11
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c38
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c9
64 files changed, 550 insertions, 274 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc2d53081e80..a79d53bdbe13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1293,7 +1293,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
-bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
bool amdgpu_device_pcie_dynamic_switching_supported(void);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
bool amdgpu_device_aspm_support_quirk(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index cdf6087706aa..25d5fda5b243 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -478,7 +478,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *c
cu_info->cu_active_number = acu_info.number;
cu_info->cu_ao_mask = acu_info.ao_cu_mask;
memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
- sizeof(acu_info.bitmap));
+ sizeof(cu_info->cu_bitmap));
cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index f1f2c24de081..69810b3f1c63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -980,8 +980,7 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
- uint32_t *reg_data,
- uint32_t inst)
+ uint32_t *reg_data)
{
*reg_data = wait_times;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
index ecaead24e8c9..67bcaa3d4226 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -55,5 +55,4 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
- uint32_t *reg_data,
- uint32_t inst);
+ uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index fa5ee96f8845..3c45a188b701 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -1103,8 +1103,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
- uint32_t *reg_data,
- uint32_t inst)
+ uint32_t *reg_data)
{
*reg_data = wait_times;
@@ -1120,8 +1119,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
SCH_WAVE,
grace_period);
- *reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
- mmCP_IQ_WAIT_TIME2);
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 936e501908ce..ce424615f59b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -100,5 +100,4 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
- uint32_t *reg_data,
- uint32_t inst);
+ uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 73ee14f7a9a4..dce9e7d5e4ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1776,7 +1776,7 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
struct atom_context *ctx = adev->mode_info.atom_context;
- return sysfs_emit(buf, "%s\n", ctx->vbios_ver_str);
+ return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
}
static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3f001a50b34a..2b8356699f23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1245,32 +1245,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
}
/*
- * On APUs with >= 64GB white flickering has been observed w/ SG enabled.
- * Disable S/G on such systems until we have a proper fix.
- * https://gitlab.freedesktop.org/drm/amd/-/issues/2354
- * https://gitlab.freedesktop.org/drm/amd/-/issues/2735
- */
-bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
-{
- switch (amdgpu_sg_display) {
- case -1:
- break;
- case 0:
- return false;
- case 1:
- return true;
- default:
- return false;
- }
- if ((totalram_pages() << (PAGE_SHIFT - 10)) +
- (adev->gmc.real_vram_size / 1024) >= 64000000) {
- DRM_WARN("Disabling S/G due to >=64GB RAM\n");
- return false;
- }
- return true;
-}
-
-/*
* Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
* speed switching. Until we have confirmation from Intel that a specific host
* supports it, it's safer that we keep it disabled for all.
@@ -2119,7 +2093,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
adev->flags |= AMD_IS_PX;
if (!(adev->flags & AMD_IS_APU)) {
- parent = pci_upstream_bridge(adev->pdev);
+ parent = pcie_find_root_port(adev->pdev);
adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index 9c66d98af6d8..7cd0dfaeee20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -170,6 +170,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
csum += pia[size - 1];
if (csum) {
DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum);
+ kfree(pia);
return -EIO;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 395c1768b9fc..0ca95c4d4bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -43,6 +43,7 @@
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
#define AMDGPU_MAX_GC_INSTANCES 8
+#define KGD_MAX_QUEUES 128
#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@@ -257,7 +258,7 @@ struct amdgpu_cu_info {
uint32_t number;
uint32_t ao_cu_mask;
uint32_t ao_cu_bitmap[4][4];
- uint32_t bitmap[4][4];
+ uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
};
struct amdgpu_gfx_ras {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 99f4df133ed3..d30dc0b718c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -839,7 +839,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
sizeof(adev->gfx.cu_info.ao_cu_bitmap));
memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
- sizeof(adev->gfx.cu_info.bitmap));
+ sizeof(dev_info->cu_bitmap));
dev_info->vram_type = adev->gmc.vram_type;
dev_info->vram_bit_width = adev->gmc.vram_width;
dev_info->vce_harvest_config = adev->vce.harvest_config;
@@ -940,12 +940,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct atom_context *atom_context;
atom_context = adev->mode_info.atom_context;
- memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name));
- memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn));
- vbios_info.version = atom_context->version;
- memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
- sizeof(atom_context->vbios_ver_str));
- memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date));
+ if (atom_context) {
+ memcpy(vbios_info.name, atom_context->name,
+ sizeof(atom_context->name));
+ memcpy(vbios_info.vbios_pn, atom_context->vbios_pn,
+ sizeof(atom_context->vbios_pn));
+ vbios_info.version = atom_context->version;
+ memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
+ sizeof(atom_context->vbios_ver_str));
+ memcpy(vbios_info.date, atom_context->date,
+ sizeof(atom_context->date));
+ }
return copy_to_user(out, &vbios_info,
min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 3c4600e15b86..163445baa4fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -801,6 +801,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
enable ? "enable":"disable",
get_ras_block_str(head),
amdgpu_ras_is_poison_mode_supported(adev), ret);
+ kfree(info);
return ret;
}
@@ -1052,7 +1053,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
info->ce_count = obj->err_data.ce_count;
if (err_data.ce_count) {
- if (adev->smuio.funcs &&
+ if (!adev->aid_mask &&
+ adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
dev_info(adev->dev, "socket: %d, die: %d "
@@ -1072,7 +1074,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
}
}
if (err_data.ue_count) {
- if (adev->smuio.funcs &&
+ if (!adev->aid_mask &&
+ adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
dev_info(adev->dev, "socket: %d, die: %d "
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 0aee9c8288a2..9032d7a24d7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9449,7 +9449,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 5c3db694afa8..762d7a19f1be 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6368,7 +6368,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
* SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
* SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
*/
- cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
+ cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index da6caff78c22..34f9211b2679 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
gfx_v6_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v6_0_get_cu_enabled(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 90b034b173c1..c2faf6b4c2fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5119,7 +5119,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
gfx_v7_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
if (bitmap & mask) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 51c1745c8369..885ebd703260 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
gfx_v8_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]);
bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
- cu_info->bitmap[i][j] = bitmap;
+ cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 458faf657042..fd61574a737c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1499,7 +1499,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
- if (cu_info->bitmap[i][j] & mask) {
+ if (cu_info->bitmap[0][i][j] & mask) {
if (counter == pg_always_on_cu_num)
WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
if (counter < always_on_cu_num)
@@ -7233,7 +7233,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
* SE6,SH0 --> bitmap[2][1]
* SE7,SH0 --> bitmap[3][1]
*/
- cu_info->bitmap[i % 4][j + i / 4] = bitmap;
+ cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
if (bitmap & mask) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 0a26a00074a6..18ce5fe45f6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -4259,7 +4259,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
}
static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
- u32 bitmap)
+ u32 bitmap, int xcc_id)
{
u32 data;
@@ -4269,15 +4269,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
- WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
}
-static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
+static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id)
{
u32 data, mask;
- data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG);
- data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG);
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
@@ -4290,7 +4290,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info)
{
- int i, j, k, counter, active_cu_number = 0;
+ int i, j, k, counter, xcc_id, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
unsigned disable_masks[4 * 4];
@@ -4309,46 +4309,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
adev->gfx.config.max_sh_per_se);
mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
- for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- mask = 1;
- ao_bitmap = 0;
- counter = 0;
- gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0);
- gfx_v9_4_3_set_user_cu_inactive_bitmap(
- adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
- bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev);
-
- /*
- * The bitmap(and ao_cu_bitmap) in cu_info structure is
- * 4x4 size array, and it's usually suitable for Vega
- * ASICs which has 4*2 SE/SH layout.
- * But for Arcturus, SE/SH layout is changed to 8*1.
- * To mostly reduce the impact, we make it compatible
- * with current bitmap array as below:
- * SE4,SH0 --> bitmap[0][1]
- * SE5,SH0 --> bitmap[1][1]
- * SE6,SH0 --> bitmap[2][1]
- * SE7,SH0 --> bitmap[3][1]
- */
- cu_info->bitmap[i % 4][j + i / 4] = bitmap;
-
- for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
- if (bitmap & mask) {
- if (counter < adev->gfx.config.max_cu_per_sh)
- ao_bitmap |= mask;
- counter++;
+ for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
+ gfx_v9_4_3_set_user_cu_inactive_bitmap(
+ adev,
+ disable_masks[i * adev->gfx.config.max_sh_per_se + j],
+ xcc_id);
+ bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id);
+
+ cu_info->bitmap[xcc_id][i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask) {
+ if (counter < adev->gfx.config.max_cu_per_sh)
+ ao_bitmap |= mask;
+ counter++;
+ }
+ mask <<= 1;
}
- mask <<= 1;
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
- active_cu_number += counter;
- if (i < 2 && j < 2)
- ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
- cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
}
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
}
- gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
- 0);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
index d5ed9e0e1a5f..e5b5b0f4940f 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -345,6 +345,9 @@ static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
}
+ if (amdgpu_sriov_vf(adev))
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 40d23738ee4e..8b2ff2b281b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -766,7 +766,7 @@ static int soc21_common_hw_init(void *handle)
* for the purpose of expose those registers
* to process space
*/
- if (adev->nbio.funcs->remap_hdp_registers)
+ if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
adev->nbio.funcs->enable_doorbell_aperture(adev, true);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 86fb7ac7982a..f76b7aee5c0a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -2087,7 +2087,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
cu->num_simd_per_cu = cu_info.simd_per_cu;
- cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
+ cu->num_simd_cores = cu_info.simd_per_cu *
+ (cu_info.cu_active_number / kdev->kfd->num_nodes);
cu->max_waves_simd = cu_info.max_waves_per_simd;
cu->wave_front_size = cu_info.wave_front_size;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 387a8ef49385..74c2d7a0d628 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -79,6 +79,10 @@ struct crat_header {
#define CRAT_SUBTYPE_IOLINK_AFFINITY 5
#define CRAT_SUBTYPE_MAX 6
+/*
+ * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
+ * as it breaks the ABI.
+ */
#define CRAT_SIBLINGMAP_SIZE 32
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b166f30f083e..0d3d538b64eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -216,7 +216,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
if (q->wptr_bo) {
wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
- queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
+ queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off;
}
queue_input.is_kfd_process = 1;
@@ -1677,8 +1677,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->dev->kfd2kgd->build_grace_period_packet_info(
dqm->dev->adev, dqm->wait_times,
grace_period, &reg_offset,
- &dqm->wait_times,
- ffs(dqm->dev->xcc_mask) - 1);
+ &dqm->wait_times);
}
dqm_unlock(dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index c2e0b79dcc6d..7b38537c7c99 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -162,6 +162,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
return NULL;
*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
+ inx *= 2;
pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n"
@@ -176,6 +177,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
unsigned int inx;
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
+ inx /= 2;
mutex_lock(&kfd->doorbell_mutex);
__clear_bit(inx, kfd->doorbell_bitmap);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index d01bb57733b3..447829c22295 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -97,18 +97,22 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
- uint32_t *se_mask)
+ uint32_t *se_mask, uint32_t inst)
{
struct kfd_cu_info cu_info;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
- int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
+ int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1;
+ uint32_t cu_active_per_node;
+ int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
+ int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
- if (cu_mask_count > cu_info.cu_active_number)
- cu_mask_count = cu_info.cu_active_number;
+ cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes;
+ if (cu_mask_count > cu_active_per_node)
+ cu_mask_count = cu_active_per_node;
/* Exceeding these bounds corrupts the stack and indicates a coding error.
* Returning with no CU's enabled will hang the queue, which should be
@@ -141,7 +145,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
for (se = 0; se < cu_info.num_shader_engines; se++)
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
cu_per_sh[se][sh] = hweight32(
- cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
+ cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) *
+ cu_bitmap_sh_mul]);
/* Symmetrically map cu_mask to all SEs & SHs:
* se_mask programs up to 2 SH in the upper and lower 16 bits.
@@ -164,20 +169,33 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
* ...
*
+ * For GFX 9.4.3, the following code only looks at a
+ * subset of the cu_mask corresponding to the inst parameter.
+ * If we have n XCCs under one GPU node
+ * cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0)
+ * cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0)
+ * ..
+ * cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0)
+ * cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0)
+ *
+ * For example, if there are 6 XCCs under 1 KFD node, this code
+ * running for each inst, will look at the bits as:
+ * inst, inst + 6, inst + 12...
+ *
* First ensure all CUs are disabled, then enable user specified CUs.
*/
for (i = 0; i < cu_info.num_shader_engines; i++)
se_mask[i] = 0;
- i = 0;
- for (cu = 0; cu < 16; cu += inc) {
+ i = inst;
+ for (cu = 0; cu < 16; cu += cu_inc) {
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
for (se = 0; se < cu_info.num_shader_engines; se++) {
if (cu_per_sh[se][sh] > cu) {
if (cu_mask[i / 32] & (en_mask << (i % 32)))
se_mask[se] |= en_mask << (cu + sh * 16);
i += inc;
- if (i == cu_mask_count)
+ if (i >= cu_mask_count)
return;
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 23158db7da03..57bf5e513f4d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -138,7 +138,7 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
- uint32_t *se_mask);
+ uint32_t *se_mask, uint32_t inst);
int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index ee1d32d957f2..1a4a69943c71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
- minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 83699392c808..8b7fed913526 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
- minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 0bbf0edbabd4..15277f1d5cf0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -71,7 +71,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
}
mqd_symmetrically_map_cu_mask(mm,
- minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
@@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+ struct v11_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+ uint64_t addr;
+ struct v11_compute_mqd *m;
+
+ m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memcpy(m, mqd_src, sizeof(*m));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+
+ m->cp_hqd_pq_doorbell_control =
+ qp->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ qp->is_active = 0;
+}
+
+
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -458,6 +495,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
mqd->mqd_size = sizeof(struct v11_compute_mqd);
mqd->get_wave_state = get_wave_state;
mqd->mqd_stride = kfd_mqd_stride;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@@ -502,6 +541,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index e23d32f35607..42d881809dc7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -60,7 +60,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
}
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
- struct mqd_update_info *minfo)
+ struct mqd_update_info *minfo, uint32_t inst)
{
struct v9_mqd *m;
uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
@@ -69,27 +69,36 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
- minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
m = get_mqd(mqd);
+
m->compute_static_thread_mgmt_se0 = se_mask[0];
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
- m->compute_static_thread_mgmt_se4 = se_mask[4];
- m->compute_static_thread_mgmt_se5 = se_mask[5];
- m->compute_static_thread_mgmt_se6 = se_mask[6];
- m->compute_static_thread_mgmt_se7 = se_mask[7];
-
- pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
- m->compute_static_thread_mgmt_se0,
- m->compute_static_thread_mgmt_se1,
- m->compute_static_thread_mgmt_se2,
- m->compute_static_thread_mgmt_se3,
- m->compute_static_thread_mgmt_se4,
- m->compute_static_thread_mgmt_se5,
- m->compute_static_thread_mgmt_se6,
- m->compute_static_thread_mgmt_se7);
+ if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) {
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+ m->compute_static_thread_mgmt_se0,
+ m->compute_static_thread_mgmt_se1,
+ m->compute_static_thread_mgmt_se2,
+ m->compute_static_thread_mgmt_se3,
+ m->compute_static_thread_mgmt_se4,
+ m->compute_static_thread_mgmt_se5,
+ m->compute_static_thread_mgmt_se6,
+ m->compute_static_thread_mgmt_se7);
+ } else {
+ pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n",
+ inst, m->compute_static_thread_mgmt_se0,
+ m->compute_static_thread_mgmt_se1,
+ m->compute_static_thread_mgmt_se2,
+ m->compute_static_thread_mgmt_se3);
+ }
}
static void set_priority(struct v9_mqd *m, struct queue_properties *q)
@@ -290,7 +299,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
m->cp_hqd_ctx_save_control = 0;
- update_cu_mask(mm, mqd, minfo);
+ if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3))
+ update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
q->is_active = QUEUE_IS_ACTIVE(*q);
@@ -676,6 +686,8 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);
+ update_cu_mask(mm, mqd, minfo, xcc);
+
if (q->format == KFD_QUEUE_FORMAT_AQL) {
switch (xcc) {
case 0:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 657c37822980..3e1a574d4ea6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -55,7 +55,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
return;
mqd_symmetrically_map_cu_mask(mm,
- minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 8ce6f5200905..1a03173e2313 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -299,8 +299,7 @@ static int pm_set_grace_period_v9(struct packet_manager *pm,
pm->dqm->wait_times,
grace_period,
&reg_offset,
- &reg_data,
- 0);
+ &reg_data);
if (grace_period == USE_DEFAULT_GRACE_PERIOD)
reg_data = pm->dqm->wait_times;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 3d9ce44d88da..fa24e1852493 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1466,8 +1466,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
{
- return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
+ return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ff98fded9534..c8c75ff7cea8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -450,8 +450,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, offs, "simd_count",
- dev->gpu ? (dev->node_props.simd_count *
- NUM_XCC(dev->gpu->xcc_mask)) : 0);
+ dev->gpu ? dev->node_props.simd_count : 0);
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, offs, "caches_count",
@@ -1597,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
- int cache_type, unsigned int cu_processor_id)
+ int cache_type, unsigned int cu_processor_id,
+ struct kfd_node *knode)
{
unsigned int cu_sibling_map_mask;
int first_active_cu;
- int i, j, k;
+ int i, j, k, xcc, start, end;
struct kfd_cache_properties *pcache = NULL;
- cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
+ start = ffs(knode->xcc_mask) - 1;
+ end = start + NUM_XCC(knode->xcc_mask);
+ cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
@@ -1639,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
k = 0;
- for (i = 0; i < cu_info->num_shader_engines; i++) {
- for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
- pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
- pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
- pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
- pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
- k += 4;
-
- cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
- cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ for (xcc = start; xcc < end; xcc++) {
+ for (i = 0; i < cu_info->num_shader_engines; i++) {
+ for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+ pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+ pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+ pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+ pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+ k += 4;
+
+ cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
+ cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ }
}
}
pcache->sibling_map_size = k;
@@ -1666,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
{
struct kfd_gpu_cache_info *pcache_info = NULL;
- int i, j, k;
+ int i, j, k, xcc, start, end;
int ct = 0;
unsigned int cu_processor_id;
int ret;
@@ -1700,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
* then it will consider only one CU from
* the shared unit
*/
+ start = ffs(kdev->xcc_mask) - 1;
+ end = start + NUM_XCC(kdev->xcc_mask);
+
for (ct = 0; ct < num_of_cache_types; ct++) {
cu_processor_id = gpu_processor_id;
if (pcache_info[ct].cache_level == 1) {
- for (i = 0; i < pcu_info->num_shader_engines; i++) {
- for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
- for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
+ for (xcc = start; xcc < end; xcc++) {
+ for (i = 0; i < pcu_info->num_shader_engines; i++) {
+ for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
+ for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
- ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
- pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
+ ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
+ pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
cu_processor_id, k);
- if (ret < 0)
- break;
+ if (ret < 0)
+ break;
- if (!ret) {
- num_of_entries++;
- list_add_tail(&props_ext->list, &dev->cache_props);
- }
+ if (!ret) {
+ num_of_entries++;
+ list_add_tail(&props_ext->list, &dev->cache_props);
+ }
- /* Move to next CU block */
- num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
- pcu_info->num_cu_per_sh) ?
- pcache_info[ct].num_cu_shared :
- (pcu_info->num_cu_per_sh - k);
- cu_processor_id += num_cu_shared;
+ /* Move to next CU block */
+ num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+ pcu_info->num_cu_per_sh) ?
+ pcache_info[ct].num_cu_shared :
+ (pcu_info->num_cu_per_sh - k);
+ cu_processor_id += num_cu_shared;
+ }
}
}
}
} else {
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
- pcu_info, ct, cu_processor_id);
+ pcu_info, ct, cu_processor_id, kdev);
if (ret < 0)
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index dea32a9e5506..27386ce9a021 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -89,7 +89,7 @@ struct kfd_mem_properties {
struct attribute attr;
};
-#define CACHE_SIBLINGMAP_SIZE 64
+#define CACHE_SIBLINGMAP_SIZE 128
struct kfd_cache_properties {
struct list_head list;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 88ba8b66de1f..868946dd7ef1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1274,11 +1274,15 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
- page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF;
- page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12);
- page_table_end.high_part = (u32)(adev->gmc.gart_end >> 44) & 0xF;
- page_table_end.low_part = (u32)(adev->gmc.gart_end >> 12);
- page_table_base.high_part = upper_32_bits(pt_base) & 0xF;
+ page_table_start.high_part = upper_32_bits(adev->gmc.gart_start >>
+ AMDGPU_GPU_PAGE_SHIFT);
+ page_table_start.low_part = lower_32_bits(adev->gmc.gart_start >>
+ AMDGPU_GPU_PAGE_SHIFT);
+ page_table_end.high_part = upper_32_bits(adev->gmc.gart_end >>
+ AMDGPU_GPU_PAGE_SHIFT);
+ page_table_end.low_part = lower_32_bits(adev->gmc.gart_end >>
+ AMDGPU_GPU_PAGE_SHIFT);
+ page_table_base.high_part = upper_32_bits(pt_base);
page_table_base.low_part = lower_32_bits(pt_base);
pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18;
@@ -1640,8 +1644,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
break;
}
- if (init_data.flags.gpu_vm_support)
- init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev);
+ if (init_data.flags.gpu_vm_support &&
+ (amdgpu_sg_display == 0))
+ init_data.flags.gpu_vm_support = false;
if (init_data.flags.gpu_vm_support)
adev->mode_info.gpu_vm_support = true;
@@ -2335,14 +2340,62 @@ static int dm_late_init(void *handle)
return detect_mst_link_for_all_connectors(adev_to_drm(adev));
}
+static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr)
+{
+ int ret;
+ u8 guid[16];
+ u64 tmp64;
+
+ mutex_lock(&mgr->lock);
+ if (!mgr->mst_primary)
+ goto out_fail;
+
+ if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) {
+ drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+
+ ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
+ DP_MST_EN |
+ DP_UP_REQ_EN |
+ DP_UPSTREAM_IS_SRC);
+ if (ret < 0) {
+ drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+
+ /* Some hubs forget their guids after they resume */
+ ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
+ if (ret != 16) {
+ drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+
+ if (memchr_inv(guid, 0, 16) == NULL) {
+ tmp64 = get_jiffies_64();
+ memcpy(&guid[0], &tmp64, sizeof(u64));
+ memcpy(&guid[8], &tmp64, sizeof(u64));
+
+ ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16);
+
+ if (ret != 16) {
+ drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+ }
+
+ memcpy(mgr->mst_primary->guid, guid, 16);
+
+out_fail:
+ mutex_unlock(&mgr->lock);
+}
+
static void s3_handle_mst(struct drm_device *dev, bool suspend)
{
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct drm_dp_mst_topology_mgr *mgr;
- int ret;
- bool need_hotplug = false;
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
@@ -2364,18 +2417,15 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
if (!dp_is_lttpr_present(aconnector->dc_link))
try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
- ret = drm_dp_mst_topology_mgr_resume(mgr, true);
- if (ret < 0) {
- dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
- aconnector->dc_link);
- need_hotplug = true;
- }
+ /* TODO: move resume_mst_branch_status() into drm mst resume again
+ * once topology probing work is pulled out from mst resume into mst
+ * resume 2nd step. mst resume 2nd step should be called after old
+ * state getting restored (i.e. drm_atomic_helper_resume()).
+ */
+ resume_mst_branch_status(mgr);
}
}
drm_connector_list_iter_end(&iter);
-
- if (need_hotplug)
- drm_kms_helper_hotplug_event(dev);
}
static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
@@ -2769,7 +2819,8 @@ static int dm_resume(void *handle)
struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
enum dc_connection_type new_connection_type = dc_connection_none;
struct dc_state *dc_state;
- int i, r, j;
+ int i, r, j, ret;
+ bool need_hotplug = false;
if (amdgpu_in_reset(adev)) {
dc_state = dm->cached_dc_state;
@@ -2867,7 +2918,7 @@ static int dm_resume(void *handle)
continue;
/*
- * this is the case when traversing through already created
+ * this is the case when traversing through already created end sink
* MST connectors, should be skipped
*/
if (aconnector && aconnector->mst_root)
@@ -2927,6 +2978,27 @@ static int dm_resume(void *handle)
dm->cached_state = NULL;
+ /* Do mst topology probing after resuming cached state*/
+ drm_connector_list_iter_begin(ddev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ aconnector = to_amdgpu_dm_connector(connector);
+ if (aconnector->dc_link->type != dc_connection_mst_branch ||
+ aconnector->mst_root)
+ continue;
+
+ ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true);
+
+ if (ret < 0) {
+ dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
+ aconnector->dc_link);
+ need_hotplug = true;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (need_hotplug)
+ drm_kms_helper_hotplug_event(ddev);
+
amdgpu_dm_irq_resume_late(adev);
amdgpu_dm_smu_write_watermarks_table(adev);
@@ -6026,8 +6098,6 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
if (recalculate_timing)
drm_mode_set_crtcinfo(&saved_mode, 0);
- else if (!old_stream)
- drm_mode_set_crtcinfo(&mode, 0);
/*
* If scaling is enabled and refresh rate didn't change
@@ -6589,6 +6659,8 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec
goto fail;
}
+ drm_mode_set_crtcinfo(mode, 0);
+
stream = create_validate_stream_for_sink(aconnector, mode,
to_dm_connector_state(connector->state),
NULL);
@@ -8073,7 +8145,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->surface_updates[planes_count].plane_info =
&bundle->plane_infos[planes_count];
- if (acrtc_state->stream->link->psr_settings.psr_feature_enabled) {
+ if (acrtc_state->stream->link->psr_settings.psr_feature_enabled ||
+ acrtc_state->stream->link->replay_settings.replay_feature_enabled) {
fill_dc_dirty_rects(plane, old_plane_state,
new_plane_state, new_crtc_state,
&bundle->flip_addrs[planes_count],
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index a2d34be82613..9e4cc5eeda76 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -620,7 +620,7 @@ struct amdgpu_hdmi_vsdb_info {
unsigned int max_refresh_rate_hz;
/**
- * @replay mode: Replay supported
+ * @replay_mode: Replay supported
*/
bool replay_mode;
};
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index c435f7632e8e..5ee87965a078 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -157,7 +157,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
@@ -188,7 +188,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 984b52923534..e9345f6554db 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -355,7 +355,7 @@ static void dcn32_update_clocks_update_dentist(
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
@@ -401,7 +401,7 @@ static void dcn32_update_clocks_update_dentist(
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
index 30c0644d4418..be5a6d008b29 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
@@ -169,11 +169,23 @@ static void add_link_enc_assignment(
/* Return first available DIG link encoder. */
static enum engine_id find_first_avail_link_enc(
const struct dc_context *ctx,
- const struct dc_state *state)
+ const struct dc_state *state,
+ enum engine_id eng_id_requested)
{
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
int i;
+ if (eng_id_requested != ENGINE_ID_UNKNOWN) {
+
+ for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
+ eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
+ if (eng_id == eng_id_requested)
+ return eng_id;
+ }
+ }
+
+ eng_id = ENGINE_ID_UNKNOWN;
+
for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
if (eng_id != ENGINE_ID_UNKNOWN)
@@ -287,7 +299,7 @@ void link_enc_cfg_link_encs_assign(
struct dc_stream_state *streams[],
uint8_t stream_count)
{
- enum engine_id eng_id = ENGINE_ID_UNKNOWN;
+ enum engine_id eng_id = ENGINE_ID_UNKNOWN, eng_id_req = ENGINE_ID_UNKNOWN;
int i;
int j;
@@ -377,8 +389,14 @@ void link_enc_cfg_link_encs_assign(
* assigned to that endpoint.
*/
link_enc = get_link_enc_used_by_link(state, stream->link);
- if (link_enc == NULL)
- eng_id = find_first_avail_link_enc(stream->ctx, state);
+ if (link_enc == NULL) {
+
+ if (stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ stream->link->dpia_preferred_eng_id != ENGINE_ID_UNKNOWN)
+ eng_id_req = stream->link->dpia_preferred_eng_id;
+
+ eng_id = find_first_avail_link_enc(stream->ctx, state, eng_id_req);
+ }
else
eng_id = link_enc->preferred_engine;
@@ -402,7 +420,9 @@ void link_enc_cfg_link_encs_assign(
DC_LOG_DEBUG("%s: CUR %s(%d) - enc_id(%d)\n",
__func__,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
- assignment.ep_id.link_id.enum_id - 1,
+ assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
+ assignment.ep_id.link_id.enum_id :
+ assignment.ep_id.link_id.enum_id - 1,
assignment.eng_id);
}
for (i = 0; i < MAX_PIPES; i++) {
@@ -413,7 +433,9 @@ void link_enc_cfg_link_encs_assign(
DC_LOG_DEBUG("%s: NEW %s(%d) - enc_id(%d)\n",
__func__,
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
- assignment.ep_id.link_id.enum_id - 1,
+ assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
+ assignment.ep_id.link_id.enum_id :
+ assignment.ep_id.link_id.enum_id - 1,
assignment.eng_id);
}
@@ -478,7 +500,6 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
if (stream)
link = stream->link;
- // dm_output_to_console("%s: No link using DIG(%d).\n", __func__, eng_id);
return link;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 0d0bef8eb331..31e3183497a7 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1496,6 +1496,7 @@ struct dc_link {
* object creation.
*/
enum engine_id eng_id;
+ enum engine_id dpia_preferred_eng_id;
bool test_pattern_enabled;
enum dp_test_pattern current_test_pattern;
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index ad967b58d7be..2a6157555fd1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -964,7 +964,9 @@ void dce110_edp_backlight_control(
return;
}
- if (link->panel_cntl) {
+ if (link->panel_cntl && !(link->dpcd_sink_ext_caps.bits.oled ||
+ link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 ||
+ link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) {
bool is_backlight_on = link->panel_cntl->funcs->is_panel_backlight_on(link->panel_cntl);
if ((enable && is_backlight_on) || (!enable && !is_backlight_on)) {
@@ -1176,12 +1178,15 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
dto_params.otg_inst = tg->inst;
dto_params.timing = &pipe_ctx->stream->timing;
dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
- dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
- dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
- dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
- } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && dccg->funcs->disable_symclk_se)
+ if (dccg) {
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
+ dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
+ }
+ } else if (dccg && dccg->funcs->disable_symclk_se) {
dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
link_enc->transmitter - TRANSMITTER_UNIPHY_A);
+ }
if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
/* TODO: This looks like a bug to me as we are disabling HPO IO when
@@ -2656,11 +2661,11 @@ void dce110_prepare_bandwidth(
struct clk_mgr *dccg = dc->clk_mgr;
dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool);
-
- dccg->funcs->update_clocks(
- dccg,
- context,
- false);
+ if (dccg)
+ dccg->funcs->update_clocks(
+ dccg,
+ context,
+ false);
}
void dce110_optimize_bandwidth(
@@ -2671,10 +2676,11 @@ void dce110_optimize_bandwidth(
dce110_set_displaymarks(dc, context);
- dccg->funcs->update_clocks(
- dccg,
- context,
- true);
+ if (dccg)
+ dccg->funcs->update_clocks(
+ dccg,
+ context,
+ true);
}
static void dce110_program_front_end_for_pipe(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index e72f15ac0048..aeadc587433f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -2692,8 +2692,6 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
struct dce_hwseq *hws = dc->hwseq;
unsigned int k1_div = PIXEL_RATE_DIV_NA;
unsigned int k2_div = PIXEL_RATE_DIV_NA;
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link);
- struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
if (dc->hwseq->funcs.setup_hpo_hw_control)
@@ -2713,10 +2711,8 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
dto_params.timing = &pipe_ctx->stream->timing;
dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
- } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && dccg->funcs->enable_symclk_se)
- dccg->funcs->enable_symclk_se(dccg,
- stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A);
-
+ } else {
+ }
if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) {
hws->funcs.calculate_dccg_k1_k2_values(pipe_ctx, &k1_div, &k2_div);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
index 1c1fb2fa0822..004beed9bd44 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
@@ -1032,6 +1032,28 @@ static const struct dce_i2c_mask i2c_masks = {
I2C_COMMON_MASK_SH_LIST_DCN30(_MASK)
};
+/* ========================================================== */
+
+/*
+ * DPIA index | Preferred Encoder | Host Router
+ * 0 | C | 0
+ * 1 | First Available | 0
+ * 2 | D | 1
+ * 3 | First Available | 1
+ */
+/* ========================================================== */
+static const enum engine_id dpia_to_preferred_enc_id_table[] = {
+ ENGINE_ID_DIGC,
+ ENGINE_ID_DIGC,
+ ENGINE_ID_DIGD,
+ ENGINE_ID_DIGD
+};
+
+static enum engine_id dcn314_get_preferred_eng_id_dpia(unsigned int dpia_index)
+{
+ return dpia_to_preferred_enc_id_table[dpia_index];
+}
+
static struct dce_i2c_hw *dcn31_i2c_hw_create(
struct dc_context *ctx,
uint32_t inst)
@@ -1785,6 +1807,7 @@ static struct resource_funcs dcn314_res_pool_funcs = {
.update_bw_bounding_box = dcn314_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn314_get_panel_config_defaults,
+ .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia,
};
static struct clock_source *dcn30_clock_source_create(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index 3082da04a63d..1d052f08aff5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -75,7 +75,7 @@ void mpc32_power_on_blnd_lut(
if (power_on) {
REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0);
REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5);
- } else {
+ } else if (!mpc->ctx->dc->debug.disable_mem_low_power) {
ASSERT(false);
/* TODO: change to mpc
* dpp_base->ctx->dc->optimized_required = true;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 027aec70c070..eaad1260bfd1 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -65,6 +65,7 @@ struct resource_context;
struct clk_bw_params;
struct resource_funcs {
+ enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index);
void (*destroy)(struct resource_pool **pool);
void (*link_init)(struct dc_link *link);
struct panel_cntl*(*panel_cntl_create)(
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 195ca9e52eda..0895742a3102 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -791,6 +791,10 @@ static bool construct_dpia(struct dc_link *link,
/* Set dpia port index : 0 to number of dpia ports */
link->ddc_hw_inst = init_params->connector_index;
+ // Assign Dpia preferred eng_id
+ if (link->dc->res_pool->funcs->get_preferred_eng_id_dpia)
+ link->dpia_preferred_eng_id = link->dc->res_pool->funcs->get_preferred_eng_id_dpia(link->ddc_hw_inst);
+
/* TODO: Create link encoder */
link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 8433f99f6667..3b5a56585c4b 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -31,12 +31,12 @@
#include <linux/types.h>
#include <linux/bitmap.h>
#include <linux/dma-fence.h>
+#include "amdgpu_irq.h"
+#include "amdgpu_gfx.h"
struct pci_dev;
struct amdgpu_device;
-#define KGD_MAX_QUEUES 128
-
struct kfd_dev;
struct kgd_mem;
@@ -68,7 +68,7 @@ struct kfd_cu_info {
uint32_t wave_front_size;
uint32_t max_scratch_slots_per_cu;
uint32_t lds_size;
- uint32_t cu_bitmap[4][4];
+ uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
};
/* For getting GPU local memory information from KGD */
@@ -326,8 +326,7 @@ struct kfd2kgd_calls {
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
- uint32_t *reg_data,
- uint32_t inst);
+ uint32_t *reg_data);
void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid,
int *wave_cnt, int *max_waves_per_cu, uint32_t inst);
void (*program_trap_handler_settings)(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 41147da54458..8bb2da13826f 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2040,6 +2040,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
*states = ATTR_STATE_SUPPORTED;
break;
default:
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 4bb289f9b4b8..da2860da6018 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2082,36 +2082,41 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context
return ret;
}
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
uint32_t pcie_gen_cap,
uint32_t pcie_width_cap)
{
struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table;
- u32 smu_pcie_arg;
+ uint8_t *table_member1, *table_member2;
+ uint32_t min_gen_speed, max_gen_speed;
+ uint32_t min_lane_width, max_lane_width;
+ uint32_t smu_pcie_arg;
int ret, i;
- /* PCIE gen speed and lane width override */
- if (!amdgpu_device_pcie_dynamic_switching_supported()) {
- if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap)
- pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 1];
+ GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1);
+ GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2);
- if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap)
- pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS - 1];
+ min_gen_speed = MAX(0, table_member1[0]);
+ max_gen_speed = MIN(pcie_gen_cap, table_member1[1]);
+ min_gen_speed = min_gen_speed > max_gen_speed ?
+ max_gen_speed : min_gen_speed;
+ min_lane_width = MAX(1, table_member2[0]);
+ max_lane_width = MIN(pcie_width_cap, table_member2[1]);
+ min_lane_width = min_lane_width > max_lane_width ?
+ max_lane_width : min_lane_width;
- /* Force all levels to use the same settings */
- for (i = 0; i < NUM_LINK_LEVELS; i++) {
- pcie_table->pcie_gen[i] = pcie_gen_cap;
- pcie_table->pcie_lane[i] = pcie_width_cap;
- }
+ if (!amdgpu_device_pcie_dynamic_switching_supported()) {
+ pcie_table->pcie_gen[0] = max_gen_speed;
+ pcie_table->pcie_lane[0] = max_lane_width;
} else {
- for (i = 0; i < NUM_LINK_LEVELS; i++) {
- if (pcie_table->pcie_gen[i] > pcie_gen_cap)
- pcie_table->pcie_gen[i] = pcie_gen_cap;
- if (pcie_table->pcie_lane[i] > pcie_width_cap)
- pcie_table->pcie_lane[i] = pcie_width_cap;
- }
+ pcie_table->pcie_gen[0] = min_gen_speed;
+ pcie_table->pcie_lane[0] = min_lane_width;
}
+ pcie_table->pcie_gen[1] = max_gen_speed;
+ pcie_table->pcie_lane[1] = max_lane_width;
for (i = 0; i < NUM_LINK_LEVELS; i++) {
smu_pcie_arg = (i << 16 |
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 199a673b8120..de80e191a92c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -336,7 +336,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
/* Store one-time values in driver PPTable */
if (!pptable->Init) {
- while (retry--) {
+ while (--retry) {
ret = smu_v13_0_6_get_metrics_table(smu, NULL, true);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 858c959f7bab..f735b035436c 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -3540,6 +3540,27 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata)
return map_aux_ch(devdata->i915, devdata->child.aux_channel);
}
+bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata)
+{
+ struct drm_i915_private *i915;
+ u8 aux_channel;
+ int count = 0;
+
+ if (!devdata || !devdata->child.aux_channel)
+ return false;
+
+ i915 = devdata->i915;
+ aux_channel = devdata->child.aux_channel;
+
+ list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) {
+ if (intel_bios_encoder_supports_dp(devdata) &&
+ aux_channel == devdata->child.aux_channel)
+ count++;
+ }
+
+ return count > 1;
+}
+
int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata)
{
if (!devdata || devdata->i915->display.vbt.version < 196 || !devdata->child.iboost)
diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h
index 9680e3e92bb5..49e24b7cf675 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.h
+++ b/drivers/gpu/drm/i915/display/intel_bios.h
@@ -273,6 +273,7 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata);
int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata);
int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata);
int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata);
+bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata);
int intel_bios_hdmi_boost_level(const struct intel_bios_encoder_data *devdata);
int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata);
int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 12bd2f322e62..e0e4cb529284 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -5512,8 +5512,13 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
/*
* VBT and straps are liars. Also check HPD as that seems
* to be the most reliable piece of information available.
+ *
+ * ... expect on devices that forgot to hook HPD up for eDP
+ * (eg. Acer Chromebook C710), so we'll check it only if multiple
+ * ports are attempting to use the same AUX CH, according to VBT.
*/
- if (!intel_digital_port_connected(encoder)) {
+ if (intel_bios_dp_has_shared_aux_ch(encoder->devdata) &&
+ !intel_digital_port_connected(encoder)) {
/*
* If this fails, presume the DPCD answer came
* from some other port using the same AUX CH.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 6b6d22c19411..0ba955611dfb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -198,7 +198,7 @@ static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
for_each_gt(gt, i915, id) {
if (!obj->mm.tlb[id])
- return;
+ continue;
intel_gt_invalidate_tlb_full(gt, obj->mm.tlb[id]);
obj->mm.tlb[id] = 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 8f1633c3fb93..73a4a4eb29e0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -100,6 +100,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
st->nents = 0;
for (i = 0; i < page_count; i++) {
struct folio *folio;
+ unsigned long nr_pages;
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
0,
@@ -150,6 +151,8 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
}
} while (1);
+ nr_pages = min_t(unsigned long,
+ folio_nr_pages(folio), page_count - i);
if (!i ||
sg->length >= max_segment ||
folio_pfn(folio) != next_pfn) {
@@ -157,13 +160,13 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
sg = sg_next(sg);
st->nents++;
- sg_set_folio(sg, folio, folio_size(folio), 0);
+ sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0);
} else {
/* XXX: could overflow? */
- sg->length += folio_size(folio);
+ sg->length += nr_pages * PAGE_SIZE;
}
- next_pfn = folio_pfn(folio) + folio_nr_pages(folio);
- i += folio_nr_pages(folio) - 1;
+ next_pfn = folio_pfn(folio) + nr_pages;
+ i += nr_pages - 1;
/* Check that the i965g/gm workaround works. */
GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index a4ff55aa5e55..7ad36198aab2 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -271,8 +271,17 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+ /*
+ * L3 fabric flush is needed for AUX CCS invalidation
+ * which happens as part of pipe-control so we can
+ * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3
+ * deals with Protected Memory which is not needed for
+ * AUX CCS invalidation and lead to unwanted side effects.
+ */
+ if (mode & EMIT_FLUSH)
+ bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+
bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
- bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl,adl-p */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index ee15486fed0d..e85d70a62123 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -558,7 +558,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
DRIVER_CAPS(i915)->has_logical_contexts = true;
ewma__engine_latency_init(&engine->latency);
- seqcount_init(&engine->stats.execlists.lock);
ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 8a641bcf777c..3292524469d5 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3550,6 +3550,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
+ seqcount_init(&engine->stats.execlists.lock);
+
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
rcs_submission_override(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index dd0ed941441a..da21f2786b5d 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm,
vm->clear_range(vm, vma_res->start, vma_res->vma_size);
}
+/*
+ * Reserve the top of the GuC address space for firmware images. Addresses
+ * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
+ * which makes for a suitable range to hold GuC/HuC firmware images if the
+ * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT
+ * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk
+ * of the same size anyway, which is far more than needed, to keep the logic
+ * in uc_fw_ggtt_offset() simple.
+ */
+#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP)
+
static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
{
- u64 size;
+ u64 offset;
int ret;
if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
return 0;
- GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
- size = ggtt->vm.total - GUC_GGTT_TOP;
+ GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE);
+ offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE;
- ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
- GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
- PIN_NOEVICT);
+ ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw,
+ GUC_TOP_RESERVE_SIZE, offset,
+ I915_COLOR_UNEVICTABLE, PIN_NOEVICT);
if (ret)
drm_dbg(&ggtt->vm.i915->drm,
"Failed to reserve top of GGTT for GuC\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 957d0aeb0c02..c378cc7c953c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1094,6 +1094,9 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj)) {
obj = i915_gem_object_create_shmem(engine->i915, context_size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
/*
* Wa_22016122933: For Media version 13.0, all Media GT shared
* memory needs to be mapped as WC on CPU side and UC (PAT
@@ -1102,8 +1105,6 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
if (intel_gt_needs_wa_22016122933(engine->gt))
i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
}
- if (IS_ERR(obj))
- return ERR_CAST(obj);
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index b5b7f2fe8c78..dc7b40e06e38 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1433,6 +1433,36 @@ static void guc_timestamp_ping(struct work_struct *wrk)
int srcu, ret;
/*
+ * Ideally the busyness worker should take a gt pm wakeref because the
+ * worker only needs to be active while gt is awake. However, the
+ * gt_park path cancels the worker synchronously and this complicates
+ * the flow if the worker is also running at the same time. The cancel
+ * waits for the worker and when the worker releases the wakeref, that
+ * would call gt_park and would lead to a deadlock.
+ *
+ * The resolution is to take the global pm wakeref if runtime pm is
+ * already active. If not, we don't need to update the busyness stats as
+ * the stats would already be updated when the gt was parked.
+ *
+ * Note:
+ * - We do not requeue the worker if we cannot take a reference to runtime
+ * pm since intel_guc_busyness_unpark would requeue the worker in the
+ * resume path.
+ *
+ * - If the gt was parked longer than time taken for GT timestamp to roll
+ * over, we ignore those rollovers since we don't care about tracking
+ * the exact GT time. We only care about roll overs when the gt is
+ * active and running workloads.
+ *
+ * - There is a window of time between gt_park and runtime suspend,
+ * where the worker may run. This is acceptable since the worker will
+ * not find any new data to update busyness.
+ */
+ wakeref = intel_runtime_pm_get_if_active(&gt->i915->runtime_pm);
+ if (!wakeref)
+ return;
+
+ /*
* Synchronize with gt reset to make sure the worker does not
* corrupt the engine/guc stats. NB: can't actually block waiting
* for a reset to complete as the reset requires flushing out
@@ -1440,10 +1470,9 @@ static void guc_timestamp_ping(struct work_struct *wrk)
*/
ret = intel_gt_reset_trylock(gt, &srcu);
if (ret)
- return;
+ goto err_trylock;
- with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
- __update_guc_busyness_stats(guc);
+ __update_guc_busyness_stats(guc);
/* adjust context stats for overflow */
xa_for_each(&guc->context_lookup, index, ce)
@@ -1452,6 +1481,9 @@ static void guc_timestamp_ping(struct work_struct *wrk)
intel_gt_reset_unlock(gt, srcu);
guc_enable_busyness_worker(guc);
+
+err_trylock:
+ intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
}
static int guc_action_enable_usage_stats(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1f65bb33dd21..a8551ce322de 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1199,6 +1199,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
goto err_unlock;
}
+ /*
+ * Register engines early to ensure the engine list is in its final
+ * rb-tree form, lowering the amount of code that has to deal with
+ * the intermediate llist state.
+ */
+ intel_engines_driver_register(dev_priv);
+
return 0;
/*
@@ -1246,8 +1253,6 @@ err_unlock:
void i915_gem_driver_register(struct drm_i915_private *i915)
{
i915_gem_driver_register__shrinker(i915);
-
- intel_engines_driver_register(i915);
}
void i915_gem_driver_unregister(struct drm_i915_private *i915)