summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c253
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c514
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c296
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c125
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c323
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h17
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c23
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c10
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c39
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c61
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c29
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c29
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c93
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c84
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c69
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h69
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h3
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.c9
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h1
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h1
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h3
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h27
-rw-r--r--drivers/gpu/drm/amd/include/atombios.h2
-rw-r--r--drivers/gpu/drm/amd/include/mes_v11_api_def.h6
-rw-r--r--drivers/gpu/drm/amd/include/yellow_carp_offset.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h117
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c51
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c152
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c17
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c43
150 files changed, 3254 insertions, 925 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 712075a491f2..798d0e9a60b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
- amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+ amdgpu_ring_mux.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index f50e3ba4d7a5..0040deaf8a83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -29,6 +29,7 @@
#include <linux/mm.h>
#include <linux/kthread.h>
#include <linux/workqueue.h>
+#include <linux/mmu_notifier.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
@@ -65,6 +66,7 @@ struct kgd_mem {
struct mutex lock;
struct amdgpu_bo *bo;
struct dma_buf *dmabuf;
+ struct hmm_range *range;
struct list_head attachments;
/* protected by amdkfd_process_info.lock */
struct ttm_validate_buffer validate_list;
@@ -75,7 +77,7 @@ struct kgd_mem {
uint32_t alloc_flags;
- atomic_t invalid;
+ uint32_t invalid;
struct amdkfd_process_info *process_info;
struct amdgpu_sync sync;
@@ -131,7 +133,8 @@ struct amdkfd_process_info {
struct amdgpu_amdkfd_fence *eviction_fence;
/* MMU-notifier related fields */
- atomic_t evicted_bos;
+ struct mutex notifier_lock;
+ uint32_t evicted_bos;
struct delayed_work restore_userptr_work;
struct pid *pid;
bool block_mmu_notifications;
@@ -180,7 +183,8 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem);
#else
static inline
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
@@ -201,7 +205,8 @@ int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
}
static inline
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem)
{
return 0;
}
@@ -265,8 +270,10 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
(&((struct amdgpu_fpriv *) \
((struct drm_file *)(drm_priv))->driver_priv)->vm)
+int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
+ struct file *filp, u32 pasid);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
- struct file *filp, u32 pasid,
+ struct file *filp,
void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ab450f12c445..92131573cbb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -965,7 +965,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
* later stage when it is scheduled by another ioctl called by
* CRIU master process for the target pid for restore.
*/
- atomic_inc(&mem->invalid);
+ mutex_lock(&process_info->notifier_lock);
+ mem->invalid++;
+ mutex_unlock(&process_info->notifier_lock);
mutex_unlock(&process_info->lock);
return 0;
}
@@ -1302,6 +1304,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
return -ENOMEM;
mutex_init(&info->lock);
+ mutex_init(&info->notifier_lock);
INIT_LIST_HEAD(&info->vm_list_head);
INIT_LIST_HEAD(&info->kfd_bo_list);
INIT_LIST_HEAD(&info->userptr_valid_list);
@@ -1318,7 +1321,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
}
info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
- atomic_set(&info->evicted_bos, 0);
INIT_DELAYED_WORK(&info->restore_userptr_work,
amdgpu_amdkfd_restore_userptr_worker);
@@ -1373,6 +1375,7 @@ reserve_pd_fail:
put_pid(info->pid);
create_evict_fence_fail:
mutex_destroy(&info->lock);
+ mutex_destroy(&info->notifier_lock);
kfree(info);
}
return ret;
@@ -1427,10 +1430,9 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
amdgpu_bo_unreserve(bo);
}
-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
- struct file *filp, u32 pasid,
- void **process_info,
- struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
+ struct file *filp, u32 pasid)
+
{
struct amdgpu_fpriv *drv_priv;
struct amdgpu_vm *avm;
@@ -1441,10 +1443,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
return ret;
avm = &drv_priv->vm;
- /* Already a compute VM? */
- if (avm->process_info)
- return -EINVAL;
-
/* Free the original amdgpu allocated pasid,
* will be replaced with kfd allocated pasid.
*/
@@ -1453,14 +1451,36 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
amdgpu_vm_set_pasid(adev, avm, 0);
}
- /* Convert VM into a compute VM */
- ret = amdgpu_vm_make_compute(adev, avm);
+ ret = amdgpu_vm_set_pasid(adev, avm, pasid);
if (ret)
return ret;
- ret = amdgpu_vm_set_pasid(adev, avm, pasid);
+ return 0;
+}
+
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+ struct file *filp,
+ void **process_info,
+ struct dma_fence **ef)
+{
+ struct amdgpu_fpriv *drv_priv;
+ struct amdgpu_vm *avm;
+ int ret;
+
+ ret = amdgpu_file_to_fpriv(filp, &drv_priv);
if (ret)
return ret;
+ avm = &drv_priv->vm;
+
+ /* Already a compute VM? */
+ if (avm->process_info)
+ return -EINVAL;
+
+ /* Convert VM into a compute VM */
+ ret = amdgpu_vm_make_compute(adev, avm);
+ if (ret)
+ return ret;
+
/* Initialize KFD part of the VM and process info */
ret = init_kfd_vm(avm, process_info, ef);
if (ret)
@@ -1497,6 +1517,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
cancel_delayed_work_sync(&process_info->restore_userptr_work);
put_pid(process_info->pid);
mutex_destroy(&process_info->lock);
+ mutex_destroy(&process_info->notifier_lock);
kfree(process_info);
}
}
@@ -1549,7 +1570,9 @@ int amdgpu_amdkfd_criu_resume(void *p)
mutex_lock(&pinfo->lock);
pr_debug("scheduling work\n");
- atomic_inc(&pinfo->evicted_bos);
+ mutex_lock(&pinfo->notifier_lock);
+ pinfo->evicted_bos++;
+ mutex_unlock(&pinfo->notifier_lock);
if (!READ_ONCE(pinfo->block_mmu_notifications)) {
ret = -EINVAL;
goto out_unlock;
@@ -1774,8 +1797,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
- /* No more MMU notifiers */
- amdgpu_hmm_unregister(mem->bo);
+ /* Cleanup user pages and MMU notifiers */
+ if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+ amdgpu_hmm_unregister(mem->bo);
+ mutex_lock(&process_info->notifier_lock);
+ amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
+ mutex_unlock(&process_info->notifier_lock);
+ }
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
@@ -1865,6 +1893,16 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
*/
mutex_lock(&mem->process_info->lock);
+ /* Lock notifier lock. If we find an invalid userptr BO, we can be
+ * sure that the MMU notifier is no longer running
+ * concurrently and the queues are actually stopped
+ */
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ mutex_lock(&mem->process_info->notifier_lock);
+ is_invalid_userptr = !!mem->invalid;
+ mutex_unlock(&mem->process_info->notifier_lock);
+ }
+
mutex_lock(&mem->lock);
domain = mem->domain;
@@ -2205,7 +2243,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
if (ret) {
- kfree(mem);
+ kfree(*mem);
return ret;
}
@@ -2242,34 +2280,38 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
* cannot do any memory allocations, and cannot take any locks that
- * are held elsewhere while allocating memory. Therefore this is as
- * simple as possible, using atomic counters.
+ * are held elsewhere while allocating memory.
*
* It doesn't do anything to the BO itself. The real work happens in
* restore, where we get updated page addresses. This function only
* ensures that GPU access to the BO is stopped.
*/
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
- struct mm_struct *mm)
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem)
{
struct amdkfd_process_info *process_info = mem->process_info;
- int evicted_bos;
int r = 0;
- /* Do not process MMU notifications until stage-4 IOCTL is received */
+ /* Do not process MMU notifications during CRIU restore until
+ * KFD_CRIU_OP_RESUME IOCTL is received
+ */
if (READ_ONCE(process_info->block_mmu_notifications))
return 0;
- atomic_inc(&mem->invalid);
- evicted_bos = atomic_inc_return(&process_info->evicted_bos);
- if (evicted_bos == 1) {
+ mutex_lock(&process_info->notifier_lock);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ mem->invalid++;
+ if (++process_info->evicted_bos == 1) {
/* First eviction, stop the queues */
- r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
+ r = kgd2kfd_quiesce_mm(mni->mm,
+ KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
if (r)
pr_err("Failed to quiesce KFD\n");
schedule_delayed_work(&process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
}
+ mutex_unlock(&process_info->notifier_lock);
return r;
}
@@ -2286,54 +2328,58 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
struct kgd_mem *mem, *tmp_mem;
struct amdgpu_bo *bo;
struct ttm_operation_ctx ctx = { false, false };
- int invalid, ret;
+ uint32_t invalid;
+ int ret = 0;
- /* Move all invalidated BOs to the userptr_inval_list and
- * release their user pages by migration to the CPU domain
- */
+ mutex_lock(&process_info->notifier_lock);
+
+ /* Move all invalidated BOs to the userptr_inval_list */
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_valid_list,
- validate_list.head) {
- if (!atomic_read(&mem->invalid))
- continue; /* BO is still valid */
-
- bo = mem->bo;
-
- if (amdgpu_bo_reserve(bo, true))
- return -EAGAIN;
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- amdgpu_bo_unreserve(bo);
- if (ret) {
- pr_err("%s: Failed to invalidate userptr BO\n",
- __func__);
- return -EAGAIN;
- }
-
- list_move_tail(&mem->validate_list.head,
- &process_info->userptr_inval_list);
- }
-
- if (list_empty(&process_info->userptr_inval_list))
- return 0; /* All evicted userptr BOs were freed */
+ validate_list.head)
+ if (mem->invalid)
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_inval_list);
/* Go through userptr_inval_list and update any invalid user_pages */
list_for_each_entry(mem, &process_info->userptr_inval_list,
validate_list.head) {
- struct hmm_range *range;
-
- invalid = atomic_read(&mem->invalid);
+ invalid = mem->invalid;
if (!invalid)
/* BO hasn't been invalidated since the last
- * revalidation attempt. Keep its BO list.
+ * revalidation attempt. Keep its page list.
*/
continue;
bo = mem->bo;
+ amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range);
+ mem->range = NULL;
+
+ /* BO reservations and getting user pages (hmm_range_fault)
+ * must happen outside the notifier lock
+ */
+ mutex_unlock(&process_info->notifier_lock);
+
+ /* Move the BO to system (CPU) domain if necessary to unmap
+ * and free the SG table
+ */
+ if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) {
+ if (amdgpu_bo_reserve(bo, true))
+ return -EAGAIN;
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ amdgpu_bo_unreserve(bo);
+ if (ret) {
+ pr_err("%s: Failed to invalidate userptr BO\n",
+ __func__);
+ return -EAGAIN;
+ }
+ }
+
/* Get updated user pages */
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
- &range);
+ &mem->range);
if (ret) {
pr_debug("Failed %d to get user pages\n", ret);
@@ -2346,30 +2392,32 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
*/
if (ret != -EFAULT)
return ret;
- } else {
- /*
- * FIXME: Cannot ignore the return code, must hold
- * notifier_lock
- */
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
+ ret = 0;
}
+ mutex_lock(&process_info->notifier_lock);
+
/* Mark the BO as valid unless it was invalidated
* again concurrently.
*/
- if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
- return -EAGAIN;
+ if (mem->invalid != invalid) {
+ ret = -EAGAIN;
+ goto unlock_out;
+ }
+ mem->invalid = 0;
}
- return 0;
+unlock_out:
+ mutex_unlock(&process_info->notifier_lock);
+
+ return ret;
}
/* Validate invalid userptr BOs
*
- * Validates BOs on the userptr_inval_list, and moves them back to the
- * userptr_valid_list. Also updates GPUVM page tables with new page
- * addresses and waits for the page table updates to complete.
+ * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables
+ * with new page addresses and waits for the page table updates to complete.
*/
static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
{
@@ -2440,9 +2488,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
- list_move_tail(&mem->validate_list.head,
- &process_info->userptr_valid_list);
-
/* Update mapping. If the BO was not validated
* (because we couldn't get user pages), this will
* clear the page table entries, which will result in
@@ -2458,7 +2503,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
if (ret) {
pr_err("%s: update PTE failed\n", __func__);
/* make sure this gets validated again */
- atomic_inc(&mem->invalid);
+ mutex_lock(&process_info->notifier_lock);
+ mem->invalid++;
+ mutex_unlock(&process_info->notifier_lock);
goto unreserve_out;
}
}
@@ -2478,6 +2525,36 @@ out_no_mem:
return ret;
}
+/* Confirm that all user pages are valid while holding the notifier lock
+ *
+ * Moves valid BOs from the userptr_inval_list back to userptr_val_list.
+ */
+static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ int ret = 0;
+
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list.head) {
+ bool valid = amdgpu_ttm_tt_get_user_pages_done(
+ mem->bo->tbo.ttm, mem->range);
+
+ mem->range = NULL;
+ if (!valid) {
+ WARN(!mem->invalid, "Invalid BO not marked invalid");
+ ret = -EAGAIN;
+ continue;
+ }
+ WARN(mem->invalid, "Valid BO is marked invalid");
+
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_valid_list);
+ }
+
+ return ret;
+}
+
/* Worker callback to restore evicted userptr BOs
*
* Tries to update and validate all userptr BOs. If successful and no
@@ -2492,9 +2569,11 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
restore_userptr_work);
struct task_struct *usertask;
struct mm_struct *mm;
- int evicted_bos;
+ uint32_t evicted_bos;
- evicted_bos = atomic_read(&process_info->evicted_bos);
+ mutex_lock(&process_info->notifier_lock);
+ evicted_bos = process_info->evicted_bos;
+ mutex_unlock(&process_info->notifier_lock);
if (!evicted_bos)
return;
@@ -2517,9 +2596,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
* and we can just restart the queues.
*/
if (!list_empty(&process_info->userptr_inval_list)) {
- if (atomic_read(&process_info->evicted_bos) != evicted_bos)
- goto unlock_out; /* Concurrent eviction, try again */
-
if (validate_invalid_user_pages(process_info))
goto unlock_out;
}
@@ -2528,10 +2604,17 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
* be a first eviction that calls quiesce_mm. The eviction
* reference counting inside KFD will handle this case.
*/
- if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
- evicted_bos)
- goto unlock_out;
- evicted_bos = 0;
+ mutex_lock(&process_info->notifier_lock);
+ if (process_info->evicted_bos != evicted_bos)
+ goto unlock_notifier_out;
+
+ if (confirm_valid_user_pages_locked(process_info)) {
+ WARN(1, "User pages unexpectedly invalid");
+ goto unlock_notifier_out;
+ }
+
+ process_info->evicted_bos = evicted_bos = 0;
+
if (kgd2kfd_resume_mm(mm)) {
pr_err("%s: Failed to resume KFD\n", __func__);
/* No recovery from this failure. Probably the CP is
@@ -2539,6 +2622,8 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
*/
}
+unlock_notifier_out:
+ mutex_unlock(&process_info->notifier_lock);
unlock_out:
mutex_unlock(&process_info->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 9b97fa39d47a..ac6fe0ae4609 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -104,7 +104,7 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes)
{
- uint32_t start_addr, fw_size, drv_size;
+ u32 start_addr, fw_size, drv_size;
start_addr = le32_to_cpu(fw_usage->start_address_in_kb);
fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
@@ -116,7 +116,7 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
drv_size);
if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
- (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+ (u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
/* Firmware request VRAM reservation for SR-IOV */
adev->mman.fw_vram_usage_start_offset = (start_addr &
@@ -133,7 +133,7 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes)
{
- uint32_t fw_start_addr, fw_size, drv_start_addr, drv_size;
+ u32 fw_start_addr, fw_size, drv_start_addr, drv_size;
fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb);
fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
@@ -147,14 +147,18 @@ static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
drv_start_addr,
drv_size);
- if ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) {
+ if (amdgpu_sriov_vf(adev) &&
+ ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
/* Firmware request VRAM reservation for SR-IOV */
adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
adev->mman.fw_vram_usage_size = fw_size << 10;
}
- if ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << 30)) == 0) {
+ if (amdgpu_sriov_vf(adev) &&
+ ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
/* driver request VRAM reservation for SR-IOV */
adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
@@ -172,8 +176,8 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
vram_usagebyfirmware);
struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1;
struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2;
- uint16_t data_offset;
- uint8_t frev, crev;
+ u16 data_offset;
+ u8 frev, crev;
int usage_bytes = 0;
if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index e363f56c72af..30c28a69e847 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -317,6 +317,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
if (!found)
return false;
+ pci_dev_put(pdev);
adev->bios = kmalloc(size, GFP_KERNEL);
if (!adev->bios) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index e1320edfc527..2ebbc6382a06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -327,7 +327,6 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector)
kfree(amdgpu_connector->edid);
amdgpu_connector->edid = NULL;
- drm_connector_update_edid_property(connector, NULL);
}
static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 74ccbd566777..076ae400d099 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2473,6 +2473,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev)) {
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ if (WARN_ON(!hive)) {
+ r = -ENOENT;
+ goto init_failed;
+ }
+
if (!hive->reset_domain ||
!amdgpu_reset_get_reset_domain(hive->reset_domain)) {
r = -ENOENT;
@@ -3011,14 +3016,15 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
continue;
}
- /* skip suspend of gfx and psp for S0ix
+ /* skip suspend of gfx/mes and psp for S0ix
* gfx is in gfxoff state, so on resume it will exit gfxoff just
* like at runtime. PSP is also part of the always on hardware
* so no need to suspend it.
*/
if (adev->in_s0ix &&
(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
continue;
/* XXX handle errors */
@@ -4105,6 +4111,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
adev->in_suspend = true;
+ /* Evict the majority of BOs before grabbing the full access */
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ return r;
+
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_fini_data_exchange(adev);
r = amdgpu_virt_request_full_gpu(adev, false);
@@ -4179,21 +4190,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
r = amdgpu_device_ip_resume(adev);
- /* no matter what r is, always need to properly release full GPU */
- if (amdgpu_sriov_vf(adev)) {
- amdgpu_virt_init_data_exchange(adev);
- amdgpu_virt_release_full_gpu(adev, true);
- }
-
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
- return r;
+ goto exit;
}
amdgpu_fence_driver_hw_init(adev);
r = amdgpu_device_ip_late_init(adev);
if (r)
- return r;
+ goto exit;
queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
@@ -4201,12 +4206,19 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
if (!adev->in_s0ix) {
r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
if (r)
- return r;
+ goto exit;
+ }
+
+exit:
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_data_exchange(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
}
+ if (r)
+ return r;
+
/* Make sure IB tests flushed */
- if (amdgpu_sriov_vf(adev))
- amdgpu_irq_gpu_reset_resume_helper(adev);
flush_delayed_work(&adev->delayed_init_work);
if (adev->in_s0ix) {
@@ -5043,6 +5055,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
pm_runtime_enable(&(p->dev));
pm_runtime_resume(&(p->dev));
}
+
+ pci_dev_put(p);
}
static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
@@ -5081,6 +5095,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
if (expires < ktime_get_mono_fast_ns()) {
dev_warn(adev->dev, "failed to suspend display audio\n");
+ pci_dev_put(p);
/* TODO: abort the succeeding gpu reset? */
return -ETIMEDOUT;
}
@@ -5088,6 +5103,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
pm_runtime_disable(&(p->dev));
+ pci_dev_put(p);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 6b48178455bc..1bbd56029a4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1512,6 +1512,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
break;
default:
@@ -1556,6 +1557,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
break;
default:
@@ -1641,6 +1643,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
break;
case IP_VERSION(13, 0, 4):
@@ -1691,6 +1694,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
default:
@@ -1702,11 +1706,13 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
return 0;
}
+#if defined(CONFIG_DRM_AMD_DC)
static void amdgpu_discovery_set_sriov_display(struct amdgpu_device *adev)
{
amdgpu_device_set_sriov_virtual_display(adev);
amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
}
+#endif
static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
{
@@ -1802,6 +1808,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
break;
default:
@@ -1965,6 +1972,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
adev->enable_mes = true;
adev->enable_mes_kiq = true;
@@ -2195,6 +2203,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->family = AMDGPU_FAMILY_GC_11_0_0;
break;
case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
adev->family = AMDGPU_FAMILY_GC_11_0_1;
break;
default:
@@ -2212,6 +2221,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 7):
case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
adev->flags |= AMD_IS_APU;
break;
default:
@@ -2268,6 +2278,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;
break;
case IP_VERSION(7, 7, 0):
+ case IP_VERSION(7, 7, 1):
adev->nbio.funcs = &nbio_v7_7_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 5697d456d7d0..b4f2d61ea0d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -231,17 +231,18 @@ module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
/**
* DOC: gartsize (uint)
- * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic).
+ * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing.
+ * The default is -1 (The size depends on asic).
*/
-MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
+MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)");
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
/**
* DOC: gttsize (int)
- * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM,
- * otherwise 3/4 RAM size).
+ * Restrict the size of GTT domain (for userspace use) in MiB for testing.
+ * The default is -1 (Use 1/2 RAM, minimum value is 3GB).
*/
-MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
+MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
/**
@@ -2038,6 +2039,15 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
"See modparam exp_hw_support\n");
return -ENODEV;
}
+ /* differentiate between P10 and P11 asics with the same DID */
+ if (pdev->device == 0x67FF &&
+ (pdev->revision == 0xE3 ||
+ pdev->revision == 0xE7 ||
+ pdev->revision == 0xF3 ||
+ pdev->revision == 0xF7)) {
+ flags &= ~AMD_ASIC_MASK;
+ flags |= CHIP_POLARIS10;
+ }
/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
* however, SME requires an indirect IOMMU mapping because the encryption
@@ -2107,12 +2117,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, ddev);
- ret = amdgpu_driver_load_kms(adev, ent->driver_data);
+ ret = amdgpu_driver_load_kms(adev, flags);
if (ret)
goto err_pci;
retry_init:
- ret = drm_dev_register(ddev, ent->driver_data);
+ ret = drm_dev_register(ddev, flags);
if (ret == -EAGAIN && ++retry <= 3) {
DRM_INFO("retry init %d\n", retry);
/* Don't request EX mode too frequently which is attacking */
@@ -2570,6 +2580,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
amdgpu_device_baco_enter(drm_dev);
}
+ dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d0d99ed607dd..00444203220d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -55,6 +55,7 @@ struct amdgpu_fence {
/* RB, DMA, etc. */
struct amdgpu_ring *ring;
+ ktime_t start_timestamp;
};
static struct kmem_cache *amdgpu_fence_slab;
@@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
}
}
+ to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
/* This function can't be called concurrently anyway, otherwise
* emitting the fence would mess up the hardware ring buffer.
*/
@@ -407,6 +410,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
}
/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earliest fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+ uint32_t last_seq, sync_seq;
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+ sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+ if (last_seq == sync_seq)
+ return 0;
+
+ ++last_seq;
+ last_seq &= drv->num_fences_mask;
+ fence = drv->fences[last_seq];
+ if (!fence)
+ return 0;
+
+ return ktime_us_delta(ktime_get(),
+ to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+
+ seq &= drv->num_fences_mask;
+ fence = drv->fences[seq];
+ if (!fence)
+ return;
+
+ to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
+/**
* amdgpu_fence_driver_start_ring - make the fence driver
* ready for use on the requested ring.
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index 2c38ac7bc643..4620c4712ce3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -64,7 +64,8 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
sizeof(atom_ctx->vbios_version)) ||
strnstr(atom_ctx->vbios_version, "D163",
sizeof(atom_ctx->vbios_version))) {
- *fru_addr = FRU_EEPROM_MADDR_6;
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
return true;
} else {
return false;
@@ -83,7 +84,8 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
sizeof(atom_ctx->vbios_version))) {
return false;
} else {
- *fru_addr = FRU_EEPROM_MADDR_6;
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
return true;
}
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index f8f9d68d69ff..00edc7002ee8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -257,7 +257,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
* becoming writable and makes is_cow_mapping(vm_flags) false.
*/
if (is_cow_mapping(vma->vm_flags) &&
- !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+ !(vma->vm_flags & VM_ACCESS_FLAGS))
vma->vm_flags &= ~VM_MAYWRITE;
return drm_gem_ttm_mmap(obj, vma);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 832b3807f1d6..b3df4787877e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
#include "amdgpu_imu.h"
#include "soc15.h"
#include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
@@ -352,6 +353,9 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras *ras;
bool is_poweron;
+
+ struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
+ struct amdgpu_ring_mux muxer;
};
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 3169a942dbbb..12871b71b07b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -480,6 +480,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
unsigned i;
unsigned vmhub, inv_eng;
+ if (adev->enable_mes) {
+ /* reserve engine 5 for firmware */
+ for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++)
+ vm_inv_engs[vmhub] &= ~(1 << 5);
+ }
+
for (i = 0; i < adev->num_rings; ++i) {
ring = adev->rings[i];
vmhub = ring->funcs->vmhub;
@@ -544,6 +550,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
/* YELLOW_CARP*/
case IP_VERSION(10, 3, 3):
case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index a48ea62b12b0..2dadcfe43d03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -51,6 +51,8 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_hmm.h"
+#define MAX_WALK_BYTE (2UL << 30)
+
/**
* amdgpu_hmm_invalidate_gfx - callback to notify about mm change
*
@@ -103,17 +105,11 @@ static bool amdgpu_hmm_invalidate_hsa(struct mmu_interval_notifier *mni,
unsigned long cur_seq)
{
struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
if (!mmu_notifier_range_blockable(range))
return false;
- mutex_lock(&adev->notifier_lock);
-
- mmu_interval_set_seq(mni, cur_seq);
-
- amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
- mutex_unlock(&adev->notifier_lock);
+ amdgpu_amdkfd_evict_userptr(mni, cur_seq, bo->kfd_bo);
return true;
}
@@ -163,6 +159,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
struct hmm_range **phmm_range)
{
struct hmm_range *hmm_range;
+ unsigned long end;
unsigned long timeout;
unsigned long i;
unsigned long *pfns;
@@ -184,25 +181,42 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
hmm_range->hmm_pfns = pfns;
hmm_range->start = start;
- hmm_range->end = start + npages * PAGE_SIZE;
+ end = start + npages * PAGE_SIZE;
hmm_range->dev_private_owner = owner;
- /* Assuming 512MB takes maxmium 1 second to fault page address */
- timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
- timeout = jiffies + msecs_to_jiffies(timeout);
+ do {
+ hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
+
+ pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
+ hmm_range->start, hmm_range->end);
+
+ /* Assuming 512MB takes maxmium 1 second to fault page address */
+ timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL);
+ timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
+ timeout = jiffies + msecs_to_jiffies(timeout);
retry:
- hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
- r = hmm_range_fault(hmm_range);
- if (unlikely(r)) {
- /*
- * FIXME: This timeout should encompass the retry from
- * mmu_interval_read_retry() as well.
- */
- if (r == -EBUSY && !time_after(jiffies, timeout))
- goto retry;
- goto out_free_pfns;
- }
+ hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+ r = hmm_range_fault(hmm_range);
+ if (unlikely(r)) {
+ /*
+ * FIXME: This timeout should encompass the retry from
+ * mmu_interval_read_retry() as well.
+ */
+ if (r == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_free_pfns;
+ }
+
+ if (hmm_range->end == end)
+ break;
+ hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
+ hmm_range->start = hmm_range->end;
+ schedule();
+ } while (hmm_range->end < end);
+
+ hmm_range->start = start;
+ hmm_range->hmm_pfns = pfns;
/*
* Due to default_flags, all pages are HMM_PFN_VALID or
@@ -224,9 +238,9 @@ out_free_range:
return r;
}
-int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
{
- int r;
+ bool r;
r = mmu_interval_read_retry(hmm_range->notifier,
hmm_range->notifier_seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
index 13ed94d3b01b..e2edcd010ccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -29,12 +29,13 @@
#include <linux/rwsem.h>
#include <linux/workqueue.h>
#include <linux/interval_tree.h>
+#include <linux/mmu_notifier.h>
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
void *owner, struct page **pages,
struct hmm_range **phmm_range);
-int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
#if defined(CONFIG_HMM_MIRROR)
int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 774c77bb8f4e..bcccc348dbe2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
}
+ amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
+ amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 2a9a2593dc18..fcb711a11a5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -165,6 +165,26 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
atomic_read(&adev->gpu_reset_counter);
}
+/* Check if we need to switch to another set of resources */
+static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
+ struct amdgpu_job *job)
+{
+ return id->gds_base != job->gds_base ||
+ id->gds_size != job->gds_size ||
+ id->gws_base != job->gws_base ||
+ id->gws_size != job->gws_size ||
+ id->oa_base != job->oa_base ||
+ id->oa_size != job->oa_size;
+}
+
+/* Check if the id is compatible with the job */
+static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
+ struct amdgpu_job *job)
+{
+ return id->pd_gpu_addr == job->vm_pd_addr &&
+ !amdgpu_vmid_gds_switch_needed(id, job);
+}
+
/**
* amdgpu_vmid_grab_idle - grab idle VMID
*
@@ -258,14 +278,15 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
bool needs_flush = vm->use_cpu_for_update;
uint64_t updates = amdgpu_vm_tlb_seq(vm);
int r;
- *id = vm->reserved_vmid[vmhub];
+ *id = id_mgr->reserved;
if ((*id)->owner != vm->immediate.fence_context ||
- (*id)->pd_gpu_addr != job->vm_pd_addr ||
+ !amdgpu_vmid_compatible(*id, job) ||
(*id)->flushed_updates < updates ||
!(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
@@ -294,8 +315,8 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
if (r)
return r;
- (*id)->flushed_updates = updates;
job->vm_needs_flush = needs_flush;
+ job->spm_update_needed = true;
return 0;
}
@@ -333,7 +354,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
if ((*id)->owner != vm->immediate.fence_context)
continue;
- if ((*id)->pd_gpu_addr != job->vm_pd_addr)
+ if (!amdgpu_vmid_compatible(*id, job))
continue;
if (!(*id)->last_flush ||
@@ -355,7 +376,6 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
if (r)
return r;
- (*id)->flushed_updates = updates;
job->vm_needs_flush |= needs_flush;
return 0;
}
@@ -408,22 +428,30 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (r)
goto error;
- id->flushed_updates = amdgpu_vm_tlb_seq(vm);
job->vm_needs_flush = true;
}
list_move_tail(&id->list, &id_mgr->ids_lru);
}
- id->pd_gpu_addr = job->vm_pd_addr;
- id->owner = vm->immediate.fence_context;
-
+ job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
if (job->vm_needs_flush) {
+ id->flushed_updates = amdgpu_vm_tlb_seq(vm);
dma_fence_put(id->last_flush);
id->last_flush = NULL;
}
job->vmid = id - id_mgr->ids;
job->pasid = vm->pasid;
+
+ id->gds_base = job->gds_base;
+ id->gds_size = job->gds_size;
+ id->gws_base = job->gws_base;
+ id->gws_size = job->gws_size;
+ id->oa_base = job->oa_base;
+ id->oa_size = job->oa_size;
+ id->pd_gpu_addr = job->vm_pd_addr;
+ id->owner = vm->immediate.fence_context;
+
trace_amdgpu_vm_grab_id(vm, ring, job);
error:
@@ -435,31 +463,27 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
unsigned vmhub)
{
- struct amdgpu_vmid_mgr *id_mgr;
- struct amdgpu_vmid *idle;
- int r = 0;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
- id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
if (vm->reserved_vmid[vmhub])
goto unlock;
- if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
- AMDGPU_VM_MAX_RESERVED_VMID) {
- DRM_ERROR("Over limitation of reserved vmid\n");
- atomic_dec(&id_mgr->reserved_vmid_num);
- r = -EINVAL;
- goto unlock;
+
+ ++id_mgr->reserved_use_count;
+ if (!id_mgr->reserved) {
+ struct amdgpu_vmid *id;
+
+ id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid,
+ list);
+ /* Remove from normal round robin handling */
+ list_del_init(&id->list);
+ id_mgr->reserved = id;
}
- /* Select the first entry VMID */
- idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
- list_del_init(&idle->list);
- vm->reserved_vmid[vmhub] = idle;
- mutex_unlock(&id_mgr->lock);
+ vm->reserved_vmid[vmhub] = true;
- return 0;
unlock:
mutex_unlock(&id_mgr->lock);
- return r;
+ return 0;
}
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
@@ -469,12 +493,12 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
- if (vm->reserved_vmid[vmhub]) {
- list_add(&vm->reserved_vmid[vmhub]->list,
- &id_mgr->ids_lru);
- vm->reserved_vmid[vmhub] = NULL;
- atomic_dec(&id_mgr->reserved_vmid_num);
+ if (vm->reserved_vmid[vmhub] &&
+ !--id_mgr->reserved_use_count) {
+ /* give the reserved ID back to normal round robin */
+ list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
}
+ vm->reserved_vmid[vmhub] = false;
mutex_unlock(&id_mgr->lock);
}
@@ -541,7 +565,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
mutex_init(&id_mgr->lock);
INIT_LIST_HEAD(&id_mgr->ids_lru);
- atomic_set(&id_mgr->reserved_vmid_num, 0);
+ id_mgr->reserved_use_count = 0;
/* manage only VMIDs not used by KFD */
id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 57efe61dceed..d1cc09b45da4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -67,7 +67,8 @@ struct amdgpu_vmid_mgr {
unsigned num_ids;
struct list_head ids_lru;
struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
- atomic_t reserved_vmid_num;
+ struct amdgpu_vmid *reserved;
+ unsigned int reserved_use_count;
};
int amdgpu_pasid_alloc(unsigned int bits);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 032651a655f0..9e549923622b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -174,7 +174,12 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
drm_sched_job_cleanup(s_job);
amdgpu_sync_free(&job->explicit_sync);
- dma_fence_put(&job->hw_fence);
+
+ /* only put the hw fence if has embedded fence */
+ if (!job->hw_fence.ops)
+ kfree(job);
+ else
+ dma_fence_put(&job->hw_fence);
}
void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -245,15 +250,15 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
struct dma_fence *fence = NULL;
int r;
+ if (!fence && job->gang_submit)
+ fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
+
while (!fence && job->vm && !job->vmid) {
r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
if (r)
DRM_ERROR("Error getting VM ID (%d)\n", r);
}
- if (!fence && job->gang_submit)
- fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
-
return fence;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index a372802ea4e0..52f2e313ea17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -53,6 +53,8 @@ struct amdgpu_job {
uint32_t preamble_status;
uint32_t preemption_status;
bool vm_needs_flush;
+ bool gds_switch_needed;
+ bool spm_update_needed;
uint64_t vm_pd_addr;
unsigned vmid;
unsigned pasid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index ba348046fcec..7aa7e52ca784 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -431,7 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->uvd.harvest_config & (1 << i))
+ if (adev->vcn.harvest_config & (1 << i))
continue;
if (adev->vcn.inst[i].ring_dec.sched.ready)
@@ -443,7 +443,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->uvd.harvest_config & (1 << i))
+ if (adev->vcn.harvest_config & (1 << i))
continue;
for (j = 0; j < adev->vcn.num_enc_rings; j++)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 974e85d8b6cc..4e684c2afc70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -346,17 +346,16 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
* @adev: amdgpu device object
* @offset: offset of the BO
* @size: size of the BO
- * @domain: where to place it
* @bo_ptr: used to initialize BOs in structures
* @cpu_addr: optional CPU address mapping
*
- * Creates a kernel BO at a specific offset in the address space of the domain.
+ * Creates a kernel BO at a specific offset in VRAM.
*
* Returns:
* 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size, uint32_t domain,
+ uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr)
{
struct ttm_operation_ctx ctx = { false, false };
@@ -366,8 +365,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
offset &= PAGE_MASK;
size = ALIGN(size, PAGE_SIZE);
- r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr,
- NULL, cpu_addr);
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
+ cpu_addr);
if (r)
return r;
@@ -422,6 +422,8 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
if (*bo == NULL)
return;
+ WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend);
+
if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
if (cpu_addr)
amdgpu_bo_kunmap(*bo);
@@ -446,27 +448,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
/*
* If GTT is part of requested domains the check must succeed to
- * allow fall back to GTT
+ * allow fall back to GTT.
*/
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
- if (size < man->size)
+ if (man && size < man->size)
return true;
- else
- goto fail;
- }
-
- if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+ else if (!man)
+ WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized");
+ goto fail;
+ } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
- if (size < man->size)
+ if (man && size < man->size)
return true;
- else
- goto fail;
+ goto fail;
}
-
/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
return true;
@@ -1510,7 +1509,8 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain)
{
- if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+ if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) &&
+ ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) {
domain = AMDGPU_GEM_DOMAIN_VRAM;
if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
domain = AMDGPU_GEM_DOMAIN_GTT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 147b79c10cbb..93207badf83f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -284,7 +284,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size, uint32_t domain,
+ uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr);
int amdgpu_bo_create_user(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 98dbf4e5aae9..7a2fc920739b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -165,6 +165,7 @@ static int psp_early_init(void *handle)
case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
psp_v13_0_set_psp_funcs(psp);
psp->autoload_supported = true;
break;
@@ -198,6 +199,7 @@ void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
{
amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr,
&mem_ctx->shared_buf);
+ mem_ctx->shared_bo = NULL;
}
static void psp_free_shared_bufs(struct psp_context *psp)
@@ -208,6 +210,7 @@ static void psp_free_shared_bufs(struct psp_context *psp)
/* free TMR memory buffer */
pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+ psp->tmr_bo = NULL;
/* free xgmi shared memory */
psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context);
@@ -510,26 +513,22 @@ static int psp_sw_fini(void *handle)
struct psp_gfx_cmd_resp *cmd = psp->cmd;
psp_memory_training_fini(psp);
- if (psp->sos_fw) {
- release_firmware(psp->sos_fw);
- psp->sos_fw = NULL;
- }
- if (psp->asd_fw) {
- release_firmware(psp->asd_fw);
- psp->asd_fw = NULL;
- }
- if (psp->ta_fw) {
- release_firmware(psp->ta_fw);
- psp->ta_fw = NULL;
- }
- if (psp->cap_fw) {
- release_firmware(psp->cap_fw);
- psp->cap_fw = NULL;
- }
- if (psp->toc_fw) {
- release_firmware(psp->toc_fw);
- psp->toc_fw = NULL;
- }
+
+ release_firmware(psp->sos_fw);
+ psp->sos_fw = NULL;
+
+ release_firmware(psp->asd_fw);
+ psp->asd_fw = NULL;
+
+ release_firmware(psp->ta_fw);
+ psp->ta_fw = NULL;
+
+ release_firmware(psp->cap_fw);
+ psp->cap_fw = NULL;
+
+ release_firmware(psp->toc_fw);
+ psp->toc_fw = NULL;
+
if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7))
psp_sysfs_fini(adev);
@@ -769,7 +768,7 @@ static int psp_load_toc(struct psp_context *psp,
/* Set up Trusted Memory Region */
static int psp_tmr_init(struct psp_context *psp)
{
- int ret;
+ int ret = 0;
int tmr_size;
void *tmr_buf;
void **pptr;
@@ -796,10 +795,12 @@ static int psp_tmr_init(struct psp_context *psp)
}
}
- pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
- ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT,
- AMDGPU_GEM_DOMAIN_VRAM,
- &psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+ if (!psp->tmr_bo) {
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+ ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+ }
return ret;
}
@@ -857,7 +858,7 @@ static int psp_tmr_unload(struct psp_context *psp)
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
psp_prep_tmr_unload_cmd_buf(psp, cmd);
- dev_info(psp->adev->dev, "free PSP TMR buffer\n");
+ dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -2727,8 +2728,6 @@ static int psp_suspend(void *handle)
}
out:
- psp_free_shared_bufs(psp);
-
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 077404a9c935..ad490c1e2f57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1267,7 +1267,7 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, features_attr);
- return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
+ return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
}
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d3558c34d406..dc474b809604 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
}
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_end(ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 82c178a9033a..f752c7ae7f60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -39,6 +39,7 @@ struct amdgpu_vm;
#define AMDGPU_MAX_RINGS 28
#define AMDGPU_MAX_HWIP_RINGS 8
#define AMDGPU_MAX_GFX_RINGS 2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
#define AMDGPU_MAX_UVD_ENC_RINGS 2
@@ -59,6 +60,7 @@ enum amdgpu_ring_priority_level {
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC (1 << 3)
#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
@@ -143,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+
void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
+ ktime_t timestamp);
+
/*
* Rings.
*/
@@ -279,6 +286,10 @@ struct amdgpu_ring {
bool is_mes_queue;
uint32_t hw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+ bool is_sw_ring;
+ unsigned int entry_index;
+
};
#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
@@ -307,6 +318,9 @@ struct amdgpu_ring {
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index 000000000000..62079f0e3ee8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,514 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/slab.h>
+#include <drm/drm_print.h>
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
+#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
+
+static const struct ring_info {
+ unsigned int hw_pio;
+ const char *ring_name;
+} sw_ring_info[] = {
+ { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+ { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
+static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring)
+{
+ return ring->entry_index < mux->ring_entry_size ?
+ &mux->ring_entry[ring->entry_index] : NULL;
+}
+
+/* copy packages on sw ring range[begin, end) */
+static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring,
+ u64 s_start, u64 s_end)
+{
+ u64 start, end;
+ struct amdgpu_ring *real_ring = mux->real_ring;
+
+ start = s_start & ring->buf_mask;
+ end = s_end & ring->buf_mask;
+
+ if (start == end) {
+ DRM_ERROR("no more data copied from sw ring\n");
+ return;
+ }
+ if (start > end) {
+ amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start],
+ (ring->ring_size >> 2) - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
+ } else {
+ amdgpu_ring_alloc(real_ring, end - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start);
+ }
+}
+
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e = NULL;
+ struct amdgpu_mux_chunk *chunk;
+ uint32_t seq, last_seq;
+ int i;
+
+ /*find low priority entries:*/
+ if (!mux->s_resubmit)
+ return;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+ e = &mux->ring_entry[i];
+ break;
+ }
+ }
+
+ if (!e) {
+ DRM_ERROR("%s no low priority ring found\n", __func__);
+ return;
+ }
+
+ last_seq = atomic_read(&e->ring->fence_drv.last_seq);
+ seq = mux->seqno_to_resubmit;
+ if (last_seq < seq) {
+ /*resubmit all the fences between (last_seq, seq]*/
+ list_for_each_entry(chunk, &e->list, entry) {
+ if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
+ amdgpu_fence_update_start_timestamp(e->ring,
+ chunk->sync_seq,
+ ktime_get());
+ amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
+ chunk->start,
+ chunk->end);
+ mux->wptr_resubmit = chunk->end;
+ amdgpu_ring_commit(mux->real_ring);
+ }
+ }
+ }
+
+ del_timer(&mux->resubmit_timer);
+ mux->s_resubmit = false;
+}
+
+static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
+{
+ mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
+}
+
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
+{
+ struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
+
+ if (!spin_trylock(&mux->lock)) {
+ amdgpu_ring_mux_schedule_resubmit(mux);
+ DRM_ERROR("reschedule resubmit\n");
+ return;
+ }
+ amdgpu_mux_resubmit_chunks(mux);
+ spin_unlock(&mux->lock);
+}
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ unsigned int entry_size)
+{
+ mux->real_ring = ring;
+ mux->num_ring_entries = 0;
+
+ mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
+ if (!mux->ring_entry)
+ return -ENOMEM;
+
+ mux->ring_entry_size = entry_size;
+ mux->s_resubmit = false;
+
+ amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk",
+ sizeof(struct amdgpu_mux_chunk), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!amdgpu_mux_chunk_slab) {
+ DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&mux->lock);
+ timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
+
+ return 0;
+}
+
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk, *chunk2;
+ int i;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ e = &mux->ring_entry[i];
+ list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
+ list_del(&chunk->entry);
+ kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+ }
+ }
+ kmem_cache_destroy(amdgpu_mux_chunk_slab);
+ kfree(mux->ring_entry);
+ mux->ring_entry = NULL;
+ mux->num_ring_entries = 0;
+ mux->ring_entry_size = 0;
+}
+
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+
+ if (mux->num_ring_entries >= mux->ring_entry_size) {
+ DRM_ERROR("add sw ring exceeding max entry size\n");
+ return -ENOENT;
+ }
+
+ e = &mux->ring_entry[mux->num_ring_entries];
+ ring->entry_index = mux->num_ring_entries;
+ e->ring = ring;
+
+ INIT_LIST_HEAD(&e->list);
+ mux->num_ring_entries += 1;
+ return 0;
+}
+
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
+{
+ struct amdgpu_mux_entry *e;
+
+ spin_lock(&mux->lock);
+
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
+ amdgpu_mux_resubmit_chunks(mux);
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry for sw ring\n");
+ spin_unlock(&mux->lock);
+ return;
+ }
+
+ /* We could skip this set wptr as preemption in process. */
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
+ spin_unlock(&mux->lock);
+ return;
+ }
+
+ e->sw_cptr = e->sw_wptr;
+ /* Update cptr if the package already copied in resubmit functions */
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
+ e->sw_cptr = mux->wptr_resubmit;
+ e->sw_wptr = wptr;
+ e->start_ptr_in_hw_ring = mux->real_ring->wptr;
+
+ /* Skip copying for the packages already resubmitted.*/
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) {
+ amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr);
+ e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+ amdgpu_ring_commit(mux->real_ring);
+ } else {
+ e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+ }
+ spin_unlock(&mux->lock);
+}
+
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry for sw ring\n");
+ return 0;
+ }
+
+ return e->sw_wptr;
+}
+
+/**
+ * amdgpu_ring_mux_get_rptr - get the readptr of the software ring
+ * @mux: the multiplexer the software rings attach to
+ * @ring: the software ring of which we calculate the readptr
+ *
+ * The return value of the readptr is not precise while the other rings could
+ * write data onto the real ring buffer.After overwriting on the real ring, we
+ * can not decide if our packages have been excuted or not read yet. However,
+ * this function is only called by the tools such as umr to collect the latest
+ * packages for the hang analysis. We assume the hang happens near our latest
+ * submit. Thus we could use the following logic to give the clue:
+ * If the readptr is between start and end, then we return the copy pointer
+ * plus the distance from start to readptr. If the readptr is before start, we
+ * return the copy pointer. Lastly, if the readptr is past end, we return the
+ * write pointer.
+ */
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ u64 readp, offset, start, end;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("no sw entry found!\n");
+ return 0;
+ }
+
+ readp = amdgpu_ring_get_rptr(mux->real_ring);
+
+ start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
+ end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
+ if (start > end) {
+ if (readp <= end)
+ readp += mux->real_ring->ring_size >> 2;
+ end += mux->real_ring->ring_size >> 2;
+ }
+
+ if (start <= readp && readp <= end) {
+ offset = readp - start;
+ e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
+ } else if (readp < start) {
+ e->sw_rptr = e->sw_cptr;
+ } else {
+ /* end < readptr */
+ e->sw_rptr = e->sw_wptr;
+ }
+
+ return e->sw_rptr;
+}
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ return amdgpu_ring_mux_get_rptr(mux, ring);
+}
+
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ return amdgpu_ring_mux_get_wptr(mux, ring);
+}
+
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr);
+}
+
+/* Override insert_nop to prevent emitting nops to the software rings */
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ WARN_ON(!ring->is_sw_ring);
+}
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+ return idx < ARRAY_SIZE(sw_ring_info) ?
+ sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+ return idx < ARRAY_SIZE(sw_ring_info) ?
+ sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
+
+/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/
+static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_ring *ring;
+ int i, need_preempt;
+
+ need_preempt = 0;
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ ring = mux->ring_entry[i].ring;
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
+ amdgpu_fence_count_emitted(ring) > 0)
+ return 0;
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
+ amdgpu_fence_last_unsignaled_time_us(ring) >
+ AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
+ need_preempt = 1;
+ }
+ return need_preempt && !mux->s_resubmit;
+}
+
+/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */
+static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+ int r;
+
+ spin_lock(&mux->lock);
+ mux->pending_trailing_fence_signaled = true;
+ r = amdgpu_ring_preempt_ib(mux->real_ring);
+ spin_unlock(&mux->lock);
+ return r;
+}
+
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+ if (amdgpu_mcbp_scan(mux) > 0)
+ amdgpu_mcbp_trigger_preempt(mux);
+ return;
+ }
+
+ amdgpu_ring_mux_start_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+ return;
+ amdgpu_ring_mux_end_ib(mux, ring);
+}
+
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ spin_lock(&mux->lock);
+ amdgpu_mux_resubmit_chunks(mux);
+ spin_unlock(&mux->lock);
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
+ if (!chunk) {
+ DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
+ return;
+ }
+
+ chunk->start = ring->wptr;
+ list_add_tail(&chunk->entry, &e->list);
+}
+
+static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ uint32_t last_seq = 0;
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk, *tmp;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+
+ list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
+ if (chunk->sync_seq <= last_seq) {
+ list_del(&chunk->entry);
+ kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+ }
+ }
+}
+
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+ if (!chunk) {
+ DRM_ERROR("cannot find chunk!\n");
+ return;
+ }
+
+ chunk->end = ring->wptr;
+ chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+
+ scan_and_remove_signaled_chunk(mux, ring);
+}
+
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_ring *ring = NULL;
+ int i;
+
+ if (!mux->pending_trailing_fence_signaled)
+ return false;
+
+ if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
+ return false;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ e = &mux->ring_entry[i];
+ if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+ ring = e->ring;
+ break;
+ }
+ }
+
+ if (!ring) {
+ DRM_ERROR("cannot find low priority ring\n");
+ return false;
+ }
+
+ amdgpu_fence_process(ring);
+ if (amdgpu_fence_count_emitted(ring) > 0) {
+ mux->s_resubmit = true;
+ mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
+ amdgpu_ring_mux_schedule_resubmit(mux);
+ }
+
+ mux->pending_trailing_fence_signaled = false;
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
new file mode 100644
index 000000000000..4be45fc14954
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RING_MUX__
+#define __AMDGPU_RING_MUX__
+
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include "amdgpu_ring.h"
+
+struct amdgpu_ring;
+
+/**
+ * struct amdgpu_mux_entry - the entry recording software rings copying information.
+ * @ring: the pointer to the software ring.
+ * @start_ptr_in_hw_ring: last start location copied to in the hardware ring.
+ * @end_ptr_in_hw_ring: last end location copied to in the hardware ring.
+ * @sw_cptr: the position of the copy pointer in the sw ring.
+ * @sw_rptr: the read pointer in software ring.
+ * @sw_wptr: the write pointer in software ring.
+ * @list: list head for amdgpu_mux_chunk
+ */
+struct amdgpu_mux_entry {
+ struct amdgpu_ring *ring;
+ u64 start_ptr_in_hw_ring;
+ u64 end_ptr_in_hw_ring;
+ u64 sw_cptr;
+ u64 sw_rptr;
+ u64 sw_wptr;
+ struct list_head list;
+};
+
+struct amdgpu_ring_mux {
+ struct amdgpu_ring *real_ring;
+
+ struct amdgpu_mux_entry *ring_entry;
+ unsigned int num_ring_entries;
+ unsigned int ring_entry_size;
+ /*the lock for copy data from different software rings*/
+ spinlock_t lock;
+ bool s_resubmit;
+ uint32_t seqno_to_resubmit;
+ u64 wptr_resubmit;
+ struct timer_list resubmit_timer;
+
+ bool pending_trailing_fence_signaled;
+};
+
+/**
+ * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings.
+ * @entry: the list entry.
+ * @sync_seq: the fence seqno related with the saved IB.
+ * @start:- start location on the software ring.
+ * @end:- end location on the software ring.
+ */
+struct amdgpu_mux_chunk {
+ struct list_head entry;
+ uint32_t sync_seq;
+ u64 start;
+ u64 end;
+};
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ unsigned int entry_size);
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 068c2d8495fd..28a7d2ea6661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -695,8 +695,19 @@ out_unlock:
return r;
}
+/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations
+ */
+void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
+ struct hmm_range *range)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+
+ if (gtt && gtt->userptr && range)
+ amdgpu_hmm_range_get_pages_done(range);
+}
+
/*
- * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change
* Check if the pages backing this ttm range have been invalidated
*
* Returns: true if pages are still valid
@@ -714,10 +725,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
WARN_ONCE(!range->hmm_pfns, "No user pages to check\n");
- /*
- * FIXME: Must always hold notifier_lock for this, and must
- * not ignore the return code.
- */
return !amdgpu_hmm_range_get_pages_done(range);
}
#endif
@@ -1545,7 +1552,7 @@ static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
NULL,
- NULL);
+ &adev->mman.drv_vram_usage_va);
}
/**
@@ -1569,7 +1576,6 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
return amdgpu_bo_create_kernel_at(adev,
adev->mman.fw_vram_usage_start_offset,
adev->mman.fw_vram_usage_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.fw_vram_usage_reserved_bo,
&adev->mman.fw_vram_usage_va);
}
@@ -1583,8 +1589,9 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
*/
static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
{
- uint64_t vram_size = adev->gmc.visible_vram_size;
+ u64 vram_size = adev->gmc.visible_vram_size;
+ adev->mman.drv_vram_usage_va = NULL;
adev->mman.drv_vram_usage_reserved_bo = NULL;
if (adev->mman.drv_vram_usage_size == 0 ||
@@ -1594,9 +1601,8 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
return amdgpu_bo_create_kernel_at(adev,
adev->mman.drv_vram_usage_start_offset,
adev->mman.drv_vram_usage_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.drv_vram_usage_reserved_bo,
- NULL);
+ &adev->mman.drv_vram_usage_va);
}
/*
@@ -1675,7 +1681,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset,
ctx->train_data_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&ctx->c2p_bo,
NULL);
if (ret) {
@@ -1689,7 +1694,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
ret = amdgpu_bo_create_kernel_at(adev,
adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
adev->mman.discovery_tmr_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.discovery_memory,
NULL);
if (ret) {
@@ -1790,21 +1794,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
* avoid display artifacts while transitioning between pre-OS
* and driver. */
r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.stolen_vga_memory,
NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
adev->mman.stolen_extended_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.stolen_extended_memory,
NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
adev->mman.stolen_reserved_size,
- AMDGPU_GEM_DOMAIN_VRAM,
&adev->mman.stolen_reserved_memory,
NULL);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b391c8d076ff..e2cd5894afc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -90,6 +90,7 @@ struct amdgpu_mman {
u64 drv_vram_usage_start_offset;
u64 drv_vram_usage_size;
struct amdgpu_bo *drv_vram_usage_reserved_bo;
+ void *drv_vram_usage_va;
/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
struct amdgpu_bo *sdma_access_bo;
@@ -158,6 +159,8 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
struct hmm_range **range);
+void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
+ struct hmm_range *range);
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
struct hmm_range *range);
#else
@@ -167,6 +170,10 @@ static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
{
return -EPERM;
}
+static inline void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
+ struct hmm_range *range)
+{
+}
static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
struct hmm_range *range)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 33f3415096f7..b1622ac9949f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
break;
case IP_VERSION(3, 0, 2):
fw_name = FIRMWARE_VANGOGH;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
break;
case IP_VERSION(3, 0, 16):
fw_name = FIRMWARE_DIMGREY_CAVEFISH;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a226a6c48fb7..2994b9db196f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -395,7 +395,6 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
*/
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
&bo, NULL))
DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
@@ -428,11 +427,17 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
struct eeprom_table_record bp;
uint64_t retired_page;
uint32_t bp_idx, bp_cnt;
+ void *vram_usage_va = NULL;
+
+ if (adev->mman.fw_vram_usage_va)
+ vram_usage_va = adev->mman.fw_vram_usage_va;
+ else
+ vram_usage_va = adev->mman.drv_vram_usage_va;
if (bp_block_size) {
bp_cnt = bp_block_size / sizeof(uint64_t);
for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
- retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va +
+ retired_page = *(uint64_t *)(vram_usage_va +
bp_block_offset + bp_idx * sizeof(uint64_t));
bp.retired_page = retired_page;
@@ -643,7 +648,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
- if (adev->mman.fw_vram_usage_va != NULL) {
+ if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
+ DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
+ } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
/* go through this logic in ip_init and reset to init workqueue*/
amdgpu_virt_exchange_data(adev);
@@ -666,32 +673,40 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
uint32_t bp_block_size = 0;
struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
- if (adev->mman.fw_vram_usage_va != NULL) {
-
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
- adev->virt.fw_reserve.p_vf2pf =
- (struct amd_sriov_msg_vf2pf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+ if (adev->mman.fw_vram_usage_va) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ } else if (adev->mman.drv_vram_usage_va) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ }
amdgpu_virt_read_pf2vf_data(adev);
amdgpu_virt_write_vf2pf_data(adev);
/* bad page handling for version 2 */
if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
- pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
+ pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
- bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
- ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
- bp_block_size = pf2vf_v2->bp_block_size;
+ bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
+ ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
+ bp_block_size = pf2vf_v2->bp_block_size;
- if (bp_block_size && !adev->virt.ras_init_done)
- amdgpu_virt_init_ras_err_handler_data(adev);
+ if (bp_block_size && !adev->virt.ras_init_done)
+ amdgpu_virt_init_ras_err_handler_data(adev);
- if (adev->virt.ras_init_done)
- amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
- }
+ if (adev->virt.ras_init_done)
+ amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index fea25519227f..b9441ab457ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -46,22 +46,43 @@
/**
* DOC: GPUVM
*
- * GPUVM is similar to the legacy gart on older asics, however
- * rather than there being a single global gart table
- * for the entire GPU, there are multiple VM page tables active
- * at any given time. The VM page tables can contain a mix
- * vram pages and system memory pages and system memory pages
+ * GPUVM is the MMU functionality provided on the GPU.
+ * GPUVM is similar to the legacy GART on older asics, however
+ * rather than there being a single global GART table
+ * for the entire GPU, there can be multiple GPUVM page tables active
+ * at any given time. The GPUVM page tables can contain a mix
+ * VRAM pages and system pages (both memory and MMIO) and system pages
* can be mapped as snooped (cached system pages) or unsnooped
* (uncached system pages).
- * Each VM has an ID associated with it and there is a page table
- * associated with each VMID. When executing a command buffer,
- * the kernel tells the ring what VMID to use for that command
+ *
+ * Each active GPUVM has an ID associated with it and there is a page table
+ * linked with each VMID. When executing a command buffer,
+ * the kernel tells the engine what VMID to use for that command
* buffer. VMIDs are allocated dynamically as commands are submitted.
* The userspace drivers maintain their own address space and the kernel
* sets up their pages tables accordingly when they submit their
* command buffers and a VMID is assigned.
- * Cayman/Trinity support up to 8 active VMs at any given time;
- * SI supports 16.
+ * The hardware supports up to 16 active GPUVMs at any given time.
+ *
+ * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
+ * on the ASIC family. GPUVM supports RWX attributes on each page as well
+ * as other features such as encryption and caching attributes.
+ *
+ * VMID 0 is special. It is the GPUVM used for the kernel driver. In
+ * addition to an aperture managed by a page table, VMID 0 also has
+ * several other apertures. There is an aperture for direct access to VRAM
+ * and there is a legacy AGP aperture which just forwards accesses directly
+ * to the matching system physical addresses (or IOVAs when an IOMMU is
+ * present). These apertures provide direct access to these memories without
+ * incurring the overhead of a page table. VMID 0 is used by the kernel
+ * driver for tasks like memory management.
+ *
+ * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
+ * For user applications, each application can have their own unique GPUVM
+ * address space. The application manages the address space and the kernel
+ * driver manages the GPUVM page tables for each process. If an GPU client
+ * accesses an invalid page, it will generate a GPU page fault, similar to
+ * accessing an invalid page on a CPU.
*/
#define START(node) ((node)->start)
@@ -464,25 +485,20 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
- struct amdgpu_vmid *id;
- bool gds_switch_needed;
- bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
if (job->vmid == 0)
return false;
- id = &id_mgr->ids[job->vmid];
- gds_switch_needed = ring->funcs->emit_gds_switch && (
- id->gds_base != job->gds_base ||
- id->gds_size != job->gds_size ||
- id->gws_base != job->gws_base ||
- id->gws_size != job->gws_size ||
- id->oa_base != job->oa_base ||
- id->oa_size != job->oa_size);
-
- if (amdgpu_vmid_had_gpu_reset(adev, id))
+
+ if (job->vm_needs_flush || ring->has_compute_vm_bug)
return true;
- return vm_flush_needed || gds_switch_needed;
+ if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
+ return true;
+
+ if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
+ return true;
+
+ return false;
}
/**
@@ -504,27 +520,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
- bool gds_switch_needed = ring->funcs->emit_gds_switch && (
- id->gds_base != job->gds_base ||
- id->gds_size != job->gds_size ||
- id->gws_base != job->gws_base ||
- id->gws_size != job->gws_size ||
- id->oa_base != job->oa_base ||
- id->oa_size != job->oa_size);
+ bool spm_update_needed = job->spm_update_needed;
+ bool gds_switch_needed = ring->funcs->emit_gds_switch &&
+ job->gds_switch_needed;
bool vm_flush_needed = job->vm_needs_flush;
struct dma_fence *fence = NULL;
bool pasid_mapping_needed = false;
unsigned patch_offset = 0;
- bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL));
int r;
- if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
-
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
gds_switch_needed = true;
vm_flush_needed = true;
pasid_mapping_needed = true;
+ spm_update_needed = true;
}
mutex_lock(&id_mgr->lock);
@@ -542,6 +551,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
return 0;
+ amdgpu_ring_ib_begin(ring);
if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
@@ -556,6 +566,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (pasid_mapping_needed)
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
+ if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
+
+ if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
+ gds_switch_needed) {
+ amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
+ job->gds_size, job->gws_base,
+ job->gws_size, job->oa_base,
+ job->oa_size);
+ }
+
if (vm_flush_needed || pasid_mapping_needed) {
r = amdgpu_fence_emit(ring, &fence, NULL, 0);
if (r)
@@ -580,20 +601,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
}
dma_fence_put(fence);
- if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
- gds_switch_needed) {
- id->gds_base = job->gds_base;
- id->gds_size = job->gds_size;
- id->gws_base = job->gws_base;
- id->gws_size = job->gws_size;
- id->oa_base = job->oa_base;
- id->oa_size = job->oa_size;
- amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
- job->gds_size, job->gws_base,
- job->gws_size, job->oa_base,
- job->oa_size);
- }
-
if (ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, patch_offset);
@@ -602,6 +609,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
amdgpu_ring_emit_switch_buffer(ring);
amdgpu_ring_emit_switch_buffer(ring);
}
+ amdgpu_ring_ib_end(ring);
return 0;
}
@@ -2361,7 +2369,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
- long timeout = msecs_to_jiffies(2000);
int r;
switch (args->in.op) {
@@ -2373,21 +2380,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return r;
break;
case AMDGPU_VM_OP_UNRESERVE_VMID:
- if (amdgpu_sriov_runtime(adev))
- timeout = 8 * timeout;
-
- /* Wait vm idle to make sure the vmid set in SPM_VMID is
- * not referenced anymore.
- */
- r = amdgpu_bo_reserve(fpriv->vm.root.bo, true);
- if (r)
- return r;
-
- r = amdgpu_vm_wait_idle(&fpriv->vm, timeout);
- if (r < 0)
- return r;
-
- amdgpu_bo_unreserve(fpriv->vm.root.bo);
amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 44157c6c0804..856a64bc7a89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -119,9 +119,6 @@ struct amdgpu_bo_vm;
/* Reserve 2MB at top/bottom of address space for kernel use */
#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
-/* max vmids dedicated for process */
-#define AMDGPU_VM_MAX_RESERVED_VMID 1
-
/* See vm_update_mode */
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
@@ -298,8 +295,7 @@ struct amdgpu_vm {
struct dma_fence *last_unlocked;
unsigned int pasid;
- /* dedicated to vm */
- struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS];
+ bool reserved_vmid[AMDGPU_MAX_VMHUBS];
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
bool use_cpu_for_update;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 59cf64216fbb..535cd6569bcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -238,8 +238,10 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
/* Wait for PD/PT moves to be completed */
dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);
dma_resv_for_each_fence_unlocked(&cursor, fence) {
+ dma_fence_get(fence);
r = drm_sched_job_add_dependency(&p->job->base, fence);
if (r) {
+ dma_fence_put(fence);
dma_resv_iter_end(&cursor);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 47159e9a0884..4b9e7b050ccd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -386,7 +386,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
if (ret) {
dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
kobject_put(&hive->kobj);
- kfree(hive);
hive = NULL;
goto pro_end;
}
@@ -410,7 +409,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
ret = -ENOMEM;
kobject_put(&hive->kobj);
- kfree(hive);
hive = NULL;
goto pro_end;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 9d2c6523f546..a56c6e106d00 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -77,6 +77,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
{
@@ -262,6 +266,7 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
soc15_program_register_sequence(adev,
golden_settings_gc_11_0_1,
(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
@@ -855,6 +860,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
break;
case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -1284,6 +1290,7 @@ static int gfx_v11_0_sw_init(void *handle)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
@@ -2486,7 +2493,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
for (i = 0; i < adev->usec_timeout; i++) {
cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1))
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
bootload_status = RREG32_SOC15(GC, 0,
regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
else
@@ -5050,6 +5058,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
break;
default:
@@ -5083,6 +5092,7 @@ static int gfx_v11_0_set_powergating_state(void *handle,
amdgpu_gfx_off_ctrl(adev, enable);
break;
case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
gfx_v11_cntl_pg(adev, enable);
amdgpu_gfx_off_ctrl(adev, enable);
break;
@@ -5106,6 +5116,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 676832da75eb..f202b45c413c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
#include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
#include "gfx_v9_4.h"
#include "gfx_v9_0.h"
#include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
#include "asic_reg/gc/gc_9_0_default.h"
#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS 2
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@@ -753,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
@@ -826,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
if (action == PREEMPT_QUEUES_NO_UNMAP) {
- amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
- amdgpu_ring_write(kiq_ring, seq);
+ amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -2103,6 +2106,7 @@ static int gfx_v9_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int hw_prio;
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
@@ -2186,6 +2190,9 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+
+ /* disable scheduler on the real ring */
+ ring->no_scheduler = true;
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -2193,6 +2200,41 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
+ /* set up the software rings */
+ if (adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+ ring = &adev->gfx.sw_gfx_ring[i];
+ ring->ring_obj = NULL;
+ sprintf(ring->name, amdgpu_sw_ring_name(i));
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ ring->is_sw_ring = true;
+ hw_prio = amdgpu_sw_ring_priority(i);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
+ NULL);
+ if (r)
+ return r;
+ ring->wptr = 0;
+ }
+
+ /* init the muxer and add software rings */
+ r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
+ GFX9_NUM_SW_GFX_RINGS);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
+ return r;
+ }
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+ r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
+ &adev->gfx.sw_gfx_ring[i]);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
+ return r;
+ }
+ }
+ }
+
/* set up the compute queues - allocate horizontally across pipes */
ring_id = 0;
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
@@ -2243,6 +2285,12 @@ static int gfx_v9_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
+ amdgpu_ring_mux_fini(&adev->gfx.muxer);
+ }
+
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -5157,11 +5205,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
- if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
+ if (flags & AMDGPU_IB_PREEMPTED)
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
- gfx_v9_0_ring_emit_de_meta(ring);
+ gfx_v9_0_ring_emit_de_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) &&
+ flags & AMDGPU_IB_PREEMPTED) ?
+ true : false);
}
amdgpu_ring_write(ring, header);
@@ -5216,17 +5270,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+ uint32_t dw2 = 0;
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
- amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
- EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
- EOP_TC_ACTION_EN |
- EOP_TC_WB_ACTION_EN |
- EOP_TC_MD_ACTION_EN)) |
- EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- EVENT_INDEX(5)));
+
+ if (writeback) {
+ dw2 = EOP_TC_NC_ACTION_EN;
+ } else {
+ dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
+ EOP_TC_MD_ACTION_EN;
+ }
+ dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5);
+ if (exec)
+ dw2 |= EOP_EXEC;
+
+ amdgpu_ring_write(ring, dw2);
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
/*
@@ -5331,33 +5392,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0);
}
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
{
+ struct amdgpu_device *adev = ring->adev;
struct v9_ce_ib_state ce_payload = {0};
- uint64_t csa_addr;
+ uint64_t offset, ce_payload_gpu_addr;
+ void *ce_payload_cpu_addr;
int cnt;
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+ if (ring->is_mes_queue) {
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr =
+ amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ ce_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ } else {
+ offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ }
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
- amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
+ amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+ sizeof(ce_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+ sizeof(ce_payload) >> 2);
+}
+
+static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 13);
+ gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
+ /*reset the CP_VMID_PREEMPT after trailing fence*/
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
+ 0x0);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ring->trail_seq);
+
+ amdgpu_ring_commit(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*ring->trail_fence_cpu_addr))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
+ }
+
+ amdgpu_ring_commit(ring);
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
}
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
{
+ struct amdgpu_device *adev = ring->adev;
struct v9_de_ib_state de_payload = {0};
- uint64_t csa_addr, gds_addr;
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
int cnt;
- csa_addr = amdgpu_csa_vaddr(ring->adev);
- gds_addr = csa_addr + 4096;
+ if (ring->is_mes_queue) {
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_gpu_addr =
+ amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ de_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gds_backup) +
+ offsetof(struct v9_gfx_meta_data, de_payload);
+ gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ } else {
+ offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
+ }
+
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -5367,9 +5530,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
WRITE_DATA_DST_SEL(8) |
WR_CONFIRM) |
WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
- amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
- amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+ sizeof(de_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+ sizeof(de_payload) >> 2);
}
static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
@@ -5385,8 +5554,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
{
uint32_t dw2 = 0;
- if (amdgpu_sriov_vf(ring->adev))
- gfx_v9_0_ring_emit_ce_meta(ring);
+ gfx_v9_0_ring_emit_ce_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) &&
+ flags & AMDGPU_IB_PREEMPTED) ? true : false);
dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@@ -5712,7 +5882,12 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
switch (me_id) {
case 0:
- amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ if (adev->gfx.num_gfx_rings &&
+ !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
+ /* Fence signals are handled on the software rings*/
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
+ }
break;
case 1:
case 2:
@@ -6709,6 +6884,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+ .preempt_ib = gfx_v9_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v9_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .vmhub = AMDGPU_GFXHUB_0,
+ .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
+ .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
+ .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ * the first COND_EXEC jump to the place just
+ * prior to this double SWITCH_BUFFER
+ */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2 + /* SWITCH_BUFFER */
+ 7, /* gfx_v9_0_emit_mem_sync */
+ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v9_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .test_ib = gfx_v9_0_ring_test_ib,
+ .insert_nop = amdgpu_sw_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v9_ring_emit_sb,
+ .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
@@ -6794,6 +7025,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
+ if (adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
+ }
+
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 96e52ec0fb69..4326078689cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -759,6 +759,7 @@ static int gmc_v11_0_sw_init(void *handle)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
adev->num_vmhubs = 2;
/*
* To fulfill 4-level page support,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 50386eb2eec8..08d6cf79fb15 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1185,6 +1185,8 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
uint64_t *flags)
{
+ struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+
*flags &= ~AMDGPU_PTE_EXECUTABLE;
*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
@@ -1196,7 +1198,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
*flags &= ~AMDGPU_PTE_VALID;
}
- if (mapping->bo_va->base.bo)
+ if (bo && bo->tbo.resource)
gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo,
mapping, flags);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 8d9c1e841353..970b066b37bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -388,6 +388,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.disable_reset = 1;
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+ mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
@@ -1341,7 +1342,8 @@ static int mes_v11_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!amdgpu_in_reset(adev) &&
+ /* it's only intended for use in mes_self_test case, not for s0ix and reset */
+ if (!amdgpu_in_reset(adev) && !adev->in_s0ix &&
(adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)))
amdgpu_mes_self_test(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 998b5d17b271..0e664d0cc8d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -319,7 +319,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
tmp = mmMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
}
static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index 1b027d069ab4..4638ea7c2eec 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -243,7 +243,7 @@ static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev)
tmp = mmMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
}
static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
index a1d26c4d80b8..16cc82215e2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -275,7 +275,7 @@ static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
tmp = regMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
}
static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
index e8058edc1d10..6bdf2ef0298d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -269,7 +269,7 @@ static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev)
tmp = regMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
}
static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
index 770be0a8f7ce..45465acaa943 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
@@ -268,7 +268,7 @@ static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev)
tmp = regMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
}
static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
index f772bb499f3e..0312c71c3af9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
@@ -32,7 +32,6 @@
#define RB_ENABLED (1 << 0)
#define RB4_ENABLED (1 << 1)
-#define MMSCH_DOORBELL_OFFSET 0x8
#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index b3fba8dea63c..6853b93ac82e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -82,10 +82,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode =
/* Navi1x */
static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -100,10 +100,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode =
/* Sienna Cichlid */
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -125,10 +125,10 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -149,7 +149,7 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode =
/* Beige Goby*/
static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -166,7 +166,7 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = {
/* Yellow Carp*/
static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index ee27bfaba6fd..e6a26a7e5e5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -46,6 +46,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@@ -102,6 +104,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 11):
err = psp_init_toc_microcode(psp, chip_name);
if (err)
return err;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 1122bd4eae98..4d780e4430e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -907,13 +907,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
/**
- * sdma_v4_0_gfx_stop - stop the gfx async dma engines
+ * sdma_v4_0_gfx_enable - enable the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
- * Stop the gfx async dma ring buffers (VEGA10).
+ * @enable: enable SDMA RB/IB
+ * control the gfx async dma ring buffers (VEGA10).
*/
-static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
{
u32 rb_cntl, ib_cntl;
int i;
@@ -922,10 +922,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
}
}
@@ -1044,7 +1044,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
int i;
if (!enable) {
- sdma_v4_0_gfx_stop(adev);
+ sdma_v4_0_gfx_enable(adev, enable);
sdma_v4_0_rlc_stop(adev);
if (adev->sdma.has_page_queue)
sdma_v4_0_page_stop(adev);
@@ -1960,8 +1960,10 @@ static int sdma_v4_0_suspend(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SMU saves SDMA state for us */
- if (adev->in_s0ix)
+ if (adev->in_s0ix) {
+ sdma_v4_0_gfx_enable(adev, false);
return 0;
+ }
return sdma_v4_0_hw_fini(adev);
}
@@ -1971,8 +1973,12 @@ static int sdma_v4_0_resume(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SMU restores SDMA state for us */
- if (adev->in_s0ix)
+ if (adev->in_s0ix) {
+ sdma_v4_0_enable(adev, true);
+ sdma_v4_0_gfx_enable(adev, true);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
return 0;
+ }
return sdma_v4_0_hw_init(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index e3b2b6b4f1a6..7cd17dda32ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -103,10 +103,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =
/* Vega */
static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
};
@@ -120,10 +120,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =
/* Raven */
static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
@@ -138,10 +138,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =
/* Renoir, Arcturus */
static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 799925d22fc8..2357ff39323f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -162,6 +162,7 @@
* 2 - Bypass
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
+#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
#define PACKET3_COPY_DATA 0x40
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_COND_WRITE 0x45
@@ -184,6 +185,7 @@
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
#define EOP_TC_NC_ACTION_EN (1 << 19)
#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
+#define EOP_EXEC (1 << 28) /* For Trailing Fence */
#define DATA_SEL(x) ((x) << 29)
/* 0 - discard
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index b258e9aa0558..5562670b7b52 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -43,6 +43,7 @@
#include "soc15.h"
#include "soc15_common.h"
#include "soc21.h"
+#include "mxgpu_nv.h"
static const struct amd_ip_funcs soc21_common_ip_funcs;
@@ -61,7 +62,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode =
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -325,6 +326,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 10):
return AMD_RESET_METHOD_MODE1;
case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 11):
return AMD_RESET_METHOD_MODE2;
default:
if (amdgpu_dpm_is_baco_supported(adev))
@@ -644,24 +646,62 @@ static int soc21_common_early_init(void *handle)
AMD_PG_SUPPORT_JPEG;
adev->external_rev_id = adev->rev_id + 0x20;
break;
+ case IP_VERSION(11, 0, 4):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x1;
+ break;
+
default:
/* FIXME: not supported yet */
return -EINVAL;
}
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
return 0;
}
static int soc21_common_late_init(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_get_irq(adev);
+
return 0;
}
static int soc21_common_sw_init(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
return 0;
}
@@ -699,6 +739,9 @@ static int soc21_common_hw_fini(void *handle)
/* disable the doorbell aperture */
soc21_enable_doorbell_aperture(adev, false);
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_put_irq(adev);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index ce8374ee824d..ec87b00f2e05 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -770,6 +770,33 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
}
}
+static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx,
+ bool indirect)
+{
+ uint32_t tmp;
+
+ if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(2, 6, 0))
+ return;
+
+ tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, mmVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+ tmp = UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_VCPU_INT_EN),
+ tmp, 0, indirect);
+
+ tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_SYS_INT_EN),
+ tmp, 0, indirect);
+}
+
static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
@@ -849,6 +876,8 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
+ vcn_v2_6_enable_ras(adev, inst_idx, indirect);
+
/* unblock VCPU register access */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 403d054cf51b..1e2b22299975 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -103,7 +103,6 @@ static int vcn_v4_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i, r;
- int vcn_doorbell_index = 0;
r = amdgpu_vcn_sw_init(adev);
if (r)
@@ -115,12 +114,6 @@ static int vcn_v4_0_sw_init(void *handle)
if (r)
return r;
- if (amdgpu_sriov_vf(adev)) {
- vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET;
- /* get DWORD offset */
- vcn_doorbell_index = vcn_doorbell_index << 1;
- }
-
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
@@ -144,7 +137,7 @@ static int vcn_v4_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev))
- ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1;
else
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
@@ -869,6 +862,28 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
return;
}
+static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
+ bool indirect)
+{
+ uint32_t tmp;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ return;
+
+ tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+ tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
+ tmp, 0, indirect);
+}
+
/**
* vcn_v4_0_start_dpg_mode - VCN start with dpg mode
*
@@ -957,6 +972,8 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
+ vcn_v4_0_enable_ras(adev, inst_idx, indirect);
+
/* enable master interrupt */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_MASTINT_EN),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index af01ba061e1b..3251f4783ba1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1521,6 +1521,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index c8dd948966c2..b8936340742b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -153,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
break;
default:
@@ -394,6 +395,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &gfx_v11_kfd2kgd;
break;
case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
gfx_target_version = 110003;
f2g = &gfx_v11_kfd2kgd;
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a26257171ab7..51b1683ac5c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -689,13 +689,13 @@ void kfd_process_destroy_wq(void)
}
static void kfd_process_free_gpuvm(struct kgd_mem *mem,
- struct kfd_process_device *pdd, void *kptr)
+ struct kfd_process_device *pdd, void **kptr)
{
struct kfd_dev *dev = pdd->dev;
- if (kptr) {
+ if (kptr && *kptr) {
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
- kptr = NULL;
+ *kptr = NULL;
}
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
@@ -795,7 +795,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
if (!qpd->ib_kaddr || !qpd->ib_base)
return;
- kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
+ kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr);
}
struct kfd_process *kfd_create_process(struct file *filep)
@@ -1277,7 +1277,7 @@ static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
return;
- kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
+ kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr);
}
void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
@@ -1576,9 +1576,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
p = pdd->process;
dev = pdd->dev;
- ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
- dev->adev, drm_file, p->pasid,
- &p->kgd_process_info, &p->ef);
+ ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file,
+ &p->kgd_process_info,
+ &p->ef);
if (ret) {
pr_err("Failed to create process VM object\n");
return ret;
@@ -1593,13 +1593,19 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (ret)
goto err_init_cwsr;
+ ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid);
+ if (ret)
+ goto err_set_pasid;
+
pdd->drm_file = drm_file;
return 0;
+err_set_pasid:
+ kfd_process_device_destroy_cwsr_dgpu(pdd);
err_init_cwsr:
+ kfd_process_device_destroy_ib_mem(pdd);
err_reserve_ib_mem:
- kfd_process_device_free_bos(pdd);
pdd->drm_priv = NULL;
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ef9c6fdfb88d..bceb1a5b2518 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1805,21 +1805,75 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
}
+static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id,
+ struct kfd_topology_device **dev)
+{
+ int proximity_domain = ++topology_crat_proximity_domain;
+ struct list_head temp_topology_device_list;
+ void *crat_image = NULL;
+ size_t image_size = 0;
+ int res;
+
+ res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+ COMPUTE_UNIT_GPU, gpu,
+ proximity_domain);
+ if (res) {
+ pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
+ gpu_id);
+ topology_crat_proximity_domain--;
+ goto err;
+ }
+
+ INIT_LIST_HEAD(&temp_topology_device_list);
+
+ res = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (res) {
+ pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
+ gpu_id);
+ topology_crat_proximity_domain--;
+ goto err;
+ }
+
+ kfd_topology_update_device_list(&temp_topology_device_list,
+ &topology_device_list);
+
+ *dev = kfd_assign_gpu(gpu);
+ if (WARN_ON(!*dev)) {
+ res = -ENODEV;
+ goto err;
+ }
+
+ /* Fill the cache affinity information here for the GPUs
+ * using VCRAT
+ */
+ kfd_fill_cache_non_crat_info(*dev, gpu);
+
+ /* Update the SYSFS tree, since we added another topology
+ * device
+ */
+ res = kfd_topology_update_sysfs();
+ if (!res)
+ sys_props.generation_count++;
+ else
+ pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
+ gpu_id, res);
+
+err:
+ kfd_destroy_crat_image(crat_image);
+ return res;
+}
+
int kfd_topology_add_device(struct kfd_dev *gpu)
{
uint32_t gpu_id;
struct kfd_topology_device *dev;
struct kfd_cu_info cu_info;
int res = 0;
- struct list_head temp_topology_device_list;
- void *crat_image = NULL;
- size_t image_size = 0;
- int proximity_domain;
int i;
const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
- INIT_LIST_HEAD(&temp_topology_device_list);
-
gpu_id = kfd_generate_gpu_id(gpu);
pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
@@ -1831,54 +1885,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
*/
down_write(&topology_lock);
dev = kfd_assign_gpu(gpu);
- if (!dev) {
- proximity_domain = ++topology_crat_proximity_domain;
-
- res = kfd_create_crat_image_virtual(&crat_image, &image_size,
- COMPUTE_UNIT_GPU, gpu,
- proximity_domain);
- if (res) {
- pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
- gpu_id);
- topology_crat_proximity_domain--;
- return res;
- }
-
- res = kfd_parse_crat_table(crat_image,
- &temp_topology_device_list,
- proximity_domain);
- if (res) {
- pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
- gpu_id);
- topology_crat_proximity_domain--;
- goto err;
- }
-
- kfd_topology_update_device_list(&temp_topology_device_list,
- &topology_device_list);
-
- dev = kfd_assign_gpu(gpu);
- if (WARN_ON(!dev)) {
- res = -ENODEV;
- goto err;
- }
-
- /* Fill the cache affinity information here for the GPUs
- * using VCRAT
- */
- kfd_fill_cache_non_crat_info(dev, gpu);
-
- /* Update the SYSFS tree, since we added another topology
- * device
- */
- res = kfd_topology_update_sysfs();
- if (!res)
- sys_props.generation_count++;
- else
- pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
- gpu_id, res);
- }
+ if (!dev)
+ res = kfd_topology_add_device_locked(gpu, gpu_id, &dev);
up_write(&topology_lock);
+ if (res)
+ return res;
dev->gpu_id = gpu_id;
gpu->id = gpu_id;
@@ -2001,11 +2012,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
kfd_debug_print_topology();
- if (!res)
- kfd_notify_gpu_change(gpu_id, 1);
-err:
- kfd_destroy_crat_image(crat_image);
- return res;
+ kfd_notify_gpu_change(gpu_id, 1);
+
+ return 0;
}
/**
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 1cdb379a90d7..2efe93f74f84 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -5,6 +5,7 @@ menu "Display Engine Configuration"
config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
+ depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64
select SND_HDA_COMPONENT if SND_HDA_CORE
# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128 || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG))
@@ -13,6 +14,12 @@ config DRM_AMD_DC
support for AMDGPU. This adds required support for Vega and
Raven ASICs.
+ calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64)
+ architectures built with Clang (all released versions), whereby the stack
+ frame gets blown up to well over 5k. This would cause an immediate kernel
+ panic on most architectures. We'll revert this when the following bug report
+ has been resolved: https://github.com/llvm/llvm-project/issues/41896.
+
config DRM_AMD_DC_DCN
def_bool n
help
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a8772082883e..86bc23a67d97 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -146,14 +146,6 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
/* Number of bytes in PSP footer for firmware. */
#define PSP_FOOTER_BYTES 0x100
-/*
- * DMUB Async to Sync Mechanism Status
- */
-#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1
-#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2
-#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3
-#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4
-
/**
* DOC: overview
*
@@ -1104,7 +1096,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
/* Initialize hardware. */
memset(&hw_params, 0, sizeof(hw_params));
hw_params.fb_base = adev->gmc.fb_start;
- hw_params.fb_offset = adev->gmc.aper_base;
+ hw_params.fb_offset = adev->vm_manager.vram_base_offset;
/* backdoor load firmware and trigger dmub running */
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
@@ -1226,7 +1218,7 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24;
pa_config->system_aperture.fb_base = adev->gmc.fb_start;
- pa_config->system_aperture.fb_offset = adev->gmc.aper_base;
+ pa_config->system_aperture.fb_offset = adev->vm_manager.vram_base_offset;
pa_config->system_aperture.fb_top = adev->gmc.fb_end;
pa_config->gart_config.page_table_start_addr = page_table_start.quad_part << 12;
@@ -1371,7 +1363,44 @@ static const struct dmi_system_id hpd_disconnect_quirk_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"),
},
},
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"),
+ },
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"),
+ },
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"),
+ },
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"),
+ },
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"),
+ },
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"),
+ },
+ },
{}
+ /* TODO: refactor this from a fixed table to a dynamic option */
};
static void retrieve_dmi_info(struct amdgpu_display_manager *dm)
@@ -1404,6 +1433,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
memset(&init_params, 0, sizeof(init_params));
#endif
+ mutex_init(&adev->dm.dpia_aux_lock);
mutex_init(&adev->dm.dc_lock);
mutex_init(&adev->dm.audio_lock);
@@ -1473,6 +1503,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
init_data.flags.gpu_vm_support = true;
@@ -1768,6 +1799,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
mutex_destroy(&adev->dm.audio_lock);
mutex_destroy(&adev->dm.dc_lock);
+ mutex_destroy(&adev->dm.dpia_aux_lock);
return;
}
@@ -4329,6 +4361,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
amdgpu_set_panel_orientation(&aconnector->base);
}
+ /* If we didn't find a panel, notify the acpi video detection */
+ if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0)
+ acpi_video_report_nolcd();
+
/* Software is initialized. Now we can register interrupt handlers. */
switch (adev->asic_type) {
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -4838,6 +4874,35 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
return 0;
}
+static inline void fill_dc_dirty_rect(struct drm_plane *plane,
+ struct rect *dirty_rect, int32_t x,
+ int32_t y, int32_t width, int32_t height,
+ int *i, bool ffu)
+{
+ if (*i > DC_MAX_DIRTY_RECTS)
+ return;
+
+ if (*i == DC_MAX_DIRTY_RECTS)
+ goto out;
+
+ dirty_rect->x = x;
+ dirty_rect->y = y;
+ dirty_rect->width = width;
+ dirty_rect->height = height;
+
+ if (ffu)
+ drm_dbg(plane->dev,
+ "[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
+ plane->base.id, width, height);
+ else
+ drm_dbg(plane->dev,
+ "[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)",
+ plane->base.id, x, y, width, height);
+
+out:
+ (*i)++;
+}
+
/**
* fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates
*
@@ -4858,10 +4923,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
* addition, certain use cases - such as cursor and multi-plane overlay (MPO) -
* implicitly provide damage clips without any client support via the plane
* bounds.
- *
- * Today, amdgpu_dm only supports the MPO and cursor usecase.
- *
- * TODO: Also enable for FB_DAMAGE_CLIPS
*/
static void fill_dc_dirty_rects(struct drm_plane *plane,
struct drm_plane_state *old_plane_state,
@@ -4872,12 +4933,11 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
struct rect *dirty_rects = flip_addrs->dirty_rects;
uint32_t num_clips;
+ struct drm_mode_rect *clips;
bool bb_changed;
bool fb_changed;
uint32_t i = 0;
- flip_addrs->dirty_rect_count = 0;
-
/*
* Cursor plane has it's own dirty rect update interface. See
* dcn10_dmub_update_cursor_data and dmub_cmd_update_cursor_info_data
@@ -4885,20 +4945,20 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
if (plane->type == DRM_PLANE_TYPE_CURSOR)
return;
- /*
- * Today, we only consider MPO use-case for PSR SU. If MPO not
- * requested, and there is a plane update, do FFU.
- */
+ num_clips = drm_plane_get_damage_clips_count(new_plane_state);
+ clips = drm_plane_get_damage_clips(new_plane_state);
+
if (!dm_crtc_state->mpo_requested) {
- dirty_rects[0].x = 0;
- dirty_rects[0].y = 0;
- dirty_rects[0].width = dm_crtc_state->base.mode.crtc_hdisplay;
- dirty_rects[0].height = dm_crtc_state->base.mode.crtc_vdisplay;
- flip_addrs->dirty_rect_count = 1;
- DRM_DEBUG_DRIVER("[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
- new_plane_state->plane->base.id,
- dm_crtc_state->base.mode.crtc_hdisplay,
- dm_crtc_state->base.mode.crtc_vdisplay);
+ if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
+ goto ffu;
+
+ for (; flip_addrs->dirty_rect_count < num_clips; clips++)
+ fill_dc_dirty_rect(new_plane_state->plane,
+ &dirty_rects[i], clips->x1,
+ clips->y1, clips->x2 - clips->x1,
+ clips->y2 - clips->y1,
+ &flip_addrs->dirty_rect_count,
+ false);
return;
}
@@ -4909,7 +4969,6 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
* If plane is moved or resized, also add old bounding box to dirty
* rects.
*/
- num_clips = drm_plane_get_damage_clips_count(new_plane_state);
fb_changed = old_plane_state->fb->base.id !=
new_plane_state->fb->base.id;
bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x ||
@@ -4917,36 +4976,51 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
old_plane_state->crtc_w != new_plane_state->crtc_w ||
old_plane_state->crtc_h != new_plane_state->crtc_h);
- DRM_DEBUG_DRIVER("[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
- new_plane_state->plane->base.id,
- bb_changed, fb_changed, num_clips);
-
- if (num_clips || fb_changed || bb_changed) {
- dirty_rects[i].x = new_plane_state->crtc_x;
- dirty_rects[i].y = new_plane_state->crtc_y;
- dirty_rects[i].width = new_plane_state->crtc_w;
- dirty_rects[i].height = new_plane_state->crtc_h;
- DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
- new_plane_state->plane->base.id,
- dirty_rects[i].x, dirty_rects[i].y,
- dirty_rects[i].width, dirty_rects[i].height);
- i += 1;
- }
+ drm_dbg(plane->dev,
+ "[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
+ new_plane_state->plane->base.id,
+ bb_changed, fb_changed, num_clips);
- /* Add old plane bounding-box if plane is moved or resized */
if (bb_changed) {
- dirty_rects[i].x = old_plane_state->crtc_x;
- dirty_rects[i].y = old_plane_state->crtc_y;
- dirty_rects[i].width = old_plane_state->crtc_w;
- dirty_rects[i].height = old_plane_state->crtc_h;
- DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
- old_plane_state->plane->base.id,
- dirty_rects[i].x, dirty_rects[i].y,
- dirty_rects[i].width, dirty_rects[i].height);
- i += 1;
- }
+ fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
+ new_plane_state->crtc_x,
+ new_plane_state->crtc_y,
+ new_plane_state->crtc_w,
+ new_plane_state->crtc_h, &i, false);
+
+ /* Add old plane bounding-box if plane is moved or resized */
+ fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
+ old_plane_state->crtc_x,
+ old_plane_state->crtc_y,
+ old_plane_state->crtc_w,
+ old_plane_state->crtc_h, &i, false);
+ }
+
+ if (num_clips) {
+ for (; i < num_clips; clips++)
+ fill_dc_dirty_rect(new_plane_state->plane,
+ &dirty_rects[i], clips->x1,
+ clips->y1, clips->x2 - clips->x1,
+ clips->y2 - clips->y1, &i, false);
+ } else if (fb_changed && !bb_changed) {
+ fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
+ new_plane_state->crtc_x,
+ new_plane_state->crtc_y,
+ new_plane_state->crtc_w,
+ new_plane_state->crtc_h, &i, false);
+ }
+
+ if (i > DC_MAX_DIRTY_RECTS)
+ goto ffu;
flip_addrs->dirty_rect_count = i;
+ return;
+
+ffu:
+ fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[0], 0, 0,
+ dm_crtc_state->base.mode.crtc_hdisplay,
+ dm_crtc_state->base.mode.crtc_vdisplay,
+ &flip_addrs->dirty_rect_count, true);
}
static void update_stream_scaling_settings(const struct drm_display_mode *mode,
@@ -10167,91 +10241,92 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
return value;
}
-static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux,
- struct dc_context *ctx,
- uint8_t status_type,
- uint32_t *operation_result)
+int amdgpu_dm_process_dmub_aux_transfer_sync(
+ struct dc_context *ctx,
+ unsigned int link_index,
+ struct aux_payload *payload,
+ enum aux_return_code_type *operation_result)
{
struct amdgpu_device *adev = ctx->driver_context;
- int return_status = -1;
struct dmub_notification *p_notify = adev->dm.dmub_notify;
+ int ret = -1;
- if (is_cmd_aux) {
- if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
- return_status = p_notify->aux_reply.length;
- *operation_result = p_notify->result;
- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) {
- *operation_result = AUX_RET_ERROR_TIMEOUT;
- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) {
- *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) {
- *operation_result = AUX_RET_ERROR_INVALID_REPLY;
- } else {
- *operation_result = AUX_RET_ERROR_UNKNOWN;
+ mutex_lock(&adev->dm.dpia_aux_lock);
+ if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) {
+ *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
+ goto out;
+ }
+
+ if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
+ DRM_ERROR("wait_for_completion_timeout timeout!");
+ *operation_result = AUX_RET_ERROR_TIMEOUT;
+ goto out;
+ }
+
+ if (p_notify->result != AUX_RET_SUCCESS) {
+ /*
+ * Transient states before tunneling is enabled could
+ * lead to this error. We can ignore this for now.
+ */
+ if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) {
+ DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n",
+ payload->address, payload->length,
+ p_notify->result);
}
- } else {
- if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
- return_status = 0;
- *operation_result = p_notify->sc_status;
- } else {
- *operation_result = SET_CONFIG_UNKNOWN_ERROR;
+ *operation_result = AUX_RET_ERROR_INVALID_REPLY;
+ goto out;
+ }
+
+
+ payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
+ if (!payload->write && p_notify->aux_reply.length &&
+ (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) {
+
+ if (payload->length != p_notify->aux_reply.length) {
+ DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n",
+ p_notify->aux_reply.length,
+ payload->address, payload->length);
+ *operation_result = AUX_RET_ERROR_INVALID_REPLY;
+ goto out;
}
+
+ memcpy(payload->data, p_notify->aux_reply.data,
+ p_notify->aux_reply.length);
}
- return return_status;
+ /* success */
+ ret = p_notify->aux_reply.length;
+ *operation_result = p_notify->result;
+out:
+ mutex_unlock(&adev->dm.dpia_aux_lock);
+ return ret;
}
-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx,
- unsigned int link_index, void *cmd_payload, void *operation_result)
+int amdgpu_dm_process_dmub_set_config_sync(
+ struct dc_context *ctx,
+ unsigned int link_index,
+ struct set_config_cmd_payload *payload,
+ enum set_config_status *operation_result)
{
struct amdgpu_device *adev = ctx->driver_context;
- int ret = 0;
+ bool is_cmd_complete;
+ int ret;
- if (is_cmd_aux) {
- dc_process_dmub_aux_transfer_async(ctx->dc,
- link_index, (struct aux_payload *)cmd_payload);
- } else if (dc_process_dmub_set_config_async(ctx->dc, link_index,
- (struct set_config_cmd_payload *)cmd_payload,
- adev->dm.dmub_notify)) {
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
- (uint32_t *)operation_result);
- }
+ mutex_lock(&adev->dm.dpia_aux_lock);
+ is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc,
+ link_index, payload, adev->dm.dmub_notify);
- ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ);
- if (ret == 0) {
+ if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
+ ret = 0;
+ *operation_result = adev->dm.dmub_notify->sc_status;
+ } else {
DRM_ERROR("wait_for_completion_timeout timeout!");
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT,
- (uint32_t *)operation_result);
- }
-
- if (is_cmd_aux) {
- if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
- struct aux_payload *payload = (struct aux_payload *)cmd_payload;
-
- payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
- if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
- payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) {
-
- if (payload->length != adev->dm.dmub_notify->aux_reply.length) {
- DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n",
- payload->address, payload->length,
- adev->dm.dmub_notify->aux_reply.length);
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx,
- DMUB_ASYNC_TO_SYNC_ACCESS_INVALID,
- (uint32_t *)operation_result);
- }
-
- memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
- adev->dm.dmub_notify->aux_reply.length);
- }
- }
+ ret = -1;
+ *operation_result = SET_CONFIG_UNKNOWN_ERROR;
}
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
- (uint32_t *)operation_result);
+ mutex_unlock(&adev->dm.dpia_aux_lock);
+ return ret;
}
/*
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 83436ef3b26b..df3c25e32c65 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -59,7 +59,9 @@
#include "signal_types.h"
#include "amdgpu_dm_crc.h"
struct aux_payload;
+struct set_config_cmd_payload;
enum aux_return_code_type;
+enum set_config_status;
/* Forward declarations */
struct amdgpu_device;
@@ -542,6 +544,13 @@ struct amdgpu_display_manager {
* occurred on certain intel platform
*/
bool aux_hpd_discon_quirk;
+
+ /**
+ * @dpia_aux_lock:
+ *
+ * Guards access to DPIA AUX
+ */
+ struct mutex dpia_aux_lock;
};
enum dsc_clock_force_state {
@@ -785,9 +794,11 @@ void amdgpu_dm_update_connector_after_detect(
extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux,
- struct dc_context *ctx, unsigned int link_index,
- void *payload, void *operation_result);
+int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
+ struct aux_payload *payload, enum aux_return_code_type *operation_result);
+
+int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
+ struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
bool check_seamless_boot_capability(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 2c43cdd2e707..461037a3dd75 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -2639,6 +2639,25 @@ static int dp_mst_progress_status_show(struct seq_file *m, void *unused)
return 0;
}
+/*
+ * Reports whether the connected display is a USB4 DPIA tunneled display
+ * Example usage: cat /sys/kernel/debug/dri/0/DP-8/is_dpia_link
+ */
+static int is_dpia_link_show(struct seq_file *m, void *data)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct dc_link *link = aconnector->dc_link;
+
+ if (connector->status != connector_status_connected)
+ return -ENODEV;
+
+ seq_printf(m, "%s\n", (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? "yes" :
+ (link->ep_type == DISPLAY_ENDPOINT_PHY) ? "no" : "unknown");
+
+ return 0;
+}
+
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
@@ -2650,6 +2669,7 @@ DEFINE_SHOW_ATTRIBUTE(internal_display);
DEFINE_SHOW_ATTRIBUTE(psr_capability);
DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status);
+DEFINE_SHOW_ATTRIBUTE(is_dpia_link);
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
.owner = THIS_MODULE,
@@ -2794,7 +2814,8 @@ static const struct {
{"max_bpc", &dp_max_bpc_debugfs_fops},
{"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops},
{"is_mst_connector", &dp_is_mst_connector_fops},
- {"mst_progress_status", &dp_mst_progress_status_fops}
+ {"mst_progress_status", &dp_mst_progress_status_fops},
+ {"is_dpia_link", &is_dpia_link_fops}
};
#ifdef CONFIG_DRM_AMD_DC_HDCP
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index e47098fa5aac..6994c9a1ed85 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -817,9 +817,8 @@ int dm_helper_dmub_aux_transfer_sync(
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
- return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx,
- link->link_index, (void *)payload,
- (void *)operation_result);
+ return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
+ operation_result);
}
int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
@@ -827,9 +826,8 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
struct set_config_cmd_payload *payload,
enum set_config_status *operation_result)
{
- return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx,
- link->link_index, (void *)payload,
- (void *)operation_result);
+ return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload,
+ operation_result);
}
void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 24d859ad4712..1edf7385f8d8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1187,7 +1187,7 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
struct amdgpu_dm_connector *aconnector;
struct drm_dp_mst_topology_mgr *mst_mgr;
int link_vars_start_index = 0;
- int ret;
+ int ret = 0;
for (i = 0; i < dc_state->stream_count; i++)
computed_streams[i] = false;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index e6854f7270a6..3c50b3ff7954 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1600,6 +1600,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
supported_rotations);
+ if (dm->adev->ip_versions[DCE_HWIP][0] > IP_VERSION(3, 0, 1) &&
+ plane->type != DRM_PLANE_TYPE_CURSOR)
+ drm_plane_enable_fb_damage_clips(plane);
+
drm_plane_helper_add(plane, &dm_plane_helper_funcs);
#ifdef CONFIG_DRM_AMD_DC_HDR
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
index 8ea8ee57b39f..61bb1d86182e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
// This is a stripped-down version of the smu11_driver_if.h file for the relevant DAL interfaces.
#define SMU11_DRIVER_IF_VERSION 0x40
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
index 090b2c02aee1..0827c7df2855 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
@@ -333,8 +333,8 @@ void dcn31_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst
(support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY))
support = DCN_ZSTATE_SUPPORT_DISALLOW;
-
- if (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY)
+ if (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY ||
+ support == DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY)
param = 1;
else
param = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
index 2db595672a46..f47cfe6b42bd 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
@@ -346,8 +346,6 @@ void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zs
if (!clk_mgr->smu_present)
return;
- // Arg[15:0] = 8/9/0 for Z8/Z9/disallow -> existing bits
- // Arg[16] = Disallow Z9 -> new bit
switch (support) {
case DCN_ZSTATE_SUPPORT_ALLOW:
@@ -366,6 +364,16 @@ void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zs
param = (1 << 10);
break;
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10) | (1 << 8);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 8);
+ break;
+
default: //DCN_ZSTATE_SUPPORT_UNKNOWN
msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
param = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 6f77d8e538ab..200fcec19186 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -233,41 +233,6 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
DC_FP_END();
}
-static void dcn32_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr,
- struct dc_state *context,
- int ref_dtbclk_khz)
-{
- struct dccg *dccg = clk_mgr->dccg;
- uint32_t tg_mask = 0;
- int i;
-
- for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
- struct dtbclk_dto_params dto_params = {0};
-
- /* use mask to program DTO once per tg */
- if (pipe_ctx->stream_res.tg &&
- !(tg_mask & (1 << pipe_ctx->stream_res.tg->inst))) {
- tg_mask |= (1 << pipe_ctx->stream_res.tg->inst);
-
- dto_params.otg_inst = pipe_ctx->stream_res.tg->inst;
- dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
-
- if (is_dp_128b_132b_signal(pipe_ctx)) {
- dto_params.pixclk_khz = pipe_ctx->stream->phy_pix_clk;
-
- if (pipe_ctx->stream_res.audio != NULL)
- dto_params.req_audio_dtbclk_khz = 24000;
- }
- if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
- dto_params.is_hdmi = true;
-
- dccg->funcs->set_dtbclk_dto(clk_mgr->dccg, &dto_params);
- //dccg->funcs->set_audio_dtbclk_dto(clk_mgr->dccg, &dto_params);
- }
- }
-}
-
/* Since DPPCLK request to PMFW needs to be exact (due to DPP DTO programming),
* update DPPCLK to be the exact frequency that will be set after the DPPCLK
* divider is updated. This will prevent rounding issues that could cause DPP
@@ -438,7 +403,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
}
if (!new_clocks->dtbclk_en) {
- new_clocks->ref_dtbclk_khz = 0;
+ new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
}
/* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
@@ -447,8 +412,6 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
/* DCCG requires KHz precision for DTBCLK */
clk_mgr_base->clks.ref_dtbclk_khz =
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz));
-
- dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
}
if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
index d30fbbdd1792..d3d5a8caccf8 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
// This is a stripped-down version of the smu13_driver_if.h file for the relevant DAL interfaces.
#define SMU13_DRIVER_IF_VERSION 0x18
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 1c3de3a1671e..0cb8d1f934d1 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1192,7 +1192,7 @@ static void disable_vbios_mode_if_required(
if (pix_clk_100hz != requested_pix_clk_100hz) {
core_link_disable_stream(pipe);
- pipe->stream->dpms_off = true;
+ pipe->stream->dpms_off = false;
}
}
}
@@ -1556,6 +1556,9 @@ bool dc_validate_boot_timing(const struct dc *dc,
if (tg_inst >= dc->res_pool->timing_generator_count)
return false;
+ if (tg_inst != link->link_enc->preferred_engine)
+ return false;
+
tg = dc->res_pool->timing_generators[tg_inst];
if (!tg->funcs->get_hw_timing)
@@ -1985,7 +1988,7 @@ context_alloc_fail:
DC_LOG_DC("%s Finished.\n", __func__);
- return (res == DC_OK);
+ return res;
}
/* TODO: When the transition to the new commit sequence is done, remove this
@@ -3061,7 +3064,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
* Ensures that we have enough pipes for newly added MPO planes
*/
if (dc->res_pool->funcs->remove_phantom_pipes)
- dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+ dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
/*remove old surfaces from context */
if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
@@ -3098,6 +3101,19 @@ static bool update_planes_and_stream_state(struct dc *dc,
if (update_type == UPDATE_TYPE_FULL) {
if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
+ /* For phantom pipes we remove and create a new set of phantom pipes
+ * for each full update (because we don't know if we'll need phantom
+ * pipes until after the first round of validation). However, if validation
+ * fails we need to keep the existing phantom pipes (because we don't update
+ * the dc->current_state).
+ *
+ * The phantom stream/plane refcount is decremented for validation because
+ * we assume it'll be removed (the free comes when the dc_state is freed),
+ * but if validation fails we have to increment back the refcount so it's
+ * consistent.
+ */
+ if (dc->res_pool->funcs->retain_phantom_pipes)
+ dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state);
BREAK_TO_DEBUGGER();
goto fail;
}
@@ -3740,6 +3756,8 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
struct dc_stream_status *cur_stream_status = stream_get_status(dc->current_state, stream);
bool force_minimal_pipe_splitting = false;
+ bool subvp_active = false;
+ uint32_t i;
*is_plane_addition = false;
@@ -3771,11 +3789,25 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
}
}
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+ subvp_active = true;
+ break;
+ }
+ }
+
/* For SubVP when adding or removing planes we need to add a minimal transition
* (even when disabling all planes). Whenever disabling a phantom pipe, we
* must use the minimal transition path to disable the pipe correctly.
+ *
+ * We want to use the minimal transition whenever subvp is active, not only if
+ * a plane is being added / removed from a subvp stream (MPO plane can be added
+ * to a DRR pipe of SubVP + DRR config, in which case we still want to run through
+ * a min transition to disable subvp.
*/
- if (cur_stream_status && stream->mall_stream_config.type == SUBVP_MAIN) {
+ if (cur_stream_status && subvp_active) {
/* determine if minimal transition is required due to SubVP*/
if (cur_stream_status->plane_count > surface_count) {
force_minimal_pipe_splitting = true;
@@ -3925,6 +3957,7 @@ bool dc_update_planes_and_stream(struct dc *dc,
struct dc_state *context;
enum surface_update_type update_type;
int i;
+ struct mall_temp_config mall_temp_config;
/* In cases where MPO and split or ODM are used transitions can
* cause underflow. Apply stream configuration with minimal pipe
@@ -3956,11 +3989,29 @@ bool dc_update_planes_and_stream(struct dc *dc,
/* on plane removal, minimal state is the new one */
if (force_minimal_pipe_splitting && !is_plane_addition) {
+ /* Since all phantom pipes are removed in full validation,
+ * we have to save and restore the subvp/mall config when
+ * we do a minimal transition since the flags marking the
+ * pipe as subvp/phantom will be cleared (dc copy constructor
+ * creates a shallow copy).
+ */
+ if (dc->res_pool->funcs->save_mall_state)
+ dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
if (!commit_minimal_transition_state(dc, context)) {
dc_release_state(context);
return false;
}
-
+ if (dc->res_pool->funcs->restore_mall_state)
+ dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
+
+ /* If we do a minimal transition with plane removal and the context
+ * has subvp we also have to retain back the phantom stream / planes
+ * since the refcount is decremented as part of the min transition
+ * (we commit a state with no subvp, so the phantom streams / planes
+ * had to be removed).
+ */
+ if (dc->res_pool->funcs->retain_phantom_pipes)
+ dc->res_pool->funcs->retain_phantom_pipes(dc, context);
update_type = UPDATE_TYPE_FULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 5304e9daf90a..342e906ae26e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3378,7 +3378,7 @@ bool dc_link_setup_psr(struct dc_link *link,
case FAMILY_YELLOW_CARP:
case AMDGPU_FAMILY_GC_10_3_6:
case AMDGPU_FAMILY_GC_11_0_1:
- if (dc->debug.disable_z10)
+ if (dc->debug.disable_z10 || dc->debug.psr_skip_crtc_disable)
psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true;
break;
default:
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 38d71b5c1f2d..20e534f73513 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -332,9 +332,21 @@ bool dc_stream_set_cursor_attributes(
dc = stream->ctx->dc;
- if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384)
- if (stream->mall_stream_config.type == SUBVP_MAIN)
+ /* SubVP is not compatible with HW cursor larger than 64 x 64 x 4.
+ * Therefore, if cursor is greater than 64 x 64 x 4, fallback to SW cursor in the following case:
+ * 1. For single display cases, if resolution is >= 5K and refresh rate < 120hz
+ * 2. For multi display cases, if resolution is >= 4K and refresh rate < 120hz
+ *
+ * [< 120hz is a requirement for SubVP configs]
+ */
+ if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) {
+ if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 &&
+ ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
return false;
+ else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 &&
+ ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
+ return false;
+ }
stream->cursor_attributes = *attributes;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 57fc9193c770..85ebeaa2de18 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
-#define DC_VER "3.2.212"
+#define DC_VER "3.2.215"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@@ -267,6 +267,7 @@ struct dc_caps {
uint16_t subvp_pstate_allow_width_us;
uint16_t subvp_vertical_int_margin_us;
bool seamless_odm;
+ uint8_t subvp_drr_vblank_start_margin_us;
};
struct dc_bug_wa {
@@ -492,6 +493,8 @@ enum dcn_pwr_state {
enum dcn_zstate_support_state {
DCN_ZSTATE_SUPPORT_UNKNOWN,
DCN_ZSTATE_SUPPORT_ALLOW,
+ DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY,
+ DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY,
DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY,
DCN_ZSTATE_SUPPORT_DISALLOW,
};
@@ -841,6 +844,7 @@ struct dc_debug_options {
int crb_alloc_policy_min_disp_count;
bool disable_z10;
bool enable_z9_disable_interface;
+ bool psr_skip_crtc_disable;
union dpia_debug_options dpia_debug;
bool disable_fixed_vs_aux_timeout_wa;
bool force_disable_subvp;
@@ -849,6 +853,7 @@ struct dc_debug_options {
unsigned int force_subvp_num_ways;
unsigned int force_mall_ss_num_ways;
bool alloc_extra_way_for_cursor;
+ uint32_t subvp_extra_lines;
bool force_usr_allow;
/* uses value at boot and disables switch */
bool disable_dtb_ref_clk_switch;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 097556f7b32c..6ccf477d1c4d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -493,6 +493,7 @@ static void populate_subvp_cmd_drr_info(struct dc *dc,
pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported;
pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported;
+ pipe_data->pipe_config.vblank_data.drr_info.drr_vblank_start_margin = dc->caps.subvp_drr_vblank_start_margin_us;
}
/**
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index dc6afe33bca2..2e18bcf6b11a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -151,6 +151,20 @@ struct dc_panel_config {
bool optimize_edp_link_rate; /* eDP ILR */
} ilr;
};
+
+/*
+ * USB4 DPIA BW ALLOCATION STRUCTS
+ */
+struct dc_dpia_bw_alloc {
+ int sink_verified_bw; // The Verified BW that sink can allocated and use that has been verified already
+ int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated
+ int padding_bw; // The Padding "Un-used" BW allocated by CM for padding reasons
+ int sink_max_bw; // The Max BW that sink can require/support
+ int estimated_bw; // The estimated available BW for this DPIA
+ int bw_granularity; // BW Granularity
+ bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM
+};
+
/*
* A link contains one or more sinks and their connected status.
* The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index f4dfd3a49b68..dfd3df1d2f7e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -144,7 +144,7 @@ struct test_pattern {
unsigned int cust_pattern_size;
};
-#define SUBVP_DRR_MARGIN_US 500 // 500us for DRR margin (SubVP + DRR)
+#define SUBVP_DRR_MARGIN_US 600 // 600us for DRR margin (SubVP + DRR)
enum mall_stream_type {
SUBVP_NONE, // subvp not in use
@@ -160,6 +160,17 @@ struct mall_stream_config {
struct dc_stream_state *paired_stream; // master / slave stream
};
+/* Temp struct used to save and restore MALL config
+ * during validation.
+ *
+ * TODO: Move MALL config into dc_state instead of stream struct
+ * to avoid needing to save/restore.
+ */
+struct mall_temp_config {
+ struct mall_stream_config mall_stream_config[MAX_PIPES];
+ bool is_phantom_plane[MAX_PIPES];
+};
+
struct dc_stream_state {
// sink is deprecated, new code should not reference
// this pointer
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index c5380ce70653..913a1fe6b3da 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1561,8 +1561,13 @@ static enum dc_status apply_single_controller_ctx_to_hw(
pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != NULL;
- pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
-
+ /* Phantom and main stream share the same link (because the stream
+ * is constructed with the same sink). Make sure not to override
+ * and link programming on the main.
+ */
+ if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+ pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
+ }
return DC_OK;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 1b70b78e2fa1..af631085e88c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -359,7 +359,8 @@ static const struct dce_audio_registers audio_regs[] = {
audio_regs(2),
audio_regs(3),
audio_regs(4),
- audio_regs(5)
+ audio_regs(5),
+ audio_regs(6),
};
#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index b9765b3899e1..ef52e6b6eccf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -436,34 +436,48 @@ void dpp1_set_cursor_position(
uint32_t height)
{
struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
- int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x;
- int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y;
+ int x_pos = pos->x - param->viewport.x;
+ int y_pos = pos->y - param->viewport.y;
+ int x_hotspot = pos->x_hotspot;
+ int y_hotspot = pos->y_hotspot;
+ int src_x_offset = x_pos - pos->x_hotspot;
+ int src_y_offset = y_pos - pos->y_hotspot;
+ int cursor_height = (int)height;
+ int cursor_width = (int)width;
uint32_t cur_en = pos->enable ? 1 : 0;
- // Cursor width/height and hotspots need to be rotated for offset calculation
+ // Transform cursor width / height and hotspots for offset calculations
if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) {
- swap(width, height);
+ swap(cursor_height, cursor_width);
+ swap(x_hotspot, y_hotspot);
+
if (param->rotation == ROTATION_ANGLE_90) {
- src_x_offset = pos->x - pos->y_hotspot - param->viewport.x;
- src_y_offset = pos->y - pos->x_hotspot - param->viewport.y;
+ // hotspot = (-y, x)
+ src_x_offset = x_pos - (cursor_width - x_hotspot);
+ src_y_offset = y_pos - y_hotspot;
+ } else if (param->rotation == ROTATION_ANGLE_270) {
+ // hotspot = (y, -x)
+ src_x_offset = x_pos - x_hotspot;
+ src_y_offset = y_pos - (cursor_height - y_hotspot);
}
} else if (param->rotation == ROTATION_ANGLE_180) {
+ // hotspot = (-x, -y)
if (!param->mirror)
- src_x_offset = pos->x - param->viewport.x;
+ src_x_offset = x_pos - (cursor_width - x_hotspot);
- src_y_offset = pos->y - param->viewport.y;
+ src_y_offset = y_pos - (cursor_height - y_hotspot);
}
if (src_x_offset >= (int)param->viewport.width)
cur_en = 0; /* not visible beyond right edge*/
- if (src_x_offset + (int)width <= 0)
+ if (src_x_offset + cursor_width <= 0)
cur_en = 0; /* not visible beyond left edge*/
if (src_y_offset >= (int)param->viewport.height)
cur_en = 0; /* not visible beyond bottom edge*/
- if (src_y_offset + (int)height <= 0)
+ if (src_y_offset + cursor_height <= 0)
cur_en = 0; /* not visible beyond top edge*/
REG_UPDATE(CURSOR0_CONTROL,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index 0f746bb4e500..d51f1ce02874 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -55,7 +55,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub,
s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A);
s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A);
}
- s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
+ s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
s = &wm->sets[1];
s->wm_set = 1;
@@ -65,7 +65,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub,
s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B);
s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B);
}
- s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B);
+ s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B);
s = &wm->sets[2];
s->wm_set = 2;
@@ -75,7 +75,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub,
s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C);
s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C);
}
- s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C);
+ s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C);
s = &wm->sets[3];
s->wm_set = 3;
@@ -85,7 +85,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub,
s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D);
s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D);
}
- s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D);
+ s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D);
}
void hubbub1_allow_self_refresh_control(struct hubbub *hubbub, bool allow)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 52e201e9b091..a142a00bc432 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -1179,10 +1179,12 @@ void hubp1_cursor_set_position(
const struct dc_cursor_mi_param *param)
{
struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
- int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x;
- int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y;
+ int x_pos = pos->x - param->viewport.x;
+ int y_pos = pos->y - param->viewport.y;
int x_hotspot = pos->x_hotspot;
int y_hotspot = pos->y_hotspot;
+ int src_x_offset = x_pos - pos->x_hotspot;
+ int src_y_offset = y_pos - pos->y_hotspot;
int cursor_height = (int)hubp->curs_attr.height;
int cursor_width = (int)hubp->curs_attr.width;
uint32_t dst_x_offset;
@@ -1200,18 +1202,26 @@ void hubp1_cursor_set_position(
if (hubp->curs_attr.address.quad_part == 0)
return;
- // Rotated cursor width/height and hotspots tweaks for offset calculation
+ // Transform cursor width / height and hotspots for offset calculations
if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) {
swap(cursor_height, cursor_width);
+ swap(x_hotspot, y_hotspot);
+
if (param->rotation == ROTATION_ANGLE_90) {
- src_x_offset = pos->x - pos->y_hotspot - param->viewport.x;
- src_y_offset = pos->y - pos->x_hotspot - param->viewport.y;
+ // hotspot = (-y, x)
+ src_x_offset = x_pos - (cursor_width - x_hotspot);
+ src_y_offset = y_pos - y_hotspot;
+ } else if (param->rotation == ROTATION_ANGLE_270) {
+ // hotspot = (y, -x)
+ src_x_offset = x_pos - x_hotspot;
+ src_y_offset = y_pos - (cursor_height - y_hotspot);
}
} else if (param->rotation == ROTATION_ANGLE_180) {
+ // hotspot = (-x, -y)
if (!param->mirror)
- src_x_offset = pos->x - param->viewport.x;
+ src_x_offset = x_pos - (cursor_width - x_hotspot);
- src_y_offset = pos->y - param->viewport.y;
+ src_y_offset = y_pos - (cursor_height - y_hotspot);
}
dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0;
@@ -1248,8 +1258,8 @@ void hubp1_cursor_set_position(
CURSOR_Y_POSITION, pos->y);
REG_SET_2(CURSOR_HOT_SPOT, 0,
- CURSOR_HOT_SPOT_X, x_hotspot,
- CURSOR_HOT_SPOT_Y, y_hotspot);
+ CURSOR_HOT_SPOT_X, pos->x_hotspot,
+ CURSOR_HOT_SPOT_Y, pos->y_hotspot);
REG_SET(CURSOR_DST_OFFSET, 0,
CURSOR_DST_X_OFFSET, dst_x_offset);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 587b04b8712d..fe2023f18b7d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -159,7 +159,7 @@ static void dcn10_log_hubbub_state(struct dc *dc,
DTN_INFO_MICRO_SEC(s->pte_meta_urgent);
DTN_INFO_MICRO_SEC(s->sr_enter);
DTN_INFO_MICRO_SEC(s->sr_exit);
- DTN_INFO_MICRO_SEC(s->dram_clk_chanage);
+ DTN_INFO_MICRO_SEC(s->dram_clk_change);
DTN_INFO("\n");
}
@@ -869,6 +869,32 @@ static void false_optc_underflow_wa(
tg->funcs->clear_optc_underflow(tg);
}
+static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
+{
+ struct pipe_ctx *other_pipe;
+ int vready_offset = pipe->pipe_dlg_param.vready_offset;
+
+ /* Always use the largest vready_offset of all connected pipes */
+ for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
+ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+ }
+ for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
+ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+ }
+ for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
+ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+ }
+ for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
+ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+ }
+
+ return vready_offset;
+}
+
enum dc_status dcn10_enable_stream_timing(
struct pipe_ctx *pipe_ctx,
struct dc_state *context,
@@ -912,7 +938,7 @@ enum dc_status dcn10_enable_stream_timing(
pipe_ctx->stream_res.tg->funcs->program_timing(
pipe_ctx->stream_res.tg,
&stream->timing,
- pipe_ctx->pipe_dlg_param.vready_offset,
+ calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width,
@@ -2190,6 +2216,12 @@ void dcn10_enable_vblanks_synchronization(
opp = grouped_pipes[i]->stream_res.opp;
tg = grouped_pipes[i]->stream_res.tg;
tg->funcs->get_otg_active_size(tg, &width, &height);
+
+ if (!tg->funcs->is_tg_enabled(tg)) {
+ DC_SYNC_INFO("Skipping timing sync on disabled OTG\n");
+ return;
+ }
+
if (opp->funcs->opp_program_dpg_dimensions)
opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1);
}
@@ -2252,6 +2284,12 @@ void dcn10_enable_timing_synchronization(
opp = grouped_pipes[i]->stream_res.opp;
tg = grouped_pipes[i]->stream_res.tg;
tg->funcs->get_otg_active_size(tg, &width, &height);
+
+ if (!tg->funcs->is_tg_enabled(tg)) {
+ DC_SYNC_INFO("Skipping timing sync on disabled OTG\n");
+ return;
+ }
+
if (opp->funcs->opp_program_dpg_dimensions)
opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1);
}
@@ -2902,7 +2940,7 @@ void dcn10_program_pipe(
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg,
- pipe_ctx->pipe_dlg_param.vready_offset,
+ calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index e8b6065fffad..a0f8e31d2adc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -83,7 +83,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i
memset(&wm, 0, sizeof(struct dcn_hubbub_wm));
dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm);
- chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_chanage\n");
+ chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_change\n");
remaining_buffer -= chars_printed;
pBuf += chars_printed;
@@ -98,7 +98,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i
(s->pte_meta_urgent * frac) / ref_clk_mhz / frac, (s->pte_meta_urgent * frac) / ref_clk_mhz % frac,
(s->sr_enter * frac) / ref_clk_mhz / frac, (s->sr_enter * frac) / ref_clk_mhz % frac,
(s->sr_exit * frac) / ref_clk_mhz / frac, (s->sr_exit * frac) / ref_clk_mhz % frac,
- (s->dram_clk_chanage * frac) / ref_clk_mhz / frac, (s->dram_clk_chanage * frac) / ref_clk_mhz % frac);
+ (s->dram_clk_change * frac) / ref_clk_mhz / frac, (s->dram_clk_change * frac) / ref_clk_mhz % frac);
remaining_buffer -= chars_printed;
pBuf += chars_printed;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
index aacb1fb5c73e..24bd93219936 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
@@ -500,7 +500,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub,
s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A);
s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A);
}
- s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
+ s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
s = &wm->sets[1];
s->wm_set = 1;
@@ -511,7 +511,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub,
s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B);
s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B);
}
- s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B);
+ s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B);
s = &wm->sets[2];
s->wm_set = 2;
@@ -522,7 +522,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub,
s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C);
s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C);
}
- s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C);
+ s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C);
s = &wm->sets[3];
s->wm_set = 3;
@@ -533,7 +533,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub,
s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D);
s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D);
}
- s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D);
+ s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D);
}
void hubbub2_get_dchub_ref_freq(struct hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 938dba5249d4..4566bc7abf17 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -973,10 +973,12 @@ void hubp2_cursor_set_position(
const struct dc_cursor_mi_param *param)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
- int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x;
- int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y;
+ int x_pos = pos->x - param->viewport.x;
+ int y_pos = pos->y - param->viewport.y;
int x_hotspot = pos->x_hotspot;
int y_hotspot = pos->y_hotspot;
+ int src_x_offset = x_pos - pos->x_hotspot;
+ int src_y_offset = y_pos - pos->y_hotspot;
int cursor_height = (int)hubp->curs_attr.height;
int cursor_width = (int)hubp->curs_attr.width;
uint32_t dst_x_offset;
@@ -994,18 +996,26 @@ void hubp2_cursor_set_position(
if (hubp->curs_attr.address.quad_part == 0)
return;
- // Rotated cursor width/height and hotspots tweaks for offset calculation
+ // Transform cursor width / height and hotspots for offset calculations
if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) {
swap(cursor_height, cursor_width);
+ swap(x_hotspot, y_hotspot);
+
if (param->rotation == ROTATION_ANGLE_90) {
- src_x_offset = pos->x - pos->y_hotspot - param->viewport.x;
- src_y_offset = pos->y - pos->x_hotspot - param->viewport.y;
+ // hotspot = (-y, x)
+ src_x_offset = x_pos - (cursor_width - x_hotspot);
+ src_y_offset = y_pos - y_hotspot;
+ } else if (param->rotation == ROTATION_ANGLE_270) {
+ // hotspot = (y, -x)
+ src_x_offset = x_pos - x_hotspot;
+ src_y_offset = y_pos - (cursor_height - y_hotspot);
}
} else if (param->rotation == ROTATION_ANGLE_180) {
+ // hotspot = (-x, -y)
if (!param->mirror)
- src_x_offset = pos->x - param->viewport.x;
+ src_x_offset = x_pos - (cursor_width - x_hotspot);
- src_y_offset = pos->y - param->viewport.y;
+ src_y_offset = y_pos - (cursor_height - y_hotspot);
}
dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0;
@@ -1042,8 +1052,8 @@ void hubp2_cursor_set_position(
CURSOR_Y_POSITION, pos->y);
REG_SET_2(CURSOR_HOT_SPOT, 0,
- CURSOR_HOT_SPOT_X, x_hotspot,
- CURSOR_HOT_SPOT_Y, y_hotspot);
+ CURSOR_HOT_SPOT_X, pos->x_hotspot,
+ CURSOR_HOT_SPOT_Y, pos->y_hotspot);
REG_SET(CURSOR_DST_OFFSET, 0,
CURSOR_DST_X_OFFSET, dst_x_offset);
@@ -1052,8 +1062,8 @@ void hubp2_cursor_set_position(
hubp->pos.cur_ctl.bits.cur_enable = cur_en;
hubp->pos.position.bits.x_pos = pos->x;
hubp->pos.position.bits.y_pos = pos->y;
- hubp->pos.hot_spot.bits.x_hot = x_hotspot;
- hubp->pos.hot_spot.bits.y_hot = y_hotspot;
+ hubp->pos.hot_spot.bits.x_hot = pos->x_hotspot;
+ hubp->pos.hot_spot.bits.y_hot = pos->y_hotspot;
hubp->pos.dst_offset.bits.dst_x_offset = dst_x_offset;
/* Cursor Rectangle Cache
* Cursor bitmaps have different hotspot values
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 3f3d4daa6294..6291a241158a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -1652,6 +1652,31 @@ static void dcn20_update_dchubp_dpp(
hubp->funcs->phantom_hubp_post_enable(hubp);
}
+static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
+{
+ struct pipe_ctx *other_pipe;
+ int vready_offset = pipe->pipe_dlg_param.vready_offset;
+
+ /* Always use the largest vready_offset of all connected pipes */
+ for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
+ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+ }
+ for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
+ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+ }
+ for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
+ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+ }
+ for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
+ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+ vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+ }
+
+ return vready_offset;
+}
static void dcn20_program_pipe(
struct dc *dc,
@@ -1670,7 +1695,7 @@ static void dcn20_program_pipe(
&& !pipe_ctx->prev_odm_pipe) {
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg,
- pipe_ctx->pipe_dlg_param.vready_offset,
+ calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width);
@@ -1716,7 +1741,10 @@ static void dcn20_program_pipe(
* only do gamma programming for powering on, internal memcmp to avoid
* updating on slave planes
*/
- if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf)
+ if (pipe_ctx->update_flags.bits.enable ||
+ pipe_ctx->update_flags.bits.plane_changed ||
+ pipe_ctx->stream->update_flags.bits.out_tf ||
+ pipe_ctx->plane_state->update_flags.bits.output_tf_change)
hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
/* If the pipe has been enabled or has a different opp, we
@@ -1983,10 +2011,13 @@ void dcn20_prepare_bandwidth(
/* decrease compbuf size */
if (hubbub->funcs->program_compbuf_size) {
- if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes)
+ if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) {
compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes;
- else
+ dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
+ } else {
compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb;
+ dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
+ }
hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false);
}
@@ -2067,7 +2098,7 @@ bool dcn20_update_bandwidth(
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg,
- pipe_ctx->pipe_dlg_param.vready_offset,
+ calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
index c5e200d09038..aeb0e0d9b70a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
@@ -635,7 +635,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit);
REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A,
- DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, &s->dram_clk_chanage);
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, &s->dram_clk_change);
s = &wm->sets[1];
s->wm_set = 1;
@@ -649,7 +649,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit);
REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B,
- DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, &s->dram_clk_chanage);
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, &s->dram_clk_change);
s = &wm->sets[2];
s->wm_set = 2;
@@ -663,7 +663,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, &s->sr_exit);
REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C,
- DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, &s->dram_clk_chanage);
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, &s->dram_clk_change);
s = &wm->sets[3];
s->wm_set = 3;
@@ -677,7 +677,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, &s->sr_exit);
REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D,
- DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_chanage);
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_change);
}
static void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index 480145f09246..8cf10351f271 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -1493,6 +1493,8 @@ static bool dcn301_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.dp_hdmi21_pcon_support = true;
+
/* read VBIOS LTTPR caps */
if (ctx->dc_bios->funcs->get_lttpr_caps) {
enum bp_result bp_query_result;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
index 7d11c2a43cbe..47cffd0e6830 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
@@ -1281,6 +1281,8 @@ static bool dcn302_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.dp_hdmi21_pcon_support = true;
+
/* read VBIOS LTTPR caps */
if (ctx->dc_bios->funcs->get_lttpr_caps) {
enum bp_result bp_query_result;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 92393b04cc44..c14d35894b2e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -1212,6 +1212,8 @@ static bool dcn303_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.dp_hdmi21_pcon_support = true;
+
/* read VBIOS LTTPR caps */
if (ctx->dc_bios->funcs->get_lttpr_caps) {
enum bp_result bp_query_result;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
index 165c920ca776..4226a051df41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
@@ -623,3 +623,43 @@ void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable)
if (hws->ctx->dc->debug.hpo_optimization)
REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, !!enable);
}
+void dcn31_set_drr(struct pipe_ctx **pipe_ctx,
+ int num_pipes, struct dc_crtc_timing_adjust adjust)
+{
+ int i = 0;
+ struct drr_params params = {0};
+ unsigned int event_triggers = 0x2;/*Bit[1]: OTG_TRIG_A*/
+ unsigned int num_frames = 2;
+ params.vertical_total_max = adjust.v_total_max;
+ params.vertical_total_min = adjust.v_total_min;
+ params.vertical_total_mid = adjust.v_total_mid;
+ params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
+ for (i = 0; i < num_pipes; i++) {
+ if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
+ if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
+ pipe_ctx[i]->stream_res.tg->funcs->set_drr(
+ pipe_ctx[i]->stream_res.tg, &params);
+ if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
+ if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
+ pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
+ pipe_ctx[i]->stream_res.tg,
+ event_triggers, num_frames);
+ }
+ }
+}
+void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+ int num_pipes, const struct dc_static_screen_params *params)
+{
+ unsigned int i;
+ unsigned int triggers = 0;
+ if (params->triggers.surface_update)
+ triggers |= 0x600;/*bit 9 and bit10 : 110 0000 0000*/
+ if (params->triggers.cursor_update)
+ triggers |= 0x10;/*bit4*/
+ if (params->triggers.force_trigger)
+ triggers |= 0x1;
+ for (i = 0; i < num_pipes; i++)
+ pipe_ctx[i]->stream_res.tg->funcs->
+ set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+ triggers, params->num_frames);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
index edfc01d6ad73..e7e03a8722e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
@@ -56,4 +56,8 @@ bool dcn31_is_abm_supported(struct dc *dc,
void dcn31_init_pipes(struct dc *dc, struct dc_state *context);
void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable);
+void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+ int num_pipes, const struct dc_static_screen_params *params);
+void dcn31_set_drr(struct pipe_ctx **pipe_ctx,
+ int num_pipes, struct dc_crtc_timing_adjust adjust);
#endif /* __DC_HWSS_DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
index 3a32810bbe38..7c2da70ffe21 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
@@ -64,9 +64,9 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.prepare_bandwidth = dcn20_prepare_bandwidth,
.optimize_bandwidth = dcn20_optimize_bandwidth,
.update_bandwidth = dcn20_update_bandwidth,
- .set_drr = dcn10_set_drr,
+ .set_drr = dcn31_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn10_set_static_screen_control,
+ .set_static_screen_control = dcn31_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
index 63a677c8ee27..fe449f7aa771 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
@@ -40,6 +40,7 @@
#define FN(reg_name, field_name) \
optc1->tg_shift->field_name, optc1->tg_mask->field_name
+#define STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN 0x2000 /*bit 13*/
static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
struct dc_crtc_timing *timing)
{
@@ -231,6 +232,32 @@ void optc3_init_odm(struct timing_generator *optc)
OPTC_MEM_SEL, 0);
optc1->opp_count = 1;
}
+void optc31_set_static_screen_control(
+ struct timing_generator *optc,
+ uint32_t event_triggers,
+ uint32_t num_frames)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ uint32_t framecount;
+ uint32_t events;
+
+ if (num_frames > 0xFF)
+ num_frames = 0xFF;
+ REG_GET_2(OTG_STATIC_SCREEN_CONTROL,
+ OTG_STATIC_SCREEN_EVENT_MASK, &events,
+ OTG_STATIC_SCREEN_FRAME_COUNT, &framecount);
+
+ if (events == event_triggers && num_frames == framecount)
+ return;
+ if ((event_triggers & STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN)
+ != 0)
+ event_triggers = event_triggers &
+ ~STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN;
+
+ REG_UPDATE_2(OTG_STATIC_SCREEN_CONTROL,
+ OTG_STATIC_SCREEN_EVENT_MASK, event_triggers,
+ OTG_STATIC_SCREEN_FRAME_COUNT, num_frames);
+}
static struct timing_generator_funcs dcn31_tg_funcs = {
.validate_timing = optc1_validate_timing,
@@ -266,7 +293,7 @@ static struct timing_generator_funcs dcn31_tg_funcs = {
.set_drr = optc31_set_drr,
.get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
.set_vtotal_min_max = optc1_set_vtotal_min_max,
- .set_static_screen_control = optc1_set_static_screen_control,
+ .set_static_screen_control = optc31_set_static_screen_control,
.program_stereo = optc1_program_stereo,
.is_stereo_left_eye = optc1_is_stereo_left_eye,
.tg_init = optc3_tg_init,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
index 30b81a448ce2..5fc6c63580d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
@@ -263,5 +263,8 @@ bool optc31_immediate_disable_crtc(struct timing_generator *optc);
void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params);
void optc3_init_odm(struct timing_generator *optc);
-
+void optc31_set_static_screen_control(
+ struct timing_generator *optc,
+ uint32_t event_triggers,
+ uint32_t num_frames);
#endif /* __DC_OPTC_DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
index 367cb6e6d074..0b769ee71405 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
@@ -96,6 +96,13 @@ static void dccg314_set_pixel_rate_div(
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA;
+ // Don't program 0xF into the register field. Not valid since
+ // K1 / K2 field is only 1 / 2 bits wide
+ if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2);
if (k1 == cur_k1 && k2 == cur_k2)
return;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
index 588c1c71241f..a0741794db62 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
@@ -348,10 +348,8 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
odm_combine_factor = get_odm_config(pipe_ctx, NULL);
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL)
- return odm_combine_factor;
-
if (is_dp_128b_132b_signal(pipe_ctx)) {
+ *k1_div = PIXEL_RATE_DIV_BY_1;
*k2_div = PIXEL_RATE_DIV_BY_1;
} else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) {
*k1_div = PIXEL_RATE_DIV_BY_1;
@@ -359,7 +357,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
*k2_div = PIXEL_RATE_DIV_BY_2;
else
*k2_div = PIXEL_RATE_DIV_BY_4;
- } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
+ } else if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) {
if (two_pix_per_container) {
*k1_div = PIXEL_RATE_DIV_BY_1;
*k2_div = PIXEL_RATE_DIV_BY_2;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
index 5b6c2d94ec71..31feb4b0edee 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
@@ -66,9 +66,9 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
.prepare_bandwidth = dcn20_prepare_bandwidth,
.optimize_bandwidth = dcn20_optimize_bandwidth,
.update_bandwidth = dcn20_update_bandwidth,
- .set_drr = dcn10_set_drr,
+ .set_drr = dcn31_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn10_set_static_screen_control,
+ .set_static_screen_control = dcn31_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
index f246aab23050..41edbd64ea21 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
@@ -228,7 +228,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = {
.set_drr = optc31_set_drr,
.get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
.set_vtotal_min_max = optc1_set_vtotal_min_max,
- .set_static_screen_control = optc1_set_static_screen_control,
+ .set_static_screen_control = optc31_set_static_screen_control,
.program_stereo = optc1_program_stereo,
.is_stereo_left_eye = optc1_is_stereo_left_eye,
.tg_init = optc3_tg_init,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
index 4fffc7bb8088..f9ea1e86707f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
@@ -886,6 +886,7 @@ static const struct dc_plane_cap plane_cap = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_z10 = false,
.enable_z9_disable_interface = true,
+ .psr_skip_crtc_disable = true,
.disable_dmcu = true,
.force_abm_enable = false,
.timing_trace = false,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
index e4daed44ef5f..e4472c6be6c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
@@ -96,8 +96,10 @@ static void dccg32_set_pixel_rate_div(
// Don't program 0xF into the register field. Not valid since
// K1 / K2 field is only 1 / 2 bits wide
- if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA)
+ if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) {
+ BREAK_TO_DEBUGGER();
return;
+ }
dccg32_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2);
if (k1 == cur_k1 && k2 == cur_k2)
@@ -223,11 +225,7 @@ static void dccg32_set_dtbclk_dto(
} else {
REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
DTBCLK_DTO_ENABLE[params->otg_inst], 0,
- PIPE_DTO_SRC_SEL[params->otg_inst], 1);
- if (params->is_hdmi)
- REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
- PIPE_DTO_SRC_SEL[params->otg_inst], 0);
-
+ PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c
index 5947c2cb0f30..9501403a48a9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c
@@ -865,7 +865,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit);
REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A,
- DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_chanage);
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_change);
REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A,
DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, &s->usr_retrain);
@@ -885,7 +885,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit);
REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B,
- DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_chanage);
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_change);
REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B,
DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, &s->usr_retrain);
@@ -905,7 +905,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, &s->sr_exit);
REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C,
- DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, &s->dram_clk_chanage);
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, &s->dram_clk_change);
REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C,
DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C, &s->usr_retrain);
@@ -925,7 +925,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, &s->sr_exit);
REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D,
- DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, &s->dram_clk_chanage);
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, &s->dram_clk_change);
REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D,
DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D, &s->usr_retrain);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index 763311ffb967..b8767be1e4c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -227,8 +227,13 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ /* If PSR is supported on an eDP panel that's connected, but that panel is
+ * not in PSR at the time of trying to enter MALL SS, we have to include it
+ * in the static screen CAB calculation
+ */
if (!pipe->stream || !pipe->plane_state ||
- pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED ||
+ (pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED &&
+ pipe->stream->link->psr_settings.psr_allow_active) ||
pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
continue;
@@ -257,11 +262,11 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
- /* For DCC:
- * meta_num_mblk = CEILING(full_mblk_width_ub_l*full_mblk_height_ub_l*Bpe/256/mblk_bytes, 1)
+ /*For DCC:
+ * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
*/
if (pipe->plane_state->dcc.enable)
- num_mblks += (mall_alloc_width_blk_aligned * mall_alloc_width_blk_aligned * bytes_per_pixel +
+ num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
@@ -311,8 +316,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) /
DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) /
dc->caps.cache_line_size + 2;
+ break;
}
- break;
}
}
@@ -698,11 +703,7 @@ void dcn32_subvp_update_force_pstate(struct dc *dc, struct dc_state *context)
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- // For SubVP + DRR, also force disallow on the DRR pipe
- // (We will force allow in the DMUB sequence -- some DRR timings by default won't allow P-State so we have
- // to force once the vblank is stretched).
- if (pipe->stream && pipe->plane_state && (pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
- (pipe->stream->mall_stream_config.type == SUBVP_NONE && pipe->stream->ignore_msa_timing_param))) {
+ if (pipe->stream && pipe->plane_state && (pipe->stream->mall_stream_config.type == SUBVP_MAIN)) {
struct hubp *hubp = pipe->plane_res.hubp;
if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
@@ -780,6 +781,10 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context)
if (hws && hws->funcs.update_mall_sel)
hws->funcs.update_mall_sel(dc, context);
+ //update subvp force pstate
+ if (hws && hws->funcs.subvp_update_force_pstate)
+ dc->hwseq->funcs.subvp_update_force_pstate(dc, context);
+
// Program FORCE_ONE_ROW_FOR_FRAME and CURSOR_REQ_MODE for main subvp pipes
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -1170,10 +1175,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
odm_combine_factor = get_odm_config(pipe_ctx, NULL);
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL)
- return odm_combine_factor;
-
if (is_dp_128b_132b_signal(pipe_ctx)) {
+ *k1_div = PIXEL_RATE_DIV_BY_1;
*k2_div = PIXEL_RATE_DIV_BY_1;
} else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) {
*k1_div = PIXEL_RATE_DIV_BY_1;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index cdeff6de725d..e4dbc8353ea3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -723,7 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = {
/* Must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/
.enable_double_buffered_dsc_pg_support = true,
.enable_dp_dig_pixel_rate_div_policy = 1,
- .allow_sw_cursor_fallback = false,
+ .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback"
.alloc_extra_way_for_cursor = true,
.min_prefetch_in_strobe_ns = 60000, // 60us
};
@@ -1721,8 +1721,29 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
return phantom_stream;
}
+void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context)
+{
+ int i;
+ struct dc_plane_state *phantom_plane = NULL;
+ struct dc_stream_state *phantom_stream = NULL;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->top_pipe && !pipe->prev_odm_pipe &&
+ pipe->plane_state && pipe->stream &&
+ pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ phantom_plane = pipe->plane_state;
+ phantom_stream = pipe->stream;
+
+ dc_plane_state_retain(phantom_plane);
+ dc_stream_retain(phantom_stream);
+ }
+ }
+}
+
// return true if removed piped from ctx, false otherwise
-bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
+bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update)
{
int i;
bool removed_pipe = false;
@@ -1749,14 +1770,23 @@ bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
removed_pipe = true;
}
- // Clear all phantom stream info
- if (pipe->stream) {
- pipe->stream->mall_stream_config.type = SUBVP_NONE;
- pipe->stream->mall_stream_config.paired_stream = NULL;
- }
+ /* For non-full updates, a shallow copy of the current state
+ * is created. In this case we don't want to erase the current
+ * state (there can be 2 HIRQL threads, one in flip, and one in
+ * checkMPO) that can cause a race condition.
+ *
+ * This is just a workaround, needs a proper fix.
+ */
+ if (!fast_update) {
+ // Clear all phantom stream info
+ if (pipe->stream) {
+ pipe->stream->mall_stream_config.type = SUBVP_NONE;
+ pipe->stream->mall_stream_config.paired_stream = NULL;
+ }
- if (pipe->plane_state) {
- pipe->plane_state->is_phantom = false;
+ if (pipe->plane_state) {
+ pipe->plane_state->is_phantom = false;
+ }
}
}
return removed_pipe;
@@ -1903,7 +1933,7 @@ int dcn32_populate_dml_pipes_from_context(
pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
if (context->stream_count == 1 &&
- context->stream_status[0].plane_count <= 1 &&
+ context->stream_status[0].plane_count == 1 &&
!dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal) &&
is_h_timing_divisible_by_2(res_ctx->pipe_ctx[i].stream) &&
pipe->stream->timing.pix_clk_100hz * 100 > DCN3_2_VMIN_DISPCLK_HZ &&
@@ -1929,23 +1959,28 @@ int dcn32_populate_dml_pipes_from_context(
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19;
- switch (pipe->stream->mall_stream_config.type) {
- case SUBVP_MAIN:
- pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
- subvp_in_use = true;
- break;
- case SUBVP_PHANTOM:
- pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe;
- pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
- // Disallow unbounded req for SubVP according to DCHUB programming guide
- pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
- break;
- case SUBVP_NONE:
- pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable;
- pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
- break;
- default:
- break;
+ /* Only populate DML input with subvp info for full updates.
+ * This is just a workaround -- needs a proper fix.
+ */
+ if (!fast_validate) {
+ switch (pipe->stream->mall_stream_config.type) {
+ case SUBVP_MAIN:
+ pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
+ subvp_in_use = true;
+ break;
+ case SUBVP_PHANTOM:
+ pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe;
+ pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
+ // Disallow unbounded req for SubVP according to DCHUB programming guide
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+ break;
+ case SUBVP_NONE:
+ pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable;
+ pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
+ break;
+ default:
+ break;
+ }
}
pipes[pipe_cnt].dout.dsc_input_bpc = 0;
@@ -2033,6 +2068,9 @@ static struct resource_funcs dcn32_res_pool_funcs = {
.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
.add_phantom_pipes = dcn32_add_phantom_pipes,
.remove_phantom_pipes = dcn32_remove_phantom_pipes,
+ .retain_phantom_pipes = dcn32_retain_phantom_pipes,
+ .save_mall_state = dcn32_save_mall_state,
+ .restore_mall_state = dcn32_restore_mall_state,
};
@@ -2124,6 +2162,7 @@ static bool dcn32_resource_construct(
dc->caps.subvp_swath_height_margin_lines = 16;
dc->caps.subvp_pstate_allow_width_us = 20;
dc->caps.subvp_vertical_int_margin_us = 30;
+ dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin
dc->caps.max_slave_planes = 2;
dc->caps.max_slave_yuv_planes = 2;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
index f6bc9bd5da31..13fbc574910b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
@@ -45,17 +45,6 @@
extern struct _vcs_dpi_ip_params_st dcn3_2_ip;
extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc;
-/* Temp struct used to save and restore MALL config
- * during validation.
- *
- * TODO: Move MALL config into dc_state instead of stream struct
- * to avoid needing to save/restore.
- */
-struct mall_temp_config {
- struct mall_stream_config mall_stream_config[MAX_PIPES];
- bool is_phantom_plane[MAX_PIPES];
-};
-
struct dcn32_resource_pool {
struct resource_pool base;
};
@@ -81,6 +70,9 @@ bool dcn32_release_post_bldn_3dlut(
struct dc_transfer_func **shaper);
bool dcn32_remove_phantom_pipes(struct dc *dc,
+ struct dc_state *context, bool fast_update);
+
+void dcn32_retain_phantom_pipes(struct dc *dc,
struct dc_state *context);
void dcn32_add_phantom_pipes(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index b03a7814e96d..783935c4e664 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -97,21 +97,21 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
* FLOOR(vp_x_start, blk_width)
*/
full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
- pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) +
+ pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) -
(pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
/* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
* FLOOR(vp_y_start, blk_height)
*/
full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
- full_vp_height + mblk_height - 1) / mblk_height * mblk_height) +
+ full_vp_height + mblk_height - 1) / mblk_height * mblk_height) -
(pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
/* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */
mall_alloc_width_blk_aligned = full_vp_width_blk_aligned;
/* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */
- mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) /
+ mall_alloc_height_blk_aligned = (pipe->plane_res.scl_data.viewport.height - 1 + mblk_height - 1) /
mblk_height * mblk_height + mblk_height;
/* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c;
@@ -121,14 +121,19 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
*/
num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
+
+ /*For DCC:
+ * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
+ */
+ if (pipe->plane_state->dcc.enable)
+ num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
+ (256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
+
bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
// cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
// (MALL is 64-byte aligned)
cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2;
- /* For DCC divide by 256 */
- if (pipe->plane_state->dcc.enable)
- cache_lines_per_plane = cache_lines_per_plane + (cache_lines_per_plane / 256) + 1;
cache_lines_used += cache_lines_per_plane;
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index 6c79a47b6336..d1f36df03c2e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -721,7 +721,7 @@ static const struct dc_debug_options debug_defaults_drv = {
/*must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/
.enable_double_buffered_dsc_pg_support = true,
.enable_dp_dig_pixel_rate_div_policy = 1,
- .allow_sw_cursor_fallback = false,
+ .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback"
.alloc_extra_way_for_cursor = true,
.min_prefetch_in_strobe_ns = 60000, // 60us
};
@@ -743,7 +743,7 @@ static const struct dc_debug_options debug_defaults_diags = {
.dmub_command_table = true,
.enable_tri_buf = true,
.use_max_lb = true,
- .force_disable_subvp = true
+ .force_disable_subvp = true,
};
@@ -1621,6 +1621,9 @@ static struct resource_funcs dcn321_res_pool_funcs = {
.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
.add_phantom_pipes = dcn32_add_phantom_pipes,
.remove_phantom_pipes = dcn32_remove_phantom_pipes,
+ .retain_phantom_pipes = dcn32_retain_phantom_pipes,
+ .save_mall_state = dcn32_save_mall_state,
+ .restore_mall_state = dcn32_restore_mall_state,
};
@@ -1711,6 +1714,7 @@ static bool dcn321_resource_construct(
dc->caps.subvp_swath_height_margin_lines = 16;
dc->caps.subvp_pstate_allow_width_us = 20;
dc->caps.subvp_vertical_int_margin_us = 30;
+ dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin
dc->caps.max_slave_planes = 1;
dc->caps.max_slave_yuv_planes = 1;
dc->caps.max_slave_rgb_planes = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 75dbb7ee193b..c26da3bb2892 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -565,7 +565,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
.dppclk_mhz = 847.06,
.phyclk_mhz = 810.0,
.socclk_mhz = 953.0,
- .dscclk_mhz = 489.0,
+ .dscclk_mhz = 300.0,
.dram_speed_mts = 2400.0,
},
{
@@ -576,7 +576,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
.dppclk_mhz = 960.00,
.phyclk_mhz = 810.0,
.socclk_mhz = 278.0,
- .dscclk_mhz = 287.67,
+ .dscclk_mhz = 342.86,
.dram_speed_mts = 2666.0,
},
{
@@ -587,7 +587,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
.dppclk_mhz = 1028.57,
.phyclk_mhz = 810.0,
.socclk_mhz = 715.0,
- .dscclk_mhz = 318.334,
+ .dscclk_mhz = 369.23,
.dram_speed_mts = 3200.0,
},
{
@@ -949,6 +949,7 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc
int plane_count;
int i;
unsigned int optimized_min_dst_y_next_start_us;
+ bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > 1000.0;
plane_count = 0;
optimized_min_dst_y_next_start_us = 0;
@@ -963,6 +964,8 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc
* 2. single eDP, on link 0, 1 plane and stutter period > 5ms
* Z10 only cases:
* 1. single eDP, on link 0, 1 plane and stutter period >= 5ms
+ * Z8 cases:
+ * 1. stutter period sufficient
* Zstate not allowed cases:
* 1. Everything else
*/
@@ -990,11 +993,14 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc
if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000)
return DCN_ZSTATE_SUPPORT_ALLOW;
else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr)
- return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
+ return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
else
- return DCN_ZSTATE_SUPPORT_DISALLOW;
- } else
+ return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY : DCN_ZSTATE_SUPPORT_DISALLOW;
+ } else if (allow_z8) {
+ return DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+ } else {
return DCN_ZSTATE_SUPPORT_DISALLOW;
+ }
}
void dcn20_calculate_dlg_params(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index 479e2c1a1301..379729b02847 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -4851,7 +4851,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->SwathHeightYThisState[k],
v->SwathHeightCThisState[k],
v->HTotal[k] / v->PixelClock[k],
- v->UrgentLatency,
+ v->UrgLatency[i],
v->CursorBufferSize,
v->CursorWidth[k][0],
v->CursorBPP[k][0],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index 12b23bd50e19..b37d14369a62 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -559,6 +559,9 @@ void dcn31_calculate_wm_and_dlg_fp(
context->bw_ctx.bw.dcn.clk.dramclk_khz = 0;
context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (context->res_ctx.pipe_ctx[i].stream)
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 4e45c6d9ecdc..ec351c8418cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -5082,7 +5082,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->SwathHeightYThisState[k],
v->SwathHeightCThisState[k],
v->HTotal[k] / v->PixelClock[k],
- v->UrgentLatency,
+ v->UrgLatency[i],
v->CursorBufferSize,
v->CursorWidth[k][0],
v->CursorBPP[k][0],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
index 1dd51c4b6804..6a1cf6adea77 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -149,8 +149,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
.num_states = 5,
.sr_exit_time_us = 16.5,
.sr_enter_plus_exit_time_us = 18.5,
- .sr_exit_z8_time_us = 442.0,
- .sr_enter_plus_exit_z8_time_us = 560.0,
+ .sr_exit_z8_time_us = 280.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
.writeback_latency_us = 12.0,
.dram_channel_width_bytes = 4,
.round_trip_ping_latency_dcfclk_cycles = 106,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index 41f0b4c1c72f..950669f2c10d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -5179,7 +5179,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_
v->SwathHeightYThisState[k],
v->SwathHeightCThisState[k],
v->HTotal[k] / v->PixelClock[k],
- v->UrgentLatency,
+ v->UrgLatency[i],
v->CursorBufferSize,
v->CursorWidth[k][0],
v->CursorBPP[k][0],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 97b333b230d1..f94abd124021 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -157,7 +157,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
.dispclk_dppclk_vco_speed_mhz = 4300.0,
.do_urgent_latency_adjustment = true,
.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
};
void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
@@ -211,7 +211,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
/* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */
if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) {
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38;
+ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
@@ -221,7 +221,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16;
- clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
+ clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50;
clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16;
clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16;
@@ -256,16 +256,24 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
int vlevel)
{
const int max_latency_table_entries = 4;
- const struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
int dummy_latency_index = 0;
+ enum clock_change_support temp_clock_change_support = vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
dc_assert_fp_enabled();
while (dummy_latency_index < max_latency_table_entries) {
+ if (temp_clock_change_support != dm_dram_clock_change_unsupported)
+ vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+ /* for subvp + DRR case, if subvp pipes are still present we support pstate */
+ if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported &&
+ dcn32_subvp_in_use(dc, context))
+ vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
+
if (vlevel < context->bw_ctx.dml.vba.soc.num_states &&
vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported)
break;
@@ -577,6 +585,9 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
num_dpp = vba->NoOfDPP[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]];
phantom_vactive += num_dpp > 1 ? vba->meta_row_height[vba->pipe_plane[pipe_idx]] : 0;
+ /* dc->debug.subvp_extra_lines 0 by default*/
+ phantom_vactive += dc->debug.subvp_extra_lines;
+
// For backporch of phantom pipe, use vstartup of the main pipe
phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
@@ -1141,7 +1152,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final ==
dm_prefetch_support_uclk_fclk_and_stutter) {
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
- dm_prefetch_support_stutter;
+ dm_prefetch_support_fclk_and_stutter;
/* There are params (such as FabricClock) that need to be recalculated
* after validation fails (otherwise it will be 0). Calculation for
* phantom vactive requires call into DML, so we must ensure all the
@@ -1192,7 +1203,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
// remove phantom pipes and repopulate dml pipes
if (!found_supported_config) {
- dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+ dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
*pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
@@ -1309,7 +1320,10 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
- context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+ else
+ context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
pipe_idx++;
}
@@ -1504,7 +1518,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
return false;
// For each full update, remove all existing phantom pipes first
- dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+ dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate);
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
@@ -1756,6 +1770,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
if (repopulate_pipes) {
int flag_max_mpc_comb = vba->maxMpcComb;
int flag_vlevel = vlevel;
+ int i;
pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
@@ -1774,7 +1789,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
flag_max_mpc_comb != context->bw_ctx.dml.vba.maxMpcComb) {
/* check the context constructed with pipe split flags is still valid*/
bool flags_valid = false;
- for (int i = flag_vlevel; i < context->bw_ctx.dml.soc.num_states; i++) {
+ for (i = flag_vlevel; i < context->bw_ctx.dml.soc.num_states; i++) {
if (vba->ModeSupport[i][flag_max_mpc_comb]) {
vba->maxMpcComb = flag_max_mpc_comb;
vba->VoltageLevel = i;
@@ -1816,14 +1831,38 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
unsigned int dummy_latency_index = 0;
int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+ bool subvp_in_use = dcn32_subvp_in_use(dc, context);
unsigned int min_dram_speed_mts_margin;
+ bool need_fclk_lat_as_dummy = false;
+ bool is_subvp_p_drr = true;
dc_assert_fp_enabled();
- // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK
- if (!pstate_en && dcn32_subvp_in_use(dc, context)) {
- context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
- pstate_en = true;
+ /* need to find dummy latency index for subvp */
+ if (subvp_in_use) {
+ /* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */
+ if (!pstate_en) {
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ pstate_en = true;
+ is_subvp_p_drr = true;
+ }
+ dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc,
+ context, pipes, pipe_cnt, vlevel);
+
+ /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so prefetch is
+ * scheduled correctly to account for dummy pstate.
+ */
+ if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) {
+ need_fclk_lat_as_dummy = true;
+ context->bw_ctx.dml.soc.fclk_change_latency_us =
+ dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+ }
+ context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+ if (is_subvp_p_drr) {
+ context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
+ }
}
context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
@@ -1847,9 +1886,11 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
/* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so
* prefetch is scheduled correctly to account for dummy pstate.
*/
- if (dummy_latency_index == 0)
+ if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) {
+ need_fclk_lat_as_dummy = true;
context->bw_ctx.dml.soc.fclk_change_latency_us =
dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+ }
dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
@@ -1951,13 +1992,13 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] ==
dm_dram_clock_change_unsupported) {
- int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1;
+ int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1;
min_dram_speed_mts =
dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
}
- if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) {
/* find largest table entry that is lower than dram speed,
* but lower than DPM0 still uses DPM0
*/
@@ -2037,7 +2078,8 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && dummy_latency_index == 0)
+ /* for proper prefetch calculations, if dummy lat > fclk lat, use fclk lat = dummy lat */
+ if (need_fclk_lat_as_dummy)
context->bw_ctx.dml.soc.fclk_change_latency_us =
dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
@@ -2050,10 +2092,12 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context);
- if (dummy_latency_index == 0)
- context->bw_ctx.dml.soc.fclk_change_latency_us =
- dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us;
}
+
+ /* revert fclk lat changes if required */
+ if (need_fclk_lat_as_dummy)
+ context->bw_ctx.dml.soc.fclk_change_latency_us =
+ dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us;
}
static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index e5c8f6a71b5b..4b8f5fa0f0ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -670,6 +670,25 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
}
+ v->NotEnoughDETSwathFillLatencyHiding = dml32_CalculateDETSwathFillLatencyHiding(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBW,
+ v->UrgentLatency,
+ mode_lib->vba.SwathHeightY,
+ mode_lib->vba.SwathHeightC,
+ v->swath_width_luma_ub,
+ v->swath_width_chroma_ub,
+ v->BytePerPixelDETY,
+ v->BytePerPixelDETC,
+ mode_lib->vba.DETBufferSizeY,
+ mode_lib->vba.DETBufferSizeC,
+ mode_lib->vba.DPPPerPlane,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ mode_lib->vba.UsesMALLForPStateChange);
+
for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] &&
!mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ?
@@ -1665,6 +1684,8 @@ static void mode_support_configuration(struct vba_vars_st *v,
&& mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true
&& mode_lib->vba.NonsupportedDSCInputBPC == false
&& !mode_lib->vba.ExceededMALLSize
+ && (mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] == false
+ || i == v->soc.num_states - 1)
&& ((mode_lib->vba.HostVMEnable == false
&& !mode_lib->vba.ImmediateFlipRequiredFinal)
|| mode_lib->vba.ImmediateFlipSupportedForState[i][j])
@@ -2286,7 +2307,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
&& (mode_lib->vba.Output[k] == dm_dp || mode_lib->vba.Output[k] == dm_dp2p0
|| mode_lib->vba.Output[k] == dm_edp
|| mode_lib->vba.Output[k] == dm_hdmi)
- && mode_lib->vba.OutputBppPerState[i][k] == 0 && (mode_lib->vba.UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)) {
+ && mode_lib->vba.OutputBppPerState[i][k] == 0) {
mode_lib->vba.LinkCapacitySupport[i] = false;
}
}
@@ -3158,6 +3179,25 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
mode_lib->vba.UrgentBurstFactorChroma,
mode_lib->vba.UrgentBurstFactorCursor);
+ mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] = dml32_CalculateDETSwathFillLatencyHiding(
+ mode_lib->vba.NumberOfActiveSurfaces,
+ mode_lib->vba.ReturnBWPerState[i][j],
+ mode_lib->vba.UrgLatency[i],
+ mode_lib->vba.SwathHeightYThisState,
+ mode_lib->vba.SwathHeightCThisState,
+ mode_lib->vba.swath_width_luma_ub_this_state,
+ mode_lib->vba.swath_width_chroma_ub_this_state,
+ mode_lib->vba.BytePerPixelInDETY,
+ mode_lib->vba.BytePerPixelInDETC,
+ mode_lib->vba.DETBufferSizeYThisState,
+ mode_lib->vba.DETBufferSizeCThisState,
+ mode_lib->vba.NoOfDPPThisState,
+ mode_lib->vba.HTotal,
+ mode_lib->vba.PixelClock,
+ mode_lib->vba.VRatio,
+ mode_lib->vba.VRatioChroma,
+ mode_lib->vba.UsesMALLForPStateChange);
+
v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i,
mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i],
mode_lib->vba.DRAMSpeedPerState[i]);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index debe46b24a3e..5af601cff1a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -6228,3 +6228,72 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf
*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
}
+
+bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double UrgentLatency,
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int SwathWidthY[],
+ unsigned int SwathWidthC[],
+ double BytePerPixelInDETY[],
+ double BytePerPixelInDETC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int NumOfDPP[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double VRatioY[],
+ double VRatioC[],
+ enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX])
+{
+ int k;
+ double SwathSizeAllSurfaces = 0;
+ double SwathSizeAllSurfacesInFetchTimeUs;
+ double DETSwathLatencyHidingUs;
+ double DETSwathLatencyHidingYUs;
+ double DETSwathLatencyHidingCUs;
+ double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
+ double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
+ bool NotEnoughDETSwathFillLatencyHiding = false;
+
+ /* calculate sum of single swath size for all pipes in bytes*/
+ for (k = 0; k < NumberOfActiveSurfaces; k++) {
+ SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
+
+ if (SwathHeightC[k] != 0)
+ SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
+ else
+ SwathSizePerSurfaceC[k] = 0;
+
+ SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
+ }
+
+ SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
+
+ /* ensure all DET - 1 swath can hide a fetch for all surfaces */
+ for (k = 0; k < NumberOfActiveSurfaces; k++) {
+ double LineTime = HTotal[k] / PixelClock[k];
+
+ /* only care if surface is not phantom */
+ if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+ DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
+
+ if (SwathHeightC[k] != 0) {
+ DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
+
+ DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
+ } else {
+ DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
+ }
+
+ /* DET must be able to hide time to fetch 1 swath for each surface */
+ if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
+ NotEnoughDETSwathFillLatencyHiding = true;
+ break;
+ }
+ }
+ }
+
+ return NotEnoughDETSwathFillLatencyHiding;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 3989c2a28fae..779c6805f599 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -1141,4 +1141,22 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf
double *FractionOfUrgentBandwidth,
bool *ImmediateFlipBandwidthSupport);
+bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
+ double ReturnBW,
+ double UrgentLatency,
+ unsigned int SwathHeightY[],
+ unsigned int SwathHeightC[],
+ unsigned int SwathWidthY[],
+ unsigned int SwathWidthC[],
+ double BytePerPixelInDETY[],
+ double BytePerPixelInDETC[],
+ unsigned int DETBufferSizeY[],
+ unsigned int DETBufferSizeC[],
+ unsigned int NumOfDPP[],
+ unsigned int HTotal[],
+ double PixelClock[],
+ double VRatioY[],
+ double VRatioC[],
+ enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
index 432b4ecd01a7..f4b176599be7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -126,9 +126,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = {
.sr_enter_plus_exit_z8_time_us = 320,
.writeback_latency_us = 12.0,
.round_trip_ping_latency_dcfclk_cycles = 263,
- .urgent_latency_pixel_data_only_us = 9.35,
- .urgent_latency_pixel_mixed_with_vm_data_us = 9.35,
- .urgent_latency_vm_data_only_us = 9.35,
+ .urgent_latency_pixel_data_only_us = 4,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4,
+ .urgent_latency_vm_data_only_us = 4,
.fclk_change_latency_us = 20,
.usr_retraining_latency_us = 2,
.smn_latency_us = 2,
@@ -156,7 +156,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = {
.dispclk_dppclk_vco_speed_mhz = 4300.0,
.do_urgent_latency_adjustment = true,
.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
};
static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index d46aa4817e70..81e53e67cd0b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -1050,6 +1050,7 @@ struct vba_vars_st {
double MinFullDETBufferingTime;
double AverageReadBandwidthGBytePerSecond;
bool FirstMainPlane;
+ bool NotEnoughDETSwathFillLatencyHiding;
unsigned int ViewportWidthChroma[DC__NUM_DPP__MAX];
unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX];
@@ -1162,7 +1163,7 @@ struct vba_vars_st {
double UrgBurstFactorLumaPre[DC__NUM_DPP__MAX];
double UrgBurstFactorChromaPre[DC__NUM_DPP__MAX];
bool NotUrgentLatencyHidingPre[DC__NUM_DPP__MAX];
- bool LinkCapacitySupport[DC__NUM_DPP__MAX];
+ bool LinkCapacitySupport[DC__VOLTAGE_STATES];
bool VREADY_AT_OR_AFTER_VSYNC[DC__NUM_DPP__MAX];
unsigned int MIN_DST_Y_NEXT_START[DC__NUM_DPP__MAX];
unsigned int VFrontPorch[DC__NUM_DPP__MAX];
@@ -1233,6 +1234,7 @@ struct vba_vars_st {
unsigned int BlockWidthC[DC__NUM_DPP__MAX];
unsigned int SubViewportLinesNeededInMALL[DC__NUM_DPP__MAX];
bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2];
+ bool NotEnoughDETSwathFillLatencyHidingPerState[DC__VOLTAGE_STATES][2];
struct dummy_vars dummy_vars;
};
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c
index 0ea52ba5ac82..9fd8b269dd79 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c
@@ -256,8 +256,8 @@ static const struct hw_factory_funcs funcs = {
*/
void dal_hw_factory_dcn32_init(struct hw_factory *factory)
{
- factory->number_of_pins[GPIO_ID_DDC_DATA] = 6;
- factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 6;
+ factory->number_of_pins[GPIO_ID_DDC_DATA] = 8;
+ factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 8;
factory->number_of_pins[GPIO_ID_GENERIC] = 4;
factory->number_of_pins[GPIO_ID_HPD] = 5;
factory->number_of_pins[GPIO_ID_GPIO_PAD] = 28;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 85495ef026f5..525f8f0b8732 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -240,8 +240,11 @@ struct resource_funcs {
unsigned int pipe_cnt,
unsigned int index);
- bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context);
+ bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update);
+ void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context);
void (*get_panel_config_defaults)(struct dc_panel_config *panel_config);
+ void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
+ void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
};
struct audio_support{
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
index f2e1fcb668fb..5b0265c0df61 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -46,7 +46,7 @@ struct dcn_hubbub_wm_set {
uint32_t pte_meta_urgent;
uint32_t sr_enter;
uint32_t sr_exit;
- uint32_t dram_clk_chanage;
+ uint32_t dram_clk_change;
uint32_t usr_retrain;
uint32_t fclk_pstate_change;
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
index dcb80c4747b0..131fcfa28bca 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
@@ -83,10 +83,15 @@ static const struct dpp_input_csc_matrix __maybe_unused dpp_input_csc_matrix[] =
{COLOR_SPACE_YCBCR709,
{0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0,
0x2000, 0x3b61, 0xe24f} },
-
{COLOR_SPACE_YCBCR709_LIMITED,
{0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0,
- 0x2568, 0x43ee, 0xdbb2} }
+ 0x2568, 0x43ee, 0xdbb2} },
+ {COLOR_SPACE_2020_YCBCR,
+ {0x2F30, 0x2000, 0, 0xE869, 0xEDB7, 0x2000, 0xFABC, 0xBC6, 0,
+ 0x2000, 0x3C34, 0xE1E6} },
+ {COLOR_SPACE_2020_RGB_LIMITEDRANGE,
+ {0x35E0, 0x255F, 0, 0xE2B3, 0xEB20, 0x255F, 0xF9FD, 0xB1E, 0,
+ 0x255F, 0x44BD, 0xDB43} }
};
struct dpp_grph_csc_adjustment {
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c
new file mode 100644
index 000000000000..801a95b34e8c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c
@@ -0,0 +1,28 @@
+
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+/*********************************************************************/
+// USB4 DPIA BANDWIDTH ALLOCATION LOGIC
+/*********************************************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h
new file mode 100644
index 000000000000..669e995f825f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DC_INC_LINK_DP_DPIA_BW_H_
+#define DC_INC_LINK_DP_DPIA_BW_H_
+
+// XXX: TODO: Re-add for Phase 2
+/* Number of Host Routers per motherboard is 2 and 2 DPIA per host router */
+#define MAX_HR_NUM 2
+
+struct dc_host_router_bw_alloc {
+ int max_bw[MAX_HR_NUM]; // The Max BW that each Host Router has available to be shared btw DPIAs
+ int total_estimated_bw[MAX_HR_NUM]; // The Total Verified and available BW that Host Router has
+};
+
+/*
+ * Enable BW Allocation Mode Support from the DP-Tx side
+ *
+ * @link: pointer to the dc_link struct instance
+ *
+ * return: SUCCESS or FAILURE
+ */
+bool set_dptx_usb4_bw_alloc_support(struct dc_link *link);
+
+/*
+ * Send a request from DP-Tx requesting to allocate BW remotely after
+ * allocating it locally. This will get processed by CM and a CB function
+ * will be called.
+ *
+ * @link: pointer to the dc_link struct instance
+ * @req_bw: The requested bw in Kbyte to allocated
+ *
+ * return: none
+ */
+void set_usb4_req_bw_req(struct dc_link *link, int req_bw);
+
+/*
+ * CB function for when the status of the Req above is complete. We will
+ * find out the result of allocating on CM and update structs accordingly
+ *
+ * @link: pointer to the dc_link struct instance
+ *
+ * return: none
+ */
+void get_usb4_req_bw_resp(struct dc_link *link);
+
+#endif /* DC_INC_LINK_DP_DPIA_BW_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 795d8811af9a..33907feefebb 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -1029,13 +1029,14 @@ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 {
uint16_t vtotal;
uint16_t htotal;
uint8_t vblank_pipe_index;
- uint8_t padding[2];
+ uint8_t padding[1];
struct {
uint8_t drr_in_use;
uint8_t drr_window_size_ms; // Indicates largest VMIN/VMAX adjustment per frame
uint16_t min_vtotal_supported; // Min VTOTAL that supports switching in VBLANK
uint16_t max_vtotal_supported; // Max VTOTAL that can support SubVP static scheduling
uint8_t use_ramping; // Use ramping or not
+ uint8_t drr_vblank_start_margin;
} drr_info; // DRR considered as part of SubVP + VBLANK case
} vblank_data;
} pipe_config;
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 235259d6c5a1..9b5d9b2c9a6a 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -102,9 +102,18 @@ static const struct abm_parameters abm_settings_config1[abm_defines_max_level] =
{0x82, 0x4d, 0x20, 0x00, 0x00, 0xff, 0xb3, 0x70, 0x70, 0xcccc, 0xcccc},
};
+static const struct abm_parameters abm_settings_config2[abm_defines_max_level] = {
+// min_red max_red bright_pos dark_pos bright_gain contrast dev min_knee max_knee blRed blStart
+ {0xf0, 0xbf, 0x20, 0x00, 0x88, 0x99, 0xb3, 0x40, 0xe0, 0x0000, 0xcccc},
+ {0xd8, 0x85, 0x20, 0x00, 0x70, 0x90, 0xa8, 0x40, 0xc8, 0x0700, 0xb333},
+ {0xb8, 0x58, 0x20, 0x00, 0x64, 0x88, 0x78, 0x70, 0xa0, 0x7000, 0x9999},
+ {0x82, 0x40, 0x20, 0x00, 0x00, 0xb8, 0xb3, 0x70, 0x70, 0xc333, 0xb333},
+};
+
static const struct abm_parameters * const abm_settings[] = {
abm_settings_config0,
abm_settings_config1,
+ abm_settings_config2,
};
#define NUM_AMBI_LEVEL 5
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h
index 483769fb1736..537aee0536d3 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
#ifndef _dcn_3_0_0_OFFSET_HEADER
#define _dcn_3_0_0_OFFSET_HEADER
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
index b79be3a25a80..f9d90b098519 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
#ifndef _dcn_3_0_0_SH_MASK_HEADER
#define _dcn_3_0_0_SH_MASK_HEADER
diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h
index 363d2139cea2..db7e22720d00 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h
@@ -993,7 +993,8 @@
#define mmUVD_RAS_VCPU_VCODEC_STATUS_BASE_IDX 1
#define mmUVD_RAS_MMSCH_FATAL_ERROR 0x0058
#define mmUVD_RAS_MMSCH_FATAL_ERROR_BASE_IDX 1
-
+#define mmVCN_RAS_CNTL 0x04b9
+#define mmVCN_RAS_CNTL_BASE_IDX 1
/* JPEG 2_6_0 regs */
#define mmUVD_RAS_JPEG0_STATUS 0x0059
diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h
index 8de883b76d90..874a8b7e1feb 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h
@@ -3618,6 +3618,33 @@
#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_VF_MASK 0x7FFFFFFFL
#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_PF_MASK 0x80000000L
+//VCN 2_6_0 VCN_RAS_CNTL
+#define VCN_RAS_CNTL__VCPU_VCODEC_IH_EN__SHIFT 0x0
+#define VCN_RAS_CNTL__MMSCH_FATAL_ERROR_EN__SHIFT 0x1
+#define VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN__SHIFT 0x4
+#define VCN_RAS_CNTL__MMSCH_PMI_EN__SHIFT 0x5
+#define VCN_RAS_CNTL__VCPU_VCODEC_REARM__SHIFT 0x8
+#define VCN_RAS_CNTL__MMSCH_REARM__SHIFT 0x9
+#define VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN__SHIFT 0xc
+#define VCN_RAS_CNTL__VCPU_VCODEC_READY__SHIFT 0x10
+#define VCN_RAS_CNTL__MMSCH_READY__SHIFT 0x11
+#define VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK 0x00000001L
+#define VCN_RAS_CNTL__MMSCH_FATAL_ERROR_EN_MASK 0x00000002L
+#define VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK 0x00000010L
+#define VCN_RAS_CNTL__MMSCH_PMI_EN_MASK 0x00000020L
+#define VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK 0x00000100L
+#define VCN_RAS_CNTL__MMSCH_REARM_MASK 0x00000200L
+#define VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK 0x00001000L
+#define VCN_RAS_CNTL__VCPU_VCODEC_READY_MASK 0x00010000L
+#define VCN_RAS_CNTL__MMSCH_READY_MASK 0x00020000L
+
+//VCN 2_6_0 UVD_VCPU_INT_EN
+#define UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN__SHIFT 0x16
+#define UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK 0x00400000L
+
+//VCN 2_6_0 UVD_SYS_INT_EN
+#define UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK 0x04000000L
+
/* JPEG 2_6_0 UVD_RAS_JPEG0_STATUS */
#define UVD_RAS_JPEG0_STATUS__POISONED_VF__SHIFT 0x0
#define UVD_RAS_JPEG0_STATUS__POISONED_PF__SHIFT 0x1f
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index 4dc738c51771..b78360a71bc9 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -9292,7 +9292,7 @@ typedef struct {
typedef struct {
VFCT_IMAGE_HEADER VbiosHeader;
- UCHAR VbiosContent[1];
+ UCHAR VbiosContent[];
}GOP_VBIOS_CONTENT;
typedef struct {
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index 7e85cdc5bd34..dc694cb246d9 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -222,7 +222,11 @@ union MESAPI_SET_HW_RESOURCES {
uint32_t apply_grbm_remote_register_dummy_read_wa : 1;
uint32_t second_gfx_pipe_enabled : 1;
uint32_t enable_level_process_quantum_check : 1;
- uint32_t reserved : 25;
+ uint32_t legacy_sch_mode : 1;
+ uint32_t disable_add_queue_wptr_mc_addr : 1;
+ uint32_t enable_mes_event_int_logging : 1;
+ uint32_t enable_reg_active_poll : 1;
+ uint32_t reserved : 21;
};
uint32_t uint32_t_all;
};
diff --git a/drivers/gpu/drm/amd/include/yellow_carp_offset.h b/drivers/gpu/drm/amd/include/yellow_carp_offset.h
index 28a56b56bcaf..0fea6a746611 100644
--- a/drivers/gpu/drm/amd/include/yellow_carp_offset.h
+++ b/drivers/gpu/drm/amd/include/yellow_carp_offset.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
#ifndef YELLOW_CARP_OFFSET_H
#define YELLOW_CARP_OFFSET_H
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 20e5f66f853f..ca3beb5d8f27 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -161,7 +161,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu,
int smu_set_gfx_power_up_by_imu(struct smu_context *smu)
{
- if (!smu->ppt_funcs && !smu->ppt_funcs->set_gfx_power_up_by_imu)
+ if (!smu->ppt_funcs || !smu->ppt_funcs->set_gfx_power_up_by_imu)
return -EOPNOTSUPP;
return smu->ppt_funcs->set_gfx_power_up_by_imu(smu);
@@ -585,6 +585,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
yellow_carp_set_ppt_funcs(smu);
break;
case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 11):
smu_v13_0_4_set_ppt_funcs(smu);
break;
case IP_VERSION(13, 0, 5):
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 44bbf17e4bef..3bc4128a22ac 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -168,6 +168,7 @@ struct smu_temperature_range {
int mem_crit_max;
int mem_emergency_max;
int software_shutdown_temp;
+ int software_shutdown_temp_offset;
};
struct smu_state_validation_block {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index b76f0f7e4299..d6b964cf73bd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -522,9 +522,9 @@ typedef enum {
TEMP_HOTSPOT_M,
TEMP_MEM,
TEMP_VR_GFX,
+ TEMP_VR_SOC,
TEMP_VR_MEM0,
TEMP_VR_MEM1,
- TEMP_VR_SOC,
TEMP_VR_U,
TEMP_LIQUID0,
TEMP_LIQUID1,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index 25c08f963f49..d6b13933a98f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -25,10 +25,10 @@
// *** IMPORTANT ***
// PMFW TEAM: Always increment the interface version on any change to this file
-#define SMU13_DRIVER_IF_VERSION 0x2C
+#define SMU13_DRIVER_IF_VERSION 0x35
//Increment this version if SkuTable_t or BoardTable_t change
-#define PPTABLE_VERSION 0x20
+#define PPTABLE_VERSION 0x27
#define NUM_GFXCLK_DPM_LEVELS 16
#define NUM_SOCCLK_DPM_LEVELS 8
@@ -96,7 +96,7 @@
#define FEATURE_MEM_TEMP_READ_BIT 47
#define FEATURE_ATHUB_MMHUB_PG_BIT 48
#define FEATURE_SOC_PCC_BIT 49
-#define FEATURE_SPARE_50_BIT 50
+#define FEATURE_EDC_PWRBRK_BIT 50
#define FEATURE_SPARE_51_BIT 51
#define FEATURE_SPARE_52_BIT 52
#define FEATURE_SPARE_53_BIT 53
@@ -282,15 +282,15 @@ typedef enum {
} I2cControllerPort_e;
typedef enum {
- I2C_CONTROLLER_NAME_VR_GFX = 0,
- I2C_CONTROLLER_NAME_VR_SOC,
- I2C_CONTROLLER_NAME_VR_VMEMP,
- I2C_CONTROLLER_NAME_VR_VDDIO,
- I2C_CONTROLLER_NAME_LIQUID0,
- I2C_CONTROLLER_NAME_LIQUID1,
- I2C_CONTROLLER_NAME_PLX,
- I2C_CONTROLLER_NAME_OTHER,
- I2C_CONTROLLER_NAME_COUNT,
+ I2C_CONTROLLER_NAME_VR_GFX = 0,
+ I2C_CONTROLLER_NAME_VR_SOC,
+ I2C_CONTROLLER_NAME_VR_VMEMP,
+ I2C_CONTROLLER_NAME_VR_VDDIO,
+ I2C_CONTROLLER_NAME_LIQUID0,
+ I2C_CONTROLLER_NAME_LIQUID1,
+ I2C_CONTROLLER_NAME_PLX,
+ I2C_CONTROLLER_NAME_FAN_INTAKE,
+ I2C_CONTROLLER_NAME_COUNT,
} I2cControllerName_e;
typedef enum {
@@ -302,6 +302,7 @@ typedef enum {
I2C_CONTROLLER_THROTTLER_LIQUID0,
I2C_CONTROLLER_THROTTLER_LIQUID1,
I2C_CONTROLLER_THROTTLER_PLX,
+ I2C_CONTROLLER_THROTTLER_FAN_INTAKE,
I2C_CONTROLLER_THROTTLER_INA3221,
I2C_CONTROLLER_THROTTLER_COUNT,
} I2cControllerThrottler_e;
@@ -309,8 +310,9 @@ typedef enum {
typedef enum {
I2C_CONTROLLER_PROTOCOL_VR_XPDE132G5,
I2C_CONTROLLER_PROTOCOL_VR_IR35217,
- I2C_CONTROLLER_PROTOCOL_TMP_TMP102A,
+ I2C_CONTROLLER_PROTOCOL_TMP_MAX31875,
I2C_CONTROLLER_PROTOCOL_INA3221,
+ I2C_CONTROLLER_PROTOCOL_TMP_MAX6604,
I2C_CONTROLLER_PROTOCOL_COUNT,
} I2cControllerProtocol_e;
@@ -690,6 +692,9 @@ typedef struct {
#define PP_OD_FEATURE_UCLK_BIT 8
#define PP_OD_FEATURE_ZERO_FAN_BIT 9
#define PP_OD_FEATURE_TEMPERATURE_BIT 10
+#define PP_OD_FEATURE_POWER_FEATURE_CTRL_BIT 11
+#define PP_OD_FEATURE_ASIC_TDC_BIT 12
+#define PP_OD_FEATURE_COUNT 13
typedef enum {
PP_OD_POWER_FEATURE_ALWAYS_ENABLED,
@@ -697,6 +702,11 @@ typedef enum {
PP_OD_POWER_FEATURE_ALWAYS_DISABLED,
} PP_OD_POWER_FEATURE_e;
+typedef enum {
+ FAN_MODE_AUTO = 0,
+ FAN_MODE_MANUAL_LINEAR,
+} FanMode_e;
+
typedef struct {
uint32_t FeatureCtrlMask;
@@ -708,8 +718,8 @@ typedef struct {
uint8_t RuntimePwrSavingFeaturesCtrl;
//Frequency changes
- int16_t GfxclkFmin; // MHz
- int16_t GfxclkFmax; // MHz
+ int16_t GfxclkFmin; // MHz
+ int16_t GfxclkFmax; // MHz
uint16_t UclkFmin; // MHz
uint16_t UclkFmax; // MHz
@@ -730,7 +740,12 @@ typedef struct {
uint8_t MaxOpTemp;
uint8_t Padding[4];
- uint32_t Spare[12];
+ uint16_t GfxVoltageFullCtrlMode;
+ uint16_t GfxclkFullCtrlMode;
+ uint16_t UclkFullCtrlMode;
+ int16_t AsicTdc;
+
+ uint32_t Spare[10];
uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround
} OverDriveTable_t;
@@ -748,8 +763,8 @@ typedef struct {
uint8_t IdlePwrSavingFeaturesCtrl;
uint8_t RuntimePwrSavingFeaturesCtrl;
- uint16_t GfxclkFmin; // MHz
- uint16_t GfxclkFmax; // MHz
+ int16_t GfxclkFmin; // MHz
+ int16_t GfxclkFmax; // MHz
uint16_t UclkFmin; // MHz
uint16_t UclkFmax; // MHz
@@ -769,7 +784,12 @@ typedef struct {
uint8_t MaxOpTemp;
uint8_t Padding[4];
- uint32_t Spare[12];
+ uint16_t GfxVoltageFullCtrlMode;
+ uint16_t GfxclkFullCtrlMode;
+ uint16_t UclkFullCtrlMode;
+ int16_t AsicTdc;
+
+ uint32_t Spare[10];
} OverDriveLimits_t;
@@ -903,7 +923,8 @@ typedef struct {
uint16_t FanStartTempMin;
uint16_t FanStartTempMax;
- uint32_t Spare[12];
+ uint16_t PowerMinPpt0[POWER_SOURCE_COUNT];
+ uint32_t Spare[11];
} MsgLimits_t;
@@ -1086,11 +1107,13 @@ typedef struct {
uint32_t GfxoffSpare[15];
// GFX GPO
- float DfllBtcMasterScalerM;
+ uint32_t DfllBtcMasterScalerM;
int32_t DfllBtcMasterScalerB;
- float DfllBtcSlaveScalerM;
+ uint32_t DfllBtcSlaveScalerM;
int32_t DfllBtcSlaveScalerB;
- uint32_t GfxGpoSpare[12];
+ uint32_t DfllPccAsWaitCtrl; //GDFLL_AS_WAIT_CTRL_PCC register value to be passed to RLC msg
+ uint32_t DfllPccAsStepCtrl; //GDFLL_AS_STEP_CTRL_PCC register value to be passed to RLC msg
+ uint32_t GfxGpoSpare[10];
// GFX DCS
@@ -1106,7 +1129,10 @@ typedef struct {
uint16_t DcsTimeout; //This is the amount of time SMU FW waits for RLC to put GFX into GFXOFF before reverting to the fallback mechanism of throttling GFXCLK to Fmin.
- uint32_t DcsSpare[16];
+ uint32_t DcsSpare[14];
+
+ // UCLK section
+ uint16_t ShadowFreqTableUclk[NUM_UCLK_DPM_LEVELS]; // In MHz
// UCLK section
uint8_t UseStrobeModeOptimizations; //Set to indicate that FW should use strobe mode optimizations
@@ -1163,13 +1189,14 @@ typedef struct {
uint16_t IntakeTempHighIntakeAcousticLimit;
uint16_t IntakeTempAcouticLimitReleaseRate;
- uint16_t FanStalledTempLimitOffset;
+ int16_t FanAbnormalTempLimitOffset;
uint16_t FanStalledTriggerRpm;
- uint16_t FanAbnormalTriggerRpm;
- uint16_t FanPadding;
-
- uint32_t FanSpare[14];
+ uint16_t FanAbnormalTriggerRpmCoeff;
+ uint16_t FanAbnormalDetectionEnable;
+ uint8_t FanIntakeSensorSupport;
+ uint8_t FanIntakePadding[3];
+ uint32_t FanSpare[13];
// SECTION: VDD_GFX AVFS
uint8_t OverrideGfxAvfsFuses;
@@ -1193,7 +1220,6 @@ typedef struct {
uint32_t dGbV_dT_vmin;
uint32_t dGbV_dT_vmax;
- //Unused: PMFW-9370
uint32_t V2F_vmin_range_low;
uint32_t V2F_vmin_range_high;
uint32_t V2F_vmax_range_low;
@@ -1238,8 +1264,21 @@ typedef struct {
// SECTION: Advanced Options
uint32_t DebugOverrides;
+ // Section: Total Board Power idle vs active coefficients
+ uint8_t TotalBoardPowerSupport;
+ uint8_t TotalBoardPowerPadding[3];
+
+ int16_t TotalIdleBoardPowerM;
+ int16_t TotalIdleBoardPowerB;
+ int16_t TotalBoardPowerM;
+ int16_t TotalBoardPowerB;
+
+ QuadraticInt_t qFeffCoeffGameClock[POWER_SOURCE_COUNT];
+ QuadraticInt_t qFeffCoeffBaseClock[POWER_SOURCE_COUNT];
+ QuadraticInt_t qFeffCoeffBoostClock[POWER_SOURCE_COUNT];
+
// SECTION: Sku Reserved
- uint32_t Spare[64];
+ uint32_t Spare[43];
// Padding for MMHUB - do not modify this
uint32_t MmHubPadding[8];
@@ -1304,7 +1343,8 @@ typedef struct {
// SECTION: Clock Spread Spectrum
// UCLK Spread Spectrum
- uint16_t UclkSpreadPadding;
+ uint8_t UclkTrainingModeSpreadPercent; // Q4.4
+ uint8_t UclkSpreadPadding;
uint16_t UclkSpreadFreq; // kHz
// UCLK Spread Spectrum
@@ -1317,11 +1357,7 @@ typedef struct {
// Section: Memory Config
uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e
- uint8_t PaddingMem1[3];
-
- // Section: Total Board Power
- uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power
- uint16_t BoardPowerPadding;
+ uint8_t PaddingMem1[7];
// SECTION: UMC feature flags
uint8_t HsrEnabled;
@@ -1423,8 +1459,11 @@ typedef struct {
uint16_t Vcn1ActivityPercentage ;
uint32_t EnergyAccumulator;
- uint16_t AverageSocketPower ;
+ uint16_t AverageSocketPower;
+ uint16_t AverageTotalBoardPower;
+
uint16_t AvgTemperature[TEMP_COUNT];
+ uint16_t AvgTemperatureFanIntake;
uint8_t PcieRate ;
uint8_t PcieWidth ;
@@ -1592,5 +1631,7 @@ typedef struct {
#define IH_INTERRUPT_CONTEXT_ID_AUDIO_D0 0x5
#define IH_INTERRUPT_CONTEXT_ID_AUDIO_D3 0x6
#define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING 0x7
+#define IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8
+#define IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
index 9ebb8f39732a..8b8266890a10 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
@@ -131,7 +131,13 @@
#define PPSMC_MSG_EnableAudioStutterWA 0x44
#define PPSMC_MSG_PowerUpUmsch 0x45
#define PPSMC_MSG_PowerDownUmsch 0x46
-#define PPSMC_Message_Count 0x47
+#define PPSMC_MSG_SetDcsArch 0x47
+#define PPSMC_MSG_TriggerVFFLR 0x48
+#define PPSMC_MSG_SetNumBadMemoryPagesRetired 0x49
+#define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
+#define PPSMC_MSG_SetPriorityDeltaGain 0x4B
+#define PPSMC_MSG_AllowIHHostInterrupt 0x4C
+#define PPSMC_Message_Count 0x4D
//Debug Dump Message
#define DEBUGSMC_MSG_TestMessage 0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 58098b82df66..4180c71d930f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -239,7 +239,10 @@
__SMU_DUMMY_MAP(DriverMode2Reset), \
__SMU_DUMMY_MAP(GetGfxOffStatus), \
__SMU_DUMMY_MAP(GetGfxOffEntryCount), \
- __SMU_DUMMY_MAP(LogGfxOffResidency),
+ __SMU_DUMMY_MAP(LogGfxOffResidency), \
+ __SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired), \
+ __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), \
+ __SMU_DUMMY_MAP(AllowGpo),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index b7f4569aff2a..e8c6febb8b64 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,10 +28,11 @@
#define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D
#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
@@ -272,6 +273,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu);
int smu_v13_0_run_btc(struct smu_context *smu);
+int smu_v13_0_gpo_control(struct smu_context *smu,
+ bool enablement);
+
int smu_v13_0_deep_sleep_control(struct smu_context *smu,
bool enablement);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 70b560737687..ad66d57aa102 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -79,6 +79,17 @@ MODULE_FIRMWARE("amdgpu/beige_goby_smc.bin");
#define mmTHM_BACO_CNTL_ARCT 0xA7
#define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0
+static void smu_v11_0_poll_baco_exit(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ uint32_t data, loop = 0;
+
+ do {
+ usleep_range(1000, 1100);
+ data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
+ } while ((data & 0x100) && (++loop < 100));
+}
+
int smu_v11_0_init_microcode(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
@@ -1588,6 +1599,10 @@ bool smu_v11_0_baco_is_support(struct smu_context *smu)
if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support)
return false;
+ /* return true if ASIC is in BACO state already */
+ if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
+ return true;
+
/* Arcturus does not support this bit mask */
if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
@@ -1685,7 +1700,18 @@ int smu_v11_0_baco_enter(struct smu_context *smu)
int smu_v11_0_baco_exit(struct smu_context *smu)
{
- return smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
+ int ret;
+
+ ret = smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
+ if (!ret) {
+ /*
+ * Poll BACO exit status to ensure FW has completed
+ * BACO exit process to avoid timing issues.
+ */
+ smu_v11_0_poll_baco_exit(smu);
+ }
+
+ return ret;
}
int smu_v11_0_mode1_reset(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 89f0f6eb19f3..e54b760b875b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -250,6 +250,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu)
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 11):
mp1_fw_flags = RREG32_PCIE(MP1_Public |
(smnMP1_V13_0_4_FIRMWARE_FLAGS & 0xffffffff));
break;
@@ -289,6 +290,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_ALDE;
break;
case IP_VERSION(13, 0, 0):
+ smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0;
+ break;
case IP_VERSION(13, 0, 10):
smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10;
break;
@@ -301,6 +304,7 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP;
break;
case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 11):
smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_4;
break;
case IP_VERSION(13, 0, 5):
@@ -841,6 +845,7 @@ int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable)
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
return 0;
if (enable)
@@ -1376,6 +1381,7 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
*/
uint32_t ctxid = entry->src_data[0];
uint32_t data;
+ uint32_t high;
if (client_id == SOC15_IH_CLIENTID_THM) {
switch (src_id) {
@@ -1432,6 +1438,36 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
schedule_work(&smu->throttling_logging_work);
break;
+ case 0x8:
+ high = smu->thermal_range.software_shutdown_temp +
+ smu->thermal_range.software_shutdown_temp_offset;
+ high = min_t(typeof(high),
+ SMU_THERMAL_MAXIMUM_ALERT_TEMP,
+ high);
+ dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n",
+ high,
+ smu->thermal_range.software_shutdown_temp_offset);
+
+ data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
+ data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL,
+ DIG_THERM_INTH,
+ (high & 0xff));
+ data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
+ WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data);
+ break;
+ case 0x9:
+ high = min_t(typeof(high),
+ SMU_THERMAL_MAXIMUM_ALERT_TEMP,
+ smu->thermal_range.software_shutdown_temp);
+ dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high);
+
+ data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
+ data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL,
+ DIG_THERM_INTH,
+ (high & 0xff));
+ data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
+ WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data);
+ break;
}
}
}
@@ -2146,6 +2182,21 @@ int smu_v13_0_run_btc(struct smu_context *smu)
return res;
}
+int smu_v13_0_gpo_control(struct smu_context *smu,
+ bool enablement)
+{
+ int res;
+
+ res = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_AllowGpo,
+ enablement ? 1 : 0,
+ NULL);
+ if (res)
+ dev_err(smu->adev->dev, "SetGpoAllow %d failed!\n", enablement);
+
+ return res;
+}
+
int smu_v13_0_deep_sleep_control(struct smu_context *smu,
bool enablement)
{
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 5bcb61f77e41..9643b21c636a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -141,6 +141,10 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0),
MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0),
+ MSG_MAP(SetNumBadMemoryPagesRetired, PPSMC_MSG_SetNumBadMemoryPagesRetired, 0),
+ MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel,
+ PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0),
+ MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0),
};
static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -207,6 +211,8 @@ static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] =
FEA_MAP(MEM_TEMP_READ),
FEA_MAP(ATHUB_MMHUB_PG),
FEA_MAP(SOC_PCC),
+ [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
+ [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
};
static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
@@ -537,6 +543,23 @@ static int smu_v13_0_0_set_default_dpm_table(struct smu_context *smu)
dpm_table);
if (ret)
return ret;
+
+ /*
+ * Update the reported maximum shader clock to the value
+ * which can be guarded to be achieved on all cards. This
+ * is aligned with Window setting. And considering that value
+ * might be not the peak frequency the card can achieve, it
+ * is normal some real-time clock frequency can overtake this
+ * labelled maximum clock frequency(for example in pp_dpm_sclk
+ * sysfs output).
+ */
+ if (skutable->DriverReportedClocks.GameClockAc &&
+ (dpm_table->dpm_levels[dpm_table->count - 1].value >
+ skutable->DriverReportedClocks.GameClockAc)) {
+ dpm_table->dpm_levels[dpm_table->count - 1].value =
+ skutable->DriverReportedClocks.GameClockAc;
+ dpm_table->max = skutable->DriverReportedClocks.GameClockAc;
+ }
} else {
dpm_table->count = 1;
dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100;
@@ -799,6 +822,57 @@ static int smu_v13_0_0_get_smu_metrics_data(struct smu_context *smu,
return ret;
}
+static int smu_v13_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ struct smu_13_0_dpm_context *dpm_context =
+ smu->smu_dpm.dpm_context;
+ struct smu_13_0_dpm_table *dpm_table;
+
+ switch (clk_type) {
+ case SMU_MCLK:
+ case SMU_UCLK:
+ /* uclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.uclk_table;
+ break;
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ /* gfxclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.gfx_table;
+ break;
+ case SMU_SOCCLK:
+ /* socclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.soc_table;
+ break;
+ case SMU_FCLK:
+ /* fclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.fclk_table;
+ break;
+ case SMU_VCLK:
+ case SMU_VCLK1:
+ /* vclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.vclk_table;
+ break;
+ case SMU_DCLK:
+ case SMU_DCLK1:
+ /* dclk dpm table */
+ dpm_table = &dpm_context->dpm_tables.dclk_table;
+ break;
+ default:
+ dev_err(smu->adev->dev, "Unsupported clock type!\n");
+ return -EINVAL;
+ }
+
+ if (min)
+ *min = dpm_table->min;
+ if (max)
+ *max = dpm_table->max;
+
+ return 0;
+}
+
static int smu_v13_0_0_read_sensor(struct smu_context *smu,
enum amd_pp_sensors sensor,
void *data,
@@ -1301,9 +1375,17 @@ static int smu_v13_0_0_populate_umd_state_clk(struct smu_context *smu)
&dpm_context->dpm_tables.fclk_table;
struct smu_umd_pstate_table *pstate_table =
&smu->pstate_table;
+ struct smu_table_context *table_context = &smu->smu_table;
+ PPTable_t *pptable = table_context->driver_pptable;
+ DriverReportedClocks_t driver_clocks =
+ pptable->SkuTable.DriverReportedClocks;
pstate_table->gfxclk_pstate.min = gfx_table->min;
- pstate_table->gfxclk_pstate.peak = gfx_table->max;
+ if (driver_clocks.GameClockAc &&
+ (driver_clocks.GameClockAc < gfx_table->max))
+ pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc;
+ else
+ pstate_table->gfxclk_pstate.peak = gfx_table->max;
pstate_table->uclk_pstate.min = mem_table->min;
pstate_table->uclk_pstate.peak = mem_table->max;
@@ -1320,12 +1402,12 @@ static int smu_v13_0_0_populate_umd_state_clk(struct smu_context *smu)
pstate_table->fclk_pstate.min = fclk_table->min;
pstate_table->fclk_pstate.peak = fclk_table->max;
- /*
- * For now, just use the mininum clock frequency.
- * TODO: update them when the real pstate settings available
- */
- pstate_table->gfxclk_pstate.standard = gfx_table->min;
- pstate_table->uclk_pstate.standard = mem_table->min;
+ if (driver_clocks.BaseClockAc &&
+ driver_clocks.BaseClockAc < gfx_table->max)
+ pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc;
+ else
+ pstate_table->gfxclk_pstate.standard = gfx_table->max;
+ pstate_table->uclk_pstate.standard = mem_table->max;
pstate_table->socclk_pstate.standard = soc_table->min;
pstate_table->vclk_pstate.standard = vclk_table->min;
pstate_table->dclk_pstate.standard = dclk_table->min;
@@ -1359,12 +1441,23 @@ out:
static int smu_v13_0_0_get_fan_speed_pwm(struct smu_context *smu,
uint32_t *speed)
{
+ int ret;
+
if (!speed)
return -EINVAL;
- return smu_v13_0_0_get_smu_metrics_data(smu,
- METRICS_CURR_FANPWM,
- speed);
+ ret = smu_v13_0_0_get_smu_metrics_data(smu,
+ METRICS_CURR_FANPWM,
+ speed);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!");
+ return ret;
+ }
+
+ /* Convert the PMFW output which is in percent to pwm(255) based */
+ *speed = MIN(*speed * 255 / 100, 255);
+
+ return 0;
}
static int smu_v13_0_0_get_fan_speed_rpm(struct smu_context *smu,
@@ -1838,6 +1931,40 @@ static void smu_v13_0_0_set_smu_mailbox_registers(struct smu_context *smu)
smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_54);
}
+static int smu_v13_0_0_smu_send_bad_mem_page_num(struct smu_context *smu,
+ uint32_t size)
+{
+ int ret = 0;
+
+ /* message SMU to update the bad page number on SMUBUS */
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_SetNumBadMemoryPagesRetired,
+ size, NULL);
+ if (ret)
+ dev_err(smu->adev->dev,
+ "[%s] failed to message SMU to update bad memory pages number\n",
+ __func__);
+
+ return ret;
+}
+
+static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu,
+ uint32_t size)
+{
+ int ret = 0;
+
+ /* message SMU to update the bad channel info on SMUBUS */
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel,
+ size, NULL);
+ if (ret)
+ dev_err(smu->adev->dev,
+ "[%s] failed to message SMU to update bad memory pages channel info\n",
+ __func__);
+
+ return ret;
+}
+
static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
@@ -1862,7 +1989,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.get_enabled_mask = smu_cmn_get_enabled_mask,
.dpm_set_vcn_enable = smu_v13_0_set_vcn_enable,
.dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable,
- .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq,
+ .get_dpm_ultimate_freq = smu_v13_0_0_get_dpm_ultimate_freq,
.get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values,
.read_sensor = smu_v13_0_0_read_sensor,
.feature_is_enabled = smu_cmn_feature_is_enabled,
@@ -1908,6 +2035,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.mode1_reset = smu_v13_0_0_mode1_reset,
.set_mp1_state = smu_v13_0_0_set_mp1_state,
.set_df_cstate = smu_v13_0_0_set_df_cstate,
+ .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num,
+ .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag,
+ .gpo_control = smu_v13_0_gpo_control,
};
void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 97e1d55dcaad..8fa9a36c38b6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -1026,6 +1026,15 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = {
.set_gfx_power_up_by_imu = smu_v13_0_set_gfx_power_up_by_imu,
};
+static void smu_v13_0_4_set_smu_mailbox_registers(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+
+ smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82);
+ smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66);
+ smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
+}
+
void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
@@ -1035,7 +1044,9 @@ void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu)
smu->feature_map = smu_v13_0_4_feature_mask_map;
smu->table_map = smu_v13_0_4_table_map;
smu->is_apu = true;
- smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82);
- smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66);
- smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
+
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4))
+ smu_v13_0_4_set_smu_mailbox_registers(smu);
+ else
+ smu_v13_0_set_smu_mailbox_registers(smu);
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index d74debc584f8..5c6c6ad011ca 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -123,6 +123,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0),
MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0),
+ MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0),
};
static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -189,6 +190,8 @@ static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] =
FEA_MAP(MEM_TEMP_READ),
FEA_MAP(ATHUB_MMHUB_PG),
FEA_MAP(SOC_PCC),
+ [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
+ [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
};
static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = {
@@ -1223,6 +1226,7 @@ static int smu_v13_0_7_get_thermal_temperature_range(struct smu_context *smu,
range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)*
SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
range->software_shutdown_temp = powerplay_table->software_shutdown_temp;
+ range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset;
return 0;
}
@@ -1359,12 +1363,23 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu)
static int smu_v13_0_7_get_fan_speed_pwm(struct smu_context *smu,
uint32_t *speed)
{
+ int ret;
+
if (!speed)
return -EINVAL;
- return smu_v13_0_7_get_smu_metrics_data(smu,
- METRICS_CURR_FANPWM,
- speed);
+ ret = smu_v13_0_7_get_smu_metrics_data(smu,
+ METRICS_CURR_FANPWM,
+ speed);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!");
+ return ret;
+ }
+
+ /* Convert the PMFW output which is in percent to pwm(255) based */
+ *speed = MIN(*speed * 255 / 100, 255);
+
+ return 0;
}
static int smu_v13_0_7_get_fan_speed_rpm(struct smu_context *smu,
@@ -1436,7 +1451,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu,
static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf)
{
- DpmActivityMonitorCoeffIntExternal_t activity_monitor_external[PP_SMC_POWER_PROFILE_COUNT];
+ DpmActivityMonitorCoeffIntExternal_t *activity_monitor_external;
uint32_t i, j, size = 0;
int16_t workload_type = 0;
int result = 0;
@@ -1444,6 +1459,12 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf
if (!buf)
return -EINVAL;
+ activity_monitor_external = kcalloc(PP_SMC_POWER_PROFILE_COUNT,
+ sizeof(*activity_monitor_external),
+ GFP_KERNEL);
+ if (!activity_monitor_external)
+ return -ENOMEM;
+
size += sysfs_emit_at(buf, size, " ");
for (i = 0; i <= PP_SMC_POWER_PROFILE_WINDOW3D; i++)
size += sysfs_emit_at(buf, size, "%-14s%s", amdgpu_pp_profile_name[i],
@@ -1456,15 +1477,17 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf
workload_type = smu_cmn_to_asic_specific_index(smu,
CMN2ASIC_MAPPING_WORKLOAD,
i);
- if (workload_type < 0)
- return -EINVAL;
+ if (workload_type < 0) {
+ result = -EINVAL;
+ goto out;
+ }
result = smu_cmn_update_table(smu,
SMU_TABLE_ACTIVITY_MONITOR_COEFF, workload_type,
(void *)(&activity_monitor_external[i]), false);
if (result) {
dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
- return result;
+ goto out;
}
}
@@ -1492,7 +1515,10 @@ do { \
PRINT_DPM_MONITOR(Fclk_BoosterFreq);
#undef PRINT_DPM_MONITOR
- return size;
+ result = size;
+out:
+ kfree(activity_monitor_external);
+ return result;
}
static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
@@ -1687,6 +1713,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.mode1_reset = smu_v13_0_mode1_reset,
.set_mp1_state = smu_v13_0_7_set_mp1_state,
.set_df_cstate = smu_v13_0_7_set_df_cstate,
+ .gpo_control = smu_v13_0_gpo_control,
};
void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)