From 42547ec5f0c6b8d0c995078cd805736fcdf51dfb Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 11 Apr 2024 17:14:17 +0800 Subject: drm/amdkfd: handle duplicate BOs in reserve_bo_and_cond_vms commit 2a705f3e49d20b59cd9e5cc3061b2d92ebe1e5f0 upstream. Observed on gfx8 ASIC where KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM is used. Two attachments use the same VM, root PD would be locked twice. [ 57.910418] Call Trace: [ 57.793726] ? reserve_bo_and_cond_vms+0x111/0x1c0 [amdgpu] [ 57.793820] amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu+0x6c/0x1c0 [amdgpu] [ 57.793923] ? idr_get_next_ul+0xbe/0x100 [ 57.793933] kfd_process_device_free_bos+0x7e/0xf0 [amdgpu] [ 57.794041] kfd_process_wq_release+0x2ae/0x3c0 [amdgpu] [ 57.794141] ? process_scheduled_works+0x29c/0x580 [ 57.794147] process_scheduled_works+0x303/0x580 [ 57.794157] ? __pfx_worker_thread+0x10/0x10 [ 57.794160] worker_thread+0x1a2/0x370 [ 57.794165] ? __pfx_worker_thread+0x10/0x10 [ 57.794167] kthread+0x11b/0x150 [ 57.794172] ? __pfx_kthread+0x10/0x10 [ 57.794177] ret_from_fork+0x3d/0x60 [ 57.794181] ? __pfx_kthread+0x10/0x10 [ 57.794184] ret_from_fork_asm+0x1b/0x30 Signed-off-by: Lang Yu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index e4d4e55c08ad..0535b07987d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1188,7 +1188,8 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, int ret; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&ctx->exec) { ctx->n_vms = 0; list_for_each_entry(entry, &mem->attachments, list) { -- cgit v1.2.3 From ce5a22d22db691d14516c3b8fdbf69139eb2ea8f Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 17 Apr 2024 07:56:46 -0700 Subject: drm/i915/hwmon: Get rid of devm commit 5bc9de065b8bb9b8dd8799ecb4592d0403b54281 upstream. When both hwmon and hwmon drvdata (on which hwmon depends) are device managed resources, the expectation, on device unbind, is that hwmon will be released before drvdata. However, in i915 there are two separate code paths, which both release either drvdata or hwmon and either can be released before the other. These code paths (for device unbind) are as follows (see also the bug referenced below): Call Trace: release_nodes+0x11/0x70 devres_release_group+0xb2/0x110 component_unbind_all+0x8d/0xa0 component_del+0xa5/0x140 intel_pxp_tee_component_fini+0x29/0x40 [i915] intel_pxp_fini+0x33/0x80 [i915] i915_driver_remove+0x4c/0x120 [i915] i915_pci_remove+0x19/0x30 [i915] pci_device_remove+0x32/0xa0 device_release_driver_internal+0x19c/0x200 unbind_store+0x9c/0xb0 and Call Trace: release_nodes+0x11/0x70 devres_release_all+0x8a/0xc0 device_unbind_cleanup+0x9/0x70 device_release_driver_internal+0x1c1/0x200 unbind_store+0x9c/0xb0 This means that in i915, if use devm, we cannot gurantee that hwmon will always be released before drvdata. Which means that we have a uaf if hwmon sysfs is accessed when drvdata has been released but hwmon hasn't. The only way out of this seems to be do get rid of devm_ and release/free everything explicitly during device unbind. v2: Change commit message and other minor code changes v3: Cleanup from i915_hwmon_register on error (Armin Wolf) v4: Eliminate potential static analyzer warning (Rodrigo) Eliminate fetch_and_zero (Jani) v5: Restore previous logic for ddat_gt->hwmon_dev error return (Andi) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366 Reviewed-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240417145646.793223-1-ashutosh.dixit@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_hwmon.c | 46 +++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index b758fd110c20..c0662a022f59 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!IS_DGFX(i915)) return; - hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return; @@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915) hwm_get_preregistration_info(i915); /* hwmon_dev points to device hwmon */ - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, - ddat, - &hwm_chip_info, - hwm_groups); - if (IS_ERR(hwmon_dev)) { - i915->hwmon = NULL; - return; - } + hwmon_dev = hwmon_device_register_with_info(dev, ddat->name, + ddat, + &hwm_chip_info, + hwm_groups); + if (IS_ERR(hwmon_dev)) + goto err; ddat->hwmon_dev = hwmon_dev; @@ -839,16 +837,36 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0)) continue; - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name, - ddat_gt, - &hwm_gt_chip_info, - NULL); + hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name, + ddat_gt, + &hwm_gt_chip_info, + NULL); if (!IS_ERR(hwmon_dev)) ddat_gt->hwmon_dev = hwmon_dev; } + return; +err: + i915_hwmon_unregister(i915); } void i915_hwmon_unregister(struct drm_i915_private *i915) { - fetch_and_zero(&i915->hwmon); + struct i915_hwmon *hwmon = i915->hwmon; + struct intel_gt *gt; + int i; + + if (!hwmon) + return; + + for_each_gt(gt, i915, i) + if (hwmon->ddat_gt[i].hwmon_dev) + hwmon_device_unregister(hwmon->ddat_gt[i].hwmon_dev); + + if (hwmon->ddat.hwmon_dev) + hwmon_device_unregister(hwmon->ddat.hwmon_dev); + + mutex_destroy(&hwmon->hwmon_lock); + + kfree(i915->hwmon); + i915->hwmon = NULL; } -- cgit v1.2.3 From 0964c84b93db7fbf74f357c1e20957850e092db3 Mon Sep 17 00:00:00 2001 From: Bob Zhou Date: Tue, 23 Apr 2024 16:58:11 +0800 Subject: drm/amdgpu: add error handle to avoid out-of-bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8b2faf1a4f3b6c748c0da36cda865a226534d520 upstream. if the sdma_v4_0_irq_id_to_seq return -EINVAL, the process should be stop to avoid out-of-bounds read, so directly return -EINVAL. Signed-off-by: Bob Zhou Acked-by: Christian König Reviewed-by: Le Ma Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 43775cb67ff5..8f231766756a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2021,6 +2021,9 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: SDMA trap\n"); instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) + return instance; + switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); -- cgit v1.2.3 From 6b84900ebb91ef0945fd9969d1620ea66c4779ba Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 20 Mar 2024 11:27:31 +0000 Subject: drm/xe/bb: assert width in xe_bb_create_job() commit 1008368e1c7e36bdec01b3cce1e76606dc3ad46f upstream. The queue width will determine the number of batch buffer emitted into the ring. In the case of xe_bb_create_job() we pass exactly one batch address, therefore add an assert for the width to make sure we don't go out of bounds. While here also convert to the helper to determine if the queue is migration based. Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240320112730.219854-3-matthew.auld@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/xe/xe_bb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 7c124475c428..a35e0781b7b9 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -96,7 +96,8 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, { u64 addr = xe_sa_bo_gpu_addr(bb->bo); - xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION)); + xe_gt_assert(q->gt, !xe_sched_job_is_migration(q)); + xe_gt_assert(q->gt, q->width == 1); return __xe_bb_create_job(q, bb, &addr); } -- cgit v1.2.3 From 309f1a7ffa4e2db309af1db4c968327faf6e89f3 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 19 Apr 2024 10:28:54 +0200 Subject: drm/fbdev-generic: Do not set physical framebuffer address commit 87cb4a612a89690b123e68f6602d9f6581b03597 upstream. Framebuffer memory is allocated via vzalloc() from non-contiguous physical pages. The physical framebuffer start address is therefore meaningless. Do not set it. The value is not used within the kernel and only exported to userspace on dedicated ARM configs. No functional change is expected. v2: - refer to vzalloc() in commit message (Javier) Signed-off-by: Thomas Zimmermann Fixes: a5b44c4adb16 ("drm/fbdev-generic: Always use shadow buffering") Cc: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: Zack Rusin Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: # v6.4+ Reviewed-by: Javier Martinez Canillas Reviewed-by: Zack Rusin Reviewed-by: Sui Jingfeng Tested-by: Sui Jingfeng Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-2-tzimmermann@suse.de (cherry picked from commit 73ef0aecba78aa9ebd309b10b6cd17d94e632892) Signed-off-by: Maarten Lankhorst Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_fbdev_generic.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c index d647d89764cb..b4659cd6285a 100644 --- a/drivers/gpu/drm/drm_fbdev_generic.c +++ b/drivers/gpu/drm/drm_fbdev_generic.c @@ -113,7 +113,6 @@ static int drm_fbdev_generic_helper_fb_probe(struct drm_fb_helper *fb_helper, /* screen */ info->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; info->screen_buffer = screen_buffer; - info->fix.smem_start = page_to_phys(vmalloc_to_page(info->screen_buffer)); info->fix.smem_len = screen_size; /* deferred I/O */ -- cgit v1.2.3 From 438c129ef72a76330c07d8a8c684d1cd78c74851 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Mon, 20 May 2024 18:43:55 +0800 Subject: drm/amdgpu/atomfirmware: add intergrated info v2.3 table commit e64e8f7c178e5228e0b2dbb504b9dc75953a319f upstream. [Why] The vram width value is 0. Because the integratedsysteminfo table in VBIOS has updated to 2.3. [How] Driver needs a new intergrated info v2.3 table too. Then the vram width value will be correct. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 15 +++++++++ drivers/gpu/drm/amd/include/atomfirmware.h | 43 ++++++++++++++++++++++++ 2 files changed, 58 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 6857c586ded7..c8c23dbb9091 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -211,6 +211,7 @@ union igp_info { struct atom_integrated_system_info_v1_11 v11; struct atom_integrated_system_info_v1_12 v12; struct atom_integrated_system_info_v2_1 v21; + struct atom_integrated_system_info_v2_3 v23; }; union umc_info { @@ -359,6 +360,20 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; + case 3: + mem_channel_number = igp_info->v23.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + mem_type = igp_info->v23.memorytype; + if (mem_type == LpDdr5MemType) + mem_channel_width = 32; + else + mem_channel_width = 64; + if (vram_width) + *vram_width = mem_channel_number * mem_channel_width; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index af3eebb4c9bc..1acb2d2c5597 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1657,6 +1657,49 @@ struct atom_integrated_system_info_v2_2 uint32_t reserved4[189]; }; +struct uma_carveout_option { + char optionName[29]; //max length of string is 28chars + '\0'. Current design is for "minimum", "Medium", "High". This makes entire struct size 64bits + uint8_t memoryCarvedGb; //memory carved out with setting + uint8_t memoryRemainingGb; //memory remaining on system + union { + struct _flags { + uint8_t Auto : 1; + uint8_t Custom : 1; + uint8_t Reserved : 6; + } flags; + uint8_t all8; + } uma_carveout_option_flags; +}; + +struct atom_integrated_system_info_v2_3 { + struct atom_common_table_header table_header; + uint32_t vbios_misc; // enum of atom_system_vbiosmisc_def + uint32_t gpucapinfo; // enum of atom_system_gpucapinf_def + uint32_t system_config; + uint32_t cpucapinfo; + uint16_t gpuclk_ss_percentage; // unit of 0.001%, 1000 mean 1% + uint16_t gpuclk_ss_type; + uint16_t dpphy_override; // bit vector, enum of atom_sysinfo_dpphy_override_def + uint8_t memorytype; // enum of atom_dmi_t17_mem_type_def, APU memory type indication. + uint8_t umachannelnumber; // number of memory channels + uint8_t htc_hyst_limit; + uint8_t htc_tmp_limit; + uint8_t reserved1; // dp_ss_control + uint8_t gpu_package_id; + struct edp_info_table edp1_info; + struct edp_info_table edp2_info; + uint32_t reserved2[8]; + struct atom_external_display_connection_info extdispconninfo; + uint8_t UMACarveoutVersion; + uint8_t UMACarveoutIndexMax; + uint8_t UMACarveoutTypeDefault; + uint8_t UMACarveoutIndexDefault; + uint8_t UMACarveoutType; //Auto or Custom + uint8_t UMACarveoutIndex; + struct uma_carveout_option UMASizeControlOption[20]; + uint8_t reserved3[110]; +}; + // system_config enum atom_system_vbiosmisc_def{ INTEGRATED_SYSTEM_INFO__GET_EDID_CALLBACK_FUNC_SUPPORT = 0x01, -- cgit v1.2.3 From 40ee4866d25f69b0428e24a8f5d35e64d407f5d3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 26 May 2024 07:59:08 -0500 Subject: drm/amd: Fix shutdown (again) on some SMU v13.0.4/11 platforms commit 267cace556e8a53d703119f7435ab556209e5b6a upstream. commit cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for some SMU 13.0.4/13.0.11 users") attempted to fix shutdown issues that were reported since commit 31729e8c21ec ("drm/amd/pm: fixes a random hang in S4 for SMU v13.0.4/11") but caused issues for some people. Adjust the workaround flow to properly only apply in the S4 case: -> For shutdown go through SMU_MSG_PrepareMp1ForUnload -> For S4 go through SMU_MSG_GfxDeviceDriverReset and SMU_MSG_PrepareMp1ForUnload Reported-and-tested-by: lectrode Closes: https://github.com/void-linux/void-packages/issues/50417 Cc: stable@vger.kernel.org Fixes: cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for some SMU 13.0.4/13.0.11 users") Reviewed-by: Tim Huang Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 4abfcd32747d..2fb6c9cb0f14 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -226,15 +226,17 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en) struct amdgpu_device *adev = smu->adev; int ret = 0; - if (!en && adev->in_s4) { - /* Adds a GFX reset as workaround just before sending the - * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering - * an invalid state. - */ - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, - SMU_RESET_MODE_2, NULL); - if (ret) - return ret; + if (!en && !adev->in_s0ix) { + if (adev->in_s4) { + /* Adds a GFX reset as workaround just before sending the + * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering + * an invalid state. + */ + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, + SMU_RESET_MODE_2, NULL); + if (ret) + return ret; + } ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); } -- cgit v1.2.3 From 46579f06206d877b076b6835244fd36c97db82e1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 20 May 2024 14:41:31 -0400 Subject: Revert "drm/amdkfd: fix gfx_target_version for certain 11.0.3 devices" commit dd2b75fd9a79bf418e088656822af06fc253dbe3 upstream. This reverts commit 28ebbb4981cb1fad12e0b1227dbecc88810b1ee8. Revert this commit as apparently the LLVM code to take advantage of this never landed. Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: Feifei Xu Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 719d6d365e15..ff01610fbce3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -408,15 +408,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) f2g = &gfx_v11_kfd2kgd; break; case IP_VERSION(11, 0, 3): - if ((adev->pdev->device == 0x7460 && - adev->pdev->revision == 0x00) || - (adev->pdev->device == 0x7461 && - adev->pdev->revision == 0x00)) - /* Note: Compiler version is 11.0.5 while HW version is 11.0.3 */ - gfx_target_version = 110005; - else - /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */ - gfx_target_version = 110001; + /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */ + gfx_target_version = 110001; f2g = &gfx_v11_kfd2kgd; break; case IP_VERSION(11, 5, 0): -- cgit v1.2.3