From cfbb6b0047448e2d986160d9f30d60f604d9ad0f Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Fri, 21 Jan 2022 17:23:32 -0500 Subject: drm/amdgpu: Rework reset domain to be refcounted. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reset domain contains register access semaphor now and so needs to be present as long as each device in a hive needs it and so it cannot be binded to XGMI hive life cycle. Adress this by making reset domain refcounted and pointed by each member of the hive and the hive itself. v4: Fix crash on boot witrh XGMI hive by adding type to reset_domain. XGMI will only create a new reset_domain if prevoius was of single device type meaning it's first boot. Otherwsie it will take a refocunt to exsiting reset_domain from the amdgou device. Add a wrapper around reset_domain->refcount get/put and a wrapper around send to reset wq (Lijo) Signed-off-by: Andrey Grodzovsky Acked-by: Christian König Link: https://www.spinics.net/lists/amd-gfx/msg74121.html --- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c') diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 5869d51d8bee..6740eef84ee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -32,6 +32,8 @@ #include "soc15_common.h" #include "mxgpu_ai.h" +#include "amdgpu_reset.h" + static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev) { WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2); @@ -308,8 +310,8 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, switch (event) { case IDH_FLR_NOTIFICATION: if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) - WARN_ONCE(!queue_work(adev->reset_domain.wq, - &adev->virt.flr_work), + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), "Failed to queue work! at %s", __func__); break; -- cgit v1.2.3