summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c53
1 files changed, 34 insertions, 19 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index c4c6a29052ac..e1c21d250611 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -144,7 +144,8 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
uint16_t pasid, uint16_t client_id)
{
enum amdgpu_ras_block block = 0;
- int old_poison, ret = -EINVAL;
+ int old_poison;
+ uint32_t reset = 0;
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
if (!p)
@@ -162,8 +163,13 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SE2SH:
case SOC15_IH_CLIENTID_SE3SH:
case SOC15_IH_CLIENTID_UTCL2:
- ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
block = AMDGPU_RAS_BLOCK__GFX;
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ break;
+ case SOC15_IH_CLIENTID_VMC:
+ case SOC15_IH_CLIENTID_VMC1:
+ block = AMDGPU_RAS_BLOCK__MMHUB;
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
break;
case SOC15_IH_CLIENTID_SDMA0:
case SOC15_IH_CLIENTID_SDMA1:
@@ -171,27 +177,21 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SDMA3:
case SOC15_IH_CLIENTID_SDMA4:
block = AMDGPU_RAS_BLOCK__SDMA;
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
break;
default:
- break;
+ dev_warn(dev->adev->dev,
+ "client %d does not support poison consumption\n", client_id);
+ return;
}
kfd_signal_poison_consumed_event(dev, pasid);
- /* resetting queue passes, do page retirement without gpu reset
- * resetting queue fails, fallback to gpu reset solution
- */
- if (!ret) {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, unmap queue flow succeeded: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false);
- } else {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, fall back to gpu reset flow: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true);
- }
+ dev_warn(dev->adev->dev,
+ "poison is consumed by client %d, kick off gpu reset flow\n", client_id);
+
+ amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev,
+ block, pasid, NULL, NULL, reset);
}
static bool context_id_expected(struct kfd_dev *dev)
@@ -414,10 +414,25 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
client_id == SOC15_IH_CLIENTID_UTCL2) {
struct kfd_vm_fault_info info = {0};
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+ uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
+ uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry);
+ int hub_inst = 0;
struct kfd_hsa_memory_exception_data exception_data;
- if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
- amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
+ /* gfxhub */
+ if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
+ hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
+ node_id);
+ if (hub_inst < 0)
+ hub_inst = 0;
+ }
+
+ /* mmhub */
+ if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
+ hub_inst = node_id / 4;
+
+ if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev,
+ hub_inst, vmid_type)) {
event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}