summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorYong Zhao <yong.zhao@amd.com>2018-07-13 23:17:46 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2018-07-13 23:17:46 +0300
commit8725aecac331954e0827d6bed7be02eb7b8f1e9e (patch)
treebb992397ad45432c85b1a3f98c951b15a829ee56 /drivers/gpu/drm/amd/amdkfd
parenteab69801cf4388aeba2c730ce4db746ae164eada (diff)
downloadlinux-8725aecac331954e0827d6bed7be02eb7b8f1e9e.tar.xz
drm/amdkfd: Workaround to accommodate Raven too many PPR issue
On Raven multiple PPRs can be queued up by the hardware. When the first of those requests is handled by the IOMMU driver, the memory access succeeds. After that the application may be done with the memory and unmap it. At that point the page table entries are invalidated, but there are still outstanding duplicate PPRs for those addresses. When the IOMMU driver processes those duplicate requests, it finds invalid page table entries and triggers an invalid PPR fault. As a workaround, don't signal invalid PPR faults on Raven to avoid segfaulting applications that haven't done anything wrong. As a side effect, real GPU memory access faults may go unnoticed by the application. Signed-off-by: Yong Zhao <yong.zhao@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c21
1 files changed, 16 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 820133cdef83..4dcacce2db86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -932,13 +932,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
up_read(&mm->mmap_sem);
mmput(mm);
- mutex_lock(&p->event_mutex);
+ pr_debug("notpresent %d, noexecute %d, readonly %d\n",
+ memory_exception_data.failure.NotPresent,
+ memory_exception_data.failure.NoExecute,
+ memory_exception_data.failure.ReadOnly);
- /* Lookup events by type and signal them */
- lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
- &memory_exception_data);
+ /* Workaround on Raven to not kill the process when memory is freed
+ * before IOMMU is able to finish processing all the excessive PPRs
+ */
+ if (dev->device_info->asic_family != CHIP_RAVEN) {
+ mutex_lock(&p->event_mutex);
+
+ /* Lookup events by type and signal them */
+ lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
+ &memory_exception_data);
+
+ mutex_unlock(&p->event_mutex);
+ }
- mutex_unlock(&p->event_mutex);
kfd_unref_process(p);
}
#endif /* KFD_SUPPORT_IOMMU_V2 */