summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhigang Luo <Zhigang.Luo@amd.com>2024-03-18 21:13:10 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-05-17 13:02:16 +0300
commitb6f6626528fe724b512c34f3fb5946c36a135f58 (patch)
tree5e010fe11b61ff44f22c364d7845fc1143e16f6c
parenta624829bba2752a2b726f227574d6ca1d2e39671 (diff)
downloadlinux-b6f6626528fe724b512c34f3fb5946c36a135f58.tar.xz
amd/amdkfd: sync all devices to wait all processes being evicted
[ Upstream commit d06af584be5a769d124b7302b32a033e9559761d ] If there are more than one device doing reset in parallel, the first device will call kfd_suspend_all_processes() to evict all processes on all devices, this call takes time to finish. other device will start reset and recover without waiting. if the process has not been evicted before doing recover, it will be restored, then caused page fault. Signed-off-by: Zhigang Luo <Zhigang.Luo@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c17
1 files changed, 6 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 93ce181eb3ba..913c70a0ef44 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -935,7 +935,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
struct kfd_node *node;
int i;
- int count;
if (!kfd->init_complete)
return;
@@ -943,12 +942,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
/* for runtime suspend, skip locking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = ++kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
/* For first KFD device suspend all the KFD processes */
- if (count == 1)
+ if (++kfd_locked == 1)
kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
}
for (i = 0; i < kfd->num_nodes; i++) {
@@ -959,7 +956,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
- int ret, count, i;
+ int ret, i;
if (!kfd->init_complete)
return 0;
@@ -973,12 +970,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
/* for runtime resume, skip unlocking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = --kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
+ if (--kfd_locked == 0)
ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
}
return ret;