summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>2023-05-09 08:08:24 +0300
committerRodrigo Vivi <rodrigo.vivi@intel.com>2023-12-20 02:33:49 +0300
commit34f89ac8e66cd5121fb05c765acc3c67ddbef7a0 (patch)
tree21948eb5197c59918e94ffc0c3105a8817ec74c0
parent9ca14f94d294862d6f5ee30a6b73f295cfaa5d08 (diff)
downloadlinux-34f89ac8e66cd5121fb05c765acc3c67ddbef7a0.tar.xz
drm/xe: Handle -EDEADLK case in exec ioctl
With multiple active VMs, under memory pressure, it is possible that ttm_bo_validate() run into -EDEADLK in ttm_mem_evict_wait_busy() and return -ENOMEM. Until ttm properly handles locking in such scenarios, best thing the driver can do is unwind the lock and retry. Update xe_exec_begin to retry validating BOs with a timeout upon -ENOMEM. Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c27
1 files changed, 24 insertions, 3 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index ea869f2452ef..3db1b159586e 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -8,6 +8,7 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
#include <drm/xe_drm.h>
+#include <linux/delay.h>
#include "xe_bo.h"
#include "xe_device.h"
@@ -91,6 +92,8 @@
* Unlock all
*/
+#define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000
+
static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
struct ttm_validate_buffer tv_onstack[],
struct ttm_validate_buffer **tv,
@@ -99,12 +102,14 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
struct xe_vm *vm = e->vm;
struct xe_vma *vma;
LIST_HEAD(dups);
- int err;
+ ktime_t end = 0;
+ int err = 0;
*tv = NULL;
if (xe_vm_no_dma_fences(e->vm))
return 0;
+retry:
err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
if (err)
return err;
@@ -122,11 +127,27 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
if (err) {
xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
*tv = NULL;
- return err;
+ break;
+ }
+ }
+
+ /*
+ * With multiple active VMs, under memory pressure, it is possible that
+ * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
+ * Until ttm properly handles locking in such scenarios, best thing the
+ * driver can do is retry with a timeout.
+ */
+ if (err == -ENOMEM) {
+ ktime_t cur = ktime_get();
+
+ end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS);
+ if (ktime_before(cur, end)) {
+ msleep(20);
+ goto retry;
}
}
- return 0;
+ return err;
}
static void xe_exec_end(struct xe_engine *e,