diff options
author | Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> | 2023-05-09 08:08:24 +0300 |
---|---|---|
committer | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2023-12-20 02:33:49 +0300 |
commit | 34f89ac8e66cd5121fb05c765acc3c67ddbef7a0 (patch) | |
tree | 21948eb5197c59918e94ffc0c3105a8817ec74c0 | |
parent | 9ca14f94d294862d6f5ee30a6b73f295cfaa5d08 (diff) | |
download | linux-34f89ac8e66cd5121fb05c765acc3c67ddbef7a0.tar.xz |
drm/xe: Handle -EDEADLK case in exec ioctl
With multiple active VMs, under memory pressure, it is possible that
ttm_bo_validate() run into -EDEADLK in ttm_mem_evict_wait_busy() and
return -ENOMEM.
Until ttm properly handles locking in such scenarios, best thing the
driver can do is unwind the lock and retry.
Update xe_exec_begin to retry validating BOs with a timeout upon
-ENOMEM.
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec.c | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index ea869f2452ef..3db1b159586e 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -8,6 +8,7 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> #include <drm/xe_drm.h> +#include <linux/delay.h> #include "xe_bo.h" #include "xe_device.h" @@ -91,6 +92,8 @@ * Unlock all */ +#define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000 + static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, struct ttm_validate_buffer tv_onstack[], struct ttm_validate_buffer **tv, @@ -99,12 +102,14 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, struct xe_vm *vm = e->vm; struct xe_vma *vma; LIST_HEAD(dups); - int err; + ktime_t end = 0; + int err = 0; *tv = NULL; if (xe_vm_no_dma_fences(e->vm)) return 0; +retry: err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1); if (err) return err; @@ -122,11 +127,27 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, if (err) { xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs); *tv = NULL; - return err; + break; + } + } + + /* + * With multiple active VMs, under memory pressure, it is possible that + * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. + * Until ttm properly handles locking in such scenarios, best thing the + * driver can do is retry with a timeout. + */ + if (err == -ENOMEM) { + ktime_t cur = ktime_get(); + + end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS); + if (ktime_before(cur, end)) { + msleep(20); + goto retry; } } - return 0; + return err; } static void xe_exec_end(struct xe_engine *e, |