summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_exec.c
diff options
context:
space:
mode:
authorThomas Hellström <thomas.hellstrom@linux.intel.com>2023-09-08 12:17:14 +0300
committerRodrigo Vivi <rodrigo.vivi@intel.com>2023-12-21 19:41:07 +0300
commitd490ecf577903ce5a9e6a3bb3bd08b5a550719c7 (patch)
tree23e512301a622080206a97a940bdfe36e582724a /drivers/gpu/drm/xe/xe_exec.c
parentb7ab8c4f028f87b8c79c9f99e12b891fd5430483 (diff)
downloadlinux-d490ecf577903ce5a9e6a3bb3bd08b5a550719c7.tar.xz
drm/xe: Rework xe_exec and the VM rebind worker to use the drm_exec helper
Replace the calls to ttm_eu_reserve_buffers() by using the drm_exec helper instead. Also make sure the locking loop covers any calls to xe_bo_validate() / ttm_bo_validate() so that these function calls may easily benefit from being called from within an unsealed locking transaction and may thus perform blocking dma_resv locks in the future. For the unlock we remove an assert that the vm->rebind_list is empty when locks are released. Since if the error path is hit with a partly locked list, that assert may no longer hold true we chose to remove it. v3: - Don't accept duplicate bo locks in the rebind worker. v5: - Loop over drm_exec objects in reverse when unlocking. v6: - We can't keep the WW ticket when retrying validation on OOM. Fix. Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-5-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/xe/xe_exec.c')
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c77
1 files changed, 23 insertions, 54 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 629d81a789e7..eb7fc3192c22 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -6,6 +6,7 @@
#include "xe_exec.h"
#include <drm/drm_device.h>
+#include <drm/drm_exec.h>
#include <drm/drm_file.h>
#include <drm/xe_drm.h>
#include <linux/delay.h>
@@ -93,25 +94,16 @@
* Unlock all
*/
-#define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000
-
-static int xe_exec_begin(struct xe_exec_queue *q, struct ww_acquire_ctx *ww,
- struct ttm_validate_buffer tv_onstack[],
- struct ttm_validate_buffer **tv,
- struct list_head *objs)
+static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm)
{
- struct xe_vm *vm = q->vm;
struct xe_vma *vma;
LIST_HEAD(dups);
- ktime_t end = 0;
int err = 0;
- *tv = NULL;
- if (xe_vm_no_dma_fences(q->vm))
+ if (xe_vm_no_dma_fences(vm))
return 0;
-retry:
- err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
+ err = xe_vm_lock_dma_resv(vm, exec, 1, true);
if (err)
return err;
@@ -127,42 +119,13 @@ retry:
continue;
err = xe_bo_validate(xe_vma_bo(vma), vm, false);
- if (err) {
- xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
- *tv = NULL;
+ if (err)
break;
- }
- }
-
- /*
- * With multiple active VMs, under memory pressure, it is possible that
- * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
- * Until ttm properly handles locking in such scenarios, best thing the
- * driver can do is retry with a timeout.
- */
- if (err == -ENOMEM) {
- ktime_t cur = ktime_get();
-
- end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS);
- if (ktime_before(cur, end)) {
- msleep(20);
- goto retry;
- }
}
return err;
}
-static void xe_exec_end(struct xe_exec_queue *q,
- struct ttm_validate_buffer *tv_onstack,
- struct ttm_validate_buffer *tv,
- struct ww_acquire_ctx *ww,
- struct list_head *objs)
-{
- if (!xe_vm_no_dma_fences(q->vm))
- xe_vm_unlock_dma_resv(q->vm, tv_onstack, tv, ww, objs);
-}
-
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@@ -173,15 +136,13 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct xe_exec_queue *q;
struct xe_sync_entry *syncs = NULL;
u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
- struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
- struct ttm_validate_buffer *tv = NULL;
+ struct drm_exec exec;
u32 i, num_syncs = 0;
struct xe_sched_job *job;
struct dma_fence *rebind_fence;
struct xe_vm *vm;
- struct ww_acquire_ctx ww;
- struct list_head objs;
bool write_locked;
+ ktime_t end = 0;
int err = 0;
if (XE_IOCTL_DBG(xe, args->extensions) ||
@@ -294,26 +255,34 @@ retry:
goto err_unlock_list;
}
- err = xe_exec_begin(q, &ww, tv_onstack, &tv, &objs);
- if (err)
- goto err_unlock_list;
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_until_all_locked(&exec) {
+ err = xe_exec_begin(&exec, vm);
+ drm_exec_retry_on_contention(&exec);
+ if (err && xe_vm_validate_should_retry(&exec, err, &end)) {
+ err = -EAGAIN;
+ goto err_unlock_list;
+ }
+ if (err)
+ goto err_exec;
+ }
if (xe_vm_is_closed_or_banned(q->vm)) {
drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n");
err = -ECANCELED;
- goto err_exec_queue_end;
+ goto err_exec;
}
if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
err = -EWOULDBLOCK;
- goto err_exec_queue_end;
+ goto err_exec;
}
job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ?
addresses : &args->address);
if (IS_ERR(job)) {
err = PTR_ERR(job);
- goto err_exec_queue_end;
+ goto err_exec;
}
/*
@@ -412,8 +381,8 @@ err_repin:
err_put_job:
if (err)
xe_sched_job_put(job);
-err_exec_queue_end:
- xe_exec_end(q, tv_onstack, tv, &ww, &objs);
+err_exec:
+ drm_exec_fini(&exec);
err_unlock_list:
if (write_locked)
up_write(&vm->lock);