summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2020-07-29 19:42:17 +0300
committerJoonas Lahtinen <joonas.lahtinen@linux.intel.com>2020-09-07 14:24:05 +0300
commitcd0452aa2a0d146ff2c695877a4f5d3e4d09b435 (patch)
tree084636daa2d992d9521681cd120ab4cf78c9abaa /drivers/gpu/drm/i915/gt/gen8_ppgtt.c
parentb3786b29379c0e5d1e4e162ad5464d77aa4bc4db (diff)
downloadlinux-cd0452aa2a0d146ff2c695877a4f5d3e4d09b435.tar.xz
drm/i915: Preallocate stashes for vma page-directories
We need to make the DMA allocations used for page directories to be performed up front so that we can include those allocations in our memory reservation pass. The downside is that we have to assume the worst case, even before we know the final layout, and always allocate enough page directories for this object, even when there will be overlap. This unfortunately can be quite expensive, especially as we have to clear/reset the page directories and DMA pages, but it should only be required during early phases of a workload when new objects are being discovered, or after memory/eviction pressure when we need to rebind. Once we reach steady state, the objects should not be moved and we no longer need to preallocating the pages tables. It should be noted that the lifetime for the page directories DMA is more or less decoupled from individual fences as they will be shared across objects across timelines. v2: Only allocate enough PD space for the PTE we may use, we do not need to allocate PD that will be left as scratch. v3: Store the shift unto the first PD level to encapsulate the different PTE counts for gen6/gen8. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/gt/gen8_ppgtt.c')
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c78
1 files changed, 23 insertions, 55 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 699125928272..08cb65bfb313 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -269,14 +269,12 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm,
start, start + length, vm->top);
}
-static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
- struct i915_page_directory * const pd,
- u64 * const start, const u64 end, int lvl)
+static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_page_directory * const pd,
+ u64 * const start, const u64 end, int lvl)
{
- const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
- struct i915_page_table *alloc = NULL;
unsigned int idx, len;
- int ret = 0;
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
@@ -297,49 +295,30 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
__func__, vm, lvl + 1, idx);
- pt = fetch_and_zero(&alloc);
- if (lvl) {
- if (!pt) {
- pt = &alloc_pd(vm)->pt;
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto out;
- }
- }
+ pt = stash->pt[!!lvl];
+ GEM_BUG_ON(!pt);
+ if (lvl ||
+ gen8_pt_count(*start, end) < I915_PDES ||
+ intel_vgpu_active(vm->i915))
fill_px(pt, vm->scratch[lvl].encode);
- } else {
- if (!pt) {
- pt = alloc_pt(vm);
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto out;
- }
- }
-
- if (intel_vgpu_active(vm->i915) ||
- gen8_pt_count(*start, end) < I915_PDES)
- fill_px(pt, vm->scratch[lvl].encode);
- }
spin_lock(&pd->lock);
- if (likely(!pd->entry[idx]))
+ if (likely(!pd->entry[idx])) {
+ stash->pt[!!lvl] = pt->stash;
+ atomic_set(&pt->used, 0);
set_pd_entry(pd, idx, pt);
- else
- alloc = pt, pt = pd->entry[idx];
+ } else {
+ pt = pd->entry[idx];
+ }
}
if (lvl) {
atomic_inc(&pt->used);
spin_unlock(&pd->lock);
- ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
- start, end, lvl);
- if (unlikely(ret)) {
- if (release_pd_entry(pd, idx, pt, scratch))
- free_px(vm, pt);
- goto out;
- }
+ __gen8_ppgtt_alloc(vm, stash,
+ as_pd(pt), start, end, lvl);
spin_lock(&pd->lock);
atomic_dec(&pt->used);
@@ -359,18 +338,12 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
}
} while (idx++, --len);
spin_unlock(&pd->lock);
-out:
- if (alloc)
- free_px(vm, alloc);
- return ret;
}
-static int gen8_ppgtt_alloc(struct i915_address_space *vm,
- u64 start, u64 length)
+static void gen8_ppgtt_alloc(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 start, u64 length)
{
- u64 from;
- int err;
-
GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
GEM_BUG_ON(range_overflows(start, length, vm->total));
@@ -378,15 +351,9 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm,
start >>= GEN8_PTE_SHIFT;
length >>= GEN8_PTE_SHIFT;
GEM_BUG_ON(length == 0);
- from = start;
-
- err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
- &start, start + length, vm->top);
- if (unlikely(err && from != start))
- __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
- from, start, vm->top);
- return err;
+ __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
+ &start, start + length, vm->top);
}
static __always_inline void
@@ -703,6 +670,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
ppgtt_init(ppgtt, gt);
ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
+ ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
/*
* From bdw, there is hw support for read-only pages in the PPGTT.