diff options
author | Dave Airlie <airlied@redhat.com> | 2020-11-13 08:01:13 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2020-11-13 08:01:57 +0300 |
commit | 334a1683935fceba346768b62cb3bb2d3e045578 (patch) | |
tree | 3730aa9bfefc4ef4b8ee7160c2fd04cd69a3e56f /drivers/gpu/drm/i915/gem | |
parent | 24bdae6993f7c430a48b1e3a16f27f98e414ee28 (diff) | |
parent | 695dc55b573985569259e18f8e6261a77924342b (diff) | |
download | linux-334a1683935fceba346768b62cb3bb2d3e045578.tar.xz |
Merge tag 'drm-intel-gt-next-2020-11-12-1' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Cross-subsystem Changes:
- DMA mapped scatterlist fixes in i915 to unblock merging of
https://lkml.org/lkml/2020/9/27/70 (Tvrtko, Tom)
Driver Changes:
- Fix for user reported issue #2381 (Graphical output stops with "switching to inteldrmfb from simple"):
Mark ininitial fb obj as WT on eLLC machines to avoid rcu lockup during fbdev init (Ville, Chris)
- Fix for Tigerlake (and earlier) to avoid spurious empty CSB events leading to hang (Chris, Bruce)
- Delay execlist processing for Tigerlake to avoid hang (Chris)
- Fix for Tigerlake RCS engine health check through heartbeat (Chris)
- Fix for Tigerlake reserved MOCS entries (Ayaz, Chris)
- Fix Media power gate sequence on Tigerlake (Rodrigo)
- Enable eLLC caching of display buffers for SKL+ (Ville)
- Support parsing of oversize batches on Gen9 (Matt, Chris)
- Exclude low pages (128KiB) of stolen from use to avoid thrashing during reset (Chris)
- Flush engines before Tigerlake breadcrumbs (Chris)
- Use the local HWSP offset during submission (Chris)
- Flush coherency domains on first set-domain-ioctl (Chris, Zbigniew)
- Use the active reference on the vma while capturing to avoid use-after-free (Chris)
- Fix MOCS PTE setting for gen9+ (Ville)
- Avoid NULL dereference on IPS driver callback while unbinding i915 (Chris)
- Avoid NULL dereference from PT/PD stash allocation error (Matt)
- Hold request reference for canceling an active context (Chris)
- Avoid infinite loop on x86-32 when mapping a lot of objects (Chris)
- Disallow WC mappings when processor doesn't support them (Chris)
- Return correct error in i915_gem_object_copy_blt() error path (Dan)
- Return correct error in intel_context_create_request() error path (Maarten)
- Tune down GuC communication enabled/disabled messages to debug (Jani)
- Fix rebased commit "Remove i915_request.lock requirement for execution callbacks" (Chris)
- Cancel outstanding work after disabling heartbeats on an engine (Chris)
- Signal cancelled requests (Chris)
- Retire cancelled requests on unload (Chris)
- Scrub HW state on driver remove (Chris)
- Undo forced context restores after trivial preemptions (Chris)
- Handle PCI unbind in PMU code (Tvrtko)
- Fix CPU hotplug with multiple GPUs in PMU code (Trtkko)
- Correctly set SFC capability for video engines (Venkata)
- Update GuC code to use firmware v49.0.1 (John, Matthew B., Daniele, Oscar, Michel, Rodrigo, Michal)
- Improve GuC warnings on loading failure (John)
- Avoid ownership race in buffer pool by clearing age (Chris)
- Use MMIO to read CSB in case of failure (Chris, Mika)
- Show engine properties in engine state dump to indicate changes (Chris, Joonas)
- Break up error capture compression loops with cond_resched() (Chris)
- Reduce GPU error capture mutex hold time to avoid khungtaskd (Chris)
- Serialise debugfs i915_gem_objects with ctx->mutex (Chris)
- Always test execution status on closing the context and close if not persistent (Chris)
- Avoid mixing integer types during batch copies (Chris, Jared)
- Skip over MI_NOOP when parsing to avoid overhead (Chris)
- Hold onto an explicit ref to i915_vma_work.pinned (Chris)
- Perform all asynchronous waits prior to marking payload start (Chris)
- Pull phys pread/pwrite implementations to the backend (Matt)
- Improve record of hung engines in error state (Tvrtko)
- Allow backends to override pread implementation (Matt)
- Reinforce LRC poisoning checks to confirm context survives execution (Chris)
- Fix memory region max size calculation (Matt)
- Fix order when adding blocks to memory region (Matt)
- Eliminate unused intel_virtual_engine_get_sibling func (Chris)
- Cleanup kasan warning for on-stack (unsigned long) casting (Chris)
- Onion unwind for scratch page allocation failure (Chris)
- Poison stolen pages before use (Chris)
- Selftest improvements (Chris)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201112163407.GA20320@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_client_blt.c | 18 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_object.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_object.h | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 19 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_pages.c | 21 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_phys.c | 55 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 48 |
7 files changed, 158 insertions, 25 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 272cf3ea68d5..44821d94544f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -202,12 +202,6 @@ retry: if (unlikely(err)) goto out_request; - if (w->ce->engine->emit_init_breadcrumb) { - err = w->ce->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - /* * w->dma is already exported via (vma|obj)->resv we need only * keep track of the GPU activity within this vma/request, and @@ -217,9 +211,15 @@ retry: if (err) goto out_request; - err = w->ce->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (unlikely(err)) + goto out_request; + } + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); out_request: if (unlikely(err)) { i915_request_set_error_once(rq, err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 3389ac972d16..00d24000b5e8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -82,6 +82,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_page.lock); + INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->mm.get_dma_page.lock); if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj)) i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev), diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index eaf3d4147be0..be14486f63a7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -272,8 +272,26 @@ int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, unsigned int tiling, unsigned int stride); struct scatterlist * +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, + struct i915_gem_object_page_iter *iter, + unsigned int n, + unsigned int *offset); + +static inline struct scatterlist * i915_gem_object_get_sg(struct drm_i915_gem_object *obj, - unsigned int n, unsigned int *offset); + unsigned int n, + unsigned int *offset) +{ + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset); +} + +static inline struct scatterlist * +i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, + unsigned int n, + unsigned int *offset) +{ + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset); +} struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index b5c15557cc87..e2d9b7e1e152 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -56,6 +56,8 @@ struct drm_i915_gem_object_ops { void (*truncate)(struct drm_i915_gem_object *obj); void (*writeback)(struct drm_i915_gem_object *obj); + int (*pread)(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *arg); int (*pwrite)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg); @@ -80,6 +82,14 @@ struct i915_mmap_offset { struct rb_node offset; }; +struct i915_gem_object_page_iter { + struct scatterlist *sg_pos; + unsigned int sg_idx; /* in pages, but 32bit eek! */ + + struct radix_tree_root radix; + struct mutex lock; /* protects this cache */ +}; + struct drm_i915_gem_object { struct drm_gem_object base; @@ -246,13 +256,8 @@ struct drm_i915_gem_object { I915_SELFTEST_DECLARE(unsigned int page_mask); - struct i915_gem_object_page_iter { - struct scatterlist *sg_pos; - unsigned int sg_idx; /* in pages, but 32bit eek! */ - - struct radix_tree_root radix; - struct mutex lock; /* protects this cache */ - } get_page; + struct i915_gem_object_page_iter get_page; + struct i915_gem_object_page_iter get_dma_page; /** * Element within i915->mm.unbound_list or i915->mm.bound_list, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index f60ca6dc911f..e2c7b2a7895f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -33,6 +33,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.get_page.sg_pos = pages->sgl; obj->mm.get_page.sg_idx = 0; + obj->mm.get_dma_page.sg_pos = pages->sgl; + obj->mm.get_dma_page.sg_idx = 0; obj->mm.pages = pages; @@ -155,6 +157,8 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_lock(); radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) radix_tree_delete(&obj->mm.get_page.radix, iter.index); + radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0) + radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index); rcu_read_unlock(); } @@ -438,11 +442,12 @@ void __i915_gem_object_release_map(struct drm_i915_gem_object *obj) } struct scatterlist * -i915_gem_object_get_sg(struct drm_i915_gem_object *obj, - unsigned int n, - unsigned int *offset) +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, + struct i915_gem_object_page_iter *iter, + unsigned int n, + unsigned int *offset) { - struct i915_gem_object_page_iter *iter = &obj->mm.get_page; + const bool dma = iter == &obj->mm.get_dma_page; struct scatterlist *sg; unsigned int idx, count; @@ -471,7 +476,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, sg = iter->sg_pos; idx = iter->sg_idx; - count = __sg_page_count(sg); + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); while (idx + count <= n) { void *entry; @@ -499,7 +504,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, idx += count; sg = ____sg_next(sg); - count = __sg_page_count(sg); + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); } scan: @@ -517,7 +522,7 @@ scan: while (idx + count <= n) { idx += count; sg = ____sg_next(sg); - count = __sg_page_count(sg); + count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg); } *offset = n - idx; @@ -584,7 +589,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, struct scatterlist *sg; unsigned int offset; - sg = i915_gem_object_get_sg(obj, n, &offset); + sg = i915_gem_object_get_sg_dma(obj, n, &offset); if (len) *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 28147aab47b9..3a4dfe2ef1da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -134,6 +134,58 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, vaddr, dma); } +static int +phys_pwrite(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) +{ + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; + char __user *user_data = u64_to_user_ptr(args->data_ptr); + int err; + + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + /* + * We manually control the domain here and pretend that it + * remains coherent i.e. in the GTT domain, like shmem_pwrite. + */ + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); + + if (copy_from_user(vaddr, user_data, args->size)) + return -EFAULT; + + drm_clflush_virt_range(vaddr, args->size); + intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); + return 0; +} + +static int +phys_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) +{ + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; + char __user *user_data = u64_to_user_ptr(args->data_ptr); + int err; + + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + drm_clflush_virt_range(vaddr, args->size); + if (copy_to_user(user_data, vaddr, args->size)) + return -EFAULT; + + return 0; +} + static void phys_release(struct drm_i915_gem_object *obj) { fput(obj->base.filp); @@ -144,6 +196,9 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .get_pages = i915_gem_object_get_pages_phys, .put_pages = i915_gem_object_put_pages_phys, + .pread = phys_pread, + .pwrite = phys_pwrite, + .release = phys_release, }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 84b2707d8b17..29bffc6afcc1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -497,6 +497,43 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915) return 0; } +static void dbg_poison(struct i915_ggtt *ggtt, + dma_addr_t addr, resource_size_t size, + u8 x) +{ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) + if (!drm_mm_node_allocated(&ggtt->error_capture)) + return; + + if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND) + return; /* beware stop_machine() inversion */ + + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + mutex_lock(&ggtt->error_mutex); + while (size) { + void __iomem *s; + + ggtt->vm.insert_page(&ggtt->vm, addr, + ggtt->error_capture.start, + I915_CACHE_NONE, 0); + mb(); + + s = io_mapping_map_wc(&ggtt->iomap, + ggtt->error_capture.start, + PAGE_SIZE); + memset_io(s, x, PAGE_SIZE); + io_mapping_unmap(s); + + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + mb(); + ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE); + mutex_unlock(&ggtt->error_mutex); +#endif +} + static struct sg_table * i915_pages_create_for_stolen(struct drm_device *dev, resource_size_t offset, resource_size_t size) @@ -540,6 +577,11 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) if (IS_ERR(pages)) return PTR_ERR(pages); + dbg_poison(&to_i915(obj->base.dev)->ggtt, + sg_dma_address(pages->sgl), + sg_dma_len(pages->sgl), + POISON_INUSE); + __i915_gem_object_set_pages(obj, pages, obj->stolen->size); return 0; @@ -549,6 +591,12 @@ static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, struct sg_table *pages) { /* Should only be called from i915_gem_object_release_stolen() */ + + dbg_poison(&to_i915(obj->base.dev)->ggtt, + sg_dma_address(pages->sgl), + sg_dma_len(pages->sgl), + POISON_FREE); + sg_free_table(pages); kfree(pages); } |