summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c10
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c48
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c314
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c14
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c15
-rw-r--r--drivers/gpu/drm/i915/i915_request.c25
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.c10
9 files changed, 401 insertions, 60 deletions
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 68325678f5ef..b18c5ac2934d 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -14956,12 +14956,6 @@ static int intel_atomic_check(struct drm_device *dev,
if (dev_priv->wm.distrust_bios_wm)
any_ms = true;
- if (any_ms) {
- ret = intel_modeset_checks(state);
- if (ret)
- goto fail;
- }
-
intel_fbc_choose_crtc(dev_priv, state);
ret = calc_watermark_data(state);
if (ret)
@@ -14976,6 +14970,10 @@ static int intel_atomic_check(struct drm_device *dev,
goto fail;
if (any_ms) {
+ ret = intel_modeset_checks(state);
+ if (ret)
+ goto fail;
+
ret = intel_modeset_calc_cdclk(state);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d0bdb6d447ed..ef755dd5e68f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -439,29 +439,36 @@ static bool __cancel_engine(struct intel_engine_cs *engine)
return __reset_engine(engine);
}
-static struct intel_engine_cs *__active_engine(struct i915_request *rq)
+static bool
+__active_engine(struct i915_request *rq, struct intel_engine_cs **active)
{
struct intel_engine_cs *engine, *locked;
+ bool ret = false;
/*
* Serialise with __i915_request_submit() so that it sees
* is-banned?, or we know the request is already inflight.
+ *
+ * Note that rq->engine is unstable, and so we double
+ * check that we have acquired the lock on the final engine.
*/
locked = READ_ONCE(rq->engine);
spin_lock_irq(&locked->active.lock);
while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
spin_unlock(&locked->active.lock);
- spin_lock(&engine->active.lock);
locked = engine;
+ spin_lock(&locked->active.lock);
}
- engine = NULL;
- if (i915_request_is_active(rq) && rq->fence.error != -EIO)
- engine = rq->engine;
+ if (!i915_request_completed(rq)) {
+ if (i915_request_is_active(rq) && rq->fence.error != -EIO)
+ *active = locked;
+ ret = true;
+ }
spin_unlock_irq(&locked->active.lock);
- return engine;
+ return ret;
}
static struct intel_engine_cs *active_engine(struct intel_context *ce)
@@ -472,17 +479,16 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
if (!ce->timeline)
return NULL;
- mutex_lock(&ce->timeline->mutex);
- list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
- if (i915_request_completed(rq))
- break;
+ rcu_read_lock();
+ list_for_each_entry_rcu(rq, &ce->timeline->requests, link) {
+ if (i915_request_is_active(rq) && i915_request_completed(rq))
+ continue;
/* Check with the backend if the request is inflight */
- engine = __active_engine(rq);
- if (engine)
+ if (__active_engine(rq, &engine))
break;
}
- mutex_unlock(&ce->timeline->mutex);
+ rcu_read_unlock();
return engine;
}
@@ -713,6 +719,7 @@ __create_context(struct drm_i915_private *i915)
ctx->i915 = i915;
ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
mutex_init(&ctx->mutex);
+ INIT_LIST_HEAD(&ctx->link);
spin_lock_init(&ctx->stale.lock);
INIT_LIST_HEAD(&ctx->stale.engines);
@@ -740,10 +747,6 @@ __create_context(struct drm_i915_private *i915)
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
- spin_lock(&i915->gem.contexts.lock);
- list_add_tail(&ctx->link, &i915->gem.contexts.list);
- spin_unlock(&i915->gem.contexts.lock);
-
return ctx;
err_free:
@@ -931,6 +934,7 @@ static int gem_context_register(struct i915_gem_context *ctx,
struct drm_i915_file_private *fpriv,
u32 *id)
{
+ struct drm_i915_private *i915 = ctx->i915;
struct i915_address_space *vm;
int ret;
@@ -949,8 +953,16 @@ static int gem_context_register(struct i915_gem_context *ctx,
/* And finally expose ourselves to userspace via the idr */
ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL);
if (ret)
- put_pid(fetch_and_zero(&ctx->pid));
+ goto err_pid;
+
+ spin_lock(&i915->gem.contexts.lock);
+ list_add_tail(&ctx->link, &i915->gem.contexts.list);
+ spin_unlock(&i915->gem.contexts.lock);
+
+ return 0;
+err_pid:
+ put_pid(fetch_and_zero(&ctx->pid));
return ret;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 6b4ec66cb558..446e76e95c38 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -45,6 +45,13 @@ struct eb_vma_array {
struct eb_vma vma[];
};
+enum {
+ FORCE_CPU_RELOC = 1,
+ FORCE_GTT_RELOC,
+ FORCE_GPU_RELOC,
+#define DBG_FORCE_RELOC 0 /* choose one of the above! */
+};
+
#define __EXEC_OBJECT_HAS_PIN BIT(31)
#define __EXEC_OBJECT_HAS_FENCE BIT(30)
#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
@@ -253,6 +260,8 @@ struct i915_execbuffer {
*/
struct reloc_cache {
struct drm_mm_node node; /** temporary GTT binding */
+ unsigned long vaddr; /** Current kmap address */
+ unsigned long page; /** Currently mapped page index */
unsigned int gen; /** Cached value of INTEL_GEN */
bool use_64bit_reloc : 1;
bool has_llc : 1;
@@ -596,6 +605,23 @@ eb_add_vma(struct i915_execbuffer *eb,
}
}
+static inline int use_cpu_reloc(const struct reloc_cache *cache,
+ const struct drm_i915_gem_object *obj)
+{
+ if (!i915_gem_object_has_struct_page(obj))
+ return false;
+
+ if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
+ return true;
+
+ if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
+ return false;
+
+ return (cache->has_llc ||
+ obj->cache_dirty ||
+ obj->cache_level != I915_CACHE_NONE);
+}
+
static int eb_reserve_vma(const struct i915_execbuffer *eb,
struct eb_vma *ev,
u64 pin_flags)
@@ -926,6 +952,8 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
static void reloc_cache_init(struct reloc_cache *cache,
struct drm_i915_private *i915)
{
+ cache->page = -1;
+ cache->vaddr = 0;
/* Must be a variable in the struct to allow GCC to unroll. */
cache->gen = INTEL_GEN(i915);
cache->has_llc = HAS_LLC(i915);
@@ -937,6 +965,25 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->target = NULL;
}
+static inline void *unmask_page(unsigned long p)
+{
+ return (void *)(uintptr_t)(p & PAGE_MASK);
+}
+
+static inline unsigned int unmask_flags(unsigned long p)
+{
+ return p & ~PAGE_MASK;
+}
+
+#define KMAP 0x4 /* after CLFLUSH_FLAGS */
+
+static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
+{
+ struct drm_i915_private *i915 =
+ container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
+ return &i915->ggtt;
+}
+
#define RELOC_TAIL 4
static int reloc_gpu_chain(struct reloc_cache *cache)
@@ -1049,6 +1096,181 @@ static int reloc_gpu_flush(struct reloc_cache *cache)
return err;
}
+static void reloc_cache_reset(struct reloc_cache *cache)
+{
+ void *vaddr;
+
+ if (!cache->vaddr)
+ return;
+
+ vaddr = unmask_page(cache->vaddr);
+ if (cache->vaddr & KMAP) {
+ if (cache->vaddr & CLFLUSH_AFTER)
+ mb();
+
+ kunmap_atomic(vaddr);
+ i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
+ } else {
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+ io_mapping_unmap_atomic((void __iomem *)vaddr);
+
+ if (drm_mm_node_allocated(&cache->node)) {
+ ggtt->vm.clear_range(&ggtt->vm,
+ cache->node.start,
+ cache->node.size);
+ mutex_lock(&ggtt->vm.mutex);
+ drm_mm_remove_node(&cache->node);
+ mutex_unlock(&ggtt->vm.mutex);
+ } else {
+ i915_vma_unpin((struct i915_vma *)cache->node.mm);
+ }
+ }
+
+ cache->vaddr = 0;
+ cache->page = -1;
+}
+
+static void *reloc_kmap(struct drm_i915_gem_object *obj,
+ struct reloc_cache *cache,
+ unsigned long page)
+{
+ void *vaddr;
+
+ if (cache->vaddr) {
+ kunmap_atomic(unmask_page(cache->vaddr));
+ } else {
+ unsigned int flushes;
+ int err;
+
+ err = i915_gem_object_prepare_write(obj, &flushes);
+ if (err)
+ return ERR_PTR(err);
+
+ BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
+ BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
+
+ cache->vaddr = flushes | KMAP;
+ cache->node.mm = (void *)obj;
+ if (flushes)
+ mb();
+ }
+
+ vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+ cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
+ cache->page = page;
+
+ return vaddr;
+}
+
+static void *reloc_iomap(struct drm_i915_gem_object *obj,
+ struct reloc_cache *cache,
+ unsigned long page)
+{
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+ unsigned long offset;
+ void *vaddr;
+
+ if (cache->vaddr) {
+ intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+ io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
+ } else {
+ struct i915_vma *vma;
+ int err;
+
+ if (i915_gem_object_is_tiled(obj))
+ return ERR_PTR(-EINVAL);
+
+ if (use_cpu_reloc(cache, obj))
+ return NULL;
+
+ i915_gem_object_lock(obj);
+ err = i915_gem_object_set_to_gtt_domain(obj, true);
+ i915_gem_object_unlock(obj);
+ if (err)
+ return ERR_PTR(err);
+
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ if (IS_ERR(vma)) {
+ memset(&cache->node, 0, sizeof(cache->node));
+ mutex_lock(&ggtt->vm.mutex);
+ err = drm_mm_insert_node_in_range
+ (&ggtt->vm.mm, &cache->node,
+ PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
+ 0, ggtt->mappable_end,
+ DRM_MM_INSERT_LOW);
+ mutex_unlock(&ggtt->vm.mutex);
+ if (err) /* no inactive aperture space, use cpu reloc */
+ return NULL;
+ } else {
+ cache->node.start = vma->node.start;
+ cache->node.mm = (void *)vma;
+ }
+ }
+
+ offset = cache->node.start;
+ if (drm_mm_node_allocated(&cache->node)) {
+ ggtt->vm.insert_page(&ggtt->vm,
+ i915_gem_object_get_dma_address(obj, page),
+ offset, I915_CACHE_NONE, 0);
+ } else {
+ offset += page << PAGE_SHIFT;
+ }
+
+ vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
+ offset);
+ cache->page = page;
+ cache->vaddr = (unsigned long)vaddr;
+
+ return vaddr;
+}
+
+static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+ struct reloc_cache *cache,
+ unsigned long page)
+{
+ void *vaddr;
+
+ if (cache->page == page) {
+ vaddr = unmask_page(cache->vaddr);
+ } else {
+ vaddr = NULL;
+ if ((cache->vaddr & KMAP) == 0)
+ vaddr = reloc_iomap(obj, cache, page);
+ if (!vaddr)
+ vaddr = reloc_kmap(obj, cache, page);
+ }
+
+ return vaddr;
+}
+
+static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
+{
+ if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
+ if (flushes & CLFLUSH_BEFORE) {
+ clflushopt(addr);
+ mb();
+ }
+
+ *addr = value;
+
+ /*
+ * Writes to the same cacheline are serialised by the CPU
+ * (including clflush). On the write path, we only require
+ * that it hits memory in an orderly fashion and place
+ * mb barriers at the start and end of the relocation phase
+ * to ensure ordering of clflush wrt to the system.
+ */
+ if (flushes & CLFLUSH_AFTER)
+ clflushopt(addr);
+ } else
+ *addr = value;
+}
+
static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
@@ -1214,6 +1436,17 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
return cmd;
}
+static inline bool use_reloc_gpu(struct i915_vma *vma)
+{
+ if (DBG_FORCE_RELOC == FORCE_GPU_RELOC)
+ return true;
+
+ if (DBG_FORCE_RELOC)
+ return false;
+
+ return !dma_resv_test_signaled_rcu(vma->resv, true);
+}
+
static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
{
struct page *page;
@@ -1228,10 +1461,10 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
return addr + offset_in_page(offset);
}
-static int __reloc_entry_gpu(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- u64 offset,
- u64 target_addr)
+static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+ struct i915_vma *vma,
+ u64 offset,
+ u64 target_addr)
{
const unsigned int gen = eb->reloc_cache.gen;
unsigned int len;
@@ -1247,7 +1480,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
batch = reloc_gpu(eb, vma, len);
if (IS_ERR(batch))
- return PTR_ERR(batch);
+ return false;
addr = gen8_canonical_addr(vma->node.start + offset);
if (gen >= 8) {
@@ -1296,21 +1529,55 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
*batch++ = target_addr;
}
- return 0;
+ return true;
+}
+
+static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+ struct i915_vma *vma,
+ u64 offset,
+ u64 target_addr)
+{
+ if (eb->reloc_cache.vaddr)
+ return false;
+
+ if (!use_reloc_gpu(vma))
+ return false;
+
+ return __reloc_entry_gpu(eb, vma, offset, target_addr);
}
static u64
-relocate_entry(struct i915_execbuffer *eb,
- struct i915_vma *vma,
+relocate_entry(struct i915_vma *vma,
const struct drm_i915_gem_relocation_entry *reloc,
+ struct i915_execbuffer *eb,
const struct i915_vma *target)
{
u64 target_addr = relocation_target(reloc, target);
- int err;
-
- err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr);
- if (err)
- return err;
+ u64 offset = reloc->offset;
+
+ if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+ bool wide = eb->reloc_cache.use_64bit_reloc;
+ void *vaddr;
+
+repeat:
+ vaddr = reloc_vaddr(vma->obj,
+ &eb->reloc_cache,
+ offset >> PAGE_SHIFT);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+ clflush_write32(vaddr + offset_in_page(offset),
+ lower_32_bits(target_addr),
+ eb->reloc_cache.vaddr);
+
+ if (wide) {
+ offset += sizeof(u32);
+ target_addr >>= 32;
+ wide = false;
+ goto repeat;
+ }
+ }
return target->node.start | UPDATE;
}
@@ -1375,7 +1642,8 @@ eb_relocate_entry(struct i915_execbuffer *eb,
* If the relocation already has the right value in it, no
* more work needs to be done.
*/
- if (gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
+ if (!DBG_FORCE_RELOC &&
+ gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
return 0;
/* Check that the relocation address is valid... */
@@ -1407,7 +1675,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
ev->flags &= ~EXEC_OBJECT_ASYNC;
/* and update the user's relocation entry */
- return relocate_entry(eb, ev->vma, reloc, target->vma);
+ return relocate_entry(ev->vma, reloc, eb, target->vma);
}
static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
@@ -1445,8 +1713,10 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
* this is bad and so lockdep complains vehemently.
*/
copied = __copy_from_user(r, urelocs, count * sizeof(r[0]));
- if (unlikely(copied))
- return -EFAULT;
+ if (unlikely(copied)) {
+ remain = -EFAULT;
+ goto out;
+ }
remain -= count;
do {
@@ -1454,7 +1724,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
if (likely(offset == 0)) {
} else if ((s64)offset < 0) {
- return (int)offset;
+ remain = (int)offset;
+ goto out;
} else {
/*
* Note that reporting an error now
@@ -1484,8 +1755,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
} while (r++, --count);
urelocs += ARRAY_SIZE(stack);
} while (remain);
-
- return 0;
+out:
+ reloc_cache_reset(&eb->reloc_cache);
+ return remain;
}
static int eb_relocate(struct i915_execbuffer *eb)
@@ -2392,7 +2664,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.i915 = i915;
eb.file = file;
eb.args = args;
- if (!(args->flags & I915_EXEC_NO_RELOC))
+ if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
args->flags |= __EXEC_HAS_RELOC;
eb.exec = exec;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index e5b9276d254c..9cf4ad78ece6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -258,6 +258,10 @@ struct page *
i915_gem_object_get_page(struct drm_i915_gem_object *obj,
unsigned int n);
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+ unsigned int n);
+
dma_addr_t
i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
unsigned long n,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index d15ff6748a50..e8a083743e09 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -548,6 +548,20 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
return nth_page(sg_page(sg), offset);
}
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+ unsigned int n)
+{
+ struct page *page;
+
+ page = i915_gem_object_get_page(obj, n);
+ if (!obj->mm.dirty)
+ set_page_dirty(page);
+
+ return page;
+}
+
dma_addr_t
i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
unsigned long n,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 57c14d3340cd..a49016f8ee0d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -37,14 +37,20 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
return err;
/* 8-Byte aligned */
- err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
- if (err)
+ if (!__reloc_entry_gpu(eb, vma,
+ offsets[0] * sizeof(u32),
+ 0)) {
+ err = -EIO;
goto unpin_vma;
+ }
/* !8-Byte aligned */
- err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
- if (err)
+ if (!__reloc_entry_gpu(eb, vma,
+ offsets[1] * sizeof(u32),
+ 1)) {
+ err = -EIO;
goto unpin_vma;
+ }
/* Skip to the end of the cmd page */
i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
@@ -54,9 +60,12 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
eb->reloc_cache.rq_size += i;
/* Force batch chaining */
- err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
- if (err)
+ if (!__reloc_entry_gpu(eb, vma,
+ offsets[2] * sizeof(u32),
+ 2)) {
+ err = -EIO;
goto unpin_vma;
+ }
GEM_BUG_ON(!eb->reloc_cache.rq);
rq = i915_request_get(eb->reloc_cache.rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 24322ef08aa4..9eeaca957a7e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2060,6 +2060,14 @@ static inline void clear_ports(struct i915_request **ports, int count)
memset_p((void **)ports, NULL, count);
}
+static inline void
+copy_ports(struct i915_request **dst, struct i915_request **src, int count)
+{
+ /* A memcpy_p() would be very useful here! */
+ while (count--)
+ WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -2648,10 +2656,9 @@ static void process_csb(struct intel_engine_cs *engine)
/* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
- memcpy(execlists->inflight,
- execlists->pending,
- execlists_num_ports(execlists) *
- sizeof(*execlists->pending));
+ copy_ports(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists));
smp_wmb(); /* complete the seqlock */
WRITE_ONCE(execlists->active, execlists->inflight);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 0b2fe55e6194..781a6783affe 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -388,17 +388,38 @@ static bool __request_in_flight(const struct i915_request *signal)
* As we know that there are always preemption points between
* requests, we know that only the currently executing request
* may be still active even though we have cleared the flag.
- * However, we can't rely on our tracking of ELSP[0] to known
+ * However, we can't rely on our tracking of ELSP[0] to know
* which request is currently active and so maybe stuck, as
* the tracking maybe an event behind. Instead assume that
* if the context is still inflight, then it is still active
* even if the active flag has been cleared.
+ *
+ * To further complicate matters, if there a pending promotion, the HW
+ * may either perform a context switch to the second inflight execlists,
+ * or it may switch to the pending set of execlists. In the case of the
+ * latter, it may send the ACK and we process the event copying the
+ * pending[] over top of inflight[], _overwriting_ our *active. Since
+ * this implies the HW is arbitrating and not struck in *active, we do
+ * not worry about complete accuracy, but we do require no read/write
+ * tearing of the pointer [the read of the pointer must be valid, even
+ * as the array is being overwritten, for which we require the writes
+ * to avoid tearing.]
+ *
+ * Note that the read of *execlists->active may race with the promotion
+ * of execlists->pending[] to execlists->inflight[], overwritting
+ * the value at *execlists->active. This is fine. The promotion implies
+ * that we received an ACK from the HW, and so the context is not
+ * stuck -- if we do not see ourselves in *active, the inflight status
+ * is valid. If instead we see ourselves being copied into *active,
+ * we are inflight and may signal the callback.
*/
if (!intel_context_inflight(signal->context))
return false;
rcu_read_lock();
- for (port = __engine_active(signal->engine); (rq = *port); port++) {
+ for (port = __engine_active(signal->engine);
+ (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
+ port++) {
if (rq->context == signal->context) {
inflight = i915_seqno_passed(rq->fence.seqno,
signal->fence.seqno);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 295b9829e2da..4cd2038cbe35 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -164,9 +164,13 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
do {
list_for_each_entry_safe(pos, next, &x->head, entry) {
- pos->func(pos,
- TASK_NORMAL, fence->error,
- &extra);
+ int wake_flags;
+
+ wake_flags = fence->error;
+ if (pos->func == autoremove_wake_function)
+ wake_flags = 0;
+
+ pos->func(pos, TASK_NORMAL, wake_flags, &extra);
}
if (list_empty(&extra))