summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c5
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c1
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx.c2
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx.c7
-rw-r--r--drivers/accel/ivpu/ivpu_job.c4
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c36
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c77
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c15
-rw-r--r--drivers/gpu/drm/xe/xe_display.c6
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c2
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c28
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c1
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c2
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c62
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h8
-rw-r--r--include/uapi/drm/ivpu_accel.h1
19 files changed, 143 insertions, 129 deletions
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 9418c73ee8ef..4b0640226986 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -480,9 +480,8 @@ static int ivpu_pci_init(struct ivpu_device *vdev)
/* Clear any pending errors */
pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f);
- /* VPU 37XX does not require 10m D3hot delay */
- if (ivpu_hw_gen(vdev) == IVPU_HW_37XX)
- pdev->d3hot_delay = 0;
+ /* NPU does not require 10m D3hot delay */
+ pdev->d3hot_delay = 0;
ret = pcim_enable_device(pdev);
if (ret) {
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 6576232f3e67..5fa8bd4603d5 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -222,7 +222,6 @@ ivpu_fw_init_wa(struct ivpu_device *vdev)
const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data;
if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) ||
- (ivpu_hw_gen(vdev) > IVPU_HW_37XX) ||
(ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE))
vdev->wa.disable_d0i3_msg = true;
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index f15a93d83057..77accd029c4a 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -525,7 +525,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val);
- val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val);
+ val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val);
val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val);
REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val);
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index 704288084f37..1c995307c113 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -530,7 +530,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES);
val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val);
- val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val);
+ val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val);
val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val);
REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val);
@@ -704,7 +704,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
{
struct ivpu_hw_info *hw = vdev->hw;
u32 tile_disable;
- u32 tile_enable;
u32 fuse;
fuse = REGB_RD32(VPU_40XX_BUTTRESS_TILE_FUSE);
@@ -725,10 +724,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
else
ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", TILE_MAX_NUM);
- tile_enable = (~tile_disable) & TILE_MAX_MASK;
-
- hw->sku = REG_SET_FLD_NUM(SKU, HW_ID, LNL_HW_ID, hw->sku);
- hw->sku = REG_SET_FLD_NUM(SKU, TILE, tile_enable, hw->sku);
hw->tile_fuse = tile_disable;
hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 0440bee3ecaf..e70cfb859339 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -294,7 +294,7 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
return -ENOENT;
if (job->file_priv->has_mmu_faults)
- job_status = VPU_JSM_STATUS_ABORTED;
+ job_status = DRM_IVPU_JOB_STATUS_ABORTED;
job->bos[CMD_BUF_IDX]->job_status = job_status;
dma_fence_signal(job->done_fence);
@@ -315,7 +315,7 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
unsigned long id;
xa_for_each(&vdev->submitted_jobs_xa, id, job)
- ivpu_job_signal_and_destroy(vdev, id, VPU_JSM_STATUS_ABORTED);
+ ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
}
static int ivpu_job_submit(struct ivpu_job *job)
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 9a3122ffce03..91bd640655ab 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -72,10 +72,10 @@
#define IVPU_MMU_Q_COUNT_LOG2 4 /* 16 entries */
#define IVPU_MMU_Q_COUNT ((u32)1 << IVPU_MMU_Q_COUNT_LOG2)
-#define IVPU_MMU_Q_WRAP_BIT (IVPU_MMU_Q_COUNT << 1)
-#define IVPU_MMU_Q_WRAP_MASK (IVPU_MMU_Q_WRAP_BIT - 1)
-#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1)
+#define IVPU_MMU_Q_WRAP_MASK GENMASK(IVPU_MMU_Q_COUNT_LOG2, 0)
+#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1)
#define IVPU_MMU_Q_IDX(val) ((val) & IVPU_MMU_Q_IDX_MASK)
+#define IVPU_MMU_Q_WRP(val) ((val) & IVPU_MMU_Q_COUNT)
#define IVPU_MMU_CMDQ_CMD_SIZE 16
#define IVPU_MMU_CMDQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE)
@@ -475,20 +475,32 @@ static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev)
return 0;
}
+static bool ivpu_mmu_queue_is_full(struct ivpu_mmu_queue *q)
+{
+ return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) &&
+ (IVPU_MMU_Q_WRP(q->prod) != IVPU_MMU_Q_WRP(q->cons)));
+}
+
+static bool ivpu_mmu_queue_is_empty(struct ivpu_mmu_queue *q)
+{
+ return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) &&
+ (IVPU_MMU_Q_WRP(q->prod) == IVPU_MMU_Q_WRP(q->cons)));
+}
+
static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1)
{
- struct ivpu_mmu_queue *q = &vdev->mmu->cmdq;
- u64 *queue_buffer = q->base;
- int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
+ struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq;
+ u64 *queue_buffer = cmdq->base;
+ int idx = IVPU_MMU_Q_IDX(cmdq->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
- if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) {
+ if (ivpu_mmu_queue_is_full(cmdq)) {
ivpu_err(vdev, "Failed to write MMU CMD %s\n", name);
return -EBUSY;
}
queue_buffer[idx] = data0;
queue_buffer[idx + 1] = data1;
- q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
+ cmdq->prod = (cmdq->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1);
@@ -560,7 +572,6 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
mmu->cmdq.cons = 0;
memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE);
- clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE);
mmu->evtq.prod = 0;
mmu->evtq.cons = 0;
@@ -874,14 +885,10 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE);
evtq->prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
- if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT))
+ if (ivpu_mmu_queue_is_empty(evtq))
return NULL;
- clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE);
-
evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK;
- REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, evtq->cons);
-
return evt;
}
@@ -902,6 +909,7 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
}
ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+ REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
}
}
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index d1437c08645f..6f5d376d8fcc 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -9,7 +9,7 @@
#define GSP_PAGE_SIZE BIT(GSP_PAGE_SHIFT)
struct nvkm_gsp_mem {
- u32 size;
+ size_t size;
void *data;
dma_addr_t addr;
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 5e1fa176aac4..a41735ab6068 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -997,6 +997,32 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp)
return 0;
}
+static void
+nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
+{
+ if (mem->data) {
+ /*
+ * Poison the buffer to catch any unexpected access from
+ * GSP-RM if the buffer was prematurely freed.
+ */
+ memset(mem->data, 0xFF, mem->size);
+
+ dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr);
+ memset(mem, 0, sizeof(*mem));
+ }
+}
+
+static int
+nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem)
+{
+ mem->size = size;
+ mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL);
+ if (WARN_ON(!mem->data))
+ return -ENOMEM;
+
+ return 0;
+}
+
static int
r535_gsp_postinit(struct nvkm_gsp *gsp)
{
@@ -1024,6 +1050,13 @@ r535_gsp_postinit(struct nvkm_gsp *gsp)
nvkm_inth_allow(&gsp->subdev.inth);
nvkm_wr32(device, 0x110004, 0x00000040);
+
+ /* Release the DMA buffers that were needed only for boot and init */
+ nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw);
+ nvkm_gsp_mem_dtor(gsp, &gsp->libos);
+ nvkm_gsp_mem_dtor(gsp, &gsp->rmargs);
+ nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta);
+
return ret;
}
@@ -1532,27 +1565,6 @@ r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc)
return 0;
}
-static void
-nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
-{
- if (mem->data) {
- dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr);
- mem->data = NULL;
- }
-}
-
-static int
-nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, u32 size, struct nvkm_gsp_mem *mem)
-{
- mem->size = size;
- mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL);
- if (WARN_ON(!mem->data))
- return -ENOMEM;
-
- return 0;
-}
-
-
static int
r535_gsp_booter_unload(struct nvkm_gsp *gsp, u32 mbox0, u32 mbox1)
{
@@ -1938,20 +1950,20 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3)
* See kgspCreateRadix3_IMPL
*/
static int
-nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size,
+nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size,
struct nvkm_gsp_radix3 *rx3)
{
u64 addr;
for (int i = ARRAY_SIZE(rx3->mem) - 1; i >= 0; i--) {
u64 *ptes;
- int idx;
+ size_t bufsize;
+ int ret, idx;
- rx3->mem[i].size = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE);
- rx3->mem[i].data = dma_alloc_coherent(device->dev, rx3->mem[i].size,
- &rx3->mem[i].addr, GFP_KERNEL);
- if (WARN_ON(!rx3->mem[i].data))
- return -ENOMEM;
+ bufsize = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE);
+ ret = nvkm_gsp_mem_ctor(gsp, bufsize, &rx3->mem[i]);
+ if (ret)
+ return ret;
ptes = rx3->mem[i].data;
if (i == 2) {
@@ -1991,7 +2003,7 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend)
if (ret)
return ret;
- ret = nvkm_gsp_radix3_sg(gsp->subdev.device, &gsp->sr.sgt, len, &gsp->sr.radix3);
+ ret = nvkm_gsp_radix3_sg(gsp, &gsp->sr.sgt, len, &gsp->sr.radix3);
if (ret)
return ret;
@@ -2150,6 +2162,11 @@ r535_gsp_dtor(struct nvkm_gsp *gsp)
mutex_destroy(&gsp->cmdq.mutex);
r535_gsp_dtor_fws(gsp);
+
+ nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem);
+ nvkm_gsp_mem_dtor(gsp, &gsp->loginit);
+ nvkm_gsp_mem_dtor(gsp, &gsp->logintr);
+ nvkm_gsp_mem_dtor(gsp, &gsp->logrm);
}
int
@@ -2194,7 +2211,7 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp)
memcpy(gsp->sig.data, data, size);
/* Build radix3 page table for ELF image. */
- ret = nvkm_gsp_radix3_sg(device, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3);
+ ret = nvkm_gsp_radix3_sg(gsp, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 85f082396d42..d442b893275b 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1178,21 +1178,24 @@ static void drm_sched_run_job_work(struct work_struct *w)
struct drm_sched_entity *entity;
struct dma_fence *fence;
struct drm_sched_fence *s_fence;
- struct drm_sched_job *sched_job = NULL;
+ struct drm_sched_job *sched_job;
int r;
if (READ_ONCE(sched->pause_submit))
return;
/* Find entity with a ready job */
- while (!sched_job && (entity = drm_sched_select_entity(sched))) {
- sched_job = drm_sched_entity_pop_job(entity);
- if (!sched_job)
- complete_all(&entity->entity_idle);
- }
+ entity = drm_sched_select_entity(sched);
if (!entity)
return; /* No more work */
+ sched_job = drm_sched_entity_pop_job(entity);
+ if (!sched_job) {
+ complete_all(&entity->entity_idle);
+ drm_sched_run_job_queue(sched);
+ return;
+ }
+
s_fence = sched_job->s_fence;
atomic_add(sched_job->credits, &sched->credit_count);
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c
index 74391d9b11ae..e4db069f0db3 100644
--- a/drivers/gpu/drm/xe/xe_display.c
+++ b/drivers/gpu/drm/xe/xe_display.c
@@ -134,8 +134,6 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
int xe_display_init_nommio(struct xe_device *xe)
{
- int err;
-
if (!xe->info.enable_display)
return 0;
@@ -145,10 +143,6 @@ int xe_display_init_nommio(struct xe_device *xe)
/* This must be called before any calls to HAS_PCH_* */
intel_detect_pch(xe);
- err = intel_power_domains_init(xe);
- if (err)
- return err;
-
return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe);
}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index bcfc4127c7c5..254b1d3af4cb 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -926,20 +926,24 @@ void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
* @q: The exec queue
* @vm: The VM the engine does a bind or exec for
*
- * Get last fence, does not take a ref
+ * Get last fence, takes a ref
*
* Returns: last fence if not signaled, dma fence stub if signaled
*/
struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
struct xe_vm *vm)
{
+ struct dma_fence *fence;
+
xe_exec_queue_last_fence_lockdep_assert(q, vm);
if (q->last_fence &&
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
xe_exec_queue_last_fence_put(q, vm);
- return q->last_fence ? q->last_fence : dma_fence_get_stub();
+ fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
+ dma_fence_get(fence);
+ return fence;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3af2adec1295..35474ddbaf97 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -437,7 +437,10 @@ static int all_fw_domain_init(struct xe_gt *gt)
* USM has its only SA pool to non-block behind user operations
*/
if (gt_to_xe(gt)->info.has_usm) {
- gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16);
+ struct xe_device *xe = gt_to_xe(gt);
+
+ gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
+ IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
if (IS_ERR(gt->usm.bb_pool)) {
err = PTR_ERR(gt->usm.bb_pool);
goto err_force_wake;
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 9c2fe1697d6e..73f08f1924df 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -335,7 +335,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
return -EPROTO;
asid = FIELD_GET(PFD_ASID, msg[1]);
- pf_queue = &gt->usm.pf_queue[asid % NUM_PF_QUEUE];
+ pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE);
spin_lock_irqsave(&pf_queue->lock, flags);
full = pf_queue_full(pf_queue);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 5c6c54624252..70480c305602 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -170,11 +170,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (!IS_DGFX(xe)) {
/* Write out batch too */
m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
- if (xe->info.has_usm) {
- batch = tile->primary_gt->usm.bb_pool->bo;
- m->usm_batch_base_ofs = m->batch_base_ofs;
- }
-
for (i = 0; i < batch->size;
i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
XE_PAGE_SIZE) {
@@ -185,6 +180,24 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
entry);
level++;
}
+ if (xe->info.has_usm) {
+ xe_tile_assert(tile, batch->size == SZ_1M);
+
+ batch = tile->primary_gt->usm.bb_pool->bo;
+ m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M;
+ xe_tile_assert(tile, batch->size == SZ_512K);
+
+ for (i = 0; i < batch->size;
+ i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
+ XE_PAGE_SIZE) {
+ entry = vm->pt_ops->pte_encode_bo(batch, i,
+ pat_index, 0);
+
+ xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
+ entry);
+ level++;
+ }
+ }
} else {
u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
@@ -1204,8 +1217,11 @@ static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q,
}
if (q) {
fence = xe_exec_queue_last_fence_get(q, vm);
- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+ dma_fence_put(fence);
return false;
+ }
+ dma_fence_put(fence);
}
return true;
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 01106a1156ad..4e2ccad0e52f 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -274,7 +274,6 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
struct dma_fence *fence;
fence = xe_exec_queue_last_fence_get(job->q, vm);
- dma_fence_get(fence);
return drm_sched_job_add_dependency(&job->drm, fence);
}
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index e4c220cf9115..aab92bee1d7c 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -307,7 +307,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
/* Easy case... */
if (!num_in_fence) {
fence = xe_exec_queue_last_fence_get(q, vm);
- dma_fence_get(fence);
return fence;
}
@@ -322,7 +321,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
}
}
fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm);
- dma_fence_get(fences[current_fence - 1]);
cf = dma_fence_array_create(num_in_fence, fences,
vm->composite_fence_ctx,
vm->composite_fence_seqno++,
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 30db264d34a3..865e10d0a06a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -37,8 +37,6 @@
#include "generated/xe_wa_oob.h"
#include "xe_wa.h"
-#define TEST_VM_ASYNC_OPS_ERROR
-
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
{
return vm->gpuvm.r_obj;
@@ -114,11 +112,8 @@ retry:
num_pages - pinned,
read_only ? 0 : FOLL_WRITE,
&pages[pinned]);
- if (ret < 0) {
- if (in_kthread)
- ret = 0;
+ if (ret < 0)
break;
- }
pinned += ret;
ret = 0;
@@ -1984,6 +1979,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
xe_exec_queue_last_fence_get(wait_exec_queue, vm);
xe_sync_entry_signal(&syncs[i], NULL, fence);
+ dma_fence_put(fence);
}
}
@@ -2064,7 +2060,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
struct drm_gpuva_ops *ops;
struct drm_gpuva_op *__op;
- struct xe_vma_op *op;
struct drm_gpuvm_bo *vm_bo;
int err;
@@ -2111,15 +2106,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
if (IS_ERR(ops))
return ops;
-#ifdef TEST_VM_ASYNC_OPS_ERROR
- if (operation & FORCE_ASYNC_OP_ERROR) {
- op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
- base.entry);
- if (op)
- op->inject_error = true;
- }
-#endif
-
drm_gpuva_for_each_op(__op, ops) {
struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
@@ -2199,8 +2185,10 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma)
return SZ_1G;
else if (vma->gpuva.flags & XE_VMA_PTE_2M)
return SZ_2M;
+ else if (vma->gpuva.flags & XE_VMA_PTE_4K)
+ return SZ_4K;
- return SZ_4K;
+ return SZ_1G; /* Uninitialized, used max size */
}
static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
@@ -2530,13 +2518,25 @@ retry_userptr:
}
drm_exec_fini(&exec);
- if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
+ if (err == -EAGAIN) {
lockdep_assert_held_write(&vm->lock);
- err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
- if (!err)
- goto retry_userptr;
- trace_xe_vma_fail(vma);
+ if (op->base.op == DRM_GPUVA_OP_REMAP) {
+ if (!op->remap.unmap_done)
+ vma = gpuva_to_vma(op->base.remap.unmap->va);
+ else if (op->remap.prev)
+ vma = op->remap.prev;
+ else
+ vma = op->remap.next;
+ }
+
+ if (xe_vma_is_userptr(vma)) {
+ err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+ if (!err)
+ goto retry_userptr;
+
+ trace_xe_vma_fail(vma);
+ }
}
return err;
@@ -2548,13 +2548,6 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
lockdep_assert_held_write(&vm->lock);
-#ifdef TEST_VM_ASYNC_OPS_ERROR
- if (op->inject_error) {
- op->inject_error = false;
- return -ENOMEM;
- }
-#endif
-
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
ret = __xe_vma_op_execute(vm, op->map.vma, op);
@@ -2669,7 +2662,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
{
int i;
- for (i = num_ops_list - 1; i; ++i) {
+ for (i = num_ops_list - 1; i >= 0; --i) {
struct drm_gpuva_ops *__ops = ops[i];
struct drm_gpuva_op *__op;
@@ -2714,16 +2707,9 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
return 0;
}
-#ifdef TEST_VM_ASYNC_OPS_ERROR
-#define SUPPORTED_FLAGS \
- (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \
- DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
-#else
#define SUPPORTED_FLAGS \
(DRM_XE_VM_BIND_FLAG_READONLY | \
- DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
- 0xffff)
-#endif
+ DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL)
#define XE_64K_PAGE_MASK 0xffffull
#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 1fec66ae2eb2..5ac9c5bebabc 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -21,9 +21,6 @@ struct xe_bo;
struct xe_sync_entry;
struct xe_vm;
-#define TEST_VM_ASYNC_OPS_ERROR
-#define FORCE_ASYNC_OP_ERROR BIT(31)
-
#define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS
#define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1)
#define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2)
@@ -360,11 +357,6 @@ struct xe_vma_op {
/** @flags: operation flags */
enum xe_vma_op_flags flags;
-#ifdef TEST_VM_ASYNC_OPS_ERROR
- /** @inject_error: inject error to test async op error handling */
- bool inject_error;
-#endif
-
union {
/** @map: VMA map operation specific data */
struct xe_vma_op_map map;
diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index 63c49318a863..19a13468eca5 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -305,6 +305,7 @@ struct drm_ivpu_submit {
/* drm_ivpu_bo_wait job status codes */
#define DRM_IVPU_JOB_STATUS_SUCCESS 0
+#define DRM_IVPU_JOB_STATUS_ABORTED 256
/**
* struct drm_ivpu_bo_wait - Wait for BO to become inactive