summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gpu_commands.h9
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c18
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c2
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c65
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c3
8 files changed, 80 insertions, 24 deletions
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
index 288576035ce3..e60372a82723 100644
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
@@ -6,6 +6,8 @@
#ifndef _XE_GPU_COMMANDS_H_
#define _XE_GPU_COMMANDS_H_
+#include "regs/xe_reg_defs.h"
+
#define INSTR_CLIENT_SHIFT 29
#define INSTR_MI_CLIENT 0x0
#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
@@ -56,6 +58,13 @@
#define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22)
#define BLT_DEPTH_32 (3<<24)
+#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22)
+#define PVC_MEM_SET_CMD_LEN_DW 7
+#define PVC_MS_MATRIX REG_BIT(17)
+#define PVC_MS_DATA_FIELD GENMASK(31, 24)
+/* Bspec lists field as [6:0], but index alone is from [6:1] */
+#define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1)
+
#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28)
#define PIPE_CONTROL_AMFS_FLUSH (1<<25)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index f03fb907b59a..3c60cbdf516c 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -32,7 +32,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
/* Optionally clear bo *and* CCS data in VRAM. */
if (clear) {
- fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0);
+ fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource);
if (IS_ERR(fence)) {
KUNIT_FAIL(test, "Failed to submit bo clear.\n");
return PTR_ERR(fence);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index e50b6ceb56e6..17829f878757 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -99,7 +99,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
struct kunit *test)
{
struct xe_device *xe = gt_to_xe(m->gt);
- u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL;
+ u64 retval, expected = 0;
bool big = bo->size >= SZ_2M;
struct dma_fence *fence;
const char *str = big ? "Copying big bo" : "Copying small bo";
@@ -130,7 +130,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
}
xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
- fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0);
+ fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource);
if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" :
"Clearing sysmem small bo", test)) {
retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
@@ -311,10 +311,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
bb->len = 0;
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
- expected = 0x12345678U;
+ expected = 0;
emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
- expected, IS_DGFX(xe));
+ IS_DGFX(xe));
run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
test);
@@ -326,8 +326,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
/* Clear a small bo */
kunit_info(test, "Clearing small buffer object\n");
xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
- expected = 0x224488ff;
- fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected);
+ expected = 0;
+ fence = xe_migrate_clear(m, tiny, tiny->ttm.resource);
if (sanity_fence_failed(xe, fence, "Clearing small bo", test))
goto out;
@@ -342,11 +342,11 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
test_copy(m, tiny, test);
}
- /* Clear a big bo with a fixed value */
+ /* Clear a big bo */
kunit_info(test, "Clearing big buffer object\n");
xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
- expected = 0x11223344U;
- fence = xe_migrate_clear(m, big, big->ttm.resource, expected);
+ expected = 0;
+ fence = xe_migrate_clear(m, big, big->ttm.resource);
if (sanity_fence_failed(xe, fence, "Clearing big bo", test))
goto out;
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 3ca28f84dff7..ba156a85460c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -686,7 +686,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
}
} else {
if (move_lacks_source)
- fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0);
+ fence = xe_migrate_clear(gt->migrate, bo, new_mem);
else
fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem);
if (IS_ERR(fence)) {
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 8d99450f0bf4..377a8979bc06 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -89,6 +89,8 @@ struct xe_device {
bool has_4tile;
/** @has_range_tlb_invalidation: Has range based TLB invalidations */
bool has_range_tlb_invalidation;
+ /** @has_link_copy_engines: Whether the platform has link copy engines */
+ bool has_link_copy_engine;
} info;
/** @irq: device interrupt state */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 3ee3d707a8ca..9102fa1d8759 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -747,14 +747,35 @@ err_sync:
return fence;
}
-static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
- u32 size, u32 pitch, u32 value, bool is_vram)
+static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u32 size, u32 pitch)
{
u32 *cs = bb->cs + bb->len;
+ u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+ u32 len = PVC_MEM_SET_CMD_LEN_DW;
+
+ *cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2);
+ *cs++ = pitch - 1;
+ *cs++ = (size / pitch) - 1;
+ *cs++ = pitch - 1;
+ *cs++ = lower_32_bits(src_ofs);
+ *cs++ = upper_32_bits(src_ofs);
+ *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
+
+ XE_BUG_ON(cs - bb->cs != len + bb->len);
+
+ bb->len += len;
+}
+
+static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
+ u64 src_ofs, u32 size, u32 pitch, bool is_vram)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 *cs = bb->cs + bb->len;
u32 len = XY_FAST_COLOR_BLT_DW;
u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
- if (GRAPHICS_VERx100(gt->xe) < 1250)
+ if (GRAPHICS_VERx100(xe) < 1250)
len = 11;
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
@@ -766,7 +787,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
*cs++ = lower_32_bits(src_ofs);
*cs++ = upper_32_bits(src_ofs);
*cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
- *cs++ = value;
+ *cs++ = 0;
*cs++ = 0;
*cs++ = 0;
*cs++ = 0;
@@ -780,7 +801,30 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
}
XE_BUG_ON(cs - bb->cs != len + bb->len);
+
bb->len += len;
+}
+
+static u32 emit_clear_cmd_len(struct xe_device *xe)
+{
+ if (xe->info.has_link_copy_engine)
+ return PVC_MEM_SET_CMD_LEN_DW;
+ else
+ return XY_FAST_COLOR_BLT_DW;
+}
+
+static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u32 size, u32 pitch, bool is_vram)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe->info.has_link_copy_engine) {
+ emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
+
+ } else {
+ emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
+ is_vram);
+ }
return 0;
}
@@ -790,10 +834,9 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
* @m: The migration context.
* @bo: The buffer object @dst is currently bound to.
* @dst: The dst TTM resource to be cleared.
- * @value: Clear value.
*
- * Clear the contents of @dst. On flat CCS devices,
- * the CCS metadata is cleared to zero as well on VRAM destionations.
+ * Clear the contents of @dst to zero. On flat CCS devices,
+ * the CCS metadata is cleared to zero as well on VRAM destinations.
* TODO: Eliminate the @bo argument.
*
* Return: Pointer to a dma_fence representing the last clear batch, or
@@ -802,8 +845,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
*/
struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_bo *bo,
- struct ttm_resource *dst,
- u32 value)
+ struct ttm_resource *dst)
{
bool clear_vram = mem_type_is_vram(dst->mem_type);
struct xe_gt *gt = m->gt;
@@ -837,7 +879,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
batch_size = 2 +
pte_update_size(m, clear_vram, &src_it,
&clear_L0, &clear_L0_ofs, &clear_L0_pt,
- XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT);
+ emit_clear_cmd_len(xe), 0,
+ NUM_PT_PER_BLIT);
if (xe_device_has_flat_ccs(xe) && clear_vram)
batch_size += EMIT_COPY_CCS_DW;
@@ -868,7 +911,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
update_idx = bb->len;
emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE,
- value, clear_vram);
+ clear_vram);
if (xe_device_has_flat_ccs(xe) && clear_vram) {
emit_copy_ccs(gt, bb, clear_L0_ofs, true,
m->cleared_vram_ofs, false, clear_L0);
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index a569851db6f7..1ff6e0a90de5 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -79,8 +79,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_bo *bo,
- struct ttm_resource *dst,
- u32 value);
+ struct ttm_resource *dst);
struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 6dcefb8cc7c3..0a3b61f08d37 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -71,6 +71,7 @@ struct xe_device_desc {
bool has_4tile;
bool has_range_tlb_invalidation;
bool has_asid;
+ bool has_link_copy_engine;
};
#define PLATFORM(x) \
@@ -226,6 +227,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
.vm_max_level = 4,
.supports_usm = true,
.has_asid = true,
+ .has_link_copy_engine = true,
};
#define MTL_MEDIA_ENGINES \
@@ -413,6 +415,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
xe->info.has_flat_ccs = desc->has_flat_ccs;
xe->info.has_4tile = desc->has_4tile;
xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
+ xe->info.has_link_copy_engine = desc->has_link_copy_engine;
spd = subplatform_get(xe, desc);
xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;