summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/Kconfig2
-rw-r--r--drivers/gpu/drm/Makefile1
-rw-r--r--drivers/gpu/drm/xe/.gitignore2
-rw-r--r--drivers/gpu/drm/xe/Kconfig63
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug96
-rw-r--r--drivers/gpu/drm/xe/Makefile121
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h219
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h249
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h189
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h49
-rw-r--r--drivers/gpu/drm/xe/abi/guc_errors_abi.h37
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h322
-rw-r--r--drivers/gpu/drm/xe/abi/guc_messages_abi.h234
-rw-r--r--drivers/gpu/drm/xe/tests/Makefile4
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c303
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo_test.c25
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c259
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf_test.c23
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c378
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate_test.c23
-rw-r--r--drivers/gpu/drm/xe/tests/xe_test.h66
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c97
-rw-r--r--drivers/gpu/drm/xe/xe_bb.h27
-rw-r--r--drivers/gpu/drm/xe/xe_bb_types.h20
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c1698
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h290
-rw-r--r--drivers/gpu/drm/xe/xe_bo_doc.h179
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c225
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.h15
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h73
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c129
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.h13
-rw-r--r--drivers/gpu/drm/xe/xe_device.c359
-rw-r--r--drivers/gpu/drm/xe/xe_device.h126
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h214
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c307
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.h15
-rw-r--r--drivers/gpu/drm/xe/xe_drv.h24
-rw-r--r--drivers/gpu/drm/xe/xe_engine.c734
-rw-r--r--drivers/gpu/drm/xe/xe_engine.h54
-rw-r--r--drivers/gpu/drm/xe/xe_engine_types.h208
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c390
-rw-r--r--drivers/gpu/drm/xe/xe_exec.h14
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c489
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.h21
-rw-r--r--drivers/gpu/drm/xe/xe_execlist_types.h49
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c203
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.h40
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake_types.h84
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c304
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.h28
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt_types.h28
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.c101
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h73
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler_types.h57
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c830
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h64
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c83
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.h13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c160
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.h13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c552
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c750
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.h22
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs.c55
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs.h19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sysfs_types.h26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c144
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.h20
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h320
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c875
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h57
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c676
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.h17
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads_types.h25
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c1196
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h62
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct_types.h87
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.c105
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_types.h52
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h392
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.c125
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.h17
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c109
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.h48
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log_types.h23
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c843
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h15
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h34
-rw-r--r--drivers/gpu/drm/xe/xe_guc_reg.h147
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c1695
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h30
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h71
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c131
-rw-r--r--drivers/gpu/drm/xe/xe_huc.h19
-rw-r--r--drivers/gpu/drm/xe/xe_huc_debugfs.c71
-rw-r--r--drivers/gpu/drm/xe/xe_huc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_huc_types.h19
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c658
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.h27
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h107
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.c230
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.h27
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence_types.h72
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c565
-rw-r--r--drivers/gpu/drm/xe/xe_irq.h18
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c841
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h50
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h47
-rw-r--r--drivers/gpu/drm/xe/xe_macros.h20
-rw-r--r--drivers/gpu/drm/xe/xe_map.h93
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c1168
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h88
-rw-r--r--drivers/gpu/drm/xe/xe_migrate_doc.h88
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c466
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h110
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c557
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.h29
-rw-r--r--drivers/gpu/drm/xe/xe_module.c76
-rw-r--r--drivers/gpu/drm/xe/xe_module.h13
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c651
-rw-r--r--drivers/gpu/drm/xe/xe_pci.h21
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c296
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.h25
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h40
-rw-r--r--drivers/gpu/drm/xe/xe_platform_types.h32
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c207
-rw-r--r--drivers/gpu/drm/xe/xe_pm.h24
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c157
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.h61
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence_types.h33
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c1542
-rw-r--r--drivers/gpu/drm/xe/xe_pt.h54
-rw-r--r--drivers/gpu/drm/xe/xe_pt_types.h57
-rw-r--r--drivers/gpu/drm/xe/xe_pt_walk.c160
-rw-r--r--drivers/gpu/drm/xe/xe_pt_walk.h161
-rw-r--r--drivers/gpu/drm/xe/xe_query.c387
-rw-r--r--drivers/gpu/drm/xe/xe_query.h14
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c248
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.h28
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr_types.h44
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c73
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.h13
-rw-r--r--drivers/gpu/drm/xe/xe_res_cursor.h226
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c373
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.h17
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c144
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h340
-rw-r--r--drivers/gpu/drm/xe/xe_rtp_types.h105
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c96
-rw-r--r--drivers/gpu/drm/xe/xe_sa.h42
-rw-r--r--drivers/gpu/drm/xe/xe_sa_types.h19
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c246
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.h76
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h46
-rw-r--r--drivers/gpu/drm/xe/xe_step.c189
-rw-r--r--drivers/gpu/drm/xe/xe_step.h18
-rw-r--r--drivers/gpu/drm/xe/xe_step_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c276
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h27
-rw-r--r--drivers/gpu/drm/xe/xe_sync_types.h27
-rw-r--r--drivers/gpu/drm/xe/xe_trace.c9
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h513
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c130
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h16
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h18
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c403
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.h41
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h44
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c39
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.h13
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c226
-rw-r--r--drivers/gpu/drm/xe/xe_uc.h21
-rw-r--r--drivers/gpu/drm/xe/xe_uc_debugfs.c26
-rw-r--r--drivers/gpu/drm/xe/xe_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c406
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.h180
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_abi.h81
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_types.h112
-rw-r--r--drivers/gpu/drm/xe/xe_uc_types.h25
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c3407
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h141
-rw-r--r--drivers/gpu/drm/xe/xe_vm_doc.h555
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c347
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.h15
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h337
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c326
-rw-r--r--drivers/gpu/drm/xe/xe_wa.h18
-rw-r--r--drivers/gpu/drm/xe/xe_wait_user_fence.c202
-rw-r--r--drivers/gpu/drm/xe/xe_wait_user_fence.h15
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm.c263
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm.h16
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm_types.h26
196 files changed, 39422 insertions, 0 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 31cfe2c2a2af..2520db0b776e 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -276,6 +276,8 @@ source "drivers/gpu/drm/nouveau/Kconfig"
source "drivers/gpu/drm/i915/Kconfig"
+source "drivers/gpu/drm/xe/Kconfig"
+
source "drivers/gpu/drm/kmb/Kconfig"
config DRM_VGEM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 8ac6f4b9546e..104b42df2e95 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -134,6 +134,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
obj-$(CONFIG_DRM_I915) += i915/
+obj-$(CONFIG_DRM_XE) += xe/
obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/
obj-$(CONFIG_DRM_MGAG200) += mgag200/
obj-$(CONFIG_DRM_V3D) += v3d/
diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore
new file mode 100644
index 000000000000..81972dce1aff
--- /dev/null
+++ b/drivers/gpu/drm/xe/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*.hdrtest
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
new file mode 100644
index 000000000000..62f54e6d62d9
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE
+ tristate "Intel Xe Graphics"
+ depends on DRM && PCI && MMU
+ select INTERVAL_TREE
+ # we need shmfs for the swappable backing store, and in particular
+ # the shmem_readpage() which depends upon tmpfs
+ select SHMEM
+ select TMPFS
+ select DRM_BUDDY
+ select DRM_KMS_HELPER
+ select DRM_PANEL
+ select DRM_SUBALLOC_HELPER
+ select RELAY
+ select IRQ_WORK
+ select SYNC_FILE
+ select IOSF_MBI
+ select CRC32
+ select SND_HDA_I915 if SND_HDA_CORE
+ select CEC_CORE if CEC_NOTIFIER
+ select VMAP_PFN
+ select DRM_TTM
+ select DRM_TTM_HELPER
+ select DRM_SCHED
+ select MMU_NOTIFIER
+ help
+ Experimental driver for Intel Xe series GPUs
+
+ If "M" is selected, the module will be called xe.
+
+config DRM_XE_FORCE_PROBE
+ string "Force probe xe for selected Intel hardware IDs"
+ depends on DRM_XE
+ help
+ This is the default value for the xe.force_probe module
+ parameter. Using the module parameter overrides this option.
+
+ Force probe the xe for Intel graphics devices that are
+ recognized but not properly supported by this kernel version. It is
+ recommended to upgrade to a kernel version with proper support as soon
+ as it is available.
+
+ It can also be used to block the probe of recognized and fully
+ supported devices.
+
+ Use "" to disable force probe. If in doubt, use this.
+
+ Use "<pci-id>[,<pci-id>,...]" to force probe the xe for listed
+ devices. For example, "4500" or "4500,4571".
+
+ Use "*" to force probe the driver for all known devices.
+
+ Use "!" right before the ID to block the probe of the device. For
+ example, "4500,!4571" forces the probe of 4500 and blocks the probe of
+ 4571.
+
+ Use "!*" to block the probe of the driver for all known devices.
+
+menu "drm/Xe Debugging"
+depends on DRM_XE
+depends on EXPERT
+source "drivers/gpu/drm/xe/Kconfig.debug"
+endmenu
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
new file mode 100644
index 000000000000..b61fd43a76fe
--- /dev/null
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DRM_XE_WERROR
+ bool "Force GCC to throw an error instead of a warning when compiling"
+ # As this may inadvertently break the build, only allow the user
+ # to shoot oneself in the foot iff they aim really hard
+ depends on EXPERT
+ # We use the dependency on !COMPILE_TEST to not be enabled in
+ # allmodconfig or allyesconfig configurations
+ depends on !COMPILE_TEST
+ default n
+ help
+ Add -Werror to the build flags for (and only for) xe.ko.
+ Do not enable this unless you are writing code for the xe.ko module.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_XE_DEBUG
+ bool "Enable additional driver debugging"
+ depends on DRM_XE
+ depends on EXPERT
+ depends on !COMPILE_TEST
+ default n
+ help
+ Choose this option to turn on extra driver debugging that may affect
+ performance but will catch some internal issues.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_XE_DEBUG_VM
+ bool "Enable extra VM debugging info"
+ default n
+ help
+ Enable extra VM debugging info
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_XE_DEBUG_MEM
+ bool "Enable passing SYS/LMEM addresses to user space"
+ default n
+ help
+ Pass object location trough uapi. Intended for extended
+ testing and development only.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_XE_SIMPLE_ERROR_CAPTURE
+ bool "Enable simple error capture to dmesg on job timeout"
+ default n
+ help
+ Choose this option when debugging an unexpected job timeout
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_XE_KUNIT_TEST
+ tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS
+ depends on DRM_XE && KUNIT
+ default KUNIT_ALL_TESTS
+ select DRM_EXPORT_FOR_TESTS if m
+ help
+ Choose this option to allow the driver to perform selftests under
+ the kunit framework
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_XE_LARGE_GUC_BUFFER
+ bool "Enable larger guc log buffer"
+ default n
+ help
+ Choose this option when debugging guc issues.
+ Buffer should be large enough for complex issues.
+
+ Recommended for driver developers only.
+
+ If in doubt, say "N".
+
+config DRM_XE_USERPTR_INVAL_INJECT
+ bool "Inject userptr invalidation -EINVAL errors"
+ default n
+ help
+ Choose this option when debugging error paths that
+ are hit during checks for userptr invalidations.
+
+ Recomended for driver developers only.
+ If in doubt, say "N".
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
new file mode 100644
index 000000000000..228a87f2fe7b
--- /dev/null
+++ b/drivers/gpu/drm/xe/Makefile
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the drm device driver. This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+# Add a set of useful warning flags and enable -Werror for CI to prevent
+# trivial mistakes from creeping in. We have to do this piecemeal as we reject
+# any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we
+# need to filter out dubious warnings. Still it is our interest
+# to keep running locally with W=1 C=1 until we are completely clean.
+#
+# Note the danger in using -Wall -Wextra is that when CI updates gcc we
+# will most likely get a sudden build breakage... Hopefully we will fix
+# new warnings before CI updates!
+subdir-ccflags-y := -Wall -Wextra
+# making these call cc-disable-warning breaks when trying to build xe.mod.o
+# by calling make M=drivers/gpu/drm/xe. This doesn't happen in upstream tree,
+# so it was somehow fixed by the changes in the build system. Move it back to
+# $(call cc-disable-warning, ...) after rebase.
+subdir-ccflags-y += -Wno-unused-parameter
+subdir-ccflags-y += -Wno-type-limits
+#subdir-ccflags-y += $(call cc-disable-warning, unused-parameter)
+#subdir-ccflags-y += $(call cc-disable-warning, type-limits)
+subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers)
+subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
+# clang warnings
+subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
+subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
+subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
+subdir-ccflags-y += $(call cc-disable-warning, frame-address)
+subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
+
+# Fine grained warnings disable
+CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init)
+
+subdir-ccflags-y += -I$(srctree)/$(src)
+
+# Please keep these build lists sorted!
+
+# core driver code
+
+xe-y += xe_bb.o \
+ xe_bo.o \
+ xe_bo_evict.o \
+ xe_debugfs.o \
+ xe_device.o \
+ xe_dma_buf.o \
+ xe_engine.o \
+ xe_exec.o \
+ xe_execlist.o \
+ xe_force_wake.o \
+ xe_ggtt.o \
+ xe_gpu_scheduler.o \
+ xe_gt.o \
+ xe_gt_clock.o \
+ xe_gt_debugfs.o \
+ xe_gt_mcr.o \
+ xe_gt_pagefault.o \
+ xe_gt_sysfs.o \
+ xe_gt_topology.o \
+ xe_guc.o \
+ xe_guc_ads.o \
+ xe_guc_ct.o \
+ xe_guc_debugfs.o \
+ xe_guc_hwconfig.o \
+ xe_guc_log.o \
+ xe_guc_pc.o \
+ xe_guc_submit.o \
+ xe_hw_engine.o \
+ xe_hw_fence.o \
+ xe_huc.o \
+ xe_huc_debugfs.o \
+ xe_irq.o \
+ xe_lrc.o \
+ xe_migrate.o \
+ xe_mmio.o \
+ xe_mocs.o \
+ xe_module.o \
+ xe_pci.o \
+ xe_pcode.o \
+ xe_pm.o \
+ xe_preempt_fence.o \
+ xe_pt.o \
+ xe_pt_walk.o \
+ xe_query.o \
+ xe_reg_sr.o \
+ xe_reg_whitelist.o \
+ xe_rtp.o \
+ xe_ring_ops.o \
+ xe_sa.o \
+ xe_sched_job.o \
+ xe_step.o \
+ xe_sync.o \
+ xe_trace.o \
+ xe_ttm_gtt_mgr.o \
+ xe_ttm_vram_mgr.o \
+ xe_tuning.o \
+ xe_uc.o \
+ xe_uc_debugfs.o \
+ xe_uc_fw.o \
+ xe_vm.o \
+ xe_vm_madvise.o \
+ xe_wait_user_fence.o \
+ xe_wa.o \
+ xe_wopcm.o
+
+# XXX: Needed for i915 register definitions. Will be removed after xe-regs.
+subdir-ccflags-y += -I$(srctree)/drivers/gpu/drm/i915/
+
+obj-$(CONFIG_DRM_XE) += xe.o
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
+\
+# header test
+always-$(CONFIG_DRM_XE_WERROR) += \
+ $(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h'))
+
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+ cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+ $(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
new file mode 100644
index 000000000000..3062e0e0d467
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ACTIONS_ABI_H
+#define _ABI_GUC_ACTIONS_ABI_H
+
+/**
+ * DOC: HOST2GUC_SELF_CFG
+ *
+ * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | **KLV_LEN** - KLV length |
+ * | | | |
+ * | | | - 32 bit KLV = 1 |
+ * | | | - 64 bit KLV = 2 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **VALUE32** - Bits 31-0 of the KLV value |
+ * +---+-------+--------------------------------------------------------------+
+ * | 3 | 31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2) |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_SELF_CFG 0x0508
+
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
+ * DOC: HOST2GUC_CONTROL_CTB
+ *
+ * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **CONTROL** - control `CTB based communication`_ |
+ * | | | |
+ * | | | - _`GUC_CTB_CONTROL_DISABLE` = 0 |
+ * | | | - _`GUC_CTB_CONTROL_ENABLE` = 1 |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_HOST2GUC_CONTROL_CTB 0x4509
+
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL GUC_HXG_REQUEST_MSG_n_DATAn
+#define GUC_CTB_CONTROL_DISABLE 0u
+#define GUC_CTB_CONTROL_ENABLE 1u
+
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/* legacy definitions */
+
+enum xe_guc_action {
+ XE_GUC_ACTION_DEFAULT = 0x0,
+ XE_GUC_ACTION_REQUEST_PREEMPTION = 0x2,
+ XE_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3,
+ XE_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
+ XE_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
+ XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
+ XE_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40,
+ XE_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
+ XE_GUC_ACTION_ENTER_S_STATE = 0x501,
+ XE_GUC_ACTION_EXIT_S_STATE = 0x502,
+ XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+ XE_GUC_ACTION_SCHED_CONTEXT = 0x1000,
+ XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
+ XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
+ XE_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
+ XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+ XE_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005,
+ XE_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+ XE_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
+ XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
+ XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
+ XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
+ XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
+ XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+ XE_GUC_ACTION_GET_HWCONFIG = 0x4100,
+ XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
+ XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
+ XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+ XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
+ XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+ XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
+ XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
+ XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+ XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
+ XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
+ XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
+ XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY = 0x6004,
+ XE_GUC_ACTION_TLB_INVALIDATION = 0x7000,
+ XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
+ XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002,
+ XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
+ XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
+ XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
+ XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
+ XE_GUC_ACTION_LIMIT
+};
+
+enum xe_guc_rc_options {
+ XE_GUCRC_HOST_CONTROL,
+ XE_GUCRC_FIRMWARE_CONTROL,
+};
+
+enum xe_guc_preempt_options {
+ XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4,
+ XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8,
+};
+
+enum xe_guc_report_status {
+ XE_GUC_REPORT_STATUS_UNKNOWN = 0x0,
+ XE_GUC_REPORT_STATUS_ACKED = 0x1,
+ XE_GUC_REPORT_STATUS_ERROR = 0x2,
+ XE_GUC_REPORT_STATUS_COMPLETE = 0x4,
+};
+
+enum xe_guc_sleep_state_status {
+ XE_GUC_SLEEP_STATE_SUCCESS = 0x1,
+ XE_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2,
+ XE_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3
+#define XE_GUC_SLEEP_STATE_INVALID_MASK 0x80000000
+};
+
+#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0)
+#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4
+#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
+#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8)
+
+#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
+#define XE_GUC_TLB_INVAL_MODE_SHIFT 8
+/* Flush PPC or SMRO caches along with TLB invalidation request */
+#define XE_GUC_TLB_INVAL_FLUSH_CACHE (1 << 31)
+
+enum xe_guc_tlb_invalidation_type {
+ XE_GUC_TLB_INVAL_FULL = 0x0,
+ XE_GUC_TLB_INVAL_PAGE_SELECTIVE = 0x1,
+ XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX = 0x2,
+ XE_GUC_TLB_INVAL_GUC = 0x3,
+};
+
+/*
+ * 0: Heavy mode of Invalidation:
+ * The pipeline of the engine(s) for which the invalidation is targeted to is
+ * blocked, and all the in-flight transactions are guaranteed to be Globally
+ * Observed before completing the TLB invalidation
+ * 1: Lite mode of Invalidation:
+ * TLBs of the targeted engine(s) are immediately invalidated.
+ * In-flight transactions are NOT guaranteed to be Globally Observed before
+ * completing TLB invalidation.
+ * Light Invalidation Mode is to be used only when
+ * it can be guaranteed (by SW) that the address translations remain invariant
+ * for the in-flight transactions across the TLB invalidation. In other words,
+ * this mode can be used when the TLB invalidation is intended to clear out the
+ * stale cached translations that are no longer in use. Light Invalidation Mode
+ * is much faster than the Heavy Invalidation Mode, as it does not wait for the
+ * in-flight transactions to be GOd.
+ */
+enum xe_guc_tlb_inval_mode {
+ XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0,
+ XE_GUC_TLB_INVAL_MODE_LITE = 0x1,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
new file mode 100644
index 000000000000..811add10c30d
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _GUC_ACTIONS_SLPC_ABI_H_
+#define _GUC_ACTIONS_SLPC_ABI_H_
+
+#include <linux/types.h>
+
+/**
+ * DOC: SLPC SHARED DATA STRUCTURE
+ *
+ * +----+------+--------------------------------------------------------------+
+ * | CL | Bytes| Description |
+ * +====+======+==============================================================+
+ * | 1 | 0-3 | SHARED DATA SIZE |
+ * | +------+--------------------------------------------------------------+
+ * | | 4-7 | GLOBAL STATE |
+ * | +------+--------------------------------------------------------------+
+ * | | 8-11 | DISPLAY DATA ADDRESS |
+ * | +------+--------------------------------------------------------------+
+ * | | 12:63| PADDING |
+ * +----+------+--------------------------------------------------------------+
+ * | | 0:63 | PADDING(PLATFORM INFO) |
+ * +----+------+--------------------------------------------------------------+
+ * | 3 | 0-3 | TASK STATE DATA |
+ * + +------+--------------------------------------------------------------+
+ * | | 4:63 | PADDING |
+ * +----+------+--------------------------------------------------------------+
+ * |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS |
+ * +----+------+--------------------------------------------------------------+
+ * | | | PADDING + EXTRA RESERVED PAGE |
+ * +----+------+--------------------------------------------------------------+
+ */
+
+/*
+ * SLPC exposes certain parameters for global configuration by the host.
+ * These are referred to as override parameters, because in most cases
+ * the host will not need to modify the default values used by SLPC.
+ * SLPC remembers the default values which allows the host to easily restore
+ * them by simply unsetting the override. The host can set or unset override
+ * parameters during SLPC (re-)initialization using the SLPC Reset event.
+ * The host can also set or unset override parameters on the fly using the
+ * Parameter Set and Parameter Unset events
+ */
+
+#define SLPC_MAX_OVERRIDE_PARAMETERS 256
+#define SLPC_OVERRIDE_BITFIELD_SIZE \
+ (SLPC_MAX_OVERRIDE_PARAMETERS / 32)
+
+#define SLPC_PAGE_SIZE_BYTES 4096
+#define SLPC_CACHELINE_SIZE_BYTES 64
+#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES
+#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES)
+
+/*
+ * Cacheline size aligned (Total size needed for
+ * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes)
+ */
+#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES (((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \
+ + ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \
+ + (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \
+ SLPC_CACHELINE_SIZE_BYTES)
+
+#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER (SLPC_SHARED_DATA_SIZE_BYTE_MAX - \
+ (SLPC_SHARED_DATA_SIZE_BYTE_HEADER \
+ + SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \
+ + SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \
+ + SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \
+ + SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE))
+
+enum slpc_task_enable {
+ SLPC_PARAM_TASK_DEFAULT = 0,
+ SLPC_PARAM_TASK_ENABLED,
+ SLPC_PARAM_TASK_DISABLED,
+ SLPC_PARAM_TASK_UNKNOWN
+};
+
+enum slpc_global_state {
+ SLPC_GLOBAL_STATE_NOT_RUNNING = 0,
+ SLPC_GLOBAL_STATE_INITIALIZING = 1,
+ SLPC_GLOBAL_STATE_RESETTING = 2,
+ SLPC_GLOBAL_STATE_RUNNING = 3,
+ SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4,
+ SLPC_GLOBAL_STATE_ERROR = 5
+};
+
+enum slpc_param_id {
+ SLPC_PARAM_TASK_ENABLE_GTPERF = 0,
+ SLPC_PARAM_TASK_DISABLE_GTPERF = 1,
+ SLPC_PARAM_TASK_ENABLE_BALANCER = 2,
+ SLPC_PARAM_TASK_DISABLE_BALANCER = 3,
+ SLPC_PARAM_TASK_ENABLE_DCC = 4,
+ SLPC_PARAM_TASK_DISABLE_DCC = 5,
+ SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6,
+ SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7,
+ SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8,
+ SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9,
+ SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10,
+ SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11,
+ SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12,
+ SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13,
+ SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14,
+ SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15,
+ SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16,
+ SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17,
+ SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18,
+ SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19,
+ SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20,
+ SLPC_PARAM_PWRGATE_RC_MODE = 21,
+ SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22,
+ SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23,
+ SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24,
+ SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25,
+ SLPC_PARAM_STRATEGIES = 26,
+ SLPC_PARAM_POWER_PROFILE = 27,
+ SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28,
+ SLPC_MAX_PARAM = 32,
+};
+
+enum slpc_media_ratio_mode {
+ SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0,
+ SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1,
+ SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
+};
+
+enum slpc_gucrc_mode {
+ SLPC_GUCRC_MODE_HW = 0,
+ SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
+ SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
+ SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
+
+ SLPC_GUCRC_MODE_MAX,
+};
+
+enum slpc_event_id {
+ SLPC_EVENT_RESET = 0,
+ SLPC_EVENT_SHUTDOWN = 1,
+ SLPC_EVENT_PLATFORM_INFO_CHANGE = 2,
+ SLPC_EVENT_DISPLAY_MODE_CHANGE = 3,
+ SLPC_EVENT_FLIP_COMPLETE = 4,
+ SLPC_EVENT_QUERY_TASK_STATE = 5,
+ SLPC_EVENT_PARAMETER_SET = 6,
+ SLPC_EVENT_PARAMETER_UNSET = 7,
+};
+
+struct slpc_task_state_data {
+ union {
+ u32 task_status_padding;
+ struct {
+ u32 status;
+#define SLPC_GTPERF_TASK_ENABLED REG_BIT(0)
+#define SLPC_DCC_TASK_ENABLED REG_BIT(11)
+#define SLPC_IN_DCC REG_BIT(12)
+#define SLPC_BALANCER_ENABLED REG_BIT(15)
+#define SLPC_IBC_TASK_ENABLED REG_BIT(16)
+#define SLPC_BALANCER_IA_LMT_ENABLED REG_BIT(17)
+#define SLPC_BALANCER_IA_LMT_ACTIVE REG_BIT(18)
+ };
+ };
+ union {
+ u32 freq_padding;
+ struct {
+#define SLPC_MAX_UNSLICE_FREQ_MASK REG_GENMASK(7, 0)
+#define SLPC_MIN_UNSLICE_FREQ_MASK REG_GENMASK(15, 8)
+#define SLPC_MAX_SLICE_FREQ_MASK REG_GENMASK(23, 16)
+#define SLPC_MIN_SLICE_FREQ_MASK REG_GENMASK(31, 24)
+ u32 freq;
+ };
+ };
+} __packed;
+
+struct slpc_shared_data_header {
+ /* Total size in bytes of this shared buffer. */
+ u32 size;
+ u32 global_state;
+ u32 display_data_addr;
+} __packed;
+
+struct slpc_override_params {
+ u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE];
+ u32 values[SLPC_MAX_OVERRIDE_PARAMETERS];
+} __packed;
+
+struct slpc_shared_data {
+ struct slpc_shared_data_header header;
+ u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER -
+ sizeof(struct slpc_shared_data_header)];
+
+ u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO];
+
+ struct slpc_task_state_data task_state_data;
+ u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE -
+ sizeof(struct slpc_task_state_data)];
+
+ struct slpc_override_params override_params;
+ u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES -
+ sizeof(struct slpc_override_params)];
+
+ u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER];
+
+ /* PAGE 2 (4096 bytes), mode based parameter will be removed soon */
+ u8 reserved_mode_definition[4096];
+} __packed;
+
+/**
+ * DOC: SLPC H2G MESSAGE FORMAT
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:8 | **EVENT_ID** |
+ * + +-------+--------------------------------------------------------------+
+ * | | 7:0 | **EVENT_ARGC** - number of data arguments |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **EVENT_DATA1** |
+ * +---+-------+--------------------------------------------------------------+
+ * |...| 31:0 | ... |
+ * +---+-------+--------------------------------------------------------------+
+ * |2+n| 31:0 | **EVENT_DATAn** |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST 0x3003
+
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \
+ (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS 9
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \
+ (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \
+ HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
new file mode 100644
index 000000000000..41244055cc0c
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H
+#define _ABI_GUC_COMMUNICATION_CTB_ABI_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+#include "guc_messages_abi.h"
+
+/**
+ * DOC: CT Buffer
+ *
+ * Circular buffer used to send `CTB Message`_
+ */
+
+/**
+ * DOC: CTB Descriptor
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31:0 | **HEAD** - offset (in dwords) to the last dword that was |
+ * | | | read from the `CT Buffer`_. |
+ * | | | It can only be updated by the receiver. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **TAIL** - offset (in dwords) to the last dword that was |
+ * | | | written to the `CT Buffer`_. |
+ * | | | It can only be updated by the sender. |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **STATUS** - status of the CTB |
+ * | | | |
+ * | | | - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation) |
+ * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) |
+ * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) |
+ * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) |
+ * +---+-------+--------------------------------------------------------------+
+ * |...| | RESERVED = MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ * | 15| 31:0 | RESERVED = MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+struct guc_ct_buffer_desc {
+ u32 head;
+ u32 tail;
+ u32 status;
+#define GUC_CTB_STATUS_NO_ERROR 0
+#define GUC_CTB_STATUS_OVERFLOW (1 << 0)
+#define GUC_CTB_STATUS_UNDERFLOW (1 << 1)
+#define GUC_CTB_STATUS_MISMATCH (1 << 2)
+ u32 reserved[13];
+} __packed;
+static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
+
+/**
+ * DOC: CTB Message
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31:16 | **FENCE** - message identifier |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:12 | **FORMAT** - format of the CTB message |
+ * | | | - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 11:8 | **RESERVED** |
+ * | +-------+--------------------------------------------------------------+
+ * | | 7:0 | **NUM_DWORDS** - length of the CTB message (w/o header) |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | optional (depends on FORMAT) |
+ * +---+-------+ |
+ * |...| | |
+ * +---+-------+ |
+ * | n | 31:0 | |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HDR_LEN 1u
+#define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN
+#define GUC_CTB_MSG_MAX_LEN 256u
+#define GUC_CTB_MSG_0_FENCE (0xffff << 16)
+#define GUC_CTB_MSG_0_FORMAT (0xf << 12)
+#define GUC_CTB_FORMAT_HXG 0u
+#define GUC_CTB_MSG_0_RESERVED (0xf << 8)
+#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0)
+
+/**
+ * DOC: CTB HXG Message
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31:16 | FENCE |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 11:8 | RESERVED = MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | |
+ * +---+-------+ |
+ * |...| | [Embedded `HXG Message`_] |
+ * +---+-------+ |
+ * | n | 31:0 | |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_CTB_HXG_MSG_MIN_LEN (GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN)
+#define GUC_CTB_HXG_MSG_MAX_LEN GUC_CTB_MSG_MAX_LEN
+
+/**
+ * DOC: CTB based communication
+ *
+ * The CTB (command transport buffer) communication between Host and GuC
+ * is based on u32 data stream written to the shared buffer. One buffer can
+ * be used to transmit data only in one direction (one-directional channel).
+ *
+ * Current status of the each buffer is stored in the buffer descriptor.
+ * Buffer descriptor holds tail and head fields that represents active data
+ * stream. The tail field is updated by the data producer (sender), and head
+ * field is updated by the data consumer (receiver)::
+ *
+ * +------------+
+ * | DESCRIPTOR | +=================+============+========+
+ * +============+ | | MESSAGE(s) | |
+ * | address |--------->+=================+============+========+
+ * +------------+
+ * | head | ^-----head--------^
+ * +------------+
+ * | tail | ^---------tail-----------------^
+ * +------------+
+ * | size | ^---------------size--------------------^
+ * +------------+
+ *
+ * Each message in data stream starts with the single u32 treated as a header,
+ * followed by optional set of u32 data that makes message specific payload::
+ *
+ * +------------+---------+---------+---------+
+ * | MESSAGE |
+ * +------------+---------+---------+---------+
+ * | msg[0] | [1] | ... | [n-1] |
+ * +------------+---------+---------+---------+
+ * | MESSAGE | MESSAGE PAYLOAD |
+ * + HEADER +---------+---------+---------+
+ * | | 0 | ... | n |
+ * +======+=====+=========+=========+=========+
+ * | 31:16| code| | | |
+ * +------+-----+ | | |
+ * | 15:5|flags| | | |
+ * +------+-----+ | | |
+ * | 4:0| len| | | |
+ * +------+-----+---------+---------+---------+
+ *
+ * ^-------------len-------------^
+ *
+ * The message header consists of:
+ *
+ * - **len**, indicates length of the message payload (in u32)
+ * - **code**, indicates message code
+ * - **flags**, holds various bits to control message handling
+ */
+
+/*
+ * Definition of the command transport message header (DW0)
+ *
+ * bit[4..0] message len (in dwords)
+ * bit[7..5] reserved
+ * bit[8] response (G2H only)
+ * bit[8] write fence to desc (H2G only)
+ * bit[9] write status to H2G buff (H2G only)
+ * bit[10] send status back via G2H (H2G only)
+ * bit[15..11] reserved
+ * bit[31..16] action code
+ */
+#define GUC_CT_MSG_LEN_SHIFT 0
+#define GUC_CT_MSG_LEN_MASK 0x1F
+#define GUC_CT_MSG_IS_RESPONSE (1 << 8)
+#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8)
+#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9)
+#define GUC_CT_MSG_SEND_STATUS (1 << 10)
+#define GUC_CT_MSG_ACTION_SHIFT 16
+#define GUC_CT_MSG_ACTION_MASK 0xFFFF
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
new file mode 100644
index 000000000000..ef538e34f894
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+#define _ABI_GUC_COMMUNICATION_MMIO_ABI_H
+
+/**
+ * DOC: GuC MMIO based communication
+ *
+ * The MMIO based communication between Host and GuC relies on special
+ * hardware registers which format could be defined by the software
+ * (so called scratch registers).
+ *
+ * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H)
+ * messages, which maximum length depends on number of available scratch
+ * registers, is directly written into those scratch registers.
+ *
+ * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8,
+ * but no H2G command takes more than 4 parameters and the GuC firmware
+ * itself uses an 4-element array to store the H2G message.
+ *
+ * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which
+ * are, regardless on lower count, preferred over legacy ones.
+ *
+ * The MMIO based communication is mainly used during driver initialization
+ * phase to setup the `CTB based communication`_ that will be used afterwards.
+ */
+
+#define GUC_MAX_MMIO_MSG_LEN 4
+
+/**
+ * DOC: MMIO HXG Message
+ *
+ * Format of the MMIO messages follows definitions of `HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31:0 | |
+ * +---+-------+ |
+ * |...| | [Embedded `HXG Message`_] |
+ * +---+-------+ |
+ * | n | 31:0 | |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
new file mode 100644
index 000000000000..ec83551bf9c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ERRORS_ABI_H
+#define _ABI_GUC_ERRORS_ABI_H
+
+enum xe_guc_response_status {
+ XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+ XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
+};
+
+enum xe_guc_load_status {
+ XE_GUC_LOAD_STATUS_DEFAULT = 0x00,
+ XE_GUC_LOAD_STATUS_START = 0x01,
+ XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02,
+ XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03,
+ XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04,
+ XE_GUC_LOAD_STATUS_GDT_DONE = 0x10,
+ XE_GUC_LOAD_STATUS_IDT_DONE = 0x20,
+ XE_GUC_LOAD_STATUS_LAPIC_DONE = 0x30,
+ XE_GUC_LOAD_STATUS_GUCINT_DONE = 0x40,
+ XE_GUC_LOAD_STATUS_DPC_READY = 0x50,
+ XE_GUC_LOAD_STATUS_DPC_ERROR = 0x60,
+ XE_GUC_LOAD_STATUS_EXCEPTION = 0x70,
+ XE_GUC_LOAD_STATUS_INIT_DATA_INVALID = 0x71,
+ XE_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED = 0x72,
+ XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
+ XE_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73,
+ XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74,
+ XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
+
+ XE_GUC_LOAD_STATUS_READY = 0xF0,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
new file mode 100644
index 000000000000..47094b9b044c
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_KLVS_ABI_H
+#define _ABI_GUC_KLVS_ABI_H
+
+#include <linux/types.h>
+
+/**
+ * DOC: GuC KLV
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31:16 | **KEY** - KLV key identifier |
+ * | | | - `GuC Self Config KLVs`_ |
+ * | | | - `GuC VGT Policy KLVs`_ |
+ * | | | - `GuC VF Configuration KLVs`_ |
+ * | | | |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | **LEN** - length of VALUE (in 32bit dwords) |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **VALUE** - actual value of the KLV (format depends on KEY) |
+ * +---+-------+ |
+ * |...| | |
+ * +---+-------+ |
+ * | n | 31:0 | |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_KLV_LEN_MIN 1u
+#define GUC_KLV_0_KEY (0xffff << 16)
+#define GUC_KLV_0_LEN (0xffff << 0)
+#define GUC_KLV_n_VALUE (0xffffffff << 0)
+
+/**
+ * DOC: GuC Self Config KLVs
+ *
+ * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR` : 0x0900
+ * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ * status vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR` : 0x0901
+ * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts
+ * source vector for use by the GuC.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902
+ * Refers to 64 bit Global Gfx address of H2G `CT Buffer`_.
+ * Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR : 0x0903
+ * Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_.
+ * Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE : 0x0904
+ * Refers to size of H2G `CT Buffer`_ in bytes.
+ * Should be a multiple of 4K.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR : 0x0905
+ * Refers to 64 bit Global Gfx address of G2H `CT Buffer`_.
+ * Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR : 0x0906
+ * Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_.
+ * Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _GUC_KLV_SELF_CFG_G2H_CTB_SIZE : 0x0907
+ * Refers to size of G2H `CT Buffer`_ in bytes.
+ * Should be a multiple of 4K.
+ */
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY 0x0900
+#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY 0x0901
+#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY 0x0902
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY 0x0903
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY 0x0904
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN 1u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY 0x0905
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY 0x0906
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN 2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u
+
+/*
+ * Per context scheduling policy update keys.
+ */
+enum {
+ GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001,
+ GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002,
+ GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003,
+ GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY = 0x2004,
+ GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY = 0x2005,
+
+ GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5,
+};
+
+/**
+ * DOC: GuC VGT Policy KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY.
+ *
+ * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001
+ * This config sets whether strict scheduling is enabled whereby any VF
+ * that doesn’t have work to submit is still allocated a fixed execution
+ * time-slice to ensure active VFs execution is always consitent even
+ * during other VF reprovisiong / rebooting events. Changing this KLV
+ * impacts all VFs and takes effect on the next VF-Switch event.
+ *
+ * :0: don't schedule idle (default)
+ * :1: schedule if idle
+ *
+ * _`GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD` : 0x8002
+ * This config sets the sample period for tracking adverse event counters.
+ * A sample period is the period in millisecs during which events are counted.
+ * This is applicable for all the VFs.
+ *
+ * :0: adverse events are not counted (default)
+ * :n: sample period in milliseconds
+ *
+ * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00
+ * This enum is to reset utilized HW engine after VF Switch (i.e to clean
+ * up Stale HW register left behind by previous VF)
+ *
+ * :0: don't reset (default)
+ * :1: reset
+ */
+
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY 0x8001
+#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_LEN 1u
+
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY 0x8002
+#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN 1u
+
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY 0x8D00
+#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN 1u
+
+/**
+ * DOC: GuC VF Configuration KLVs
+ *
+ * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VF_CFG.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_START` : 0x0001
+ * A 4K aligned start GTT address/offset assigned to VF.
+ * Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_GGTT_SIZE` : 0x0002
+ * A 4K aligned size of GGTT assigned to VF.
+ * Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_LMEM_SIZE` : 0x0003
+ * A 2M aligned size of local memory assigned to VF.
+ * Value is 64 bits.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_CONTEXTS` : 0x0004
+ * Refers to the number of contexts allocated to this VF.
+ *
+ * :0: no contexts (default)
+ * :1-65535: number of contexts (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_TILE_MASK` : 0x0005
+ * For multi-tiled products, this field contains the bitwise-OR of tiles
+ * assigned to the VF. Bit-0-set means VF has access to Tile-0,
+ * Bit-31-set means VF has access to Tile-31, and etc.
+ * At least one tile will always be allocated.
+ * If all bits are zero, VF KMD should treat this as a fatal error.
+ * For, single-tile products this KLV config is ignored.
+ *
+ * _`GUC_KLV_VF_CFG_NUM_DOORBELLS` : 0x0006
+ * Refers to the number of doorbells allocated to this VF.
+ *
+ * :0: no doorbells (default)
+ * :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_EXEC_QUANTUM` : 0x8A01
+ * This config sets the VFs-execution-quantum in milliseconds.
+ * GUC will attempt to obey the maximum values as much as HW is capable
+ * of and this will never be perfectly-exact (accumulated nano-second
+ * granularity) since the GPUs clock time runs off a different crystal
+ * from the CPUs clock. Changing this KLV on a VF that is currently
+ * running a context wont take effect until a new context is scheduled in.
+ * That said, when the PF is changing this value from 0xFFFFFFFF to
+ * something else, it might never take effect if the VF is running an
+ * inifinitely long compute or shader kernel. In such a scenario, the
+ * PF would need to trigger a VM PAUSE and then change the KLV to force
+ * it to take effect. Such cases might typically happen on a 1PF+1VF
+ * Virtualization config enabled for heavier workloads like AI/ML.
+ *
+ * :0: infinite exec quantum (default)
+ *
+ * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02
+ * This config sets the VF-preemption-timeout in microseconds.
+ * GUC will attempt to obey the minimum and maximum values as much as
+ * HW is capable and this will never be perfectly-exact (accumulated
+ * nano-second granularity) since the GPUs clock time runs off a
+ * different crystal from the CPUs clock. Changing this KLV on a VF
+ * that is currently running a context wont take effect until a new
+ * context is scheduled in.
+ * That said, when the PF is changing this value from 0xFFFFFFFF to
+ * something else, it might never take effect if the VF is running an
+ * inifinitely long compute or shader kernel.
+ * In this case, the PF would need to trigger a VM PAUSE and then change
+ * the KLV to force it to take effect. Such cases might typically happen
+ * on a 1PF+1VF Virtualization config enabled for heavier workloads like
+ * AI/ML.
+ *
+ * :0: no preemption timeout (default)
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03
+ * This config sets threshold for CAT errors caused by the VF.
+ *
+ * :0: adverse events or error will not be reported (default)
+ * :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET` : 0x8A04
+ * This config sets threshold for engine reset caused by the VF.
+ *
+ * :0: adverse events or error will not be reported (default)
+ * :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT` : 0x8A05
+ * This config sets threshold for page fault errors caused by the VF.
+ *
+ * :0: adverse events or error will not be reported (default)
+ * :n: event occurrence count per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM` : 0x8A06
+ * This config sets threshold for H2G interrupts triggered by the VF.
+ *
+ * :0: adverse events or error will not be reported (default)
+ * :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM` : 0x8A07
+ * This config sets threshold for GT interrupts triggered by the VF's
+ * workloads.
+ *
+ * :0: adverse events or error will not be reported (default)
+ * :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM` : 0x8A08
+ * This config sets threshold for doorbell's ring triggered by the VF.
+ *
+ * :0: adverse events or error will not be reported (default)
+ * :n: time (us) per sampling interval
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID` : 0x8A0A
+ * Refers to the start index of doorbell assigned to this VF.
+ *
+ * :0: (default)
+ * :1-255: number of doorbells (Gen12)
+ *
+ * _`GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID` : 0x8A0B
+ * Refers to the start index in context array allocated to this VF’s use.
+ *
+ * :0: (default)
+ * :1-65535: number of contexts (Gen12)
+ */
+
+#define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001
+#define GUC_KLV_VF_CFG_GGTT_START_LEN 2u
+
+#define GUC_KLV_VF_CFG_GGTT_SIZE_KEY 0x0002
+#define GUC_KLV_VF_CFG_GGTT_SIZE_LEN 2u
+
+#define GUC_KLV_VF_CFG_LMEM_SIZE_KEY 0x0003
+#define GUC_KLV_VF_CFG_LMEM_SIZE_LEN 2u
+
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY 0x0004
+#define GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN 1u
+
+#define GUC_KLV_VF_CFG_TILE_MASK_KEY 0x0005
+#define GUC_KLV_VF_CFG_TILE_MASK_LEN 1u
+
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY 0x0006
+#define GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN 1u
+
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY 0x8a01
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN 1u
+
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY 0x8a03
+#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN 1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_KEY 0x8a04
+#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_LEN 1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_KEY 0x8a05
+#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_LEN 1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_KEY 0x8a06
+#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_LEN 1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_KEY 0x8a07
+#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_LEN 1u
+
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_KEY 0x8a08
+#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_LEN 1u
+
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY 0x8a0a
+#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_LEN 1u
+
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b
+#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
new file mode 100644
index 000000000000..3d199016cf88
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_MESSAGES_ABI_H
+#define _ABI_GUC_MESSAGES_ABI_H
+
+/**
+ * DOC: HXG Message
+ *
+ * All messages exchanged with GuC are defined using 32 bit dwords.
+ * First dword is treated as a message header. Remaining dwords are optional.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | | | |
+ * | 0 | 31 | **ORIGIN** - originator of the message |
+ * | | | - _`GUC_HXG_ORIGIN_HOST` = 0 |
+ * | | | - _`GUC_HXG_ORIGIN_GUC` = 1 |
+ * | | | |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | **TYPE** - message type |
+ * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 |
+ * | | | - _`GUC_HXG_TYPE_EVENT` = 1 |
+ * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 |
+ * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 |
+ * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 |
+ * | | | - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7 |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | **AUX** - auxiliary data (depends on TYPE) |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | |
+ * +---+-------+ |
+ * |...| | **PAYLOAD** - optional payload (depends on TYPE) |
+ * +---+-------+ |
+ * | n | 31:0 | |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_MSG_MIN_LEN 1u
+#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31)
+#define GUC_HXG_ORIGIN_HOST 0u
+#define GUC_HXG_ORIGIN_GUC 1u
+#define GUC_HXG_MSG_0_TYPE (0x7 << 28)
+#define GUC_HXG_TYPE_REQUEST 0u
+#define GUC_HXG_TYPE_EVENT 1u
+#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u
+#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u
+#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u
+#define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u
+#define GUC_HXG_MSG_0_AUX (0xfffffff << 0)
+#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0)
+
+/**
+ * DOC: HXG Request
+ *
+ * The `HXG Request`_ message should be used to initiate synchronous activity
+ * for which confirmation or return data is expected.
+ *
+ * The recipient of this message shall use `HXG Response`_, `HXG Failure`_
+ * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_
+ * message as a intermediate reply.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | **DATA0** - request data (depends on ACTION) |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | **ACTION** - requested action code |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | |
+ * +---+-------+ |
+ * |...| | **DATAn** - optional data (depends on ACTION) |
+ * +---+-------+ |
+ * | n | 31:0 | |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16)
+#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0)
+#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Event
+ *
+ * The `HXG Event`_ message should be used to initiate asynchronous activity
+ * that does not involves immediate confirmation nor data.
+ *
+ * Format of @DATA0 and all @DATAn fields depends on the @ACTION code.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | **DATA0** - event data (depends on ACTION) |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | **ACTION** - event action code |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | |
+ * +---+-------+ |
+ * |...| | **DATAn** - optional event data (depends on ACTION) |
+ * +---+-------+ |
+ * | n | 31:0 | |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16)
+#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0)
+#define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD
+
+/**
+ * DOC: HXG Busy
+ *
+ * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_
+ * message if the recipient expects that it processing will be longer than default
+ * timeout.
+ *
+ * The @COUNTER field may be used as a progress indicator.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | **COUNTER** - progress indicator |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_BUSY_MSG_LEN GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_BUSY_MSG_0_COUNTER GUC_HXG_MSG_0_AUX
+
+/**
+ * DOC: HXG Retry
+ *
+ * The `HXG Retry`_ message should be used by recipient to indicate that the
+ * `HXG Request`_ message was dropped and it should be resent again.
+ *
+ * The @REASON field may be used to provide additional information.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | **REASON** - reason for retry |
+ * | | | - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0 |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RETRY_MSG_LEN GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RETRY_MSG_0_REASON GUC_HXG_MSG_0_AUX
+#define GUC_HXG_RETRY_REASON_UNSPECIFIED 0u
+
+/**
+ * DOC: HXG Failure
+ *
+ * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_
+ * message that could not be processed due to an error.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | **HINT** - additional error hint |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | **ERROR** - error/result code |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16)
+#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0)
+
+/**
+ * DOC: HXG Response
+ *
+ * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_
+ * message that was successfully processed without an error.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_) |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | |
+ * +---+-------+ |
+ * |...| | **DATAn** - data (depends on ACTION from `HXG Request`_) |
+ * +---+-------+ |
+ * | n | 31:0 | |
+ * +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_HXG_RESPONSE_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN
+#define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX
+#define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD
+
+/* deprecated */
+#define INTEL_GUC_MSG_TYPE_SHIFT 28
+#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT)
+#define INTEL_GUC_MSG_DATA_SHIFT 16
+#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT)
+#define INTEL_GUC_MSG_CODE_SHIFT 0
+#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT)
+
+enum intel_guc_msg_type {
+ INTEL_GUC_MSG_TYPE_REQUEST = 0x0,
+ INTEL_GUC_MSG_TYPE_RESPONSE = 0xF,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
new file mode 100644
index 000000000000..47056b6459e3
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_bo_test.o xe_dma_buf_test.o \
+ xe_migrate_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
new file mode 100644
index 000000000000..87ac21cc8ca9
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_bo_evict.h"
+#include "xe_pci.h"
+
+static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo,
+ bool clear, u64 get_val, u64 assign_val,
+ struct kunit *test)
+{
+ struct dma_fence *fence;
+ struct ttm_tt *ttm;
+ struct page *page;
+ pgoff_t ccs_page;
+ long timeout;
+ u64 *cpu_map;
+ int ret;
+ u32 offset;
+
+ /* Move bo to VRAM if not already there. */
+ ret = xe_bo_validate(bo, NULL, false);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to validate bo.\n");
+ return ret;
+ }
+
+ /* Optionally clear bo *and* CCS data in VRAM. */
+ if (clear) {
+ fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0);
+ if (IS_ERR(fence)) {
+ KUNIT_FAIL(test, "Failed to submit bo clear.\n");
+ return PTR_ERR(fence);
+ }
+ dma_fence_put(fence);
+ }
+
+ /* Evict to system. CCS data should be copied. */
+ ret = xe_bo_evict(bo, true);
+ if (ret) {
+ KUNIT_FAIL(test, "Failed to evict bo.\n");
+ return ret;
+ }
+
+ /* Sync all migration blits */
+ timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+ DMA_RESV_USAGE_KERNEL,
+ true,
+ 5 * HZ);
+ if (timeout <= 0) {
+ KUNIT_FAIL(test, "Failed to sync bo eviction.\n");
+ return -ETIME;
+ }
+
+ /*
+ * Bo with CCS data is now in system memory. Verify backing store
+ * and data integrity. Then assign for the next testing round while
+ * we still have a CPU map.
+ */
+ ttm = bo->ttm.ttm;
+ if (!ttm || !ttm_tt_is_populated(ttm)) {
+ KUNIT_FAIL(test, "Bo was not in expected placement.\n");
+ return -EINVAL;
+ }
+
+ ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT;
+ if (ccs_page >= ttm->num_pages) {
+ KUNIT_FAIL(test, "No TTM CCS pages present.\n");
+ return -EINVAL;
+ }
+
+ page = ttm->pages[ccs_page];
+ cpu_map = kmap_local_page(page);
+
+ /* Check first CCS value */
+ if (cpu_map[0] != get_val) {
+ KUNIT_FAIL(test,
+ "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+ (unsigned long long)get_val,
+ (unsigned long long)cpu_map[0]);
+ ret = -EINVAL;
+ }
+
+ /* Check last CCS value, or at least last value in page. */
+ offset = xe_device_ccs_bytes(gt->xe, bo->size);
+ offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
+ if (cpu_map[offset] != get_val) {
+ KUNIT_FAIL(test,
+ "Expected CCS readout 0x%016llx, got 0x%016llx.\n",
+ (unsigned long long)get_val,
+ (unsigned long long)cpu_map[offset]);
+ ret = -EINVAL;
+ }
+
+ cpu_map[0] = assign_val;
+ cpu_map[offset] = assign_val;
+ kunmap_local(cpu_map);
+
+ return ret;
+}
+
+static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt,
+ struct kunit *test)
+{
+ struct xe_bo *bo;
+ u32 vram_bit;
+ int ret;
+
+ /* TODO: Sanity check */
+ vram_bit = XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id;
+ kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id,
+ gt->info.vram_id);
+
+ bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device,
+ vram_bit);
+ if (IS_ERR(bo)) {
+ KUNIT_FAIL(test, "Failed to create bo.\n");
+ return;
+ }
+
+ kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
+ ret = ccs_test_migrate(gt, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
+ test);
+ if (ret)
+ goto out_unlock;
+
+ kunit_info(test, "Verifying that CCS data survives migration.\n");
+ ret = ccs_test_migrate(gt, bo, false, 0xdeadbeefdeadbeefULL,
+ 0xdeadbeefdeadbeefULL, test);
+ if (ret)
+ goto out_unlock;
+
+ kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
+ ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test);
+
+out_unlock:
+ xe_bo_unlock_no_vm(bo);
+ xe_bo_put(bo);
+}
+
+static int ccs_test_run_device(struct xe_device *xe)
+{
+ struct kunit *test = xe_cur_kunit();
+ struct xe_gt *gt;
+ int id;
+
+ if (!xe_device_has_flat_ccs(xe)) {
+ kunit_info(test, "Skipping non-flat-ccs device.\n");
+ return 0;
+ }
+
+ for_each_gt(gt, xe, id)
+ ccs_test_run_gt(xe, gt, test);
+
+ return 0;
+}
+
+void xe_ccs_migrate_kunit(struct kunit *test)
+{
+ xe_call_for_each_device(ccs_test_run_device);
+}
+EXPORT_SYMBOL(xe_ccs_migrate_kunit);
+
+static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test)
+{
+ struct xe_bo *bo, *external;
+ unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
+ XE_BO_CREATE_VRAM_IF_DGFX(gt);
+ struct xe_vm *vm = xe_migrate_get_vm(xe->gt[0].migrate);
+ struct ww_acquire_ctx ww;
+ int err, i;
+
+ kunit_info(test, "Testing device %s gt id %u vram id %u\n",
+ dev_name(xe->drm.dev), gt->info.id, gt->info.vram_id);
+
+ for (i = 0; i < 2; ++i) {
+ xe_vm_lock(vm, &ww, 0, false);
+ bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device,
+ bo_flags);
+ xe_vm_unlock(vm, &ww);
+ if (IS_ERR(bo)) {
+ KUNIT_FAIL(test, "bo create err=%pe\n", bo);
+ break;
+ }
+
+ external = xe_bo_create(xe, NULL, NULL, 0x10000,
+ ttm_bo_type_device, bo_flags);
+ if (IS_ERR(external)) {
+ KUNIT_FAIL(test, "external bo create err=%pe\n", external);
+ goto cleanup_bo;
+ }
+
+ xe_bo_lock(external, &ww, 0, false);
+ err = xe_bo_pin_external(external);
+ xe_bo_unlock(external, &ww);
+ if (err) {
+ KUNIT_FAIL(test, "external bo pin err=%pe\n",
+ ERR_PTR(err));
+ goto cleanup_external;
+ }
+
+ err = xe_bo_evict_all(xe);
+ if (err) {
+ KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err));
+ goto cleanup_all;
+ }
+
+ err = xe_bo_restore_kernel(xe);
+ if (err) {
+ KUNIT_FAIL(test, "restore kernel err=%pe\n",
+ ERR_PTR(err));
+ goto cleanup_all;
+ }
+
+ err = xe_bo_restore_user(xe);
+ if (err) {
+ KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err));
+ goto cleanup_all;
+ }
+
+ if (!xe_bo_is_vram(external)) {
+ KUNIT_FAIL(test, "external bo is not vram\n");
+ err = -EPROTO;
+ goto cleanup_all;
+ }
+
+ if (xe_bo_is_vram(bo)) {
+ KUNIT_FAIL(test, "bo is vram\n");
+ err = -EPROTO;
+ goto cleanup_all;
+ }
+
+ if (i) {
+ down_read(&vm->lock);
+ xe_vm_lock(vm, &ww, 0, false);
+ err = xe_bo_validate(bo, bo->vm, false);
+ xe_vm_unlock(vm, &ww);
+ up_read(&vm->lock);
+ if (err) {
+ KUNIT_FAIL(test, "bo valid err=%pe\n",
+ ERR_PTR(err));
+ goto cleanup_all;
+ }
+ xe_bo_lock(external, &ww, 0, false);
+ err = xe_bo_validate(external, NULL, false);
+ xe_bo_unlock(external, &ww);
+ if (err) {
+ KUNIT_FAIL(test, "external bo valid err=%pe\n",
+ ERR_PTR(err));
+ goto cleanup_all;
+ }
+ }
+
+ xe_bo_lock(external, &ww, 0, false);
+ xe_bo_unpin_external(external);
+ xe_bo_unlock(external, &ww);
+
+ xe_bo_put(external);
+ xe_bo_put(bo);
+ continue;
+
+cleanup_all:
+ xe_bo_lock(external, &ww, 0, false);
+ xe_bo_unpin_external(external);
+ xe_bo_unlock(external, &ww);
+cleanup_external:
+ xe_bo_put(external);
+cleanup_bo:
+ xe_bo_put(bo);
+ break;
+ }
+
+ xe_vm_put(vm);
+
+ return 0;
+}
+
+static int evict_test_run_device(struct xe_device *xe)
+{
+ struct kunit *test = xe_cur_kunit();
+ struct xe_gt *gt;
+ int id;
+
+ if (!IS_DGFX(xe)) {
+ kunit_info(test, "Skipping non-discrete device %s.\n",
+ dev_name(xe->drm.dev));
+ return 0;
+ }
+
+ for_each_gt(gt, xe, id)
+ evict_test_run_gt(xe, gt, test);
+
+ return 0;
+}
+
+void xe_bo_evict_kunit(struct kunit *test)
+{
+ xe_call_for_each_device(evict_test_run_device);
+}
+EXPORT_SYMBOL(xe_bo_evict_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
new file mode 100644
index 000000000000..c8fa29b0b3b2
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+void xe_ccs_migrate_kunit(struct kunit *test);
+void xe_bo_evict_kunit(struct kunit *test);
+
+static struct kunit_case xe_bo_tests[] = {
+ KUNIT_CASE(xe_ccs_migrate_kunit),
+ KUNIT_CASE(xe_bo_evict_kunit),
+ {}
+};
+
+static struct kunit_suite xe_bo_test_suite = {
+ .name = "xe_bo",
+ .test_cases = xe_bo_tests,
+};
+
+kunit_test_suite(xe_bo_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
new file mode 100644
index 000000000000..615d22e3f731
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_pci.h"
+
+static bool p2p_enabled(struct dma_buf_test_params *params)
+{
+ return IS_ENABLED(CONFIG_PCI_P2PDMA) && params->attach_ops &&
+ params->attach_ops->allow_peer2peer;
+}
+
+static bool is_dynamic(struct dma_buf_test_params *params)
+{
+ return IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY) && params->attach_ops &&
+ params->attach_ops->move_notify;
+}
+
+static void check_residency(struct kunit *test, struct xe_bo *exported,
+ struct xe_bo *imported, struct dma_buf *dmabuf)
+{
+ struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+ u32 mem_type;
+ int ret;
+
+ xe_bo_assert_held(exported);
+ xe_bo_assert_held(imported);
+
+ mem_type = XE_PL_VRAM0;
+ if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+ /* No VRAM allowed */
+ mem_type = XE_PL_TT;
+ else if (params->force_different_devices && !p2p_enabled(params))
+ /* No P2P */
+ mem_type = XE_PL_TT;
+ else if (params->force_different_devices && !is_dynamic(params) &&
+ (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+ /* Pin migrated to TT */
+ mem_type = XE_PL_TT;
+
+ if (!xe_bo_is_mem_type(exported, mem_type)) {
+ KUNIT_FAIL(test, "Exported bo was not in expected memory type.\n");
+ return;
+ }
+
+ if (xe_bo_is_pinned(exported))
+ return;
+
+ /*
+ * Evict exporter. Note that the gem object dma_buf member isn't
+ * set from xe_gem_prime_export(), and it's needed for the move_notify()
+ * functionality, so hack that up here. Evicting the exported bo will
+ * evict also the imported bo through the move_notify() functionality if
+ * importer is on a different device. If they're on the same device,
+ * the exporter and the importer should be the same bo.
+ */
+ swap(exported->ttm.base.dma_buf, dmabuf);
+ ret = xe_bo_evict(exported, true);
+ swap(exported->ttm.base.dma_buf, dmabuf);
+ if (ret) {
+ if (ret != -EINTR && ret != -ERESTARTSYS)
+ KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n",
+ ret);
+ return;
+ }
+
+ /* Verify that also importer has been evicted to SYSTEM */
+ if (!xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) {
+ KUNIT_FAIL(test, "Importer wasn't properly evicted.\n");
+ return;
+ }
+
+ /* Re-validate the importer. This should move also exporter in. */
+ ret = xe_bo_validate(imported, NULL, false);
+ if (ret) {
+ if (ret != -EINTR && ret != -ERESTARTSYS)
+ KUNIT_FAIL(test, "Validating importer failed with err=%d.\n",
+ ret);
+ return;
+ }
+
+ /*
+ * If on different devices, the exporter is kept in system if
+ * possible, saving a migration step as the transfer is just
+ * likely as fast from system memory.
+ */
+ if (params->force_different_devices &&
+ params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)
+ KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
+ else
+ KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+
+ if (params->force_different_devices)
+ KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT));
+ else
+ KUNIT_EXPECT_TRUE(test, exported == imported);
+}
+
+static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
+{
+ struct kunit *test = xe_cur_kunit();
+ struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+ struct drm_gem_object *import;
+ struct dma_buf *dmabuf;
+ struct xe_bo *bo;
+
+ /* No VRAM on this device? */
+ if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) &&
+ (params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+ return;
+
+ kunit_info(test, "running %s\n", __func__);
+ bo = xe_bo_create(xe, NULL, NULL, PAGE_SIZE, ttm_bo_type_device,
+ XE_BO_CREATE_USER_BIT | params->mem_mask);
+ if (IS_ERR(bo)) {
+ KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
+ PTR_ERR(bo));
+ return;
+ }
+
+ dmabuf = xe_gem_prime_export(&bo->ttm.base, 0);
+ if (IS_ERR(dmabuf)) {
+ KUNIT_FAIL(test, "xe_gem_prime_export() failed with err=%ld\n",
+ PTR_ERR(dmabuf));
+ goto out;
+ }
+
+ import = xe_gem_prime_import(&xe->drm, dmabuf);
+ if (!IS_ERR(import)) {
+ struct xe_bo *import_bo = gem_to_xe_bo(import);
+
+ /*
+ * Did import succeed when it shouldn't due to lack of p2p support?
+ */
+ if (params->force_different_devices &&
+ !p2p_enabled(params) &&
+ !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+ KUNIT_FAIL(test,
+ "xe_gem_prime_import() succeeded when it shouldn't have\n");
+ } else {
+ int err;
+
+ /* Is everything where we expect it to be? */
+ xe_bo_lock_no_vm(import_bo, NULL);
+ err = xe_bo_validate(import_bo, NULL, false);
+ if (err && err != -EINTR && err != -ERESTARTSYS)
+ KUNIT_FAIL(test,
+ "xe_bo_validate() failed with err=%d\n", err);
+
+ check_residency(test, bo, import_bo, dmabuf);
+ xe_bo_unlock_no_vm(import_bo);
+ }
+ drm_gem_object_put(import);
+ } else if (PTR_ERR(import) != -EOPNOTSUPP) {
+ /* Unexpected error code. */
+ KUNIT_FAIL(test,
+ "xe_gem_prime_import failed with the wrong err=%ld\n",
+ PTR_ERR(import));
+ } else if (!params->force_different_devices ||
+ p2p_enabled(params) ||
+ (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+ /* Shouldn't fail if we can reuse same bo, use p2p or use system */
+ KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n",
+ PTR_ERR(import));
+ }
+ dma_buf_put(dmabuf);
+out:
+ drm_gem_object_put(&bo->ttm.base);
+}
+
+static const struct dma_buf_attach_ops nop2p_attach_ops = {
+ .allow_peer2peer = false,
+ .move_notify = xe_dma_buf_move_notify
+};
+
+/*
+ * We test the implementation with bos of different residency and with
+ * importers with different capabilities; some lacking p2p support and some
+ * lacking dynamic capabilities (attach_ops == NULL). We also fake
+ * different devices avoiding the import shortcut that just reuses the same
+ * gem object.
+ */
+static const struct dma_buf_test_params test_params[] = {
+ {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ .attach_ops = &xe_dma_buf_attach_ops},
+ {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ .attach_ops = &xe_dma_buf_attach_ops,
+ .force_different_devices = true},
+
+ {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ .attach_ops = &nop2p_attach_ops},
+ {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ .attach_ops = &nop2p_attach_ops,
+ .force_different_devices = true},
+
+ {.mem_mask = XE_BO_CREATE_VRAM0_BIT},
+ {.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+ .force_different_devices = true},
+
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ .attach_ops = &xe_dma_buf_attach_ops},
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ .attach_ops = &xe_dma_buf_attach_ops,
+ .force_different_devices = true},
+
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ .attach_ops = &nop2p_attach_ops},
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ .attach_ops = &nop2p_attach_ops,
+ .force_different_devices = true},
+
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT},
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+ .force_different_devices = true},
+
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ .attach_ops = &xe_dma_buf_attach_ops},
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ .attach_ops = &xe_dma_buf_attach_ops,
+ .force_different_devices = true},
+
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ .attach_ops = &nop2p_attach_ops},
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ .attach_ops = &nop2p_attach_ops,
+ .force_different_devices = true},
+
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT},
+ {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+ .force_different_devices = true},
+
+ {}
+};
+
+static int dma_buf_run_device(struct xe_device *xe)
+{
+ const struct dma_buf_test_params *params;
+ struct kunit *test = xe_cur_kunit();
+
+ for (params = test_params; params->mem_mask; ++params) {
+ struct dma_buf_test_params p = *params;
+
+ p.base.id = XE_TEST_LIVE_DMA_BUF;
+ test->priv = &p;
+ xe_test_dmabuf_import_same_driver(xe);
+ }
+
+ /* A non-zero return would halt iteration over driver devices */
+ return 0;
+}
+
+void xe_dma_buf_kunit(struct kunit *test)
+{
+ xe_call_for_each_device(dma_buf_run_device);
+}
+EXPORT_SYMBOL(xe_dma_buf_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
new file mode 100644
index 000000000000..7bb292da1193
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+void xe_dma_buf_kunit(struct kunit *test);
+
+static struct kunit_case xe_dma_buf_tests[] = {
+ KUNIT_CASE(xe_dma_buf_kunit),
+ {}
+};
+
+static struct kunit_suite xe_dma_buf_test_suite = {
+ .name = "xe_dma_buf",
+ .test_cases = xe_dma_buf_tests,
+};
+
+kunit_test_suite(xe_dma_buf_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
new file mode 100644
index 000000000000..0f3b819f0a34
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020-2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_pci.h"
+
+static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence,
+ const char *str, struct kunit *test)
+{
+ long ret;
+
+ if (IS_ERR(fence)) {
+ KUNIT_FAIL(test, "Failed to create fence for %s: %li\n", str,
+ PTR_ERR(fence));
+ return true;
+ }
+ if (!fence)
+ return true;
+
+ ret = dma_fence_wait_timeout(fence, false, 5 * HZ);
+ if (ret <= 0) {
+ KUNIT_FAIL(test, "Fence timed out for %s: %li\n", str, ret);
+ return true;
+ }
+
+ return false;
+}
+
+static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
+ struct xe_bb *bb, u32 second_idx, const char *str,
+ struct kunit *test)
+{
+ struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb,
+ m->batch_base_ofs,
+ second_idx);
+ struct dma_fence *fence;
+
+ if (IS_ERR(job)) {
+ KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+ PTR_ERR(job));
+ return PTR_ERR(job);
+ }
+
+ xe_sched_job_arm(job);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ if (sanity_fence_failed(xe, fence, str, test))
+ return -ETIMEDOUT;
+
+ dma_fence_put(fence);
+ kunit_info(test, "%s: Job completed\n", str);
+ return 0;
+}
+
+static void
+sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
+ struct xe_gt *gt, struct iosys_map *map, void *dst,
+ u32 qword_ofs, u32 num_qwords,
+ const struct xe_vm_pgtable_update *update)
+{
+ int i;
+ u64 *ptr = dst;
+
+ for (i = 0; i < num_qwords; i++)
+ ptr[i] = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
+}
+
+static const struct xe_migrate_pt_update_ops sanity_ops = {
+ .populate = sanity_populate_cb,
+};
+
+#define check(_retval, _expected, str, _test) \
+ do { if ((_retval) != (_expected)) { \
+ KUNIT_FAIL(_test, "Sanity check failed: " str \
+ " expected %llx, got %llx\n", \
+ (u64)(_expected), (u64)(_retval)); \
+ } } while (0)
+
+static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
+ struct kunit *test)
+{
+ struct xe_device *xe = gt_to_xe(m->gt);
+ u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL;
+ bool big = bo->size >= SZ_2M;
+ struct dma_fence *fence;
+ const char *str = big ? "Copying big bo" : "Copying small bo";
+ int err;
+
+ struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL,
+ bo->size,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_SYSTEM_BIT);
+ if (IS_ERR(sysmem)) {
+ KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
+ str, PTR_ERR(sysmem));
+ return;
+ }
+
+ err = xe_bo_validate(sysmem, NULL, false);
+ if (err) {
+ KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n",
+ str, err);
+ goto out_unlock;
+ }
+
+ err = xe_bo_vmap(sysmem);
+ if (err) {
+ KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n",
+ str, err);
+ goto out_unlock;
+ }
+
+ xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
+ fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0);
+ if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" :
+ "Clearing sysmem small bo", test)) {
+ retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
+ check(retval, expected, "sysmem first offset should be cleared",
+ test);
+ retval = xe_map_rd(xe, &sysmem->vmap, sysmem->size - 8, u64);
+ check(retval, expected, "sysmem last offset should be cleared",
+ test);
+ }
+ dma_fence_put(fence);
+
+ /* Try to copy 0xc0 from sysmem to lmem with 2MB or 64KiB/4KiB pages */
+ xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size);
+ xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
+
+ fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource,
+ bo->ttm.resource);
+ if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" :
+ "Copying small bo sysmem -> vram", test)) {
+ retval = xe_map_rd(xe, &bo->vmap, 0, u64);
+ check(retval, expected,
+ "sysmem -> vram bo first offset should be copied", test);
+ retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64);
+ check(retval, expected,
+ "sysmem -> vram bo offset should be copied", test);
+ }
+ dma_fence_put(fence);
+
+ /* And other way around.. slightly hacky.. */
+ xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
+ xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
+
+ fence = xe_migrate_copy(m, sysmem, bo->ttm.resource,
+ sysmem->ttm.resource);
+ if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" :
+ "Copying small bo vram -> sysmem", test)) {
+ retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
+ check(retval, expected,
+ "vram -> sysmem bo first offset should be copied", test);
+ retval = xe_map_rd(xe, &sysmem->vmap, bo->size - 8, u64);
+ check(retval, expected,
+ "vram -> sysmem bo last offset should be copied", test);
+ }
+ dma_fence_put(fence);
+
+ xe_bo_vunmap(sysmem);
+out_unlock:
+ xe_bo_unlock_no_vm(sysmem);
+ xe_bo_put(sysmem);
+}
+
+static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
+ struct kunit *test)
+{
+ struct xe_device *xe = gt_to_xe(m->gt);
+ struct dma_fence *fence;
+ u64 retval, expected;
+ int i;
+
+ struct xe_vm_pgtable_update update = {
+ .ofs = 1,
+ .qwords = 0x10,
+ .pt_bo = pt,
+ };
+ struct xe_migrate_pt_update pt_update = {
+ .ops = &sanity_ops,
+ };
+
+ /* Test xe_migrate_update_pgtables() updates the pagetable as expected */
+ expected = 0xf0f0f0f0f0f0f0f0ULL;
+ xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
+
+ fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1,
+ NULL, 0, &pt_update);
+ if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
+ return;
+
+ dma_fence_put(fence);
+ retval = xe_map_rd(xe, &pt->vmap, 0, u64);
+ check(retval, expected, "PTE[0] must stay untouched", test);
+
+ for (i = 0; i < update.qwords; i++) {
+ retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64);
+ check(retval, i * 0x1111111111111111ULL, "PTE update", test);
+ }
+
+ retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords),
+ u64);
+ check(retval, expected, "PTE[0x11] must stay untouched", test);
+}
+
+static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
+{
+ struct xe_gt *gt = m->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny;
+ struct xe_res_cursor src_it;
+ struct dma_fence *fence;
+ u64 retval, expected;
+ struct xe_bb *bb;
+ int err;
+ u8 id = gt->info.id;
+
+ err = xe_bo_vmap(bo);
+ if (err) {
+ KUNIT_FAIL(test, "Failed to vmap our pagetables: %li\n",
+ PTR_ERR(bo));
+ return;
+ }
+
+ big = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, SZ_4M,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+ XE_BO_CREATE_PINNED_BIT);
+ if (IS_ERR(big)) {
+ KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
+ goto vunmap;
+ }
+
+ pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, GEN8_PAGE_SIZE,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+ XE_BO_CREATE_PINNED_BIT);
+ if (IS_ERR(pt)) {
+ KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+ PTR_ERR(pt));
+ goto free_big;
+ }
+
+ tiny = xe_bo_create_pin_map(xe, m->gt, m->eng->vm,
+ 2 * SZ_4K,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+ XE_BO_CREATE_PINNED_BIT);
+ if (IS_ERR(tiny)) {
+ KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
+ PTR_ERR(pt));
+ goto free_pt;
+ }
+
+ bb = xe_bb_new(m->gt, 32, xe->info.supports_usm);
+ if (IS_ERR(bb)) {
+ KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n",
+ PTR_ERR(bb));
+ goto free_tiny;
+ }
+
+ kunit_info(test, "Starting tests, top level PT addr: %llx, special pagetable base addr: %llx\n",
+ xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE),
+ xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE));
+
+ /* First part of the test, are we updating our pagetable bo with a new entry? */
+ xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef);
+ expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
+ if (m->eng->vm->flags & XE_VM_FLAGS_64K)
+ expected |= GEN12_PTE_PS64;
+ xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
+ emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt),
+ &src_it, GEN8_PAGE_SIZE, pt);
+ run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
+
+ retval = xe_map_rd(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1),
+ u64);
+ check(retval, expected, "PTE entry write", test);
+
+ /* Now try to write data to our newly mapped 'pagetable', see if it succeeds */
+ bb->len = 0;
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
+ expected = 0x12345678U;
+
+ emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
+ expected, IS_DGFX(xe));
+ run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
+ test);
+
+ retval = xe_map_rd(xe, &pt->vmap, 0, u32);
+ check(retval, expected, "Write to PT after adding PTE", test);
+
+ /* Sanity checks passed, try the full ones! */
+
+ /* Clear a small bo */
+ kunit_info(test, "Clearing small buffer object\n");
+ xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
+ expected = 0x224488ff;
+ fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected);
+ if (sanity_fence_failed(xe, fence, "Clearing small bo", test))
+ goto out;
+
+ dma_fence_put(fence);
+ retval = xe_map_rd(xe, &tiny->vmap, 0, u32);
+ check(retval, expected, "Command clear small first value", test);
+ retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32);
+ check(retval, expected, "Command clear small last value", test);
+
+ if (IS_DGFX(xe)) {
+ kunit_info(test, "Copying small buffer object to system\n");
+ test_copy(m, tiny, test);
+ }
+
+ /* Clear a big bo with a fixed value */
+ kunit_info(test, "Clearing big buffer object\n");
+ xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
+ expected = 0x11223344U;
+ fence = xe_migrate_clear(m, big, big->ttm.resource, expected);
+ if (sanity_fence_failed(xe, fence, "Clearing big bo", test))
+ goto out;
+
+ dma_fence_put(fence);
+ retval = xe_map_rd(xe, &big->vmap, 0, u32);
+ check(retval, expected, "Command clear big first value", test);
+ retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32);
+ check(retval, expected, "Command clear big last value", test);
+
+ if (IS_DGFX(xe)) {
+ kunit_info(test, "Copying big buffer object to system\n");
+ test_copy(m, big, test);
+ }
+
+ test_pt_update(m, pt, test);
+
+out:
+ xe_bb_free(bb, NULL);
+free_tiny:
+ xe_bo_unpin(tiny);
+ xe_bo_put(tiny);
+free_pt:
+ xe_bo_unpin(pt);
+ xe_bo_put(pt);
+free_big:
+ xe_bo_unpin(big);
+ xe_bo_put(big);
+vunmap:
+ xe_bo_vunmap(m->pt_bo);
+}
+
+static int migrate_test_run_device(struct xe_device *xe)
+{
+ struct kunit *test = xe_cur_kunit();
+ struct xe_gt *gt;
+ int id;
+
+ for_each_gt(gt, xe, id) {
+ struct xe_migrate *m = gt->migrate;
+ struct ww_acquire_ctx ww;
+
+ kunit_info(test, "Testing gt id %d.\n", id);
+ xe_vm_lock(m->eng->vm, &ww, 0, true);
+ xe_migrate_sanity_test(m, test);
+ xe_vm_unlock(m->eng->vm, &ww);
+ }
+
+ return 0;
+}
+
+void xe_migrate_sanity_kunit(struct kunit *test)
+{
+ xe_call_for_each_device(migrate_test_run_device);
+}
+EXPORT_SYMBOL(xe_migrate_sanity_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
new file mode 100644
index 000000000000..ad779e2bd071
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+void xe_migrate_sanity_kunit(struct kunit *test);
+
+static struct kunit_case xe_migrate_tests[] = {
+ KUNIT_CASE(xe_migrate_sanity_kunit),
+ {}
+};
+
+static struct kunit_suite xe_migrate_test_suite = {
+ .name = "xe_migrate",
+ .test_cases = xe_migrate_tests,
+};
+
+kunit_test_suite(xe_migrate_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h
new file mode 100644
index 000000000000..1ec502b5acf3
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_test.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_TEST_H__
+#define __XE_TEST_H__
+
+#include <linux/types.h>
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include <linux/sched.h>
+#include <kunit/test.h>
+
+/*
+ * Each test that provides a kunit private test structure, place a test id
+ * here and point the kunit->priv to an embedded struct xe_test_priv.
+ */
+enum xe_test_priv_id {
+ XE_TEST_LIVE_DMA_BUF,
+};
+
+/**
+ * struct xe_test_priv - Base class for test private info
+ * @id: enum xe_test_priv_id to identify the subclass.
+ */
+struct xe_test_priv {
+ enum xe_test_priv_id id;
+};
+
+#define XE_TEST_DECLARE(x) x
+#define XE_TEST_ONLY(x) unlikely(x)
+#define XE_TEST_EXPORT
+#define xe_cur_kunit() current->kunit_test
+
+/**
+ * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by
+ * current->kunit->priv if it exists and is embedded in the expected subclass.
+ * @id: Id of the expected subclass.
+ *
+ * Return: NULL if the process is not a kunit test, and NULL if the
+ * current kunit->priv pointer is not pointing to an object of the expected
+ * subclass. A pointer to the embedded struct xe_test_priv otherwise.
+ */
+static inline struct xe_test_priv *
+xe_cur_kunit_priv(enum xe_test_priv_id id)
+{
+ struct xe_test_priv *priv;
+
+ if (!xe_cur_kunit())
+ return NULL;
+
+ priv = xe_cur_kunit()->priv;
+ return priv->id == id ? priv : NULL;
+}
+
+#else /* if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) */
+
+#define XE_TEST_DECLARE(x)
+#define XE_TEST_ONLY(x) 0
+#define XE_TEST_EXPORT static
+#define xe_cur_kunit() NULL
+#define xe_cur_kunit_priv(_id) NULL
+
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
new file mode 100644
index 000000000000..8b9209571fd0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bb.h"
+#include "xe_sa.h"
+#include "xe_device.h"
+#include "xe_engine_types.h"
+#include "xe_hw_fence.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+#include "gt/intel_gpu_commands.h"
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
+{
+ struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
+ int err;
+
+ if (!bb)
+ return ERR_PTR(-ENOMEM);
+
+ bb->bo = xe_sa_bo_new(!usm ? &gt->kernel_bb_pool :
+ &gt->usm.bb_pool, 4 * dwords + 4);
+ if (IS_ERR(bb->bo)) {
+ err = PTR_ERR(bb->bo);
+ goto err;
+ }
+
+ bb->cs = xe_sa_bo_cpu_addr(bb->bo);
+ bb->len = 0;
+
+ return bb;
+err:
+ kfree(bb);
+ return ERR_PTR(err);
+}
+
+static struct xe_sched_job *
+__xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr)
+{
+ u32 size = drm_suballoc_size(bb->bo);
+
+ XE_BUG_ON((bb->len * 4 + 1) > size);
+
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+
+ xe_sa_bo_flush_write(bb->bo);
+
+ return xe_sched_job_create(kernel_eng, addr);
+}
+
+struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
+ struct xe_bb *bb, u64 batch_base_ofs)
+{
+ u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo);
+
+ XE_BUG_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+
+ return __xe_bb_create_job(wa_eng, bb, &addr);
+}
+
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
+ struct xe_bb *bb,
+ u64 batch_base_ofs,
+ u32 second_idx)
+{
+ u64 addr[2] = {
+ batch_base_ofs + drm_suballoc_soffset(bb->bo),
+ batch_base_ofs + drm_suballoc_soffset(bb->bo) +
+ 4 * second_idx,
+ };
+
+ BUG_ON(second_idx > bb->len);
+ BUG_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION));
+
+ return __xe_bb_create_job(kernel_eng, bb, addr);
+}
+
+struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
+ struct xe_bb *bb)
+{
+ u64 addr = xe_sa_bo_gpu_addr(bb->bo);
+
+ BUG_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION);
+ return __xe_bb_create_job(kernel_eng, bb, &addr);
+}
+
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
+{
+ if (!bb)
+ return;
+
+ xe_sa_bo_free(bb->bo, fence);
+ kfree(bb);
+}
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
new file mode 100644
index 000000000000..0cc9260c9634
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_H_
+#define _XE_BB_H_
+
+#include "xe_bb_types.h"
+
+struct dma_fence;
+
+struct xe_gt;
+struct xe_engine;
+struct xe_sched_job;
+
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
+ struct xe_bb *bb);
+struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
+ struct xe_bb *bb, u64 batch_ofs,
+ u32 second_idx);
+struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
+ struct xe_bb *bb, u64 batch_ofs);
+void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bb_types.h b/drivers/gpu/drm/xe/xe_bb_types.h
new file mode 100644
index 000000000000..b7d30308cf90
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bb_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BB_TYPES_H_
+#define _XE_BB_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_suballoc;
+
+struct xe_bb {
+ struct drm_suballoc *bo;
+
+ u32 *cs;
+ u32 len; /* in dwords */
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
new file mode 100644
index 000000000000..ef2c9196c113
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -0,0 +1,1698 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+
+#include "xe_bo.h"
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_dma_buf.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_preempt_fence.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static const struct ttm_place sys_placement_flags = {
+ .fpfn = 0,
+ .lpfn = 0,
+ .mem_type = XE_PL_SYSTEM,
+ .flags = 0,
+};
+
+static struct ttm_placement sys_placement = {
+ .num_placement = 1,
+ .placement = &sys_placement_flags,
+ .num_busy_placement = 1,
+ .busy_placement = &sys_placement_flags,
+};
+
+bool mem_type_is_vram(u32 mem_type)
+{
+ return mem_type >= XE_PL_VRAM0;
+}
+
+static bool resource_is_vram(struct ttm_resource *res)
+{
+ return mem_type_is_vram(res->mem_type);
+}
+
+bool xe_bo_is_vram(struct xe_bo *bo)
+{
+ return resource_is_vram(bo->ttm.resource);
+}
+
+static bool xe_bo_is_user(struct xe_bo *bo)
+{
+ return bo->flags & XE_BO_CREATE_USER_BIT;
+}
+
+static struct xe_gt *
+mem_type_to_gt(struct xe_device *xe, u32 mem_type)
+{
+ XE_BUG_ON(!mem_type_is_vram(mem_type));
+
+ return xe_device_get_gt(xe, mem_type - XE_PL_VRAM0);
+}
+
+static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
+ u32 bo_flags, u32 *c)
+{
+ if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+ places[*c] = (struct ttm_place) {
+ .mem_type = XE_PL_TT,
+ };
+ *c += 1;
+
+ if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+ bo->props.preferred_mem_type = XE_PL_TT;
+ }
+}
+
+static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo,
+ struct ttm_place *places, u32 bo_flags, u32 *c)
+{
+ struct xe_gt *gt;
+
+ if (bo_flags & XE_BO_CREATE_VRAM0_BIT) {
+ gt = mem_type_to_gt(xe, XE_PL_VRAM0);
+ XE_BUG_ON(!gt->mem.vram.size);
+
+ places[*c] = (struct ttm_place) {
+ .mem_type = XE_PL_VRAM0,
+ /*
+ * For eviction / restore on suspend / resume objects
+ * pinned in VRAM must be contiguous
+ */
+ .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+ XE_BO_CREATE_GGTT_BIT) ?
+ TTM_PL_FLAG_CONTIGUOUS : 0,
+ };
+ *c += 1;
+
+ if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+ bo->props.preferred_mem_type = XE_PL_VRAM0;
+ }
+}
+
+static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo,
+ struct ttm_place *places, u32 bo_flags, u32 *c)
+{
+ struct xe_gt *gt;
+
+ if (bo_flags & XE_BO_CREATE_VRAM1_BIT) {
+ gt = mem_type_to_gt(xe, XE_PL_VRAM1);
+ XE_BUG_ON(!gt->mem.vram.size);
+
+ places[*c] = (struct ttm_place) {
+ .mem_type = XE_PL_VRAM1,
+ /*
+ * For eviction / restore on suspend / resume objects
+ * pinned in VRAM must be contiguous
+ */
+ .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
+ XE_BO_CREATE_GGTT_BIT) ?
+ TTM_PL_FLAG_CONTIGUOUS : 0,
+ };
+ *c += 1;
+
+ if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
+ bo->props.preferred_mem_type = XE_PL_VRAM1;
+ }
+}
+
+static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+ u32 bo_flags)
+{
+ struct ttm_place *places = bo->placements;
+ u32 c = 0;
+
+ bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+
+ /* The order of placements should indicate preferred location */
+
+ if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) {
+ try_add_system(bo, places, bo_flags, &c);
+ if (bo->props.preferred_gt == XE_GT1) {
+ try_add_vram1(xe, bo, places, bo_flags, &c);
+ try_add_vram0(xe, bo, places, bo_flags, &c);
+ } else {
+ try_add_vram0(xe, bo, places, bo_flags, &c);
+ try_add_vram1(xe, bo, places, bo_flags, &c);
+ }
+ } else if (bo->props.preferred_gt == XE_GT1) {
+ try_add_vram1(xe, bo, places, bo_flags, &c);
+ try_add_vram0(xe, bo, places, bo_flags, &c);
+ try_add_system(bo, places, bo_flags, &c);
+ } else {
+ try_add_vram0(xe, bo, places, bo_flags, &c);
+ try_add_vram1(xe, bo, places, bo_flags, &c);
+ try_add_system(bo, places, bo_flags, &c);
+ }
+
+ if (!c)
+ return -EINVAL;
+
+ bo->placement = (struct ttm_placement) {
+ .num_placement = c,
+ .placement = places,
+ .num_busy_placement = c,
+ .busy_placement = places,
+ };
+
+ return 0;
+}
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+ u32 bo_flags)
+{
+ xe_bo_assert_held(bo);
+ return __xe_bo_placement_for_flags(xe, bo, bo_flags);
+}
+
+static void xe_evict_flags(struct ttm_buffer_object *tbo,
+ struct ttm_placement *placement)
+{
+ struct xe_bo *bo;
+
+ if (!xe_bo_is_xe_bo(tbo)) {
+ /* Don't handle scatter gather BOs */
+ if (tbo->type == ttm_bo_type_sg) {
+ placement->num_placement = 0;
+ placement->num_busy_placement = 0;
+ return;
+ }
+
+ *placement = sys_placement;
+ return;
+ }
+
+ /*
+ * For xe, sg bos that are evicted to system just triggers a
+ * rebind of the sg list upon subsequent validation to XE_PL_TT.
+ */
+
+ bo = ttm_to_xe_bo(tbo);
+ switch (tbo->resource->mem_type) {
+ case XE_PL_VRAM0:
+ case XE_PL_VRAM1:
+ case XE_PL_TT:
+ default:
+ /* for now kick out to system */
+ *placement = sys_placement;
+ break;
+ }
+}
+
+struct xe_ttm_tt {
+ struct ttm_tt ttm;
+ struct device *dev;
+ struct sg_table sgt;
+ struct sg_table *sg;
+};
+
+static int xe_tt_map_sg(struct ttm_tt *tt)
+{
+ struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+ unsigned long num_pages = tt->num_pages;
+ int ret;
+
+ XE_BUG_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
+
+ if (xe_tt->sg)
+ return 0;
+
+ ret = sg_alloc_table_from_pages(&xe_tt->sgt, tt->pages, num_pages,
+ 0, (u64)num_pages << PAGE_SHIFT,
+ GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ xe_tt->sg = &xe_tt->sgt;
+ ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ if (ret) {
+ sg_free_table(xe_tt->sg);
+ xe_tt->sg = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
+struct sg_table *xe_bo_get_sg(struct xe_bo *bo)
+{
+ struct ttm_tt *tt = bo->ttm.ttm;
+ struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+ return xe_tt->sg;
+}
+
+static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
+ u32 page_flags)
+{
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+ struct xe_device *xe = xe_bo_device(bo);
+ struct xe_ttm_tt *tt;
+ int err;
+
+ tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+ if (!tt)
+ return NULL;
+
+ tt->dev = xe->drm.dev;
+
+ /* TODO: Select caching mode */
+ err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags,
+ bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached,
+ DIV_ROUND_UP(xe_device_ccs_bytes(xe_bo_device(bo),
+ bo->ttm.base.size),
+ PAGE_SIZE));
+ if (err) {
+ kfree(tt);
+ return NULL;
+ }
+
+ return &tt->ttm;
+}
+
+static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
+ struct ttm_operation_ctx *ctx)
+{
+ int err;
+
+ /*
+ * dma-bufs are not populated with pages, and the dma-
+ * addresses are set up when moved to XE_PL_TT.
+ */
+ if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+ return 0;
+
+ err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
+ if (err)
+ return err;
+
+ /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
+ err = xe_tt_map_sg(tt);
+ if (err)
+ ttm_pool_free(&ttm_dev->pool, tt);
+
+ return err;
+}
+
+static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+ struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+ if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+ return;
+
+ if (xe_tt->sg) {
+ dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+ DMA_BIDIRECTIONAL, 0);
+ sg_free_table(xe_tt->sg);
+ xe_tt->sg = NULL;
+ }
+
+ return ttm_pool_free(&ttm_dev->pool, tt);
+}
+
+static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
+{
+ ttm_tt_fini(tt);
+ kfree(tt);
+}
+
+static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
+ struct ttm_resource *mem)
+{
+ struct xe_device *xe = ttm_to_xe_device(bdev);
+ struct xe_gt *gt;
+
+ switch (mem->mem_type) {
+ case XE_PL_SYSTEM:
+ case XE_PL_TT:
+ return 0;
+ case XE_PL_VRAM0:
+ case XE_PL_VRAM1:
+ gt = mem_type_to_gt(xe, mem->mem_type);
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+
+ if (gt->mem.vram.mapping &&
+ mem->placement & TTM_PL_FLAG_CONTIGUOUS)
+ mem->bus.addr = (u8 *)gt->mem.vram.mapping +
+ mem->bus.offset;
+
+ mem->bus.offset += gt->mem.vram.io_start;
+ mem->bus.is_iomem = true;
+
+#if !defined(CONFIG_X86)
+ mem->bus.caching = ttm_write_combined;
+#endif
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
+ const struct ttm_operation_ctx *ctx)
+{
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+ struct xe_vma *vma;
+ int ret = 0;
+
+ dma_resv_assert_held(bo->ttm.base.resv);
+
+ if (!xe_device_in_fault_mode(xe) && !list_empty(&bo->vmas)) {
+ dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, fence)
+ dma_fence_enable_sw_signaling(fence);
+ dma_resv_iter_end(&cursor);
+ }
+
+ list_for_each_entry(vma, &bo->vmas, bo_link) {
+ struct xe_vm *vm = vma->vm;
+
+ trace_xe_vma_evict(vma);
+
+ if (xe_vm_in_fault_mode(vm)) {
+ /* Wait for pending binds / unbinds. */
+ long timeout;
+
+ if (ctx->no_wait_gpu &&
+ !dma_resv_test_signaled(bo->ttm.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP))
+ return -EBUSY;
+
+ timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP,
+ ctx->interruptible,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout > 0) {
+ ret = xe_vm_invalidate_vma(vma);
+ XE_WARN_ON(ret);
+ } else if (!timeout) {
+ ret = -ETIME;
+ } else {
+ ret = timeout;
+ }
+
+ } else {
+ bool vm_resv_locked = false;
+ struct xe_vm *vm = vma->vm;
+
+ /*
+ * We need to put the vma on the vm's rebind_list,
+ * but need the vm resv to do so. If we can't verify
+ * that we indeed have it locked, put the vma an the
+ * vm's notifier.rebind_list instead and scoop later.
+ */
+ if (dma_resv_trylock(&vm->resv))
+ vm_resv_locked = true;
+ else if (ctx->resv != &vm->resv) {
+ spin_lock(&vm->notifier.list_lock);
+ list_move_tail(&vma->notifier.rebind_link,
+ &vm->notifier.rebind_list);
+ spin_unlock(&vm->notifier.list_lock);
+ continue;
+ }
+
+ xe_vm_assert_held(vm);
+ if (list_empty(&vma->rebind_link) && vma->gt_present)
+ list_add_tail(&vma->rebind_link, &vm->rebind_list);
+
+ if (vm_resv_locked)
+ dma_resv_unlock(&vm->resv);
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
+ * Note that unmapping the attachment is deferred to the next
+ * map_attachment time, or to bo destroy (after idling) whichever comes first.
+ * This is to avoid syncing before unmap_attachment(), assuming that the
+ * caller relies on idling the reservation object before moving the
+ * backing store out. Should that assumption not hold, then we will be able
+ * to unconditionally call unmap_attachment() when moving out to system.
+ */
+static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
+ struct ttm_resource *old_res,
+ struct ttm_resource *new_res)
+{
+ struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
+ struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
+ ttm);
+ struct sg_table *sg;
+
+ XE_BUG_ON(!attach);
+ XE_BUG_ON(!ttm_bo->ttm);
+
+ if (new_res->mem_type == XE_PL_SYSTEM)
+ goto out;
+
+ if (ttm_bo->sg) {
+ dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
+ ttm_bo->sg = NULL;
+ }
+
+ sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sg))
+ return PTR_ERR(sg);
+
+ ttm_bo->sg = sg;
+ xe_tt->sg = sg;
+
+out:
+ ttm_bo_move_null(ttm_bo, new_res);
+
+ return 0;
+}
+
+/**
+ * xe_bo_move_notify - Notify subsystems of a pending move
+ * @bo: The buffer object
+ * @ctx: The struct ttm_operation_ctx controlling locking and waits.
+ *
+ * This function notifies subsystems of an upcoming buffer move.
+ * Upon receiving such a notification, subsystems should schedule
+ * halting access to the underlying pages and optionally add a fence
+ * to the buffer object's dma_resv object, that signals when access is
+ * stopped. The caller will wait on all dma_resv fences before
+ * starting the move.
+ *
+ * A subsystem may commence access to the object after obtaining
+ * bindings to the new backing memory under the object lock.
+ *
+ * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
+ * negative error code on error.
+ */
+static int xe_bo_move_notify(struct xe_bo *bo,
+ const struct ttm_operation_ctx *ctx)
+{
+ struct ttm_buffer_object *ttm_bo = &bo->ttm;
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ int ret;
+
+ /*
+ * If this starts to call into many components, consider
+ * using a notification chain here.
+ */
+
+ if (xe_bo_is_pinned(bo))
+ return -EINVAL;
+
+ xe_bo_vunmap(bo);
+ ret = xe_bo_trigger_rebind(xe, bo, ctx);
+ if (ret)
+ return ret;
+
+ /* Don't call move_notify() for imported dma-bufs. */
+ if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
+ dma_buf_move_notify(ttm_bo->base.dma_buf);
+
+ return 0;
+}
+
+static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_resource *new_mem,
+ struct ttm_place *hop)
+{
+ struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+ struct ttm_resource *old_mem = ttm_bo->resource;
+ struct ttm_tt *ttm = ttm_bo->ttm;
+ struct xe_gt *gt = NULL;
+ struct dma_fence *fence;
+ bool move_lacks_source;
+ bool needs_clear;
+ int ret = 0;
+
+ if (!old_mem) {
+ if (new_mem->mem_type != TTM_PL_SYSTEM) {
+ hop->mem_type = TTM_PL_SYSTEM;
+ hop->flags = TTM_PL_FLAG_TEMPORARY;
+ ret = -EMULTIHOP;
+ goto out;
+ }
+
+ ttm_bo_move_null(ttm_bo, new_mem);
+ goto out;
+ }
+
+ if (ttm_bo->type == ttm_bo_type_sg) {
+ ret = xe_bo_move_notify(bo, ctx);
+ if (!ret)
+ ret = xe_bo_move_dmabuf(ttm_bo, old_mem, new_mem);
+ goto out;
+ }
+
+ move_lacks_source = !resource_is_vram(old_mem) &&
+ (!ttm || !ttm_tt_is_populated(ttm));
+
+ needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
+ (!ttm && ttm_bo->type == ttm_bo_type_device);
+
+ if ((move_lacks_source && !needs_clear) ||
+ (old_mem->mem_type == XE_PL_SYSTEM &&
+ new_mem->mem_type == XE_PL_TT)) {
+ ttm_bo_move_null(ttm_bo, new_mem);
+ goto out;
+ }
+
+ if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
+ ret = xe_bo_move_notify(bo, ctx);
+ if (ret)
+ goto out;
+ }
+
+ if (old_mem->mem_type == XE_PL_TT &&
+ new_mem->mem_type == XE_PL_SYSTEM) {
+ long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
+ DMA_RESV_USAGE_BOOKKEEP,
+ true,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout < 0) {
+ ret = timeout;
+ goto out;
+ }
+ ttm_bo_move_null(ttm_bo, new_mem);
+ goto out;
+ }
+
+ if (!move_lacks_source &&
+ ((old_mem->mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
+ (resource_is_vram(old_mem) &&
+ new_mem->mem_type == XE_PL_SYSTEM))) {
+ hop->fpfn = 0;
+ hop->lpfn = 0;
+ hop->mem_type = XE_PL_TT;
+ hop->flags = TTM_PL_FLAG_TEMPORARY;
+ ret = -EMULTIHOP;
+ goto out;
+ }
+
+ if (bo->gt)
+ gt = bo->gt;
+ else if (resource_is_vram(new_mem))
+ gt = mem_type_to_gt(xe, new_mem->mem_type);
+ else if (resource_is_vram(old_mem))
+ gt = mem_type_to_gt(xe, old_mem->mem_type);
+
+ XE_BUG_ON(!gt);
+ XE_BUG_ON(!gt->migrate);
+
+ trace_xe_bo_move(bo);
+ xe_device_mem_access_get(xe);
+
+ if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
+ /*
+ * Kernel memory that is pinned should only be moved on suspend
+ * / resume, some of the pinned memory is required for the
+ * device to resume / use the GPU to move other evicted memory
+ * (user memory) around. This likely could be optimized a bit
+ * futher where we find the minimum set of pinned memory
+ * required for resume but for simplity doing a memcpy for all
+ * pinned memory.
+ */
+ ret = xe_bo_vmap(bo);
+ if (!ret) {
+ ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
+
+ /* Create a new VMAP once kernel BO back in VRAM */
+ if (!ret && resource_is_vram(new_mem)) {
+ void *new_addr = gt->mem.vram.mapping +
+ (new_mem->start << PAGE_SHIFT);
+
+ XE_BUG_ON(new_mem->start !=
+ bo->placements->fpfn);
+
+ iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
+ }
+ }
+ } else {
+ if (move_lacks_source)
+ fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0);
+ else
+ fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem);
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
+ xe_device_mem_access_put(xe);
+ goto out;
+ }
+ ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
+ new_mem);
+ dma_fence_put(fence);
+ }
+
+ xe_device_mem_access_put(xe);
+ trace_printk("new_mem->mem_type=%d\n", new_mem->mem_type);
+
+out:
+ return ret;
+
+}
+
+static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+ unsigned long page_offset)
+{
+ struct xe_device *xe = ttm_to_xe_device(bo->bdev);
+ struct xe_gt *gt = mem_type_to_gt(xe, bo->resource->mem_type);
+ struct xe_res_cursor cursor;
+
+ xe_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
+ return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo);
+
+/*
+ * TODO: Move this function to TTM so we don't rely on how TTM does its
+ * locking, thereby abusing TTM internals.
+ */
+static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
+{
+ bool locked;
+
+ XE_WARN_ON(kref_read(&ttm_bo->kref));
+
+ /*
+ * We can typically only race with TTM trylocking under the
+ * lru_lock, which will immediately be unlocked again since
+ * the ttm_bo refcount is zero at this point. So trylocking *should*
+ * always succeed here, as long as we hold the lru lock.
+ */
+ spin_lock(&ttm_bo->bdev->lru_lock);
+ locked = dma_resv_trylock(ttm_bo->base.resv);
+ spin_unlock(&ttm_bo->bdev->lru_lock);
+ XE_WARN_ON(!locked);
+
+ return locked;
+}
+
+static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
+{
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+ struct dma_fence *replacement = NULL;
+ struct xe_bo *bo;
+
+ if (!xe_bo_is_xe_bo(ttm_bo))
+ return;
+
+ bo = ttm_to_xe_bo(ttm_bo);
+ XE_WARN_ON(bo->created && kref_read(&ttm_bo->base.refcount));
+
+ /*
+ * Corner case where TTM fails to allocate memory and this BOs resv
+ * still points the VMs resv
+ */
+ if (ttm_bo->base.resv != &ttm_bo->base._resv)
+ return;
+
+ if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
+ return;
+
+ /*
+ * Scrub the preempt fences if any. The unbind fence is already
+ * attached to the resv.
+ * TODO: Don't do this for external bos once we scrub them after
+ * unbind.
+ */
+ dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
+ DMA_RESV_USAGE_BOOKKEEP, fence) {
+ if (xe_fence_is_xe_preempt(fence) &&
+ !dma_fence_is_signaled(fence)) {
+ if (!replacement)
+ replacement = dma_fence_get_stub();
+
+ dma_resv_replace_fences(ttm_bo->base.resv,
+ fence->context,
+ replacement,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ }
+ dma_fence_put(replacement);
+
+ dma_resv_unlock(ttm_bo->base.resv);
+}
+
+static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
+{
+ if (!xe_bo_is_xe_bo(ttm_bo))
+ return;
+
+ /*
+ * Object is idle and about to be destroyed. Release the
+ * dma-buf attachment.
+ */
+ if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
+ struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
+ struct xe_ttm_tt, ttm);
+
+ dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
+ DMA_BIDIRECTIONAL);
+ ttm_bo->sg = NULL;
+ xe_tt->sg = NULL;
+ }
+}
+
+struct ttm_device_funcs xe_ttm_funcs = {
+ .ttm_tt_create = xe_ttm_tt_create,
+ .ttm_tt_populate = xe_ttm_tt_populate,
+ .ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
+ .ttm_tt_destroy = xe_ttm_tt_destroy,
+ .evict_flags = xe_evict_flags,
+ .move = xe_bo_move,
+ .io_mem_reserve = xe_ttm_io_mem_reserve,
+ .io_mem_pfn = xe_ttm_io_mem_pfn,
+ .release_notify = xe_ttm_bo_release_notify,
+ .eviction_valuable = ttm_bo_eviction_valuable,
+ .delete_mem_notify = xe_ttm_bo_delete_mem_notify,
+};
+
+static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
+{
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+
+ if (bo->ttm.base.import_attach)
+ drm_prime_gem_destroy(&bo->ttm.base, NULL);
+ drm_gem_object_release(&bo->ttm.base);
+
+ WARN_ON(!list_empty(&bo->vmas));
+
+ if (bo->ggtt_node.size)
+ xe_ggtt_remove_bo(bo->gt->mem.ggtt, bo);
+
+ if (bo->vm && xe_bo_is_user(bo))
+ xe_vm_put(bo->vm);
+
+ kfree(bo);
+}
+
+static void xe_gem_object_free(struct drm_gem_object *obj)
+{
+ /* Our BO reference counting scheme works as follows:
+ *
+ * The gem object kref is typically used throughout the driver,
+ * and the gem object holds a ttm_buffer_object refcount, so
+ * that when the last gem object reference is put, which is when
+ * we end up in this function, we put also that ttm_buffer_object
+ * refcount. Anything using gem interfaces is then no longer
+ * allowed to access the object in a way that requires a gem
+ * refcount, including locking the object.
+ *
+ * driver ttm callbacks is allowed to use the ttm_buffer_object
+ * refcount directly if needed.
+ */
+ __xe_bo_vunmap(gem_to_xe_bo(obj));
+ ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
+}
+
+static bool should_migrate_to_system(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+
+ return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
+}
+
+static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
+{
+ struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
+ struct drm_device *ddev = tbo->base.dev;
+ vm_fault_t ret;
+ int idx, r = 0;
+
+ ret = ttm_bo_vm_reserve(tbo, vmf);
+ if (ret)
+ return ret;
+
+ if (drm_dev_enter(ddev, &idx)) {
+ struct xe_bo *bo = ttm_to_xe_bo(tbo);
+
+ trace_xe_bo_cpu_fault(bo);
+
+ if (should_migrate_to_system(bo)) {
+ r = xe_bo_migrate(bo, XE_PL_TT);
+ if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
+ ret = VM_FAULT_NOPAGE;
+ else if (r)
+ ret = VM_FAULT_SIGBUS;
+ }
+ if (!ret)
+ ret = ttm_bo_vm_fault_reserved(vmf,
+ vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
+
+ drm_dev_exit(idx);
+ } else {
+ ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+ }
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+ dma_resv_unlock(tbo->base.resv);
+ return ret;
+}
+
+static const struct vm_operations_struct xe_gem_vm_ops = {
+ .fault = xe_gem_fault,
+ .open = ttm_bo_vm_open,
+ .close = ttm_bo_vm_close,
+ .access = ttm_bo_vm_access
+};
+
+static const struct drm_gem_object_funcs xe_gem_object_funcs = {
+ .free = xe_gem_object_free,
+ .mmap = drm_gem_ttm_mmap,
+ .export = xe_gem_prime_export,
+ .vm_ops = &xe_gem_vm_ops,
+};
+
+/**
+ * xe_bo_alloc - Allocate storage for a struct xe_bo
+ *
+ * This funcition is intended to allocate storage to be used for input
+ * to __xe_bo_create_locked(), in the case a pointer to the bo to be
+ * created is needed before the call to __xe_bo_create_locked().
+ * If __xe_bo_create_locked ends up never to be called, then the
+ * storage allocated with this function needs to be freed using
+ * xe_bo_free().
+ *
+ * Return: A pointer to an uninitialized struct xe_bo on success,
+ * ERR_PTR(-ENOMEM) on error.
+ */
+struct xe_bo *xe_bo_alloc(void)
+{
+ struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+
+ if (!bo)
+ return ERR_PTR(-ENOMEM);
+
+ return bo;
+}
+
+/**
+ * xe_bo_free - Free storage allocated using xe_bo_alloc()
+ * @bo: The buffer object storage.
+ *
+ * Refer to xe_bo_alloc() documentation for valid use-cases.
+ */
+void xe_bo_free(struct xe_bo *bo)
+{
+ kfree(bo);
+}
+
+struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+ struct xe_gt *gt, struct dma_resv *resv,
+ size_t size, enum ttm_bo_type type,
+ u32 flags)
+{
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ };
+ struct ttm_placement *placement;
+ uint32_t alignment;
+ int err;
+
+ /* Only kernel objects should set GT */
+ XE_BUG_ON(gt && type != ttm_bo_type_kernel);
+
+ if (!bo) {
+ bo = xe_bo_alloc();
+ if (IS_ERR(bo))
+ return bo;
+ }
+
+ if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT) &&
+ !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
+ xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
+ size = ALIGN(size, SZ_64K);
+ flags |= XE_BO_INTERNAL_64K;
+ alignment = SZ_64K >> PAGE_SHIFT;
+ } else {
+ alignment = SZ_4K >> PAGE_SHIFT;
+ }
+
+ bo->gt = gt;
+ bo->size = size;
+ bo->flags = flags;
+ bo->ttm.base.funcs = &xe_gem_object_funcs;
+ bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
+ bo->props.preferred_gt = XE_BO_PROPS_INVALID;
+ bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
+ bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL;
+ INIT_LIST_HEAD(&bo->vmas);
+ INIT_LIST_HEAD(&bo->pinned_link);
+
+ drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
+
+ if (resv) {
+ ctx.allow_res_evict = true;
+ ctx.resv = resv;
+ }
+
+ err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
+ if (WARN_ON(err))
+ return ERR_PTR(err);
+
+ /* Defer populating type_sg bos */
+ placement = (type == ttm_bo_type_sg ||
+ bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
+ &bo->placement;
+ err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
+ placement, alignment,
+ &ctx, NULL, resv, xe_ttm_bo_destroy);
+ if (err)
+ return ERR_PTR(err);
+
+ bo->created = true;
+ ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+ return bo;
+}
+
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm, size_t size,
+ enum ttm_bo_type type, u32 flags)
+{
+ struct xe_bo *bo;
+ int err;
+
+ if (vm)
+ xe_vm_assert_held(vm);
+ bo = __xe_bo_create_locked(xe, NULL, gt, vm ? &vm->resv : NULL, size,
+ type, flags);
+ if (IS_ERR(bo))
+ return bo;
+
+ if (vm && xe_bo_is_user(bo))
+ xe_vm_get(vm);
+ bo->vm = vm;
+
+ if (flags & XE_BO_CREATE_GGTT_BIT) {
+ XE_BUG_ON(!gt);
+
+ err = xe_ggtt_insert_bo(gt->mem.ggtt, bo);
+ if (err)
+ goto err_unlock_put_bo;
+ }
+
+ return bo;
+
+err_unlock_put_bo:
+ xe_bo_unlock_vm_held(bo);
+ xe_bo_put(bo);
+ return ERR_PTR(err);
+}
+
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm, size_t size,
+ enum ttm_bo_type type, u32 flags)
+{
+ struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags);
+
+ if (!IS_ERR(bo))
+ xe_bo_unlock_vm_held(bo);
+
+ return bo;
+}
+
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm, size_t size,
+ enum ttm_bo_type type, u32 flags)
+{
+ struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags);
+ int err;
+
+ if (IS_ERR(bo))
+ return bo;
+
+ err = xe_bo_pin(bo);
+ if (err)
+ goto err_put;
+
+ err = xe_bo_vmap(bo);
+ if (err)
+ goto err_unpin;
+
+ xe_bo_unlock_vm_held(bo);
+
+ return bo;
+
+err_unpin:
+ xe_bo_unpin(bo);
+err_put:
+ xe_bo_unlock_vm_held(bo);
+ xe_bo_put(bo);
+ return ERR_PTR(err);
+}
+
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
+ const void *data, size_t size,
+ enum ttm_bo_type type, u32 flags)
+{
+ struct xe_bo *bo = xe_bo_create_pin_map(xe, gt, NULL,
+ ALIGN(size, PAGE_SIZE),
+ type, flags);
+ if (IS_ERR(bo))
+ return bo;
+
+ xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
+
+ return bo;
+}
+
+/*
+ * XXX: This is in the VM bind data path, likely should calculate this once and
+ * store, with a recalculation if the BO is moved.
+ */
+static uint64_t vram_region_io_offset(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type);
+
+ return gt->mem.vram.io_start - xe->mem.vram.io_start;
+}
+
+/**
+ * xe_bo_pin_external - pin an external BO
+ * @bo: buffer object to be pinned
+ *
+ * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_pin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_bo_pin_external(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ int err;
+
+ XE_BUG_ON(bo->vm);
+ XE_BUG_ON(!xe_bo_is_user(bo));
+
+ if (!xe_bo_is_pinned(bo)) {
+ err = xe_bo_validate(bo, NULL, false);
+ if (err)
+ return err;
+
+ if (xe_bo_is_vram(bo)) {
+ spin_lock(&xe->pinned.lock);
+ list_add_tail(&bo->pinned_link,
+ &xe->pinned.external_vram);
+ spin_unlock(&xe->pinned.lock);
+ }
+ }
+
+ ttm_bo_pin(&bo->ttm);
+
+ /*
+ * FIXME: If we always use the reserve / unreserve functions for locking
+ * we do not need this.
+ */
+ ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+ return 0;
+}
+
+int xe_bo_pin(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ int err;
+
+ /* We currently don't expect user BO to be pinned */
+ XE_BUG_ON(xe_bo_is_user(bo));
+
+ /* Pinned object must be in GGTT or have pinned flag */
+ XE_BUG_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT |
+ XE_BO_CREATE_GGTT_BIT)));
+
+ /*
+ * No reason we can't support pinning imported dma-bufs we just don't
+ * expect to pin an imported dma-buf.
+ */
+ XE_BUG_ON(bo->ttm.base.import_attach);
+
+ /* We only expect at most 1 pin */
+ XE_BUG_ON(xe_bo_is_pinned(bo));
+
+ err = xe_bo_validate(bo, NULL, false);
+ if (err)
+ return err;
+
+ /*
+ * For pinned objects in on DGFX, we expect these objects to be in
+ * contiguous VRAM memory. Required eviction / restore during suspend /
+ * resume (force restore to same physical address).
+ */
+ if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+ bo->flags & XE_BO_INTERNAL_TEST)) {
+ struct ttm_place *place = &(bo->placements[0]);
+ bool lmem;
+
+ XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS));
+ XE_BUG_ON(!mem_type_is_vram(place->mem_type));
+
+ place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) -
+ vram_region_io_offset(bo)) >> PAGE_SHIFT;
+ place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
+
+ spin_lock(&xe->pinned.lock);
+ list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+ spin_unlock(&xe->pinned.lock);
+ }
+
+ ttm_bo_pin(&bo->ttm);
+
+ /*
+ * FIXME: If we always use the reserve / unreserve functions for locking
+ * we do not need this.
+ */
+ ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+
+ return 0;
+}
+
+/**
+ * xe_bo_unpin_external - unpin an external BO
+ * @bo: buffer object to be unpinned
+ *
+ * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
+ * BO. Unique call compared to xe_bo_unpin as this function has it own set of
+ * asserts and code to ensure evict / restore on suspend / resume.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+void xe_bo_unpin_external(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+
+ XE_BUG_ON(bo->vm);
+ XE_BUG_ON(!xe_bo_is_pinned(bo));
+ XE_BUG_ON(!xe_bo_is_user(bo));
+
+ if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
+ spin_lock(&xe->pinned.lock);
+ list_del_init(&bo->pinned_link);
+ spin_unlock(&xe->pinned.lock);
+ }
+
+ ttm_bo_unpin(&bo->ttm);
+
+ /*
+ * FIXME: If we always use the reserve / unreserve functions for locking
+ * we do not need this.
+ */
+ ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
+}
+
+void xe_bo_unpin(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+
+ XE_BUG_ON(bo->ttm.base.import_attach);
+ XE_BUG_ON(!xe_bo_is_pinned(bo));
+
+ if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
+ bo->flags & XE_BO_INTERNAL_TEST)) {
+ XE_BUG_ON(list_empty(&bo->pinned_link));
+
+ spin_lock(&xe->pinned.lock);
+ list_del_init(&bo->pinned_link);
+ spin_unlock(&xe->pinned.lock);
+ }
+
+ ttm_bo_unpin(&bo->ttm);
+}
+
+/**
+ * xe_bo_validate() - Make sure the bo is in an allowed placement
+ * @bo: The bo,
+ * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
+ * NULL. Used together with @allow_res_evict.
+ * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
+ * reservation object.
+ *
+ * Make sure the bo is in allowed placement, migrating it if necessary. If
+ * needed, other bos will be evicted. If bos selected for eviction shares
+ * the @vm's reservation object, they can be evicted iff @allow_res_evict is
+ * set to true, otherwise they will be bypassed.
+ *
+ * Return: 0 on success, negative error code on failure. May return
+ * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
+ */
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
+{
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ };
+
+ if (vm) {
+ lockdep_assert_held(&vm->lock);
+ xe_vm_assert_held(vm);
+
+ ctx.allow_res_evict = allow_res_evict;
+ ctx.resv = &vm->resv;
+ }
+
+ return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
+{
+ if (bo->destroy == &xe_ttm_bo_destroy)
+ return true;
+
+ return false;
+}
+
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
+ size_t page_size, bool *is_lmem)
+{
+ struct xe_res_cursor cur;
+ u64 page;
+
+ if (!READ_ONCE(bo->ttm.pin_count))
+ xe_bo_assert_held(bo);
+
+ XE_BUG_ON(page_size > PAGE_SIZE);
+ page = offset >> PAGE_SHIFT;
+ offset &= (PAGE_SIZE - 1);
+
+ *is_lmem = xe_bo_is_vram(bo);
+
+ if (!*is_lmem) {
+ XE_BUG_ON(!bo->ttm.ttm);
+
+ xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT,
+ page_size, &cur);
+ return xe_res_dma(&cur) + offset;
+ } else {
+ struct xe_res_cursor cur;
+
+ xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
+ page_size, &cur);
+ return cur.start + offset + vram_region_io_offset(bo);
+ }
+}
+
+int xe_bo_vmap(struct xe_bo *bo)
+{
+ void *virtual;
+ bool is_iomem;
+ int ret;
+
+ xe_bo_assert_held(bo);
+
+ if (!iosys_map_is_null(&bo->vmap))
+ return 0;
+
+ /*
+ * We use this more or less deprecated interface for now since
+ * ttm_bo_vmap() doesn't offer the optimization of kmapping
+ * single page bos, which is done here.
+ * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
+ * to use struct iosys_map.
+ */
+ ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
+ if (ret)
+ return ret;
+
+ virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+ if (is_iomem)
+ iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
+ else
+ iosys_map_set_vaddr(&bo->vmap, virtual);
+
+ return 0;
+}
+
+static void __xe_bo_vunmap(struct xe_bo *bo)
+{
+ if (!iosys_map_is_null(&bo->vmap)) {
+ iosys_map_clear(&bo->vmap);
+ ttm_bo_kunmap(&bo->kmap);
+ }
+}
+
+void xe_bo_vunmap(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+ __xe_bo_vunmap(bo);
+}
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_gem_create *args = data;
+ struct ww_acquire_ctx ww;
+ struct xe_vm *vm = NULL;
+ struct xe_bo *bo;
+ unsigned bo_flags = XE_BO_CREATE_USER_BIT;
+ u32 handle;
+ int err;
+
+ if (XE_IOCTL_ERR(xe, args->extensions))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->flags &
+ ~(XE_GEM_CREATE_FLAG_DEFER_BACKING |
+ XE_GEM_CREATE_FLAG_SCANOUT |
+ xe->info.mem_region_mask)))
+ return -EINVAL;
+
+ /* at least one memory type must be specified */
+ if (XE_IOCTL_ERR(xe, !(args->flags & xe->info.mem_region_mask)))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->handle))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->size & ~PAGE_MASK))
+ return -EINVAL;
+
+ if (args->vm_id) {
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_ERR(xe, !vm))
+ return -ENOENT;
+ err = xe_vm_lock(vm, &ww, 0, true);
+ if (err) {
+ xe_vm_put(vm);
+ return err;
+ }
+ }
+
+ if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING)
+ bo_flags |= XE_BO_DEFER_BACKING;
+
+ if (args->flags & XE_GEM_CREATE_FLAG_SCANOUT)
+ bo_flags |= XE_BO_SCANOUT_BIT;
+
+ bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+ bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
+ bo_flags);
+ if (vm) {
+ xe_vm_unlock(vm, &ww);
+ xe_vm_put(vm);
+ }
+
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
+ xe_bo_put(bo);
+ if (err)
+ return err;
+
+ args->handle = handle;
+
+ return 0;
+}
+
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct drm_xe_gem_mmap_offset *args = data;
+ struct drm_gem_object *gem_obj;
+
+ if (XE_IOCTL_ERR(xe, args->extensions))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->flags))
+ return -EINVAL;
+
+ gem_obj = drm_gem_object_lookup(file, args->handle);
+ if (XE_IOCTL_ERR(xe, !gem_obj))
+ return -ENOENT;
+
+ /* The mmap offset was set up at BO allocation time. */
+ args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+ xe_bo_put(gem_to_xe_bo(gem_obj));
+ return 0;
+}
+
+int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
+ int num_resv, bool intr)
+{
+ struct ttm_validate_buffer tv_bo;
+ LIST_HEAD(objs);
+ LIST_HEAD(dups);
+
+ XE_BUG_ON(!ww);
+
+ tv_bo.num_shared = num_resv;
+ tv_bo.bo = &bo->ttm;;
+ list_add_tail(&tv_bo.head, &objs);
+
+ return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
+}
+
+void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww)
+{
+ dma_resv_unlock(bo->ttm.base.resv);
+ ww_acquire_fini(ww);
+}
+
+/**
+ * xe_bo_can_migrate - Whether a buffer object likely can be migrated
+ * @bo: The buffer object to migrate
+ * @mem_type: The TTM memory type intended to migrate to
+ *
+ * Check whether the buffer object supports migration to the
+ * given memory type. Note that pinning may affect the ability to migrate as
+ * returned by this function.
+ *
+ * This function is primarily intended as a helper for checking the
+ * possibility to migrate buffer objects and can be called without
+ * the object lock held.
+ *
+ * Return: true if migration is possible, false otherwise.
+ */
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
+{
+ unsigned int cur_place;
+
+ if (bo->ttm.type == ttm_bo_type_kernel)
+ return true;
+
+ if (bo->ttm.type == ttm_bo_type_sg)
+ return false;
+
+ for (cur_place = 0; cur_place < bo->placement.num_placement;
+ cur_place++) {
+ if (bo->placements[cur_place].mem_type == mem_type)
+ return true;
+ }
+
+ return false;
+}
+
+static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
+{
+ memset(place, 0, sizeof(*place));
+ place->mem_type = mem_type;
+}
+
+/**
+ * xe_bo_migrate - Migrate an object to the desired region id
+ * @bo: The buffer object to migrate.
+ * @mem_type: The TTM region type to migrate to.
+ *
+ * Attempt to migrate the buffer object to the desired memory region. The
+ * buffer object may not be pinned, and must be locked.
+ * On successful completion, the object memory type will be updated,
+ * but an async migration task may not have completed yet, and to
+ * accomplish that, the object's kernel fences must be signaled with
+ * the object lock held.
+ *
+ * Return: 0 on success. Negative error code on failure. In particular may
+ * return -EINTR or -ERESTARTSYS if signal pending.
+ */
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
+{
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ };
+ struct ttm_placement placement;
+ struct ttm_place requested;
+
+ xe_bo_assert_held(bo);
+
+ if (bo->ttm.resource->mem_type == mem_type)
+ return 0;
+
+ if (xe_bo_is_pinned(bo))
+ return -EBUSY;
+
+ if (!xe_bo_can_migrate(bo, mem_type))
+ return -EINVAL;
+
+ xe_place_from_ttm_type(mem_type, &requested);
+ placement.num_placement = 1;
+ placement.num_busy_placement = 1;
+ placement.placement = &requested;
+ placement.busy_placement = &requested;
+
+ return ttm_bo_validate(&bo->ttm, &placement, &ctx);
+}
+
+/**
+ * xe_bo_evict - Evict an object to evict placement
+ * @bo: The buffer object to migrate.
+ * @force_alloc: Set force_alloc in ttm_operation_ctx
+ *
+ * On successful completion, the object memory will be moved to evict
+ * placement. Ths function blocks until the object has been fully moved.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
+{
+ struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .no_wait_gpu = false,
+ .force_alloc = force_alloc,
+ };
+ struct ttm_placement placement;
+ int ret;
+
+ xe_evict_flags(&bo->ttm, &placement);
+ ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
+ if (ret)
+ return ret;
+
+ dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+
+ return 0;
+}
+
+/**
+ * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
+ * placed in system memory.
+ * @bo: The xe_bo
+ *
+ * If a bo has an allowable placement in XE_PL_TT memory, it can't use
+ * flat CCS compression, because the GPU then has no way to access the
+ * CCS metadata using relevant commands. For the opposite case, we need to
+ * allocate storage for the CCS metadata when the BO is not resident in
+ * VRAM memory.
+ *
+ * Return: true if extra pages need to be allocated, false otherwise.
+ */
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
+{
+ return bo->ttm.type == ttm_bo_type_device &&
+ !(bo->flags & XE_BO_CREATE_SYSTEM_BIT) &&
+ (bo->flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT));
+}
+
+/**
+ * __xe_bo_release_dummy() - Dummy kref release function
+ * @kref: The embedded struct kref.
+ *
+ * Dummy release function for xe_bo_put_deferred(). Keep off.
+ */
+void __xe_bo_release_dummy(struct kref *kref)
+{
+}
+
+/**
+ * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
+ * @deferred: The lockless list used for the call to xe_bo_put_deferred().
+ *
+ * Puts all bos whose put was deferred by xe_bo_put_deferred().
+ * The @deferred list can be either an onstack local list or a global
+ * shared list used by a workqueue.
+ */
+void xe_bo_put_commit(struct llist_head *deferred)
+{
+ struct llist_node *freed;
+ struct xe_bo *bo, *next;
+
+ if (!deferred)
+ return;
+
+ freed = llist_del_all(deferred);
+ if (!freed)
+ return;
+
+ llist_for_each_entry_safe(bo, next, freed, freed)
+ drm_gem_object_free(&bo->ttm.base.refcount);
+}
+
+/**
+ * xe_bo_dumb_create - Create a dumb bo as backing for a fb
+ * @file_priv: ...
+ * @dev: ...
+ * @args: ...
+ *
+ * See dumb_create() hook in include/drm/drm_drv.h
+ *
+ * Return: ...
+ */
+int xe_bo_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_bo *bo;
+ uint32_t handle;
+ int cpp = DIV_ROUND_UP(args->bpp, 8);
+ int err;
+ u32 page_size = max_t(u32, PAGE_SIZE,
+ xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
+
+ args->pitch = ALIGN(args->width * cpp, 64);
+ args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
+ page_size);
+
+ bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device,
+ XE_BO_CREATE_VRAM_IF_DGFX(to_gt(xe)) |
+ XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
+ /* drop reference from allocate - handle holds it now */
+ drm_gem_object_put(&bo->ttm.base);
+ if (!err)
+ args->handle = handle;
+ return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_bo.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
new file mode 100644
index 000000000000..1a49c0a3c4c6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_BO_H_
+#define _XE_BO_H_
+
+#include "xe_bo_types.h"
+#include "xe_macros.h"
+#include "xe_vm_types.h"
+
+#define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
+
+#define XE_BO_CREATE_USER_BIT BIT(1)
+#define XE_BO_CREATE_SYSTEM_BIT BIT(2)
+#define XE_BO_CREATE_VRAM0_BIT BIT(3)
+#define XE_BO_CREATE_VRAM1_BIT BIT(4)
+#define XE_BO_CREATE_VRAM_IF_DGFX(gt) \
+ (IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \
+ XE_BO_CREATE_SYSTEM_BIT)
+#define XE_BO_CREATE_GGTT_BIT BIT(5)
+#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
+#define XE_BO_CREATE_PINNED_BIT BIT(7)
+#define XE_BO_DEFER_BACKING BIT(8)
+#define XE_BO_SCANOUT_BIT BIT(9)
+/* this one is trigger internally only */
+#define XE_BO_INTERNAL_TEST BIT(30)
+#define XE_BO_INTERNAL_64K BIT(31)
+
+#define PPAT_UNCACHED GENMASK_ULL(4, 3)
+#define PPAT_CACHED_PDE 0
+#define PPAT_CACHED BIT_ULL(7)
+#define PPAT_DISPLAY_ELLC BIT_ULL(4)
+
+#define GEN8_PTE_SHIFT 12
+#define GEN8_PAGE_SIZE (1 << GEN8_PTE_SHIFT)
+#define GEN8_PTE_MASK (GEN8_PAGE_SIZE - 1)
+#define GEN8_PDE_SHIFT (GEN8_PTE_SHIFT - 3)
+#define GEN8_PDES (1 << GEN8_PDE_SHIFT)
+#define GEN8_PDE_MASK (GEN8_PDES - 1)
+
+#define GEN8_64K_PTE_SHIFT 16
+#define GEN8_64K_PAGE_SIZE (1 << GEN8_64K_PTE_SHIFT)
+#define GEN8_64K_PTE_MASK (GEN8_64K_PAGE_SIZE - 1)
+#define GEN8_64K_PDE_MASK (GEN8_PDE_MASK >> 4)
+
+#define GEN8_PDE_PS_2M BIT_ULL(7)
+#define GEN8_PDPE_PS_1G BIT_ULL(7)
+#define GEN8_PDE_IPS_64K BIT_ULL(11)
+
+#define GEN12_GGTT_PTE_LM BIT_ULL(1)
+#define GEN12_USM_PPGTT_PTE_AE BIT_ULL(10)
+#define GEN12_PPGTT_PTE_LM BIT_ULL(11)
+#define GEN12_PDE_64K BIT_ULL(6)
+#define GEN12_PTE_PS64 BIT_ULL(8)
+
+#define GEN8_PAGE_PRESENT BIT_ULL(0)
+#define GEN8_PAGE_RW BIT_ULL(1)
+
+#define PTE_READ_ONLY BIT(0)
+
+#define XE_PL_SYSTEM TTM_PL_SYSTEM
+#define XE_PL_TT TTM_PL_TT
+#define XE_PL_VRAM0 TTM_PL_VRAM
+#define XE_PL_VRAM1 (XE_PL_VRAM0 + 1)
+
+#define XE_BO_PROPS_INVALID (-1)
+
+struct sg_table;
+
+struct xe_bo *xe_bo_alloc(void);
+void xe_bo_free(struct xe_bo *bo);
+
+struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+ struct xe_gt *gt, struct dma_resv *resv,
+ size_t size, enum ttm_bo_type type,
+ u32 flags);
+struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm, size_t size,
+ enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm, size_t size,
+ enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm, size_t size,
+ enum ttm_bo_type type, u32 flags);
+struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt,
+ const void *data, size_t size,
+ enum ttm_bo_type type, u32 flags);
+
+int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
+ u32 bo_flags);
+
+static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
+{
+ return container_of(bo, struct xe_bo, ttm);
+}
+
+static inline struct xe_bo *gem_to_xe_bo(const struct drm_gem_object *obj)
+{
+ return container_of(obj, struct xe_bo, ttm.base);
+}
+
+#define xe_bo_device(bo) ttm_to_xe_device((bo)->ttm.bdev)
+
+static inline struct xe_bo *xe_bo_get(struct xe_bo *bo)
+{
+ if (bo)
+ drm_gem_object_get(&bo->ttm.base);
+
+ return bo;
+}
+
+static inline void xe_bo_put(struct xe_bo *bo)
+{
+ if (bo)
+ drm_gem_object_put(&bo->ttm.base);
+}
+
+static inline void xe_bo_assert_held(struct xe_bo *bo)
+{
+ if (bo)
+ dma_resv_assert_held((bo)->ttm.base.resv);
+}
+
+int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww,
+ int num_resv, bool intr);
+
+void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww);
+
+static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
+{
+ if (bo) {
+ XE_BUG_ON(bo->vm && bo->ttm.base.resv != &bo->vm->resv);
+ if (bo->vm)
+ xe_vm_assert_held(bo->vm);
+ else
+ dma_resv_unlock(bo->ttm.base.resv);
+ }
+}
+
+static inline void xe_bo_lock_no_vm(struct xe_bo *bo,
+ struct ww_acquire_ctx *ctx)
+{
+ if (bo) {
+ XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
+ bo->ttm.base.resv != &bo->ttm.base._resv));
+ dma_resv_lock(bo->ttm.base.resv, ctx);
+ }
+}
+
+static inline void xe_bo_unlock_no_vm(struct xe_bo *bo)
+{
+ if (bo) {
+ XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg &&
+ bo->ttm.base.resv != &bo->ttm.base._resv));
+ dma_resv_unlock(bo->ttm.base.resv);
+ }
+}
+
+int xe_bo_pin_external(struct xe_bo *bo);
+int xe_bo_pin(struct xe_bo *bo);
+void xe_bo_unpin_external(struct xe_bo *bo);
+void xe_bo_unpin(struct xe_bo *bo);
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict);
+
+static inline bool xe_bo_is_pinned(struct xe_bo *bo)
+{
+ return bo->ttm.pin_count;
+}
+
+static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
+{
+ if (likely(bo)) {
+ xe_bo_lock_no_vm(bo, NULL);
+ xe_bo_unpin(bo);
+ xe_bo_unlock_no_vm(bo);
+
+ xe_bo_put(bo);
+ }
+}
+
+bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo);
+dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset,
+ size_t page_size, bool *is_lmem);
+
+static inline dma_addr_t
+xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
+{
+ bool is_lmem;
+
+ return xe_bo_addr(bo, 0, page_size, &is_lmem);
+}
+
+static inline u32
+xe_bo_ggtt_addr(struct xe_bo *bo)
+{
+ XE_BUG_ON(bo->ggtt_node.size > bo->size);
+ XE_BUG_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32));
+ return bo->ggtt_node.start;
+}
+
+int xe_bo_vmap(struct xe_bo *bo);
+void xe_bo_vunmap(struct xe_bo *bo);
+
+bool mem_type_is_vram(u32 mem_type);
+bool xe_bo_is_vram(struct xe_bo *bo);
+
+bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
+
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
+int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+
+int xe_gem_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int xe_bo_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args);
+
+bool xe_bo_needs_ccs_pages(struct xe_bo *bo);
+
+static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
+{
+ return PAGE_ALIGN(bo->ttm.base.size);
+}
+
+void __xe_bo_release_dummy(struct kref *kref);
+
+/**
+ * xe_bo_put_deferred() - Put a buffer object with delayed final freeing
+ * @bo: The bo to put.
+ * @deferred: List to which to add the buffer object if we cannot put, or
+ * NULL if the function is to put unconditionally.
+ *
+ * Since the final freeing of an object includes both sleeping and (!)
+ * memory allocation in the dma_resv individualization, it's not ok
+ * to put an object from atomic context nor from within a held lock
+ * tainted by reclaim. In such situations we want to defer the final
+ * freeing until we've exited the restricting context, or in the worst
+ * case to a workqueue.
+ * This function either puts the object if possible without the refcount
+ * reaching zero, or adds it to the @deferred list if that was not possible.
+ * The caller needs to follow up with a call to xe_bo_put_commit() to actually
+ * put the bo iff this function returns true. It's safe to always
+ * follow up with a call to xe_bo_put_commit().
+ * TODO: It's TTM that is the villain here. Perhaps TTM should add an
+ * interface like this.
+ *
+ * Return: true if @bo was the first object put on the @freed list,
+ * false otherwise.
+ */
+static inline bool
+xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred)
+{
+ if (!deferred) {
+ xe_bo_put(bo);
+ return false;
+ }
+
+ if (!kref_put(&bo->ttm.base.refcount, __xe_bo_release_dummy))
+ return false;
+
+ return llist_add(&bo->freed, deferred);
+}
+
+void xe_bo_put_commit(struct llist_head *deferred);
+
+struct sg_table *xe_bo_get_sg(struct xe_bo *bo);
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+/**
+ * xe_bo_is_mem_type - Whether the bo currently resides in the given
+ * TTM memory type
+ * @bo: The bo to check.
+ * @mem_type: The TTM memory type.
+ *
+ * Return: true iff the bo resides in @mem_type, false otherwise.
+ */
+static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type)
+{
+ xe_bo_assert_held(bo);
+ return bo->ttm.resource->mem_type == mem_type;
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h
new file mode 100644
index 000000000000..f57d440cc95a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_doc.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_DOC_H_
+#define _XE_BO_DOC_H_
+
+/**
+ * DOC: Buffer Objects (BO)
+ *
+ * BO management
+ * =============
+ *
+ * TTM manages (placement, eviction, etc...) all BOs in XE.
+ *
+ * BO creation
+ * ===========
+ *
+ * Create a chunk of memory which can be used by the GPU. Placement rules
+ * (sysmem or vram region) passed in upon creation. TTM handles placement of BO
+ * and can trigger eviction of other BOs to make space for the new BO.
+ *
+ * Kernel BOs
+ * ----------
+ *
+ * A kernel BO is created as part of driver load (e.g. uC firmware images, GuC
+ * ADS, etc...) or a BO created as part of a user operation which requires
+ * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs
+ * are typically mapped in the GGTT (any kernel BOs aside memory for page tables
+ * are in the GGTT), are pinned (can't move or be evicted at runtime), have a
+ * vmap (XE can access the memory via xe_map layer) and have contiguous physical
+ * memory.
+ *
+ * More details of why kernel BOs are pinned and contiguous below.
+ *
+ * User BOs
+ * --------
+ *
+ * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is
+ * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
+ * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
+ * user BOs are evictable and user BOs are never pinned by XE. The allocation of
+ * the backing store can be defered from creation time until first use which is
+ * either mmap, bind, or pagefault.
+ *
+ * Private BOs
+ * ~~~~~~~~~~~
+ *
+ * A private BO is a user BO created with a valid VM argument passed into the
+ * create IOCTL. If a BO is private it cannot be exported via prime FD and
+ * mappings can only be created for the BO within the VM it is tied to. Lastly,
+ * the BO dma-resv slots / lock point to the VM's dma-resv slots / lock (all
+ * private BOs to a VM share common dma-resv slots / lock).
+ *
+ * External BOs
+ * ~~~~~~~~~~~~
+ *
+ * An external BO is a user BO created with a NULL VM argument passed into the
+ * create IOCTL. An external BO can be shared with different UMDs / devices via
+ * prime FD and the BO can be mapped into multiple VMs. An external BO has its
+ * own unique dma-resv slots / lock. An external BO will be in an array of all
+ * VMs which has a mapping of the BO. This allows VMs to lookup and lock all
+ * external BOs mapped in the VM as needed.
+ *
+ * BO placement
+ * ~~~~~~~~~~~~
+ *
+ * When a user BO is created, a mask of valid placements is passed indicating
+ * which memory regions are considered valid.
+ *
+ * The memory region information is available via query uAPI (TODO: add link).
+ *
+ * BO validation
+ * =============
+ *
+ * BO validation (ttm_bo_validate) refers to ensuring a BO has a valid
+ * placement. If a BO was swapped to temporary storage, a validation call will
+ * trigger a move back to a valid (location where GPU can access BO) placement.
+ * Validation of a BO may evict other BOs to make room for the BO being
+ * validated.
+ *
+ * BO eviction / moving
+ * ====================
+ *
+ * All eviction (or in other words, moving a BO from one memory location to
+ * another) is routed through TTM with a callback into XE.
+ *
+ * Runtime eviction
+ * ----------------
+ *
+ * Runtime evictions refers to during normal operations where TTM decides it
+ * needs to move a BO. Typically this is because TTM needs to make room for
+ * another BO and the evicted BO is first BO on LRU list that is not locked.
+ *
+ * An example of this is a new BO which can only be placed in VRAM but there is
+ * not space in VRAM. There could be multiple BOs which have sysmem and VRAM
+ * placement rules which currently reside in VRAM, TTM trigger a will move of
+ * one (or multiple) of these BO(s) until there is room in VRAM to place the new
+ * BO. The evicted BO(s) are valid but still need new bindings before the BO
+ * used again (exec or compute mode rebind worker).
+ *
+ * Another example would be, TTM can't find a BO to evict which has another
+ * valid placement. In this case TTM will evict one (or multiple) unlocked BO(s)
+ * to a temporary unreachable (invalid) placement. The evicted BO(s) are invalid
+ * and before next use need to be moved to a valid placement and rebound.
+ *
+ * In both cases, moves of these BOs are scheduled behind the fences in the BO's
+ * dma-resv slots.
+ *
+ * WW locking tries to ensures if 2 VMs use 51% of the memory forward progress
+ * is made on both VMs.
+ *
+ * Runtime eviction uses per a GT migration engine (TODO: link to migration
+ * engine doc) to do a GPU memcpy from one location to another.
+ *
+ * Rebinds after runtime eviction
+ * ------------------------------
+ *
+ * When BOs are moved, every mapping (VMA) of the BO needs to rebound before
+ * the BO is used again. Every VMA is added to an evicted list of its VM when
+ * the BO is moved. This is safe because of the VM locking structure (TODO: link
+ * to VM locking doc). On the next use of a VM (exec or compute mode rebind
+ * worker) the evicted VMA list is checked and rebinds are triggered. In the
+ * case of faulting VM, the rebind is done in the page fault handler.
+ *
+ * Suspend / resume eviction of VRAM
+ * ---------------------------------
+ *
+ * During device suspend / resume VRAM may lose power which means the contents
+ * of VRAM's memory is blown away. Thus BOs present in VRAM at the time of
+ * suspend must be moved to sysmem in order for their contents to be saved.
+ *
+ * A simple TTM call (ttm_resource_manager_evict_all) can move all non-pinned
+ * (user) BOs to sysmem. External BOs that are pinned need to be manually
+ * evicted with a simple loop + xe_bo_evict call. It gets a little trickier
+ * with kernel BOs.
+ *
+ * Some kernel BOs are used by the GT migration engine to do moves, thus we
+ * can't move all of the BOs via the GT migration engine. For simplity, use a
+ * TTM memcpy (CPU) to move any kernel (pinned) BO on either suspend or resume.
+ *
+ * Some kernel BOs need to be restored to the exact same physical location. TTM
+ * makes this rather easy but the caveat is the memory must be contiguous. Again
+ * for simplity, we enforce that all kernel (pinned) BOs are contiguous and
+ * restored to the same physical location.
+ *
+ * Pinned external BOs in VRAM are restored on resume via the GPU.
+ *
+ * Rebinds after suspend / resume
+ * ------------------------------
+ *
+ * Most kernel BOs have GGTT mappings which must be restored during the resume
+ * process. All user BOs are rebound after validation on their next use.
+ *
+ * Future work
+ * ===========
+ *
+ * Trim the list of BOs which is saved / restored via TTM memcpy on suspend /
+ * resume. All we really need to save / restore via TTM memcpy is the memory
+ * required for the GuC to load and the memory for the GT migrate engine to
+ * operate.
+ *
+ * Do not require kernel BOs to be contiguous in physical memory / restored to
+ * the same physical address on resume. In all likelihood the only memory that
+ * needs to be restored to the same physical address is memory used for page
+ * tables. All of that memory is allocated 1 page at time so the contiguous
+ * requirement isn't needed. Some work on the vmap code would need to be done if
+ * kernel BOs are not contiguous too.
+ *
+ * Make some kernel BO evictable rather than pinned. An example of this would be
+ * engine state, in all likelihood if the dma-slots of these BOs where properly
+ * used rather than pinning we could safely evict + rebind these BOs as needed.
+ *
+ * Some kernel BOs do not need to be restored on resume (e.g. GuC ADS as that is
+ * repopulated on resume), add flag to mark such objects as no save / restore.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
new file mode 100644
index 000000000000..7046dc203138
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_bo_evict.h"
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+
+/**
+ * xe_bo_evict_all - evict all BOs from VRAM
+ *
+ * @xe: xe device
+ *
+ * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next
+ * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU.
+ * All eviction magic done via TTM calls.
+ *
+ * Evict == move VRAM BOs to temporary (typically system) memory.
+ *
+ * This function should be called before the device goes into a suspend state
+ * where the VRAM loses power.
+ */
+int xe_bo_evict_all(struct xe_device *xe)
+{
+ struct ttm_device *bdev = &xe->ttm;
+ struct ww_acquire_ctx ww;
+ struct xe_bo *bo;
+ struct xe_gt *gt;
+ struct list_head still_in_list;
+ u32 mem_type;
+ u8 id;
+ int ret;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ /* User memory */
+ for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+ struct ttm_resource_manager *man =
+ ttm_manager_type(bdev, mem_type);
+
+ if (man) {
+ ret = ttm_resource_manager_evict_all(bdev, man);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* Pinned user memory in VRAM */
+ INIT_LIST_HEAD(&still_in_list);
+ spin_lock(&xe->pinned.lock);
+ for (;;) {
+ bo = list_first_entry_or_null(&xe->pinned.external_vram,
+ typeof(*bo), pinned_link);
+ if (!bo)
+ break;
+ xe_bo_get(bo);
+ list_move_tail(&bo->pinned_link, &still_in_list);
+ spin_unlock(&xe->pinned.lock);
+
+ xe_bo_lock(bo, &ww, 0, false);
+ ret = xe_bo_evict(bo, true);
+ xe_bo_unlock(bo, &ww);
+ xe_bo_put(bo);
+ if (ret) {
+ spin_lock(&xe->pinned.lock);
+ list_splice_tail(&still_in_list,
+ &xe->pinned.external_vram);
+ spin_unlock(&xe->pinned.lock);
+ return ret;
+ }
+
+ spin_lock(&xe->pinned.lock);
+ }
+ list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+ spin_unlock(&xe->pinned.lock);
+
+ /*
+ * Wait for all user BO to be evicted as those evictions depend on the
+ * memory moved below.
+ */
+ for_each_gt(gt, xe, id)
+ xe_gt_migrate_wait(gt);
+
+ spin_lock(&xe->pinned.lock);
+ for (;;) {
+ bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
+ typeof(*bo), pinned_link);
+ if (!bo)
+ break;
+ xe_bo_get(bo);
+ list_move_tail(&bo->pinned_link, &xe->pinned.evicted);
+ spin_unlock(&xe->pinned.lock);
+
+ xe_bo_lock(bo, &ww, 0, false);
+ ret = xe_bo_evict(bo, true);
+ xe_bo_unlock(bo, &ww);
+ xe_bo_put(bo);
+ if (ret)
+ return ret;
+
+ spin_lock(&xe->pinned.lock);
+ }
+ spin_unlock(&xe->pinned.lock);
+
+ return 0;
+}
+
+/**
+ * xe_bo_restore_kernel - restore kernel BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All
+ * moves done via TTM calls.
+ *
+ * This function should be called early, before trying to init the GT, on device
+ * resume.
+ */
+int xe_bo_restore_kernel(struct xe_device *xe)
+{
+ struct ww_acquire_ctx ww;
+ struct xe_bo *bo;
+ int ret;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ spin_lock(&xe->pinned.lock);
+ for (;;) {
+ bo = list_first_entry_or_null(&xe->pinned.evicted,
+ typeof(*bo), pinned_link);
+ if (!bo)
+ break;
+ xe_bo_get(bo);
+ list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+ spin_unlock(&xe->pinned.lock);
+
+ xe_bo_lock(bo, &ww, 0, false);
+ ret = xe_bo_validate(bo, NULL, false);
+ xe_bo_unlock(bo, &ww);
+ if (ret) {
+ xe_bo_put(bo);
+ return ret;
+ }
+
+ if (bo->flags & XE_BO_CREATE_GGTT_BIT)
+ xe_ggtt_map_bo(bo->gt->mem.ggtt, bo);
+
+ /*
+ * We expect validate to trigger a move VRAM and our move code
+ * should setup the iosys map.
+ */
+ XE_BUG_ON(iosys_map_is_null(&bo->vmap));
+ XE_BUG_ON(!xe_bo_is_vram(bo));
+
+ xe_bo_put(bo);
+
+ spin_lock(&xe->pinned.lock);
+ }
+ spin_unlock(&xe->pinned.lock);
+
+ return 0;
+}
+
+/**
+ * xe_bo_restore_user - restore pinned user BOs to VRAM
+ *
+ * @xe: xe device
+ *
+ * Move pinned user BOs from temporary (typically system) memory to VRAM via
+ * CPU. All moves done via TTM calls.
+ *
+ * This function should be called late, after GT init, on device resume.
+ */
+int xe_bo_restore_user(struct xe_device *xe)
+{
+ struct ww_acquire_ctx ww;
+ struct xe_bo *bo;
+ struct xe_gt *gt;
+ struct list_head still_in_list;
+ u8 id;
+ int ret;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ /* Pinned user memory in VRAM should be validated on resume */
+ INIT_LIST_HEAD(&still_in_list);
+ spin_lock(&xe->pinned.lock);
+ for (;;) {
+ bo = list_first_entry_or_null(&xe->pinned.external_vram,
+ typeof(*bo), pinned_link);
+ if (!bo)
+ break;
+ list_move_tail(&bo->pinned_link, &still_in_list);
+ xe_bo_get(bo);
+ spin_unlock(&xe->pinned.lock);
+
+ xe_bo_lock(bo, &ww, 0, false);
+ ret = xe_bo_validate(bo, NULL, false);
+ xe_bo_unlock(bo, &ww);
+ xe_bo_put(bo);
+ if (ret) {
+ spin_lock(&xe->pinned.lock);
+ list_splice_tail(&still_in_list,
+ &xe->pinned.external_vram);
+ spin_unlock(&xe->pinned.lock);
+ return ret;
+ }
+
+ spin_lock(&xe->pinned.lock);
+ }
+ list_splice_tail(&still_in_list, &xe->pinned.external_vram);
+ spin_unlock(&xe->pinned.lock);
+
+ /* Wait for validate to complete */
+ for_each_gt(gt, xe, id)
+ xe_gt_migrate_wait(gt);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h
new file mode 100644
index 000000000000..746894798852
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_evict.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_EVICT_H_
+#define _XE_BO_EVICT_H_
+
+struct xe_device;
+
+int xe_bo_evict_all(struct xe_device *xe);
+int xe_bo_restore_kernel(struct xe_device *xe);
+int xe_bo_restore_user(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
new file mode 100644
index 000000000000..06de3330211d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_BO_TYPES_H_
+#define _XE_BO_TYPES_H_
+
+#include <linux/iosys-map.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_placement.h>
+
+struct xe_device;
+struct xe_vm;
+
+#define XE_BO_MAX_PLACEMENTS 3
+
+/** @xe_bo: XE buffer object */
+struct xe_bo {
+ /** @ttm: TTM base buffer object */
+ struct ttm_buffer_object ttm;
+ /** @size: Size of this buffer object */
+ size_t size;
+ /** @flags: flags for this buffer object */
+ u32 flags;
+ /** @vm: VM this BO is attached to, for extobj this will be NULL */
+ struct xe_vm *vm;
+ /** @gt: GT this BO is attached to (kernel BO only) */
+ struct xe_gt *gt;
+ /** @vmas: List of VMAs for this BO */
+ struct list_head vmas;
+ /** @placements: valid placements for this BO */
+ struct ttm_place placements[XE_BO_MAX_PLACEMENTS];
+ /** @placement: current placement for this BO */
+ struct ttm_placement placement;
+ /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */
+ struct drm_mm_node ggtt_node;
+ /** @vmap: iosys map of this buffer */
+ struct iosys_map vmap;
+ /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
+ struct ttm_bo_kmap_obj kmap;
+ /** @pinned_link: link to present / evicted list of pinned BO */
+ struct list_head pinned_link;
+ /** @props: BO user controlled properties */
+ struct {
+ /** @preferred_mem: preferred memory class for this BO */
+ s16 preferred_mem_class;
+ /** @prefered_gt: preferred GT for this BO */
+ s16 preferred_gt;
+ /** @preferred_mem_type: preferred memory type */
+ s32 preferred_mem_type;
+ /**
+ * @cpu_atomic: the CPU expects to do atomics operations to
+ * this BO
+ */
+ bool cpu_atomic;
+ /**
+ * @device_atomic: the device expects to do atomics operations
+ * to this BO
+ */
+ bool device_atomic;
+ } props;
+ /** @freed: List node for delayed put. */
+ struct llist_node freed;
+ /** @created: Whether the bo has passed initial creation */
+ bool created;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
new file mode 100644
index 000000000000..84db7b3f501e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/string_helpers.h>
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_debugfs.h"
+#include "xe_gt_debugfs.h"
+#include "xe_step.h"
+
+#ifdef CONFIG_DRM_XE_DEBUG
+#include "xe_bo_evict.h"
+#include "xe_migrate.h"
+#include "xe_vm.h"
+#endif
+
+static struct xe_device *node_to_xe(struct drm_info_node *node)
+{
+ return to_xe_device(node->minor->dev);
+}
+
+static int info(struct seq_file *m, void *data)
+{
+ struct xe_device *xe = node_to_xe(m->private);
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100);
+ drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100);
+ drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n",
+ xe_step_name(xe->info.step.graphics),
+ xe_step_name(xe->info.step.media),
+ xe_step_name(xe->info.step.display),
+ xe_step_name(xe->info.step.basedie));
+ drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx));
+ drm_printf(&p, "platform %d\n", xe->info.platform);
+ drm_printf(&p, "subplatform %d\n",
+ xe->info.subplatform > XE_SUBPLATFORM_NONE ? xe->info.subplatform : 0);
+ drm_printf(&p, "devid 0x%x\n", xe->info.devid);
+ drm_printf(&p, "revid %d\n", xe->info.revid);
+ drm_printf(&p, "tile_count %d\n", xe->info.tile_count);
+ drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level);
+ drm_printf(&p, "enable_guc %s\n", str_yes_no(xe->info.enable_guc));
+ drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm));
+ drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
+ for_each_gt(gt, xe, id) {
+ drm_printf(&p, "gt%d force wake %d\n", id,
+ xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
+ drm_printf(&p, "gt%d engine_mask 0x%llx\n", id,
+ gt->info.engine_mask);
+ }
+
+ return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+ {"info", info, 0},
+};
+
+static int forcewake_open(struct inode *inode, struct file *file)
+{
+ struct xe_device *xe = inode->i_private;
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id)
+ XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+ return 0;
+}
+
+static int forcewake_release(struct inode *inode, struct file *file)
+{
+ struct xe_device *xe = inode->i_private;
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id)
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+ return 0;
+}
+
+static const struct file_operations forcewake_all_fops = {
+ .owner = THIS_MODULE,
+ .open = forcewake_open,
+ .release = forcewake_release,
+};
+
+void xe_debugfs_register(struct xe_device *xe)
+{
+ struct ttm_device *bdev = &xe->ttm;
+ struct drm_minor *minor = xe->drm.primary;
+ struct dentry *root = minor->debugfs_root;
+ struct ttm_resource_manager *man;
+ struct xe_gt *gt;
+ u32 mem_type;
+ u8 id;
+
+ drm_debugfs_create_files(debugfs_list,
+ ARRAY_SIZE(debugfs_list),
+ root, minor);
+
+ debugfs_create_file("forcewake_all", 0400, root, xe,
+ &forcewake_all_fops);
+
+ for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+ man = ttm_manager_type(bdev, mem_type);
+
+ if (man) {
+ char name[16];
+
+ sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0);
+ ttm_resource_manager_create_debugfs(man, root, name);
+ }
+ }
+
+ man = ttm_manager_type(bdev, XE_PL_TT);
+ ttm_resource_manager_create_debugfs(man, root, "gtt_mm");
+
+ for_each_gt(gt, xe, id)
+ xe_gt_debugfs_register(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h
new file mode 100644
index 000000000000..715b8e2e0bd9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DEBUGFS_H_
+#define _XE_DEBUGFS_H_
+
+struct xe_device;
+
+void xe_debugfs_register(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
new file mode 100644
index 000000000000..93dea2b9c464
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_device.h"
+
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/drm_aperture.h>
+#include <drm/drm_ioctl.h>
+#include <drm/xe_drm.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_atomic_helper.h>
+
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_dma_buf.h"
+#include "xe_drv.h"
+#include "xe_engine.h"
+#include "xe_exec.h"
+#include "xe_gt.h"
+#include "xe_irq.h"
+#include "xe_module.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+#include "xe_pm.h"
+#include "xe_query.h"
+#include "xe_vm.h"
+#include "xe_vm_madvise.h"
+#include "xe_wait_user_fence.h"
+
+static int xe_file_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_file *xef;
+
+ xef = kzalloc(sizeof(*xef), GFP_KERNEL);
+ if (!xef)
+ return -ENOMEM;
+
+ xef->drm = file;
+
+ mutex_init(&xef->vm.lock);
+ xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
+
+ mutex_init(&xef->engine.lock);
+ xa_init_flags(&xef->engine.xa, XA_FLAGS_ALLOC1);
+
+ file->driver_priv = xef;
+ return 0;
+}
+
+static void device_kill_persitent_engines(struct xe_device *xe,
+ struct xe_file *xef);
+
+static void xe_file_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = file->driver_priv;
+ struct xe_vm *vm;
+ struct xe_engine *e;
+ unsigned long idx;
+
+ mutex_lock(&xef->engine.lock);
+ xa_for_each(&xef->engine.xa, idx, e) {
+ xe_engine_kill(e);
+ xe_engine_put(e);
+ }
+ mutex_unlock(&xef->engine.lock);
+ mutex_destroy(&xef->engine.lock);
+ device_kill_persitent_engines(xe, xef);
+
+ mutex_lock(&xef->vm.lock);
+ xa_for_each(&xef->vm.xa, idx, vm)
+ xe_vm_close_and_put(vm);
+ mutex_unlock(&xef->vm.lock);
+ mutex_destroy(&xef->vm.lock);
+
+ kfree(xef);
+}
+
+static const struct drm_ioctl_desc xe_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_ENGINE_SET_PROPERTY, xe_engine_set_property_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
+};
+
+static const struct file_operations xe_driver_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release_noglobal,
+ .unlocked_ioctl = drm_ioctl,
+ .mmap = drm_gem_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+// .compat_ioctl = i915_ioc32_compat_ioctl,
+ .llseek = noop_llseek,
+};
+
+static void xe_driver_release(struct drm_device *dev)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL);
+}
+
+static struct drm_driver driver = {
+ /* Don't use MTRRs here; the Xserver or userspace app should
+ * deal with them for Intel hardware.
+ */
+ .driver_features =
+ DRIVER_GEM |
+ DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
+ .open = xe_file_open,
+ .postclose = xe_file_close,
+
+ .gem_prime_import = xe_gem_prime_import,
+
+ .dumb_create = xe_bo_dumb_create,
+ .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+ .release = &xe_driver_release,
+
+ .ioctls = xe_ioctls,
+ .num_ioctls = ARRAY_SIZE(xe_ioctls),
+ .fops = &xe_driver_fops,
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static void xe_device_destroy(struct drm_device *dev, void *dummy)
+{
+ struct xe_device *xe = to_xe_device(dev);
+
+ destroy_workqueue(xe->ordered_wq);
+ mutex_destroy(&xe->persitent_engines.lock);
+ ttm_device_fini(&xe->ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct xe_device *xe;
+ int err;
+
+ err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+ if (err)
+ return ERR_PTR(err);
+
+ xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
+ if (IS_ERR(xe))
+ return xe;
+
+ err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
+ xe->drm.anon_inode->i_mapping,
+ xe->drm.vma_offset_manager, false, false);
+ if (WARN_ON(err))
+ goto err_put;
+
+ xe->info.devid = pdev->device;
+ xe->info.revid = pdev->revision;
+ xe->info.enable_guc = enable_guc;
+
+ spin_lock_init(&xe->irq.lock);
+
+ init_waitqueue_head(&xe->ufence_wq);
+
+ mutex_init(&xe->usm.lock);
+ xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1);
+
+ mutex_init(&xe->persitent_engines.lock);
+ INIT_LIST_HEAD(&xe->persitent_engines.list);
+
+ spin_lock_init(&xe->pinned.lock);
+ INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
+ INIT_LIST_HEAD(&xe->pinned.external_vram);
+ INIT_LIST_HEAD(&xe->pinned.evicted);
+
+ xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
+
+ mutex_init(&xe->sb_lock);
+ xe->enabled_irq_mask = ~0;
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
+ if (err)
+ goto err_put;
+
+ mutex_init(&xe->mem_access.lock);
+ return xe;
+
+err_put:
+ drm_dev_put(&xe->drm);
+
+ return ERR_PTR(err);
+}
+
+int xe_device_probe(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int err;
+ u8 id;
+
+ xe->info.mem_region_mask = 1;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_alloc(xe, gt);
+ if (err)
+ return err;
+ }
+
+ err = xe_mmio_init(xe);
+ if (err)
+ return err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_pcode_probe(gt);
+ if (err)
+ return err;
+ }
+
+ err = xe_irq_install(xe);
+ if (err)
+ return err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_early(gt);
+ if (err)
+ goto err_irq_shutdown;
+ }
+
+ err = xe_mmio_probe_vram(xe);
+ if (err)
+ goto err_irq_shutdown;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init_noalloc(gt);
+ if (err)
+ goto err_irq_shutdown;
+ }
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_init(gt);
+ if (err)
+ goto err_irq_shutdown;
+ }
+
+ err = drm_dev_register(&xe->drm, 0);
+ if (err)
+ goto err_irq_shutdown;
+
+ xe_debugfs_register(xe);
+
+ return 0;
+
+err_irq_shutdown:
+ xe_irq_shutdown(xe);
+ return err;
+}
+
+void xe_device_remove(struct xe_device *xe)
+{
+ xe_irq_shutdown(xe);
+}
+
+void xe_device_shutdown(struct xe_device *xe)
+{
+}
+
+void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e)
+{
+ mutex_lock(&xe->persitent_engines.lock);
+ list_add_tail(&e->persitent.link, &xe->persitent_engines.list);
+ mutex_unlock(&xe->persitent_engines.lock);
+}
+
+void xe_device_remove_persitent_engines(struct xe_device *xe,
+ struct xe_engine *e)
+{
+ mutex_lock(&xe->persitent_engines.lock);
+ if (!list_empty(&e->persitent.link))
+ list_del(&e->persitent.link);
+ mutex_unlock(&xe->persitent_engines.lock);
+}
+
+static void device_kill_persitent_engines(struct xe_device *xe,
+ struct xe_file *xef)
+{
+ struct xe_engine *e, *next;
+
+ mutex_lock(&xe->persitent_engines.lock);
+ list_for_each_entry_safe(e, next, &xe->persitent_engines.list,
+ persitent.link)
+ if (e->persitent.xef == xef) {
+ xe_engine_kill(e);
+ list_del_init(&e->persitent.link);
+ }
+ mutex_unlock(&xe->persitent_engines.lock);
+}
+
+#define SOFTWARE_FLAGS_SPR33 _MMIO(0x4F084)
+
+void xe_device_wmb(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_device_get_gt(xe, 0);
+
+ wmb();
+ if (IS_DGFX(xe))
+ xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33.reg, 0);
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+{
+ return xe_device_has_flat_ccs(xe) ?
+ DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe)
+{
+ bool resumed = xe_pm_runtime_resume_if_suspended(xe);
+
+ mutex_lock(&xe->mem_access.lock);
+ if (xe->mem_access.ref++ == 0)
+ xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe);
+ mutex_unlock(&xe->mem_access.lock);
+
+ /* The usage counter increased if device was immediately resumed */
+ if (resumed)
+ xe_pm_runtime_put(xe);
+
+ XE_WARN_ON(xe->mem_access.ref == U32_MAX);
+}
+
+void xe_device_mem_access_put(struct xe_device *xe)
+{
+ mutex_lock(&xe->mem_access.lock);
+ if (--xe->mem_access.ref == 0 && xe->mem_access.hold_rpm)
+ xe_pm_runtime_put(xe);
+ mutex_unlock(&xe->mem_access.lock);
+
+ XE_WARN_ON(xe->mem_access.ref < 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
new file mode 100644
index 000000000000..88d55671b068
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_H_
+#define _XE_DEVICE_H_
+
+struct xe_engine;
+struct xe_file;
+
+#include <drm/drm_util.h>
+
+#include "xe_device_types.h"
+#include "xe_macros.h"
+#include "xe_force_wake.h"
+
+#include "gt/intel_gpu_commands.h"
+
+static inline struct xe_device *to_xe_device(const struct drm_device *dev)
+{
+ return container_of(dev, struct xe_device, drm);
+}
+
+static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev)
+{
+ return pci_get_drvdata(pdev);
+}
+
+static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm)
+{
+ return container_of(ttm, struct xe_device, ttm);
+}
+
+struct xe_device *xe_device_create(struct pci_dev *pdev,
+ const struct pci_device_id *ent);
+int xe_device_probe(struct xe_device *xe);
+void xe_device_remove(struct xe_device *xe);
+void xe_device_shutdown(struct xe_device *xe);
+
+void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e);
+void xe_device_remove_persitent_engines(struct xe_device *xe,
+ struct xe_engine *e);
+
+void xe_device_wmb(struct xe_device *xe);
+
+static inline struct xe_file *to_xe_file(const struct drm_file *file)
+{
+ return file->driver_priv;
+}
+
+static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
+{
+ struct xe_gt *gt;
+
+ XE_BUG_ON(gt_id > XE_MAX_GT);
+ gt = xe->gt + gt_id;
+ XE_BUG_ON(gt->info.id != gt_id);
+ XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+
+ return gt;
+}
+
+/*
+ * FIXME: Placeholder until multi-gt lands. Once that lands, kill this function.
+ */
+static inline struct xe_gt *to_gt(struct xe_device *xe)
+{
+ return xe->gt;
+}
+
+static inline bool xe_device_guc_submission_enabled(struct xe_device *xe)
+{
+ return xe->info.enable_guc;
+}
+
+static inline void xe_device_guc_submission_disable(struct xe_device *xe)
+{
+ xe->info.enable_guc = false;
+}
+
+#define for_each_gt(gt__, xe__, id__) \
+ for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \
+ for_each_if ((gt__) = xe_device_get_gt((xe__), (id__)))
+
+static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt)
+{
+ return &gt->mmio.fw;
+}
+
+void xe_device_mem_access_get(struct xe_device *xe);
+void xe_device_mem_access_put(struct xe_device *xe);
+
+static inline void xe_device_assert_mem_access(struct xe_device *xe)
+{
+ XE_WARN_ON(!xe->mem_access.ref);
+}
+
+static inline bool xe_device_mem_access_ongoing(struct xe_device *xe)
+{
+ bool ret;
+
+ mutex_lock(&xe->mem_access.lock);
+ ret = xe->mem_access.ref;
+ mutex_unlock(&xe->mem_access.lock);
+
+ return ret;
+}
+
+static inline bool xe_device_in_fault_mode(struct xe_device *xe)
+{
+ return xe->usm.num_vm_in_fault_mode != 0;
+}
+
+static inline bool xe_device_in_non_fault_mode(struct xe_device *xe)
+{
+ return xe->usm.num_vm_in_non_fault_mode != 0;
+}
+
+static inline bool xe_device_has_flat_ccs(struct xe_device *xe)
+{
+ return xe->info.has_flat_ccs;
+}
+
+u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
new file mode 100644
index 000000000000..d62ee85bfcbe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DEVICE_TYPES_H_
+#define _XE_DEVICE_TYPES_H_
+
+#include <linux/pci.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
+
+#include "xe_gt_types.h"
+#include "xe_platform_types.h"
+#include "xe_step_types.h"
+
+#define XE_BO_INVALID_OFFSET LONG_MAX
+
+#define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100)
+#define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100)
+#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100)
+#define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
+#define IS_DGFX(xe) ((xe)->info.is_dgfx)
+
+#define XE_VRAM_FLAGS_NEED64K BIT(0)
+
+#define XE_GT0 0
+#define XE_GT1 1
+#define XE_MAX_GT (XE_GT1 + 1)
+
+#define XE_MAX_ASID (BIT(20))
+
+#define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step) \
+ ((_xe)->info.platform == (_platform) && \
+ (_xe)->info.step.graphics >= (min_step) && \
+ (_xe)->info.step.graphics < (max_step))
+#define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step) \
+ ((_xe)->info.platform == (_platform) && \
+ (_xe)->info.subplatform == (sub) && \
+ (_xe)->info.step.graphics >= (min_step) && \
+ (_xe)->info.step.graphics < (max_step))
+
+/**
+ * struct xe_device - Top level struct of XE device
+ */
+struct xe_device {
+ /** @drm: drm device */
+ struct drm_device drm;
+
+ /** @info: device info */
+ struct intel_device_info {
+ /** @graphics_verx100: graphics IP version */
+ u32 graphics_verx100;
+ /** @media_verx100: media IP version */
+ u32 media_verx100;
+ /** @mem_region_mask: mask of valid memory regions */
+ u32 mem_region_mask;
+ /** @is_dgfx: is discrete device */
+ bool is_dgfx;
+ /** @platform: XE platform enum */
+ enum xe_platform platform;
+ /** @subplatform: XE subplatform enum */
+ enum xe_subplatform subplatform;
+ /** @devid: device ID */
+ u16 devid;
+ /** @revid: device revision */
+ u8 revid;
+ /** @step: stepping information for each IP */
+ struct xe_step_info step;
+ /** @dma_mask_size: DMA address bits */
+ u8 dma_mask_size;
+ /** @vram_flags: Vram flags */
+ u8 vram_flags;
+ /** @tile_count: Number of tiles */
+ u8 tile_count;
+ /** @vm_max_level: Max VM level */
+ u8 vm_max_level;
+ /** @media_ver: Media version */
+ u8 media_ver;
+ /** @supports_usm: Supports unified shared memory */
+ bool supports_usm;
+ /** @enable_guc: GuC submission enabled */
+ bool enable_guc;
+ /** @has_flat_ccs: Whether flat CCS metadata is used */
+ bool has_flat_ccs;
+ /** @has_4tile: Whether tile-4 tiling is supported */
+ bool has_4tile;
+ } info;
+
+ /** @irq: device interrupt state */
+ struct {
+ /** @lock: lock for processing irq's on this device */
+ spinlock_t lock;
+
+ /** @enabled: interrupts enabled on this device */
+ bool enabled;
+ } irq;
+
+ /** @ttm: ttm device */
+ struct ttm_device ttm;
+
+ /** @mmio: mmio info for device */
+ struct {
+ /** @size: size of MMIO space for device */
+ size_t size;
+ /** @regs: pointer to MMIO space for device */
+ void *regs;
+ } mmio;
+
+ /** @mem: memory info for device */
+ struct {
+ /** @vram: VRAM info for device */
+ struct {
+ /** @io_start: start address of VRAM */
+ resource_size_t io_start;
+ /** @size: size of VRAM */
+ resource_size_t size;
+ /** @mapping: pointer to VRAM mappable space */
+ void *__iomem mapping;
+ } vram;
+ } mem;
+
+ /** @usm: unified memory state */
+ struct {
+ /** @asid: convert a ASID to VM */
+ struct xarray asid_to_vm;
+ /** @next_asid: next ASID, used to cyclical alloc asids */
+ u32 next_asid;
+ /** @num_vm_in_fault_mode: number of VM in fault mode */
+ u32 num_vm_in_fault_mode;
+ /** @num_vm_in_non_fault_mode: number of VM in non-fault mode */
+ u32 num_vm_in_non_fault_mode;
+ /** @lock: protects UM state */
+ struct mutex lock;
+ } usm;
+
+ /** @persitent_engines: engines that are closed but still running */
+ struct {
+ /** @lock: protects persitent engines */
+ struct mutex lock;
+ /** @list: list of persitent engines */
+ struct list_head list;
+ } persitent_engines;
+
+ /** @pinned: pinned BO state */
+ struct {
+ /** @lock: protected pinned BO list state */
+ spinlock_t lock;
+ /** @evicted: pinned kernel BO that are present */
+ struct list_head kernel_bo_present;
+ /** @evicted: pinned BO that have been evicted */
+ struct list_head evicted;
+ /** @external_vram: pinned external BO in vram*/
+ struct list_head external_vram;
+ } pinned;
+
+ /** @ufence_wq: user fence wait queue */
+ wait_queue_head_t ufence_wq;
+
+ /** @ordered_wq: used to serialize compute mode resume */
+ struct workqueue_struct *ordered_wq;
+
+ /** @gt: graphics tile */
+ struct xe_gt gt[XE_MAX_GT];
+
+ /**
+ * @mem_access: keep track of memory access in the device, possibly
+ * triggering additional actions when they occur.
+ */
+ struct {
+ /** @lock: protect the ref count */
+ struct mutex lock;
+ /** @ref: ref count of memory accesses */
+ u32 ref;
+ /** @hold_rpm: need to put rpm ref back at the end */
+ bool hold_rpm;
+ } mem_access;
+
+ /** @d3cold_allowed: Indicates if d3cold is a valid device state */
+ bool d3cold_allowed;
+
+ /* For pcode */
+ struct mutex sb_lock;
+
+ u32 enabled_irq_mask;
+};
+
+/**
+ * struct xe_file - file handle for XE driver
+ */
+struct xe_file {
+ /** @drm: base DRM file */
+ struct drm_file *drm;
+
+ /** @vm: VM state for file */
+ struct {
+ /** @xe: xarray to store VMs */
+ struct xarray xa;
+ /** @lock: protects file VM state */
+ struct mutex lock;
+ } vm;
+
+ /** @engine: Submission engine state for file */
+ struct {
+ /** @xe: xarray to store engines */
+ struct xarray xa;
+ /** @lock: protects file engine state */
+ struct mutex lock;
+ } engine;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
new file mode 100644
index 000000000000..d09ff25bd940
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_prime.h>
+
+#include <drm/ttm/ttm_tt.h>
+
+#include <kunit/test.h>
+#include <linux/pci-p2pdma.h>
+
+#include "tests/xe_test.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_dma_buf.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_vm.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+static int xe_dma_buf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
+
+ if (attach->peer2peer &&
+ pci_p2pdma_distance(to_pci_dev(obj->dev->dev), attach->dev, false) < 0)
+ attach->peer2peer = false;
+
+ if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT))
+ return -EOPNOTSUPP;
+
+ xe_device_mem_access_get(to_xe_device(obj->dev));
+ return 0;
+}
+
+static void xe_dma_buf_detach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
+
+ xe_device_mem_access_put(to_xe_device(obj->dev));
+}
+
+static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+
+ /*
+ * Migrate to TT first to increase the chance of non-p2p clients
+ * can attach.
+ */
+ (void)xe_bo_migrate(bo, XE_PL_TT);
+ xe_bo_pin_external(bo);
+
+ return 0;
+}
+
+static void xe_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+
+ xe_bo_unpin_external(bo);
+}
+
+static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
+ enum dma_data_direction dir)
+{
+ struct dma_buf *dma_buf = attach->dmabuf;
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct sg_table *sgt;
+ int r = 0;
+
+ if (!attach->peer2peer && !xe_bo_can_migrate(bo, XE_PL_TT))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!xe_bo_is_pinned(bo)) {
+ if (!attach->peer2peer ||
+ bo->ttm.resource->mem_type == XE_PL_SYSTEM) {
+ if (xe_bo_can_migrate(bo, XE_PL_TT))
+ r = xe_bo_migrate(bo, XE_PL_TT);
+ else
+ r = xe_bo_validate(bo, NULL, false);
+ }
+ if (r)
+ return ERR_PTR(r);
+ }
+
+ switch (bo->ttm.resource->mem_type) {
+ case XE_PL_TT:
+ sgt = drm_prime_pages_to_sg(obj->dev,
+ bo->ttm.ttm->pages,
+ bo->ttm.ttm->num_pages);
+ if (IS_ERR(sgt))
+ return sgt;
+
+ if (dma_map_sgtable(attach->dev, sgt, dir,
+ DMA_ATTR_SKIP_CPU_SYNC))
+ goto error_free;
+ break;
+
+ case XE_PL_VRAM0:
+ case XE_PL_VRAM1:
+ r = xe_ttm_vram_mgr_alloc_sgt(xe_bo_device(bo),
+ bo->ttm.resource, 0,
+ bo->ttm.base.size, attach->dev,
+ dir, &sgt);
+ if (r)
+ return ERR_PTR(r);
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ return sgt;
+
+error_free:
+ sg_free_table(sgt);
+ kfree(sgt);
+ return ERR_PTR(-EBUSY);
+}
+
+static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
+{
+ struct dma_buf *dma_buf = attach->dmabuf;
+ struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
+
+ if (!xe_bo_is_vram(bo)) {
+ dma_unmap_sgtable(attach->dev, sgt, dir, 0);
+ sg_free_table(sgt);
+ kfree(sgt);
+ } else {
+ xe_ttm_vram_mgr_free_sgt(attach->dev, dir, sgt);
+ }
+}
+
+static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+ enum dma_data_direction direction)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+ bool reads = (direction == DMA_BIDIRECTIONAL ||
+ direction == DMA_FROM_DEVICE);
+
+ if (!reads)
+ return 0;
+
+ xe_bo_lock_no_vm(bo, NULL);
+ (void)xe_bo_migrate(bo, XE_PL_TT);
+ xe_bo_unlock_no_vm(bo);
+
+ return 0;
+}
+
+const struct dma_buf_ops xe_dmabuf_ops = {
+ .attach = xe_dma_buf_attach,
+ .detach = xe_dma_buf_detach,
+ .pin = xe_dma_buf_pin,
+ .unpin = xe_dma_buf_unpin,
+ .map_dma_buf = xe_dma_buf_map,
+ .unmap_dma_buf = xe_dma_buf_unmap,
+ .release = drm_gem_dmabuf_release,
+ .begin_cpu_access = xe_dma_buf_begin_cpu_access,
+ .mmap = drm_gem_dmabuf_mmap,
+ .vmap = drm_gem_dmabuf_vmap,
+ .vunmap = drm_gem_dmabuf_vunmap,
+};
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
+{
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct dma_buf *buf;
+
+ if (bo->vm)
+ return ERR_PTR(-EPERM);
+
+ buf = drm_gem_prime_export(obj, flags);
+ if (!IS_ERR(buf))
+ buf->ops = &xe_dmabuf_ops;
+
+ return buf;
+}
+
+static struct drm_gem_object *
+xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
+ struct dma_buf *dma_buf)
+{
+ struct dma_resv *resv = dma_buf->resv;
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_bo *bo;
+ int ret;
+
+ dma_resv_lock(resv, NULL);
+ bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size,
+ ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ goto error;
+ }
+ dma_resv_unlock(resv);
+
+ return &bo->ttm.base;
+
+error:
+ dma_resv_unlock(resv);
+ return ERR_PTR(ret);
+}
+
+static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = attach->importer_priv;
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+
+ XE_WARN_ON(xe_bo_evict(bo, false));
+}
+
+static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = {
+ .allow_peer2peer = true,
+ .move_notify = xe_dma_buf_move_notify
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+
+struct dma_buf_test_params {
+ struct xe_test_priv base;
+ const struct dma_buf_attach_ops *attach_ops;
+ bool force_different_devices;
+ u32 mem_mask;
+};
+
+#define to_dma_buf_test_params(_priv) \
+ container_of(_priv, struct dma_buf_test_params, base)
+#endif
+
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf)
+{
+ XE_TEST_DECLARE(struct dma_buf_test_params *test =
+ to_dma_buf_test_params
+ (xe_cur_kunit_priv(XE_TEST_LIVE_DMA_BUF));)
+ const struct dma_buf_attach_ops *attach_ops;
+ struct dma_buf_attachment *attach;
+ struct drm_gem_object *obj;
+ struct xe_bo *bo;
+
+ if (dma_buf->ops == &xe_dmabuf_ops) {
+ obj = dma_buf->priv;
+ if (obj->dev == dev &&
+ !XE_TEST_ONLY(test && test->force_different_devices)) {
+ /*
+ * Importing dmabuf exported from out own gem increases
+ * refcount on gem itself instead of f_count of dmabuf.
+ */
+ drm_gem_object_get(obj);
+ return obj;
+ }
+ }
+
+ /*
+ * Don't publish the bo until we have a valid attachment, and a
+ * valid attachment needs the bo address. So pre-create a bo before
+ * creating the attachment and publish.
+ */
+ bo = xe_bo_alloc();
+ if (IS_ERR(bo))
+ return ERR_CAST(bo);
+
+ attach_ops = &xe_dma_buf_attach_ops;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+ if (test)
+ attach_ops = test->attach_ops;
+#endif
+
+ attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base);
+ if (IS_ERR(attach)) {
+ obj = ERR_CAST(attach);
+ goto out_err;
+ }
+
+ /* Errors here will take care of freeing the bo. */
+ obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
+ if (IS_ERR(obj))
+ return obj;
+
+
+ get_dma_buf(dma_buf);
+ obj->import_attach = attach;
+ return obj;
+
+out_err:
+ xe_bo_free(bo);
+
+ return obj;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_dma_buf.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.h b/drivers/gpu/drm/xe/xe_dma_buf.h
new file mode 100644
index 000000000000..861dd28a862c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dma_buf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_DMA_BUF_H_
+#define _XE_DMA_BUF_H_
+
+#include <drm/drm_gem.h>
+
+struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags);
+struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
new file mode 100644
index 000000000000..0377e5e4e35f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_DRV_H_
+#define _XE_DRV_H_
+
+#include <drm/drm_drv.h>
+
+#define DRIVER_NAME "xe"
+#define DRIVER_DESC "Intel Xe Graphics"
+#define DRIVER_DATE "20201103"
+#define DRIVER_TIMESTAMP 1604406085
+
+/* Interface history:
+ *
+ * 1.1: Original.
+ */
+#define DRIVER_MAJOR 1
+#define DRIVER_MINOR 1
+#define DRIVER_PATCHLEVEL 0
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
new file mode 100644
index 000000000000..63219bd98be7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_engine.h"
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+#include <linux/nospec.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct xe_engine *__xe_engine_create(struct xe_device *xe,
+ struct xe_vm *vm,
+ u32 logical_mask,
+ u16 width, struct xe_hw_engine *hwe,
+ u32 flags)
+{
+ struct xe_engine *e;
+ struct xe_gt *gt = hwe->gt;
+ int err;
+ int i;
+
+ e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
+ if (!e)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&e->refcount);
+ e->flags = flags;
+ e->hwe = hwe;
+ e->gt = gt;
+ if (vm)
+ e->vm = xe_vm_get(vm);
+ e->class = hwe->class;
+ e->width = width;
+ e->logical_mask = logical_mask;
+ e->fence_irq = &gt->fence_irq[hwe->class];
+ e->ring_ops = gt->ring_ops[hwe->class];
+ e->ops = gt->engine_ops;
+ INIT_LIST_HEAD(&e->persitent.link);
+ INIT_LIST_HEAD(&e->compute.link);
+ INIT_LIST_HEAD(&e->multi_gt_link);
+
+ /* FIXME: Wire up to configurable default value */
+ e->sched_props.timeslice_us = 1 * 1000;
+ e->sched_props.preempt_timeout_us = 640 * 1000;
+
+ if (xe_engine_is_parallel(e)) {
+ e->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
+ e->parallel.composite_fence_seqno = 1;
+ }
+ if (e->flags & ENGINE_FLAG_VM) {
+ e->bind.fence_ctx = dma_fence_context_alloc(1);
+ e->bind.fence_seqno = 1;
+ }
+
+ for (i = 0; i < width; ++i) {
+ err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K);
+ if (err)
+ goto err_lrc;
+ }
+
+ err = e->ops->init(e);
+ if (err)
+ goto err_lrc;
+
+ return e;
+
+err_lrc:
+ for (i = i - 1; i >= 0; --i)
+ xe_lrc_finish(e->lrc + i);
+ kfree(e);
+ return ERR_PTR(err);
+}
+
+struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
+ u32 logical_mask, u16 width,
+ struct xe_hw_engine *hwe, u32 flags)
+{
+ struct ww_acquire_ctx ww;
+ struct xe_engine *e;
+ int err;
+
+ if (vm) {
+ err = xe_vm_lock(vm, &ww, 0, true);
+ if (err)
+ return ERR_PTR(err);
+ }
+ e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags);
+ if (vm)
+ xe_vm_unlock(vm, &ww);
+
+ return e;
+}
+
+struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm,
+ enum xe_engine_class class, u32 flags)
+{
+ struct xe_hw_engine *hwe, *hwe0 = NULL;
+ enum xe_hw_engine_id id;
+ u32 logical_mask = 0;
+
+ for_each_hw_engine(hwe, gt, id) {
+ if (xe_hw_engine_is_reserved(hwe))
+ continue;
+
+ if (hwe->class == class) {
+ logical_mask |= BIT(hwe->logical_instance);
+ if (!hwe0)
+ hwe0 = hwe;
+ }
+ }
+
+ if (!logical_mask)
+ return ERR_PTR(-ENODEV);
+
+ return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags);
+}
+
+void xe_engine_destroy(struct kref *ref)
+{
+ struct xe_engine *e = container_of(ref, struct xe_engine, refcount);
+ struct xe_engine *engine, *next;
+
+ if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) {
+ list_for_each_entry_safe(engine, next, &e->multi_gt_list,
+ multi_gt_link)
+ xe_engine_put(engine);
+ }
+
+ e->ops->fini(e);
+}
+
+void xe_engine_fini(struct xe_engine *e)
+{
+ int i;
+
+ for (i = 0; i < e->width; ++i)
+ xe_lrc_finish(e->lrc + i);
+ if (e->vm)
+ xe_vm_put(e->vm);
+
+ kfree(e);
+}
+
+struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id)
+{
+ struct xe_engine *e;
+
+ mutex_lock(&xef->engine.lock);
+ e = xa_load(&xef->engine.xa, id);
+ mutex_unlock(&xef->engine.lock);
+
+ if (e)
+ xe_engine_get(e);
+
+ return e;
+}
+
+static int engine_set_priority(struct xe_device *xe, struct xe_engine *e,
+ u64 value, bool create)
+{
+ if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH &&
+ !capable(CAP_SYS_NICE)))
+ return -EPERM;
+
+ return e->ops->set_priority(e, value);
+}
+
+static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e,
+ u64 value, bool create)
+{
+ if (!capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ return e->ops->set_timeslice(e, value);
+}
+
+static int engine_set_preemption_timeout(struct xe_device *xe,
+ struct xe_engine *e, u64 value,
+ bool create)
+{
+ if (!capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ return e->ops->set_preempt_timeout(e, value);
+}
+
+static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e,
+ u64 value, bool create)
+{
+ if (XE_IOCTL_ERR(xe, !create))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM))
+ return -EINVAL;
+
+ if (value) {
+ struct xe_vm *vm = e->vm;
+ int err;
+
+ if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm)))
+ return -EOPNOTSUPP;
+
+ if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm)))
+ return -EOPNOTSUPP;
+
+ if (XE_IOCTL_ERR(xe, e->width != 1))
+ return -EINVAL;
+
+ e->compute.context = dma_fence_context_alloc(1);
+ spin_lock_init(&e->compute.lock);
+
+ err = xe_vm_add_compute_engine(vm, e);
+ if (XE_IOCTL_ERR(xe, err))
+ return err;
+
+ e->flags |= ENGINE_FLAG_COMPUTE_MODE;
+ e->flags &= ~ENGINE_FLAG_PERSISTENT;
+ }
+
+ return 0;
+}
+
+static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e,
+ u64 value, bool create)
+{
+ if (XE_IOCTL_ERR(xe, !create))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE))
+ return -EINVAL;
+
+ if (value)
+ e->flags |= ENGINE_FLAG_PERSISTENT;
+ else
+ e->flags &= ~ENGINE_FLAG_PERSISTENT;
+
+ return 0;
+}
+
+static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e,
+ u64 value, bool create)
+{
+ if (XE_IOCTL_ERR(xe, !create))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ return e->ops->set_job_timeout(e, value);
+}
+
+static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e,
+ u64 value, bool create)
+{
+ if (XE_IOCTL_ERR(xe, !create))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+ return -EINVAL;
+
+ e->usm.acc_trigger = value;
+
+ return 0;
+}
+
+static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e,
+ u64 value, bool create)
+{
+ if (XE_IOCTL_ERR(xe, !create))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+ return -EINVAL;
+
+ e->usm.acc_notify = value;
+
+ return 0;
+}
+
+static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e,
+ u64 value, bool create)
+{
+ if (XE_IOCTL_ERR(xe, !create))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, !xe->info.supports_usm))
+ return -EINVAL;
+
+ e->usm.acc_granularity = value;
+
+ return 0;
+}
+
+typedef int (*xe_engine_set_property_fn)(struct xe_device *xe,
+ struct xe_engine *e,
+ u64 value, bool create);
+
+static const xe_engine_set_property_fn engine_set_property_funcs[] = {
+ [XE_ENGINE_PROPERTY_PRIORITY] = engine_set_priority,
+ [XE_ENGINE_PROPERTY_TIMESLICE] = engine_set_timeslice,
+ [XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout,
+ [XE_ENGINE_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode,
+ [XE_ENGINE_PROPERTY_PERSISTENCE] = engine_set_persistence,
+ [XE_ENGINE_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout,
+ [XE_ENGINE_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger,
+ [XE_ENGINE_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify,
+ [XE_ENGINE_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity,
+};
+
+static int engine_user_ext_set_property(struct xe_device *xe,
+ struct xe_engine *e,
+ u64 extension,
+ bool create)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_ext_engine_set_property ext;
+ int err;
+ u32 idx;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_ERR(xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_ERR(xe, ext.property >=
+ ARRAY_SIZE(engine_set_property_funcs)))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs));
+ return engine_set_property_funcs[idx](xe, e, ext.value, create);
+}
+
+typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe,
+ struct xe_engine *e,
+ u64 extension,
+ bool create);
+
+static const xe_engine_set_property_fn engine_user_extension_funcs[] = {
+ [XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS 16
+static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e,
+ u64 extensions, int ext_number, bool create)
+{
+ u64 __user *address = u64_to_user_ptr(extensions);
+ struct xe_user_extension ext;
+ int err;
+ u32 idx;
+
+ if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
+ return -E2BIG;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_ERR(xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_ERR(xe, ext.name >=
+ ARRAY_SIZE(engine_user_extension_funcs)))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.name,
+ ARRAY_SIZE(engine_user_extension_funcs));
+ err = engine_user_extension_funcs[idx](xe, e, extensions, create);
+ if (XE_IOCTL_ERR(xe, err))
+ return err;
+
+ if (ext.next_extension)
+ return engine_user_extensions(xe, e, ext.next_extension,
+ ++ext_number, create);
+
+ return 0;
+}
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+ [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+ [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+ [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+ [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+ [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+static struct xe_hw_engine *
+find_hw_engine(struct xe_device *xe,
+ struct drm_xe_engine_class_instance eci)
+{
+ u32 idx;
+
+ if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
+ return NULL;
+
+ if (eci.gt_id >= xe->info.tile_count)
+ return NULL;
+
+ idx = array_index_nospec(eci.engine_class,
+ ARRAY_SIZE(user_to_xe_engine_class));
+
+ return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
+ user_to_xe_engine_class[idx],
+ eci.engine_instance, true);
+}
+
+static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+ struct drm_xe_engine_class_instance *eci,
+ u16 width, u16 num_placements)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ u32 logical_mask = 0;
+
+ if (XE_IOCTL_ERR(xe, width != 1))
+ return 0;
+ if (XE_IOCTL_ERR(xe, num_placements != 1))
+ return 0;
+ if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0))
+ return 0;
+
+ eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
+
+ for_each_hw_engine(hwe, gt, id) {
+ if (xe_hw_engine_is_reserved(hwe))
+ continue;
+
+ if (hwe->class ==
+ user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
+ logical_mask |= BIT(hwe->logical_instance);
+ }
+
+ return logical_mask;
+}
+
+static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+ struct drm_xe_engine_class_instance *eci,
+ u16 width, u16 num_placements)
+{
+ int len = width * num_placements;
+ int i, j, n;
+ u16 class;
+ u16 gt_id;
+ u32 return_mask = 0, prev_mask;
+
+ if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) &&
+ len > 1))
+ return 0;
+
+ for (i = 0; i < width; ++i) {
+ u32 current_mask = 0;
+
+ for (j = 0; j < num_placements; ++j) {
+ struct xe_hw_engine *hwe;
+
+ n = j * width + i;
+
+ hwe = find_hw_engine(xe, eci[n]);
+ if (XE_IOCTL_ERR(xe, !hwe))
+ return 0;
+
+ if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe)))
+ return 0;
+
+ if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) ||
+ XE_IOCTL_ERR(xe, n && eci[n].engine_class != class))
+ return 0;
+
+ class = eci[n].engine_class;
+ gt_id = eci[n].gt_id;
+
+ if (width == 1 || !i)
+ return_mask |= BIT(eci[n].engine_instance);
+ current_mask |= BIT(eci[n].engine_instance);
+ }
+
+ /* Parallel submissions must be logically contiguous */
+ if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1))
+ return 0;
+
+ prev_mask = current_mask;
+ }
+
+ return return_mask;
+}
+
+int xe_engine_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_engine_create *args = data;
+ struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
+ struct drm_xe_engine_class_instance __user *user_eci =
+ u64_to_user_ptr(args->instances);
+ struct xe_hw_engine *hwe;
+ struct xe_vm *vm, *migrate_vm;
+ struct xe_gt *gt;
+ struct xe_engine *e = NULL;
+ u32 logical_mask;
+ u32 id;
+ int len;
+ int err;
+
+ if (XE_IOCTL_ERR(xe, args->flags))
+ return -EINVAL;
+
+ len = args->width * args->num_placements;
+ if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
+ return -EINVAL;
+
+ err = __copy_from_user(eci, user_eci,
+ sizeof(struct drm_xe_engine_class_instance) *
+ len);
+ if (XE_IOCTL_ERR(xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count))
+ return -EINVAL;
+
+ xe_pm_runtime_get(xe);
+
+ if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
+ for_each_gt(gt, xe, id) {
+ struct xe_engine *new;
+
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ eci[0].gt_id = gt->info.id;
+ logical_mask = bind_engine_logical_mask(xe, gt, eci,
+ args->width,
+ args->num_placements);
+ if (XE_IOCTL_ERR(xe, !logical_mask)) {
+ err = -EINVAL;
+ goto put_rpm;
+ }
+
+ hwe = find_hw_engine(xe, eci[0]);
+ if (XE_IOCTL_ERR(xe, !hwe)) {
+ err = -EINVAL;
+ goto put_rpm;
+ }
+
+ migrate_vm = xe_migrate_get_vm(gt->migrate);
+ new = xe_engine_create(xe, migrate_vm, logical_mask,
+ args->width, hwe,
+ ENGINE_FLAG_PERSISTENT |
+ ENGINE_FLAG_VM |
+ (id ?
+ ENGINE_FLAG_BIND_ENGINE_CHILD :
+ 0));
+ xe_vm_put(migrate_vm);
+ if (IS_ERR(new)) {
+ err = PTR_ERR(new);
+ if (e)
+ goto put_engine;
+ goto put_rpm;
+ }
+ if (id == 0)
+ e = new;
+ else
+ list_add_tail(&new->multi_gt_list,
+ &e->multi_gt_link);
+ }
+ } else {
+ gt = xe_device_get_gt(xe, eci[0].gt_id);
+ logical_mask = calc_validate_logical_mask(xe, gt, eci,
+ args->width,
+ args->num_placements);
+ if (XE_IOCTL_ERR(xe, !logical_mask)) {
+ err = -EINVAL;
+ goto put_rpm;
+ }
+
+ hwe = find_hw_engine(xe, eci[0]);
+ if (XE_IOCTL_ERR(xe, !hwe)) {
+ err = -EINVAL;
+ goto put_rpm;
+ }
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_ERR(xe, !vm)) {
+ err = -ENOENT;
+ goto put_rpm;
+ }
+
+ e = xe_engine_create(xe, vm, logical_mask,
+ args->width, hwe, ENGINE_FLAG_PERSISTENT);
+ xe_vm_put(vm);
+ if (IS_ERR(e)) {
+ err = PTR_ERR(e);
+ goto put_rpm;
+ }
+ }
+
+ if (args->extensions) {
+ err = engine_user_extensions(xe, e, args->extensions, 0, true);
+ if (XE_IOCTL_ERR(xe, err))
+ goto put_engine;
+ }
+
+ if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) !=
+ !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) {
+ err = -ENOTSUPP;
+ goto put_engine;
+ }
+
+ e->persitent.xef = xef;
+
+ mutex_lock(&xef->engine.lock);
+ err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL);
+ mutex_unlock(&xef->engine.lock);
+ if (err)
+ goto put_engine;
+
+ args->engine_id = id;
+
+ return 0;
+
+put_engine:
+ xe_engine_kill(e);
+ xe_engine_put(e);
+put_rpm:
+ xe_pm_runtime_put(xe);
+ return err;
+}
+
+static void engine_kill_compute(struct xe_engine *e)
+{
+ if (!xe_vm_in_compute_mode(e->vm))
+ return;
+
+ down_write(&e->vm->lock);
+ list_del(&e->compute.link);
+ --e->vm->preempt.num_engines;
+ if (e->compute.pfence) {
+ dma_fence_enable_sw_signaling(e->compute.pfence);
+ dma_fence_put(e->compute.pfence);
+ e->compute.pfence = NULL;
+ }
+ up_write(&e->vm->lock);
+}
+
+void xe_engine_kill(struct xe_engine *e)
+{
+ struct xe_engine *engine = e, *next;
+
+ list_for_each_entry_safe(engine, next, &engine->multi_gt_list,
+ multi_gt_link) {
+ e->ops->kill(engine);
+ engine_kill_compute(engine);
+ }
+
+ e->ops->kill(e);
+ engine_kill_compute(e);
+}
+
+int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_engine_destroy *args = data;
+ struct xe_engine *e;
+
+ if (XE_IOCTL_ERR(xe, args->pad))
+ return -EINVAL;
+
+ mutex_lock(&xef->engine.lock);
+ e = xa_erase(&xef->engine.xa, args->engine_id);
+ mutex_unlock(&xef->engine.lock);
+ if (XE_IOCTL_ERR(xe, !e))
+ return -ENOENT;
+
+ if (!(e->flags & ENGINE_FLAG_PERSISTENT))
+ xe_engine_kill(e);
+ else
+ xe_device_add_persitent_engines(xe, e);
+
+ trace_xe_engine_close(e);
+ xe_engine_put(e);
+ xe_pm_runtime_put(xe);
+
+ return 0;
+}
+
+int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_engine_set_property *args = data;
+ struct xe_engine *e;
+ int ret;
+ u32 idx;
+
+ e = xe_engine_lookup(xef, args->engine_id);
+ if (XE_IOCTL_ERR(xe, !e))
+ return -ENOENT;
+
+ if (XE_IOCTL_ERR(xe, args->property >=
+ ARRAY_SIZE(engine_set_property_funcs))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ idx = array_index_nospec(args->property,
+ ARRAY_SIZE(engine_set_property_funcs));
+ ret = engine_set_property_funcs[idx](xe, e, args->value, false);
+ if (XE_IOCTL_ERR(xe, ret))
+ goto out;
+
+ if (args->extensions)
+ ret = engine_user_extensions(xe, e, args->extensions, 0,
+ false);
+out:
+ xe_engine_put(e);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h
new file mode 100644
index 000000000000..4d1b609fea7e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_engine.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_H_
+#define _XE_ENGINE_H_
+
+#include "xe_engine_types.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+struct xe_file;
+
+struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
+ u32 logical_mask, u16 width,
+ struct xe_hw_engine *hw_engine, u32 flags);
+struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm,
+ enum xe_engine_class class, u32 flags);
+
+void xe_engine_fini(struct xe_engine *e);
+void xe_engine_destroy(struct kref *ref);
+
+struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id);
+
+static inline struct xe_engine *xe_engine_get(struct xe_engine *engine)
+{
+ kref_get(&engine->refcount);
+ return engine;
+}
+
+static inline void xe_engine_put(struct xe_engine *engine)
+{
+ kref_put(&engine->refcount, xe_engine_destroy);
+}
+
+static inline bool xe_engine_is_parallel(struct xe_engine *engine)
+{
+ return engine->width > 1;
+}
+
+void xe_engine_kill(struct xe_engine *e);
+
+int xe_engine_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int xe_engine_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
new file mode 100644
index 000000000000..3dfa1c14e181
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_engine_types.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_ENGINE_TYPES_H_
+#define _XE_ENGINE_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_lrc_types.h"
+
+struct xe_execlist_engine;
+struct xe_gt;
+struct xe_guc_engine;
+struct xe_hw_engine;
+struct xe_vm;
+
+enum xe_engine_priority {
+ XE_ENGINE_PRIORITY_UNSET = -2, /* For execlist usage only */
+ XE_ENGINE_PRIORITY_LOW = 0,
+ XE_ENGINE_PRIORITY_NORMAL,
+ XE_ENGINE_PRIORITY_HIGH,
+ XE_ENGINE_PRIORITY_KERNEL,
+
+ XE_ENGINE_PRIORITY_COUNT
+};
+
+/**
+ * struct xe_engine - Submission engine
+ *
+ * Contains all state necessary for submissions. Can either be a user object or
+ * a kernel object.
+ */
+struct xe_engine {
+ /** @gt: graphics tile this engine can submit to */
+ struct xe_gt *gt;
+ /**
+ * @hwe: A hardware of the same class. May (physical engine) or may not
+ * (virtual engine) be where jobs actual engine up running. Should never
+ * really be used for submissions.
+ */
+ struct xe_hw_engine *hwe;
+ /** @refcount: ref count of this engine */
+ struct kref refcount;
+ /** @vm: VM (address space) for this engine */
+ struct xe_vm *vm;
+ /** @class: class of this engine */
+ enum xe_engine_class class;
+ /** @priority: priority of this exec queue */
+ enum xe_engine_priority priority;
+ /**
+ * @logical_mask: logical mask of where job submitted to engine can run
+ */
+ u32 logical_mask;
+ /** @name: name of this engine */
+ char name[MAX_FENCE_NAME_LEN];
+ /** @width: width (number BB submitted per exec) of this engine */
+ u16 width;
+ /** @fence_irq: fence IRQ used to signal job completion */
+ struct xe_hw_fence_irq *fence_irq;
+
+#define ENGINE_FLAG_BANNED BIT(0)
+#define ENGINE_FLAG_KERNEL BIT(1)
+#define ENGINE_FLAG_PERSISTENT BIT(2)
+#define ENGINE_FLAG_COMPUTE_MODE BIT(3)
+#define ENGINE_FLAG_VM BIT(4)
+#define ENGINE_FLAG_BIND_ENGINE_CHILD BIT(5)
+#define ENGINE_FLAG_WA BIT(6)
+
+ /**
+ * @flags: flags for this engine, should statically setup aside from ban
+ * bit
+ */
+ unsigned long flags;
+
+ union {
+ /** @multi_gt_list: list head for VM bind engines if multi-GT */
+ struct list_head multi_gt_list;
+ /** @multi_gt_link: link for VM bind engines if multi-GT */
+ struct list_head multi_gt_link;
+ };
+
+ union {
+ /** @execlist: execlist backend specific state for engine */
+ struct xe_execlist_engine *execlist;
+ /** @guc: GuC backend specific state for engine */
+ struct xe_guc_engine *guc;
+ };
+
+ /**
+ * @persitent: persitent engine state
+ */
+ struct {
+ /** @xef: file which this engine belongs to */
+ struct xe_file *xef;
+ /** @link: link in list of persitent engines */
+ struct list_head link;
+ } persitent;
+
+ union {
+ /**
+ * @parallel: parallel submission state
+ */
+ struct {
+ /** @composite_fence_ctx: context composite fence */
+ u64 composite_fence_ctx;
+ /** @composite_fence_seqno: seqno for composite fence */
+ u32 composite_fence_seqno;
+ } parallel;
+ /**
+ * @bind: bind submission state
+ */
+ struct {
+ /** @fence_ctx: context bind fence */
+ u64 fence_ctx;
+ /** @fence_seqno: seqno for bind fence */
+ u32 fence_seqno;
+ } bind;
+ };
+
+ /** @sched_props: scheduling properties */
+ struct {
+ /** @timeslice_us: timeslice period in micro-seconds */
+ u32 timeslice_us;
+ /** @preempt_timeout_us: preemption timeout in micro-seconds */
+ u32 preempt_timeout_us;
+ } sched_props;
+
+ /** @compute: compute engine state */
+ struct {
+ /** @pfence: preemption fence */
+ struct dma_fence *pfence;
+ /** @context: preemption fence context */
+ u64 context;
+ /** @seqno: preemption fence seqno */
+ u32 seqno;
+ /** @link: link into VM's list of engines */
+ struct list_head link;
+ /** @lock: preemption fences lock */
+ spinlock_t lock;
+ } compute;
+
+ /** @usm: unified shared memory state */
+ struct {
+ /** @acc_trigger: access counter trigger */
+ u32 acc_trigger;
+ /** @acc_notify: access counter notify */
+ u32 acc_notify;
+ /** @acc_granularity: access counter granularity */
+ u32 acc_granularity;
+ } usm;
+
+ /** @ops: submission backend engine operations */
+ const struct xe_engine_ops *ops;
+
+ /** @ring_ops: ring operations for this engine */
+ const struct xe_ring_ops *ring_ops;
+ /** @entity: DRM sched entity for this engine (1 to 1 relationship) */
+ struct drm_sched_entity *entity;
+ /** @lrc: logical ring context for this engine */
+ struct xe_lrc lrc[0];
+};
+
+/**
+ * struct xe_engine_ops - Submission backend engine operations
+ */
+struct xe_engine_ops {
+ /** @init: Initialize engine for submission backend */
+ int (*init)(struct xe_engine *e);
+ /** @kill: Kill inflight submissions for backend */
+ void (*kill)(struct xe_engine *e);
+ /** @fini: Fini engine for submission backend */
+ void (*fini)(struct xe_engine *e);
+ /** @set_priority: Set priority for engine */
+ int (*set_priority)(struct xe_engine *e,
+ enum xe_engine_priority priority);
+ /** @set_timeslice: Set timeslice for engine */
+ int (*set_timeslice)(struct xe_engine *e, u32 timeslice_us);
+ /** @set_preempt_timeout: Set preemption timeout for engine */
+ int (*set_preempt_timeout)(struct xe_engine *e, u32 preempt_timeout_us);
+ /** @set_job_timeout: Set job timeout for engine */
+ int (*set_job_timeout)(struct xe_engine *e, u32 job_timeout_ms);
+ /**
+ * @suspend: Suspend engine from executing, allowed to be called
+ * multiple times in a row before resume with the caveat that
+ * suspend_wait returns before calling suspend again.
+ */
+ int (*suspend)(struct xe_engine *e);
+ /**
+ * @suspend_wait: Wait for an engine to suspend executing, should be
+ * call after suspend.
+ */
+ void (*suspend_wait)(struct xe_engine *e);
+ /**
+ * @resume: Resume engine execution, engine must be in a suspended
+ * state and dma fence returned from most recent suspend call must be
+ * signalled when this function is called.
+ */
+ void (*resume)(struct xe_engine *e);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
new file mode 100644
index 000000000000..00f298acc436
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_exec.h"
+#include "xe_macros.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_vm.h"
+
+/**
+ * DOC: Execbuf (User GPU command submission)
+ *
+ * Execs have historically been rather complicated in DRM drivers (at least in
+ * the i915) because a few things:
+ *
+ * - Passing in a list BO which are read / written to creating implicit syncs
+ * - Binding at exec time
+ * - Flow controlling the ring at exec time
+ *
+ * In XE we avoid all of this complication by not allowing a BO list to be
+ * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
+ * seperate operations, and using the DRM scheduler to flow control the ring.
+ * Let's deep dive on each of these.
+ *
+ * We can get away from a BO list by forcing the user to use in / out fences on
+ * every exec rather than the kernel tracking dependencies of BO (e.g. if the
+ * user knows an exec writes to a BO and reads from the BO in the next exec, it
+ * is the user's responsibility to pass in / out fence between the two execs).
+ *
+ * Implicit dependencies for external BOs are handled by using the dma-buf
+ * implicit dependency uAPI (TODO: add link). To make this works each exec must
+ * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
+ * BO mapped in the VM.
+ *
+ * We do not allow a user to trigger a bind at exec time rather we have a VM
+ * bind IOCTL which uses the same in / out fence interface as exec. In that
+ * sense, a VM bind is basically the same operation as an exec from the user
+ * perspective. e.g. If an exec depends on a VM bind use the in / out fence
+ * interface (struct drm_xe_sync) to synchronize like syncing between two
+ * dependent execs.
+ *
+ * Although a user cannot trigger a bind, we still have to rebind userptrs in
+ * the VM that have been invalidated since the last exec, likewise we also have
+ * to rebind BOs that have been evicted by the kernel. We schedule these rebinds
+ * behind any pending kernel operations on any external BOs in VM or any BOs
+ * private to the VM. This is accomplished by the rebinds waiting on BOs
+ * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
+ * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and
+ * in DMA_RESV_USAGE_WRITE for external BOs).
+ *
+ * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
+ * mode VMs we use preempt fences and a rebind worker (TODO: add link).
+ *
+ * There is no need to flow control the ring in the exec as we write the ring at
+ * submission time and set the DRM scheduler max job limit SIZE_OF_RING /
+ * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the
+ * ring is available.
+ *
+ * All of this results in a rather simple exec implementation.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ * Parse input arguments
+ * Wait for any async VM bind passed as in-fences to start
+ * <----------------------------------------------------------------------|
+ * Lock global VM lock in read mode |
+ * Pin userptrs (also finds userptr invalidated since last exec) |
+ * Lock exec (VM dma-resv lock, external BOs dma-resv locks) |
+ * Validate BOs that have been evicted |
+ * Create job |
+ * Rebind invalidated userptrs + evicted BOs (non-compute-mode) |
+ * Add rebind fence dependency to job |
+ * Add job VM dma-resv bookkeeping slot (non-compute mode) |
+ * Add job to external BOs dma-resv write slots (non-compute mode) |
+ * Check if any userptrs invalidated since pin ------ Drop locks ---------|
+ * Install in / out fences for job
+ * Submit job
+ * Unlock all
+ */
+
+static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
+ struct ttm_validate_buffer tv_onstack[],
+ struct ttm_validate_buffer **tv,
+ struct list_head *objs)
+{
+ struct xe_vm *vm = e->vm;
+ struct xe_vma *vma;
+ LIST_HEAD(dups);
+ int err;
+
+ *tv = NULL;
+ if (xe_vm_no_dma_fences(e->vm))
+ return 0;
+
+ err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
+ if (err)
+ return err;
+
+ /*
+ * Validate BOs that have been evicted (i.e. make sure the
+ * BOs have valid placements possibly moving an evicted BO back
+ * to a location where the GPU can access it).
+ */
+ list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
+ if (xe_vma_is_userptr(vma))
+ continue;
+
+ err = xe_bo_validate(vma->bo, vm, false);
+ if (err) {
+ xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
+ *tv = NULL;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static void xe_exec_end(struct xe_engine *e,
+ struct ttm_validate_buffer *tv_onstack,
+ struct ttm_validate_buffer *tv,
+ struct ww_acquire_ctx *ww,
+ struct list_head *objs)
+{
+ if (!xe_vm_no_dma_fences(e->vm))
+ xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs);
+}
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_exec *args = data;
+ struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
+ u64 __user *addresses_user = u64_to_user_ptr(args->address);
+ struct xe_engine *engine;
+ struct xe_sync_entry *syncs = NULL;
+ u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
+ struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
+ struct ttm_validate_buffer *tv = NULL;
+ u32 i, num_syncs = 0;
+ struct xe_sched_job *job;
+ struct dma_fence *rebind_fence;
+ struct xe_vm *vm;
+ struct ww_acquire_ctx ww;
+ struct list_head objs;
+ bool write_locked;
+ int err = 0;
+
+ if (XE_IOCTL_ERR(xe, args->extensions))
+ return -EINVAL;
+
+ engine = xe_engine_lookup(xef, args->engine_id);
+ if (XE_IOCTL_ERR(xe, !engine))
+ return -ENOENT;
+
+ if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) {
+ err = -ECANCELED;
+ goto err_engine;
+ }
+
+ if (args->num_syncs) {
+ syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+ if (!syncs) {
+ err = -ENOMEM;
+ goto err_engine;
+ }
+ }
+
+ vm = engine->vm;
+
+ for (i = 0; i < args->num_syncs; i++) {
+ err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
+ &syncs_user[i], true,
+ xe_vm_no_dma_fences(vm));
+ if (err)
+ goto err_syncs;
+ }
+
+ if (xe_engine_is_parallel(engine)) {
+ err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
+ engine->width);
+ if (err) {
+ err = -EFAULT;
+ goto err_syncs;
+ }
+ }
+
+ /*
+ * We can't install a job into the VM dma-resv shared slot before an
+ * async VM bind passed in as a fence without the risk of deadlocking as
+ * the bind can trigger an eviction which in turn depends on anything in
+ * the VM dma-resv shared slots. Not an ideal solution, but we wait for
+ * all dependent async VM binds to start (install correct fences into
+ * dma-resv slots) before moving forward.
+ */
+ if (!xe_vm_no_dma_fences(vm) &&
+ vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
+ for (i = 0; i < args->num_syncs; i++) {
+ struct dma_fence *fence = syncs[i].fence;
+ if (fence) {
+ err = xe_vm_async_fence_wait_start(fence);
+ if (err)
+ goto err_syncs;
+ }
+ }
+ }
+
+retry:
+ if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) {
+ err = down_write_killable(&vm->lock);
+ write_locked = true;
+ } else {
+ /* We don't allow execs while the VM is in error state */
+ err = down_read_interruptible(&vm->lock);
+ write_locked = false;
+ }
+ if (err)
+ goto err_syncs;
+
+ /* We don't allow execs while the VM is in error state */
+ if (vm->async_ops.error) {
+ err = vm->async_ops.error;
+ goto err_unlock_list;
+ }
+
+ /*
+ * Extreme corner where we exit a VM error state with a munmap style VM
+ * unbind inflight which requires a rebind. In this case the rebind
+ * needs to install some fences into the dma-resv slots. The worker to
+ * do this queued, let that worker make progress by dropping vm->lock,
+ * flushing the worker and retrying the exec.
+ */
+ if (vm->async_ops.munmap_rebind_inflight) {
+ if (write_locked)
+ up_write(&vm->lock);
+ else
+ up_read(&vm->lock);
+ flush_work(&vm->async_ops.work);
+ goto retry;
+ }
+
+ if (write_locked) {
+ err = xe_vm_userptr_pin(vm);
+ downgrade_write(&vm->lock);
+ write_locked = false;
+ if (err)
+ goto err_unlock_list;
+ }
+
+ err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs);
+ if (err)
+ goto err_unlock_list;
+
+ if (xe_vm_is_closed(engine->vm)) {
+ drm_warn(&xe->drm, "Trying to schedule after vm is closed\n");
+ err = -EIO;
+ goto err_engine_end;
+ }
+
+ job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ?
+ addresses : &args->address);
+ if (IS_ERR(job)) {
+ err = PTR_ERR(job);
+ goto err_engine_end;
+ }
+
+ /*
+ * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
+ * VM mode only.
+ */
+ rebind_fence = xe_vm_rebind(vm, false);
+ if (IS_ERR(rebind_fence)) {
+ err = PTR_ERR(rebind_fence);
+ goto err_put_job;
+ }
+
+ /*
+ * We store the rebind_fence in the VM so subsequent execs don't get
+ * scheduled before the rebinds of userptrs / evicted BOs is complete.
+ */
+ if (rebind_fence) {
+ dma_fence_put(vm->rebind_fence);
+ vm->rebind_fence = rebind_fence;
+ }
+ if (vm->rebind_fence) {
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &vm->rebind_fence->flags)) {
+ dma_fence_put(vm->rebind_fence);
+ vm->rebind_fence = NULL;
+ } else {
+ dma_fence_get(vm->rebind_fence);
+ err = drm_sched_job_add_dependency(&job->drm,
+ vm->rebind_fence);
+ if (err)
+ goto err_put_job;
+ }
+ }
+
+ /* Wait behind munmap style rebinds */
+ if (!xe_vm_no_dma_fences(vm)) {
+ err = drm_sched_job_add_resv_dependencies(&job->drm,
+ &vm->resv,
+ DMA_RESV_USAGE_KERNEL);
+ if (err)
+ goto err_put_job;
+ }
+
+ for (i = 0; i < num_syncs && !err; i++)
+ err = xe_sync_entry_add_deps(&syncs[i], job);
+ if (err)
+ goto err_put_job;
+
+ if (!xe_vm_no_dma_fences(vm)) {
+ err = down_read_interruptible(&vm->userptr.notifier_lock);
+ if (err)
+ goto err_put_job;
+
+ err = __xe_vm_userptr_needs_repin(vm);
+ if (err)
+ goto err_repin;
+ }
+
+ /*
+ * Point of no return, if we error after this point just set an error on
+ * the job and let the DRM scheduler / backend clean up the job.
+ */
+ xe_sched_job_arm(job);
+ if (!xe_vm_no_dma_fences(vm)) {
+ /* Block userptr invalidations / BO eviction */
+ dma_resv_add_fence(&vm->resv,
+ &job->drm.s_fence->finished,
+ DMA_RESV_USAGE_BOOKKEEP);
+
+ /*
+ * Make implicit sync work across drivers, assuming all external
+ * BOs are written as we don't pass in a read / write list.
+ */
+ xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished,
+ DMA_RESV_USAGE_WRITE);
+ }
+
+ for (i = 0; i < num_syncs; i++)
+ xe_sync_entry_signal(&syncs[i], job,
+ &job->drm.s_fence->finished);
+
+ xe_sched_job_push(job);
+
+err_repin:
+ if (!xe_vm_no_dma_fences(vm))
+ up_read(&vm->userptr.notifier_lock);
+err_put_job:
+ if (err)
+ xe_sched_job_put(job);
+err_engine_end:
+ xe_exec_end(engine, tv_onstack, tv, &ww, &objs);
+err_unlock_list:
+ if (write_locked)
+ up_write(&vm->lock);
+ else
+ up_read(&vm->lock);
+ if (err == -EAGAIN)
+ goto retry;
+err_syncs:
+ for (i = 0; i < num_syncs; i++)
+ xe_sync_entry_cleanup(&syncs[i]);
+ kfree(syncs);
+err_engine:
+ xe_engine_put(engine);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_exec.h b/drivers/gpu/drm/xe/xe_exec.h
new file mode 100644
index 000000000000..e4932494cea3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_exec.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXEC_H_
+#define _XE_EXEC_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
new file mode 100644
index 000000000000..47587571123a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_execlist.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_hw_fence.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_regs.h"
+
+#define XE_EXECLIST_HANG_LIMIT 1
+
+#define GEN11_SW_CTX_ID_SHIFT 37
+#define GEN11_SW_CTX_ID_WIDTH 11
+#define XEHP_SW_CTX_ID_SHIFT 39
+#define XEHP_SW_CTX_ID_WIDTH 16
+
+#define GEN11_SW_CTX_ID \
+ GENMASK_ULL(GEN11_SW_CTX_ID_WIDTH + GEN11_SW_CTX_ID_SHIFT - 1, \
+ GEN11_SW_CTX_ID_SHIFT)
+
+#define XEHP_SW_CTX_ID \
+ GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
+ XEHP_SW_CTX_ID_SHIFT)
+
+
+static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
+ u32 ctx_id)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ u64 lrc_desc;
+
+ printk(KERN_INFO "__start_lrc(%s, 0x%p, %u)\n", hwe->name, lrc, ctx_id);
+
+ lrc_desc = xe_lrc_descriptor(lrc);
+
+ if (GRAPHICS_VERx100(xe) >= 1250) {
+ XE_BUG_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
+ lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
+ } else {
+ XE_BUG_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id));
+ lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id);
+ }
+
+ if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+ xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg,
+ _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+ lrc->ring.old_tail = lrc->ring.tail;
+
+ /*
+ * Make sure the context image is complete before we submit it to HW.
+ *
+ * Ostensibly, writes (including the WCB) should be flushed prior to
+ * an uncached write such as our mmio register access, the empirical
+ * evidence (esp. on Braswell) suggests that the WC write into memory
+ * may not be visible to the HW prior to the completion of the UC
+ * register write and that we may begin execution from the context
+ * before its image is complete leading to invalid PD chasing.
+ */
+ wmb();
+
+ xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg,
+ xe_bo_ggtt_addr(hwe->hwsp));
+ xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg);
+ xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg,
+ _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+
+ xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 0,
+ lower_32_bits(lrc_desc));
+ xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 4,
+ upper_32_bits(lrc_desc));
+ xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg,
+ EL_CTRL_LOAD);
+}
+
+static void __xe_execlist_port_start(struct xe_execlist_port *port,
+ struct xe_execlist_engine *exl)
+{
+ struct xe_device *xe = gt_to_xe(port->hwe->gt);
+ int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID);
+
+ if (GRAPHICS_VERx100(xe) >= 1250)
+ max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
+
+ xe_execlist_port_assert_held(port);
+
+ if (port->running_exl != exl || !exl->has_run) {
+ port->last_ctx_id++;
+
+ /* 0 is reserved for the kernel context */
+ if (port->last_ctx_id > max_ctx)
+ port->last_ctx_id = 1;
+ }
+
+ __start_lrc(port->hwe, exl->engine->lrc, port->last_ctx_id);
+ port->running_exl = exl;
+ exl->has_run = true;
+}
+
+static void __xe_execlist_port_idle(struct xe_execlist_port *port)
+{
+ u32 noop[2] = { MI_NOOP, MI_NOOP };
+
+ xe_execlist_port_assert_held(port);
+
+ if (!port->running_exl)
+ return;
+
+ printk(KERN_INFO "__xe_execlist_port_idle(%d:%d)\n", port->hwe->class,
+ port->hwe->instance);
+
+ xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
+ __start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
+ port->running_exl = NULL;
+}
+
+static bool xe_execlist_is_idle(struct xe_execlist_engine *exl)
+{
+ struct xe_lrc *lrc = exl->engine->lrc;
+
+ return lrc->ring.tail == lrc->ring.old_tail;
+}
+
+static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
+{
+ struct xe_execlist_engine *exl = NULL;
+ int i;
+
+ xe_execlist_port_assert_held(port);
+
+ for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
+ while (!list_empty(&port->active[i])) {
+ exl = list_first_entry(&port->active[i],
+ struct xe_execlist_engine,
+ active_link);
+ list_del(&exl->active_link);
+
+ if (xe_execlist_is_idle(exl)) {
+ exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+ continue;
+ }
+
+ list_add_tail(&exl->active_link, &port->active[i]);
+ __xe_execlist_port_start(port, exl);
+ return;
+ }
+ }
+
+ __xe_execlist_port_idle(port);
+}
+
+static u64 read_execlist_status(struct xe_hw_engine *hwe)
+{
+ struct xe_gt *gt = hwe->gt;
+ u32 hi, lo;
+
+ lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base).reg);
+ hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base).reg);
+
+ printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class,
+ hwe->instance, hi, lo);
+
+ return lo | (u64)hi << 32;
+}
+
+static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
+{
+ u64 status;
+
+ xe_execlist_port_assert_held(port);
+
+ status = read_execlist_status(port->hwe);
+ if (status & BIT(7))
+ return;
+
+ __xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
+ u16 intr_vec)
+{
+ struct xe_execlist_port *port = hwe->exl_port;
+
+ spin_lock(&port->lock);
+ xe_execlist_port_irq_handler_locked(port);
+ spin_unlock(&port->lock);
+}
+
+static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
+ enum xe_engine_priority priority)
+{
+ xe_execlist_port_assert_held(port);
+
+ if (port->running_exl && port->running_exl->active_priority >= priority)
+ return;
+
+ __xe_execlist_port_start_next_active(port);
+}
+
+static void xe_execlist_make_active(struct xe_execlist_engine *exl)
+{
+ struct xe_execlist_port *port = exl->port;
+ enum xe_engine_priority priority = exl->active_priority;
+
+ XE_BUG_ON(priority == XE_ENGINE_PRIORITY_UNSET);
+ XE_BUG_ON(priority < 0);
+ XE_BUG_ON(priority >= ARRAY_SIZE(exl->port->active));
+
+ spin_lock_irq(&port->lock);
+
+ if (exl->active_priority != priority &&
+ exl->active_priority != XE_ENGINE_PRIORITY_UNSET) {
+ /* Priority changed, move it to the right list */
+ list_del(&exl->active_link);
+ exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+ }
+
+ if (exl->active_priority == XE_ENGINE_PRIORITY_UNSET) {
+ exl->active_priority = priority;
+ list_add_tail(&exl->active_link, &port->active[priority]);
+ }
+
+ xe_execlist_port_wake_locked(exl->port, priority);
+
+ spin_unlock_irq(&port->lock);
+}
+
+static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
+{
+ struct xe_execlist_port *port =
+ container_of(timer, struct xe_execlist_port, irq_fail);
+
+ spin_lock_irq(&port->lock);
+ xe_execlist_port_irq_handler_locked(port);
+ spin_unlock_irq(&port->lock);
+
+ port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+ add_timer(&port->irq_fail);
+}
+
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+ struct xe_hw_engine *hwe)
+{
+ struct drm_device *drm = &xe->drm;
+ struct xe_execlist_port *port;
+ int i;
+
+ port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return ERR_PTR(-ENOMEM);
+
+ port->hwe = hwe;
+
+ spin_lock_init(&port->lock);
+ for (i = 0; i < ARRAY_SIZE(port->active); i++)
+ INIT_LIST_HEAD(&port->active[i]);
+
+ port->last_ctx_id = 1;
+ port->running_exl = NULL;
+
+ hwe->irq_handler = xe_execlist_port_irq_handler;
+
+ /* TODO: Fix the interrupt code so it doesn't race like mad */
+ timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
+ port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
+ add_timer(&port->irq_fail);
+
+ return port;
+}
+
+void xe_execlist_port_destroy(struct xe_execlist_port *port)
+{
+ del_timer(&port->irq_fail);
+
+ /* Prevent an interrupt while we're destroying */
+ spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+ port->hwe->irq_handler = NULL;
+ spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
+}
+
+static struct dma_fence *
+execlist_run_job(struct drm_sched_job *drm_job)
+{
+ struct xe_sched_job *job = to_xe_sched_job(drm_job);
+ struct xe_engine *e = job->engine;
+ struct xe_execlist_engine *exl = job->engine->execlist;
+
+ e->ring_ops->emit_job(job);
+ xe_execlist_make_active(exl);
+
+ return dma_fence_get(job->fence);
+}
+
+static void execlist_job_free(struct drm_sched_job *drm_job)
+{
+ struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+ xe_sched_job_put(job);
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+ .run_job = execlist_run_job,
+ .free_job = execlist_job_free,
+};
+
+static int execlist_engine_init(struct xe_engine *e)
+{
+ struct drm_gpu_scheduler *sched;
+ struct xe_execlist_engine *exl;
+ int err;
+
+ XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
+
+ exl = kzalloc(sizeof(*exl), GFP_KERNEL);
+ if (!exl)
+ return -ENOMEM;
+
+ exl->engine = e;
+
+ err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
+ e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+ XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
+ NULL, NULL, e->hwe->name,
+ gt_to_xe(e->gt)->drm.dev);
+ if (err)
+ goto err_free;
+
+ sched = &exl->sched;
+ err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
+ if (err)
+ goto err_sched;
+
+ exl->port = e->hwe->exl_port;
+ exl->has_run = false;
+ exl->active_priority = XE_ENGINE_PRIORITY_UNSET;
+ e->execlist = exl;
+ e->entity = &exl->entity;
+
+ switch (e->class) {
+ case XE_ENGINE_CLASS_RENDER:
+ sprintf(e->name, "rcs%d", ffs(e->logical_mask) - 1);
+ break;
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ sprintf(e->name, "vcs%d", ffs(e->logical_mask) - 1);
+ break;
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ sprintf(e->name, "vecs%d", ffs(e->logical_mask) - 1);
+ break;
+ case XE_ENGINE_CLASS_COPY:
+ sprintf(e->name, "bcs%d", ffs(e->logical_mask) - 1);
+ break;
+ case XE_ENGINE_CLASS_COMPUTE:
+ sprintf(e->name, "ccs%d", ffs(e->logical_mask) - 1);
+ break;
+ default:
+ XE_WARN_ON(e->class);
+ }
+
+ return 0;
+
+err_sched:
+ drm_sched_fini(&exl->sched);
+err_free:
+ kfree(exl);
+ return err;
+}
+
+static void execlist_engine_fini_async(struct work_struct *w)
+{
+ struct xe_execlist_engine *ee =
+ container_of(w, struct xe_execlist_engine, fini_async);
+ struct xe_engine *e = ee->engine;
+ struct xe_execlist_engine *exl = e->execlist;
+ unsigned long flags;
+
+ XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt)));
+
+ spin_lock_irqsave(&exl->port->lock, flags);
+ if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET))
+ list_del(&exl->active_link);
+ spin_unlock_irqrestore(&exl->port->lock, flags);
+
+ if (e->flags & ENGINE_FLAG_PERSISTENT)
+ xe_device_remove_persitent_engines(gt_to_xe(e->gt), e);
+ drm_sched_entity_fini(&exl->entity);
+ drm_sched_fini(&exl->sched);
+ kfree(exl);
+
+ xe_engine_fini(e);
+}
+
+static void execlist_engine_kill(struct xe_engine *e)
+{
+ /* NIY */
+}
+
+static void execlist_engine_fini(struct xe_engine *e)
+{
+ INIT_WORK(&e->execlist->fini_async, execlist_engine_fini_async);
+ queue_work(system_unbound_wq, &e->execlist->fini_async);
+}
+
+static int execlist_engine_set_priority(struct xe_engine *e,
+ enum xe_engine_priority priority)
+{
+ /* NIY */
+ return 0;
+}
+
+static int execlist_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
+{
+ /* NIY */
+ return 0;
+}
+
+static int execlist_engine_set_preempt_timeout(struct xe_engine *e,
+ u32 preempt_timeout_us)
+{
+ /* NIY */
+ return 0;
+}
+
+static int execlist_engine_set_job_timeout(struct xe_engine *e,
+ u32 job_timeout_ms)
+{
+ /* NIY */
+ return 0;
+}
+
+static int execlist_engine_suspend(struct xe_engine *e)
+{
+ /* NIY */
+ return 0;
+}
+
+static void execlist_engine_suspend_wait(struct xe_engine *e)
+
+{
+ /* NIY */
+}
+
+static void execlist_engine_resume(struct xe_engine *e)
+{
+ xe_mocs_init_engine(e);
+}
+
+static const struct xe_engine_ops execlist_engine_ops = {
+ .init = execlist_engine_init,
+ .kill = execlist_engine_kill,
+ .fini = execlist_engine_fini,
+ .set_priority = execlist_engine_set_priority,
+ .set_timeslice = execlist_engine_set_timeslice,
+ .set_preempt_timeout = execlist_engine_set_preempt_timeout,
+ .set_job_timeout = execlist_engine_set_job_timeout,
+ .suspend = execlist_engine_suspend,
+ .suspend_wait = execlist_engine_suspend_wait,
+ .resume = execlist_engine_resume,
+};
+
+int xe_execlist_init(struct xe_gt *gt)
+{
+ /* GuC submission enabled, nothing to do */
+ if (xe_device_guc_submission_enabled(gt_to_xe(gt)))
+ return 0;
+
+ gt->engine_ops = &execlist_engine_ops;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h
new file mode 100644
index 000000000000..6a0442a6eff6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_H_
+#define _XE_EXECLIST_H_
+
+#include "xe_execlist_types.h"
+
+struct xe_device;
+struct xe_gt;
+
+#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock);
+
+int xe_execlist_init(struct xe_gt *gt);
+struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
+ struct xe_hw_engine *hwe);
+void xe_execlist_port_destroy(struct xe_execlist_port *port);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h
new file mode 100644
index 000000000000..9b1239b47292
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_execlist_types.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_EXECLIST_TYPES_H_
+#define _XE_EXECLIST_TYPES_H_
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_engine_types.h"
+
+struct xe_hw_engine;
+struct xe_execlist_engine;
+
+struct xe_execlist_port {
+ struct xe_hw_engine *hwe;
+
+ spinlock_t lock;
+
+ struct list_head active[XE_ENGINE_PRIORITY_COUNT];
+
+ u32 last_ctx_id;
+
+ struct xe_execlist_engine *running_exl;
+
+ struct timer_list irq_fail;
+};
+
+struct xe_execlist_engine {
+ struct xe_engine *engine;
+
+ struct drm_gpu_scheduler sched;
+
+ struct drm_sched_entity entity;
+
+ struct xe_execlist_port *port;
+
+ bool has_run;
+
+ struct work_struct fini_async;
+
+ enum xe_engine_priority active_priority;
+ struct list_head active_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
new file mode 100644
index 000000000000..0320ce7ba3d1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_util.h>
+
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "gt/intel_gt_regs.h"
+
+#define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50
+
+static struct xe_gt *
+fw_to_gt(struct xe_force_wake *fw)
+{
+ return fw->gt;
+}
+
+static struct xe_device *
+fw_to_xe(struct xe_force_wake *fw)
+{
+ return gt_to_xe(fw_to_gt(fw));
+}
+
+static void domain_init(struct xe_force_wake_domain *domain,
+ enum xe_force_wake_domain_id id,
+ u32 reg, u32 ack, u32 val, u32 mask)
+{
+ domain->id = id;
+ domain->reg_ctl = reg;
+ domain->reg_ack = ack;
+ domain->val = val;
+ domain->mask = mask;
+}
+
+#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc)
+
+void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ fw->gt = gt;
+ mutex_init(&fw->lock);
+
+ /* Assuming gen11+ so assert this assumption is correct */
+ XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+
+ if (xe->info.platform == XE_METEORLAKE) {
+ domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+ XE_FW_DOMAIN_ID_GT,
+ FORCEWAKE_GT_GEN9.reg,
+ FORCEWAKE_ACK_GT_MTL.reg,
+ BIT(0), BIT(16));
+ } else {
+ domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT],
+ XE_FW_DOMAIN_ID_GT,
+ FORCEWAKE_GT_GEN9.reg,
+ FORCEWAKE_ACK_GT_GEN9.reg,
+ BIT(0), BIT(16));
+ }
+}
+
+void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+ int i, j;
+
+ /* Assuming gen11+ so assert this assumption is correct */
+ XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+
+ if (!xe_gt_is_media_type(gt))
+ domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER],
+ XE_FW_DOMAIN_ID_RENDER,
+ FORCEWAKE_RENDER_GEN9.reg,
+ FORCEWAKE_ACK_RENDER_GEN9.reg,
+ BIT(0), BIT(16));
+
+ for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+ if (!(gt->info.engine_mask & BIT(i)))
+ continue;
+
+ domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j],
+ XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j,
+ FORCEWAKE_MEDIA_VDBOX_GEN11(j).reg,
+ FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(j).reg,
+ BIT(0), BIT(16));
+ }
+
+ for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+ if (!(gt->info.engine_mask & BIT(i)))
+ continue;
+
+ domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j],
+ XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j,
+ FORCEWAKE_MEDIA_VEBOX_GEN11(j).reg,
+ FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(j).reg,
+ BIT(0), BIT(16));
+ }
+}
+
+void xe_force_wake_prune(struct xe_gt *gt, struct xe_force_wake *fw)
+{
+ int i, j;
+
+ /* Call after fuses have been read, prune domains that are fused off */
+
+ for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j)
+ if (!(gt->info.engine_mask & BIT(i)))
+ fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j].reg_ctl = 0;
+
+ for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j)
+ if (!(gt->info.engine_mask & BIT(i)))
+ fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j].reg_ctl = 0;
+}
+
+static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+ xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val);
+}
+
+static int domain_wake_wait(struct xe_gt *gt,
+ struct xe_force_wake_domain *domain)
+{
+ return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val,
+ XE_FORCE_WAKE_ACK_TIMEOUT_MS);
+}
+
+static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain)
+{
+ xe_mmio_write32(gt, domain->reg_ctl, domain->mask);
+}
+
+static int domain_sleep_wait(struct xe_gt *gt,
+ struct xe_force_wake_domain *domain)
+{
+ return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val,
+ XE_FORCE_WAKE_ACK_TIMEOUT_MS);
+}
+
+#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \
+ for (tmp__ = (mask__); tmp__ ;) \
+ for_each_if((domain__ = ((fw__)->domains + \
+ __mask_next_bit(tmp__))) && \
+ domain__->reg_ctl)
+
+int xe_force_wake_get(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains)
+{
+ struct xe_device *xe = fw_to_xe(fw);
+ struct xe_gt *gt = fw_to_gt(fw);
+ struct xe_force_wake_domain *domain;
+ enum xe_force_wake_domains tmp, woken = 0;
+ int ret, ret2 = 0;
+
+ mutex_lock(&fw->lock);
+ for_each_fw_domain_masked(domain, domains, fw, tmp) {
+ if (!domain->ref++) {
+ woken |= BIT(domain->id);
+ domain_wake(gt, domain);
+ }
+ }
+ for_each_fw_domain_masked(domain, woken, fw, tmp) {
+ ret = domain_wake_wait(gt, domain);
+ ret2 |= ret;
+ if (ret)
+ drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n",
+ domain->id, ret);
+ }
+ fw->awake_domains |= woken;
+ mutex_unlock(&fw->lock);
+
+ return ret2;
+}
+
+int xe_force_wake_put(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains)
+{
+ struct xe_device *xe = fw_to_xe(fw);
+ struct xe_gt *gt = fw_to_gt(fw);
+ struct xe_force_wake_domain *domain;
+ enum xe_force_wake_domains tmp, sleep = 0;
+ int ret, ret2 = 0;
+
+ mutex_lock(&fw->lock);
+ for_each_fw_domain_masked(domain, domains, fw, tmp) {
+ if (!--domain->ref) {
+ sleep |= BIT(domain->id);
+ domain_sleep(gt, domain);
+ }
+ }
+ for_each_fw_domain_masked(domain, sleep, fw, tmp) {
+ ret = domain_sleep_wait(gt, domain);
+ ret2 |= ret;
+ if (ret)
+ drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n",
+ domain->id, ret);
+ }
+ fw->awake_domains &= ~sleep;
+ mutex_unlock(&fw->lock);
+
+ return ret2;
+}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
new file mode 100644
index 000000000000..5adb8daa3b71
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_H_
+#define _XE_FORCE_WAKE_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_macros.h"
+
+struct xe_gt;
+
+void xe_force_wake_init_gt(struct xe_gt *gt,
+ struct xe_force_wake *fw);
+void xe_force_wake_init_engines(struct xe_gt *gt,
+ struct xe_force_wake *fw);
+void xe_force_wake_prune(struct xe_gt *gt,
+ struct xe_force_wake *fw);
+int xe_force_wake_get(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains);
+int xe_force_wake_put(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domains);
+
+static inline int
+xe_force_wake_ref(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domain)
+{
+ XE_BUG_ON(!domain);
+ return fw->domains[ffs(domain) - 1].ref;
+}
+
+static inline void
+xe_force_wake_assert_held(struct xe_force_wake *fw,
+ enum xe_force_wake_domains domain)
+{
+ XE_BUG_ON(!(fw->awake_domains & domain));
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
new file mode 100644
index 000000000000..208dd629d7b1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_FORCE_WAKE_TYPES_H_
+#define _XE_FORCE_WAKE_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+enum xe_force_wake_domain_id {
+ XE_FW_DOMAIN_ID_GT = 0,
+ XE_FW_DOMAIN_ID_RENDER,
+ XE_FW_DOMAIN_ID_MEDIA,
+ XE_FW_DOMAIN_ID_MEDIA_VDBOX0,
+ XE_FW_DOMAIN_ID_MEDIA_VDBOX1,
+ XE_FW_DOMAIN_ID_MEDIA_VDBOX2,
+ XE_FW_DOMAIN_ID_MEDIA_VDBOX3,
+ XE_FW_DOMAIN_ID_MEDIA_VDBOX4,
+ XE_FW_DOMAIN_ID_MEDIA_VDBOX5,
+ XE_FW_DOMAIN_ID_MEDIA_VDBOX6,
+ XE_FW_DOMAIN_ID_MEDIA_VDBOX7,
+ XE_FW_DOMAIN_ID_MEDIA_VEBOX0,
+ XE_FW_DOMAIN_ID_MEDIA_VEBOX1,
+ XE_FW_DOMAIN_ID_MEDIA_VEBOX2,
+ XE_FW_DOMAIN_ID_MEDIA_VEBOX3,
+ XE_FW_DOMAIN_ID_GSC,
+ XE_FW_DOMAIN_ID_COUNT
+};
+
+enum xe_force_wake_domains {
+ XE_FW_GT = BIT(XE_FW_DOMAIN_ID_GT),
+ XE_FW_RENDER = BIT(XE_FW_DOMAIN_ID_RENDER),
+ XE_FW_MEDIA = BIT(XE_FW_DOMAIN_ID_MEDIA),
+ XE_FW_MEDIA_VDBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX0),
+ XE_FW_MEDIA_VDBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX1),
+ XE_FW_MEDIA_VDBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX2),
+ XE_FW_MEDIA_VDBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX3),
+ XE_FW_MEDIA_VDBOX4 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX4),
+ XE_FW_MEDIA_VDBOX5 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX5),
+ XE_FW_MEDIA_VDBOX6 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX6),
+ XE_FW_MEDIA_VDBOX7 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX7),
+ XE_FW_MEDIA_VEBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX0),
+ XE_FW_MEDIA_VEBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX1),
+ XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2),
+ XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3),
+ XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC),
+ XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1
+};
+
+/**
+ * struct xe_force_wake_domain - XE force wake domains
+ */
+struct xe_force_wake_domain {
+ /** @id: domain force wake id */
+ enum xe_force_wake_domain_id id;
+ /** @reg_ctl: domain wake control register address */
+ u32 reg_ctl;
+ /** @reg_ack: domain ack register address */
+ u32 reg_ack;
+ /** @val: domain wake write value */
+ u32 val;
+ /** @mask: domain mask */
+ u32 mask;
+ /** @ref: domain reference */
+ u32 ref;
+};
+
+/**
+ * struct xe_force_wake - XE force wake
+ */
+struct xe_force_wake {
+ /** @gt: back pointers to GT */
+ struct xe_gt *gt;
+ /** @lock: protects everything force wake struct */
+ struct mutex lock;
+ /** @awake_domains: mask of all domains awake */
+ enum xe_force_wake_domains awake_domains;
+ /** @domains: force wake domains */
+ struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
new file mode 100644
index 000000000000..eab74a509f68
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_ggtt.h"
+
+#include <linux/sizes.h>
+#include <drm/i915_drm.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_mmio.h"
+#include "xe_wopcm.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+/* FIXME: Common file, preferably auto-gen */
+#define MTL_GGTT_PTE_PAT0 BIT(52)
+#define MTL_GGTT_PTE_PAT1 BIT(53)
+
+u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ u64 pte;
+ bool is_lmem;
+
+ pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem);
+ pte |= GEN8_PAGE_PRESENT;
+
+ if (is_lmem)
+ pte |= GEN12_GGTT_PTE_LM;
+
+ /* FIXME: vfunc + pass in caching rules */
+ if (xe->info.platform == XE_METEORLAKE) {
+ pte |= MTL_GGTT_PTE_PAT0;
+ pte |= MTL_GGTT_PTE_PAT1;
+ }
+
+ return pte;
+}
+
+static unsigned int probe_gsm_size(struct pci_dev *pdev)
+{
+ u16 gmch_ctl, ggms;
+
+ pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl);
+ ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK;
+ return ggms ? SZ_1M << ggms : 0;
+}
+
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte)
+{
+ XE_BUG_ON(addr & GEN8_PTE_MASK);
+ XE_BUG_ON(addr >= ggtt->size);
+
+ writeq(pte, &ggtt->gsm[addr >> GEN8_PTE_SHIFT]);
+}
+
+static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
+{
+ u64 end = start + size - 1;
+ u64 scratch_pte;
+
+ XE_BUG_ON(start >= end);
+
+ if (ggtt->scratch)
+ scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
+ else
+ scratch_pte = 0;
+
+ while (start < end) {
+ xe_ggtt_set_pte(ggtt, start, scratch_pte);
+ start += GEN8_PAGE_SIZE;
+ }
+}
+
+static void ggtt_fini_noalloc(struct drm_device *drm, void *arg)
+{
+ struct xe_ggtt *ggtt = arg;
+
+ mutex_destroy(&ggtt->lock);
+ drm_mm_takedown(&ggtt->mm);
+
+ xe_bo_unpin_map_no_vm(ggtt->scratch);
+}
+
+int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ unsigned int gsm_size;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ ggtt->gt = gt;
+
+ gsm_size = probe_gsm_size(pdev);
+ if (gsm_size == 0) {
+ drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
+ return -ENOMEM;
+ }
+
+ ggtt->gsm = gt->mmio.regs + SZ_8M;
+ ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE;
+
+ /*
+ * 8B per entry, each points to a 4KB page.
+ *
+ * The GuC owns the WOPCM space, thus we can't allocate GGTT address in
+ * this area. Even though we likely configure the WOPCM to less than the
+ * maximum value, to simplify the driver load (no need to fetch HuC +
+ * GuC firmwares and determine there sizes before initializing the GGTT)
+ * just start the GGTT allocation above the max WOPCM size. This might
+ * waste space in the GGTT (WOPCM is 2MB on modern platforms) but we can
+ * live with this.
+ *
+ * Another benifit of this is the GuC bootrom can't access anything
+ * below the WOPCM max size so anything the bootom needs to access (e.g.
+ * a RSA key) needs to be placed in the GGTT above the WOPCM max size.
+ * Starting the GGTT allocations above the WOPCM max give us the correct
+ * placement for free.
+ */
+ drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
+ ggtt->size - xe_wopcm_size(xe));
+ mutex_init(&ggtt->lock);
+
+ return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt);
+}
+
+static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
+{
+ struct drm_mm_node *hole;
+ u64 start, end;
+
+ /* Display may have allocated inside ggtt, so be careful with clearing here */
+ mutex_lock(&ggtt->lock);
+ drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
+ xe_ggtt_clear(ggtt, start, end - start);
+
+ xe_ggtt_invalidate(ggtt->gt);
+ mutex_unlock(&ggtt->lock);
+}
+
+int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ ggtt->scratch = xe_bo_create_locked(xe, gt, NULL, GEN8_PAGE_SIZE,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_PINNED_BIT);
+ if (IS_ERR(ggtt->scratch)) {
+ err = PTR_ERR(ggtt->scratch);
+ goto err;
+ }
+
+ err = xe_bo_pin(ggtt->scratch);
+ xe_bo_unlock_no_vm(ggtt->scratch);
+ if (err) {
+ xe_bo_put(ggtt->scratch);
+ goto err;
+ }
+
+ xe_ggtt_initial_clear(ggtt);
+ return 0;
+err:
+ ggtt->scratch = NULL;
+ return err;
+}
+
+#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8)
+#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0)
+#define PVC_GUC_TLB_INV_DESC0 _MMIO(0xcf7c)
+#define PVC_GUC_TLB_INV_DESC0_VALID (1 << 0)
+#define PVC_GUC_TLB_INV_DESC1 _MMIO(0xcf80)
+#define PVC_GUC_TLB_INV_DESC1_INVALIDATE (1 << 6)
+
+void xe_ggtt_invalidate(struct xe_gt *gt)
+{
+ /* TODO: vfunc for GuC vs. non-GuC */
+
+ /* TODO: i915 makes comments about this being uncached and
+ * therefore flushing WC buffers. Is that really true here?
+ */
+ xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN);
+ if (xe_device_guc_submission_enabled(gt_to_xe(gt))) {
+ struct xe_device *xe = gt_to_xe(gt);
+
+ /* TODO: also use vfunc here */
+ if (xe->info.platform == XE_PVC) {
+ xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg,
+ PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+ xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg,
+ PVC_GUC_TLB_INV_DESC0_VALID);
+ } else
+ xe_mmio_write32(gt, GEN12_GUC_TLB_INV_CR.reg,
+ GEN12_GUC_TLB_INV_CR_INVALIDATE);
+ }
+}
+
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
+{
+ u64 addr, scratch_pte;
+
+ scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0);
+
+ printk("%sGlobal GTT:", prefix);
+ for (addr = 0; addr < ggtt->size; addr += GEN8_PAGE_SIZE) {
+ unsigned int i = addr / GEN8_PAGE_SIZE;
+
+ XE_BUG_ON(addr > U32_MAX);
+ if (ggtt->gsm[i] == scratch_pte)
+ continue;
+
+ printk("%s ggtt[0x%08x] = 0x%016llx",
+ prefix, (u32)addr, ggtt->gsm[i]);
+ }
+}
+
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+ u32 size, u32 align, u32 mm_flags)
+{
+ return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0,
+ mm_flags);
+}
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+ u32 size, u32 align)
+{
+ int ret;
+
+ mutex_lock(&ggtt->lock);
+ ret = xe_ggtt_insert_special_node_locked(ggtt, node, size,
+ align, DRM_MM_INSERT_HIGH);
+ mutex_unlock(&ggtt->lock);
+
+ return ret;
+}
+
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+ u64 start = bo->ggtt_node.start;
+ u64 offset, pte;
+
+ for (offset = 0; offset < bo->size; offset += GEN8_PAGE_SIZE) {
+ pte = xe_ggtt_pte_encode(bo, offset);
+ xe_ggtt_set_pte(ggtt, start + offset, pte);
+ }
+
+ xe_ggtt_invalidate(ggtt->gt);
+}
+
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+ int err;
+
+ if (XE_WARN_ON(bo->ggtt_node.size)) {
+ /* Someone's already inserted this BO in the GGTT */
+ XE_BUG_ON(bo->ggtt_node.size != bo->size);
+ return 0;
+ }
+
+ err = xe_bo_validate(bo, NULL, false);
+ if (err)
+ return err;
+
+ mutex_lock(&ggtt->lock);
+ err = drm_mm_insert_node(&ggtt->mm, &bo->ggtt_node, bo->size);
+ if (!err)
+ xe_ggtt_map_bo(ggtt, bo);
+ mutex_unlock(&ggtt->lock);
+
+ return 0;
+}
+
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+{
+ mutex_lock(&ggtt->lock);
+
+ xe_ggtt_clear(ggtt, node->start, node->size);
+ drm_mm_remove_node(node);
+ node->size = 0;
+
+ xe_ggtt_invalidate(ggtt->gt);
+
+ mutex_unlock(&ggtt->lock);
+}
+
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+ if (XE_WARN_ON(!bo->ggtt_node.size))
+ return;
+
+ /* This BO is not currently in the GGTT */
+ XE_BUG_ON(bo->ggtt_node.size != bo->size);
+
+ xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
new file mode 100644
index 000000000000..289c6852ad1a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_H_
+#define _XE_GGTT_H_
+
+#include "xe_ggtt_types.h"
+
+u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset);
+void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
+void xe_ggtt_invalidate(struct xe_gt *gt);
+int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt);
+int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt);
+void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
+
+int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+ u32 size, u32 align);
+int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
+ struct drm_mm_node *node,
+ u32 size, u32 align, u32 mm_flags);
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
new file mode 100644
index 000000000000..e04193001763
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GGTT_TYPES_H_
+#define _XE_GGTT_TYPES_H_
+
+#include <drm/drm_mm.h>
+
+struct xe_bo;
+struct xe_gt;
+
+struct xe_ggtt {
+ struct xe_gt *gt;
+
+ u64 size;
+
+ struct xe_bo *scratch;
+
+ struct mutex lock;
+
+ u64 __iomem *gsm;
+
+ struct drm_mm mm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
new file mode 100644
index 000000000000..e4ad1d6ce1d5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gpu_scheduler.h"
+
+static void xe_sched_process_msg_queue(struct xe_gpu_scheduler *sched)
+{
+ if (!READ_ONCE(sched->base.pause_submit))
+ queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched)
+{
+ struct xe_sched_msg *msg;
+
+ spin_lock(&sched->base.job_list_lock);
+ msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link);
+ if (msg)
+ xe_sched_process_msg_queue(sched);
+ spin_unlock(&sched->base.job_list_lock);
+}
+
+static struct xe_sched_msg *
+xe_sched_get_msg(struct xe_gpu_scheduler *sched)
+{
+ struct xe_sched_msg *msg;
+
+ spin_lock(&sched->base.job_list_lock);
+ msg = list_first_entry_or_null(&sched->msgs,
+ struct xe_sched_msg, link);
+ if (msg)
+ list_del(&msg->link);
+ spin_unlock(&sched->base.job_list_lock);
+
+ return msg;
+}
+
+static void xe_sched_process_msg_work(struct work_struct *w)
+{
+ struct xe_gpu_scheduler *sched =
+ container_of(w, struct xe_gpu_scheduler, work_process_msg);
+ struct xe_sched_msg *msg;
+
+ if (READ_ONCE(sched->base.pause_submit))
+ return;
+
+ msg = xe_sched_get_msg(sched);
+ if (msg) {
+ sched->ops->process_msg(msg);
+
+ xe_sched_process_msg_queue_if_ready(sched);
+ }
+}
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+ const struct drm_sched_backend_ops *ops,
+ const struct xe_sched_backend_ops *xe_ops,
+ struct workqueue_struct *submit_wq,
+ uint32_t hw_submission, unsigned hang_limit,
+ long timeout, struct workqueue_struct *timeout_wq,
+ atomic_t *score, const char *name,
+ struct device *dev)
+{
+ sched->ops = xe_ops;
+ INIT_LIST_HEAD(&sched->msgs);
+ INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work);
+
+ return drm_sched_init(&sched->base, ops, submit_wq, 1, hw_submission,
+ hang_limit, timeout, timeout_wq, score, name,
+ dev);
+}
+
+void xe_sched_fini(struct xe_gpu_scheduler *sched)
+{
+ xe_sched_submission_stop(sched);
+ drm_sched_fini(&sched->base);
+}
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched)
+{
+ drm_sched_wqueue_start(&sched->base);
+ queue_work(sched->base.submit_wq, &sched->work_process_msg);
+}
+
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched)
+{
+ drm_sched_wqueue_stop(&sched->base);
+ cancel_work_sync(&sched->work_process_msg);
+}
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+ struct xe_sched_msg *msg)
+{
+ spin_lock(&sched->base.job_list_lock);
+ list_add_tail(&msg->link, &sched->msgs);
+ spin_unlock(&sched->base.job_list_lock);
+
+ xe_sched_process_msg_queue(sched);
+}
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
new file mode 100644
index 000000000000..10c6bb9c9386
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_H_
+#define _XE_GPU_SCHEDULER_H_
+
+#include "xe_gpu_scheduler_types.h"
+#include "xe_sched_job_types.h"
+
+int xe_sched_init(struct xe_gpu_scheduler *sched,
+ const struct drm_sched_backend_ops *ops,
+ const struct xe_sched_backend_ops *xe_ops,
+ struct workqueue_struct *submit_wq,
+ uint32_t hw_submission, unsigned hang_limit,
+ long timeout, struct workqueue_struct *timeout_wq,
+ atomic_t *score, const char *name,
+ struct device *dev);
+void xe_sched_fini(struct xe_gpu_scheduler *sched);
+
+void xe_sched_submission_start(struct xe_gpu_scheduler *sched);
+void xe_sched_submission_stop(struct xe_gpu_scheduler *sched);
+
+void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
+ struct xe_sched_msg *msg);
+
+static inline void xe_sched_stop(struct xe_gpu_scheduler *sched)
+{
+ drm_sched_stop(&sched->base, NULL);
+}
+
+static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
+{
+ drm_sched_tdr_queue_imm(&sched->base);
+}
+
+static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
+{
+ drm_sched_resubmit_jobs(&sched->base);
+}
+
+static inline bool
+xe_sched_invalidate_job(struct xe_sched_job *job, int threshold)
+{
+ return drm_sched_invalidate_job(&job->drm, threshold);
+}
+
+static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
+ struct xe_sched_job *job)
+{
+ list_add(&job->drm.list, &sched->base.pending_list);
+}
+
+static inline
+struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched)
+{
+ return list_first_entry_or_null(&sched->base.pending_list,
+ struct xe_sched_job, drm.list);
+}
+
+static inline int
+xe_sched_entity_init(struct xe_sched_entity *entity,
+ struct xe_gpu_scheduler *sched)
+{
+ return drm_sched_entity_init(entity, 0,
+ (struct drm_gpu_scheduler **)&sched,
+ 1, NULL);
+}
+
+#define xe_sched_entity_fini drm_sched_entity_fini
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
new file mode 100644
index 000000000000..6731b13da8bb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_SCHEDULER_TYPES_H_
+#define _XE_GPU_SCHEDULER_TYPES_H_
+
+#include <drm/gpu_scheduler.h>
+
+/**
+ * struct xe_sched_msg - an in-band (relative to GPU scheduler run queue)
+ * message
+ *
+ * Generic enough for backend defined messages, backend can expand if needed.
+ */
+struct xe_sched_msg {
+ /** @link: list link into the gpu scheduler list of messages */
+ struct list_head link;
+ /**
+ * @private_data: opaque pointer to message private data (backend defined)
+ */
+ void *private_data;
+ /** @opcode: opcode of message (backend defined) */
+ unsigned int opcode;
+};
+
+/**
+ * struct xe_sched_backend_ops - Define the backend operations called by the
+ * scheduler
+ */
+struct xe_sched_backend_ops {
+ /**
+ * @process_msg: Process a message. Allowed to block, it is this
+ * function's responsibility to free message if dynamically allocated.
+ */
+ void (*process_msg)(struct xe_sched_msg *msg);
+};
+
+/**
+ * struct xe_gpu_scheduler - Xe GPU scheduler
+ */
+struct xe_gpu_scheduler {
+ /** @base: DRM GPU scheduler */
+ struct drm_gpu_scheduler base;
+ /** @ops: Xe scheduler ops */
+ const struct xe_sched_backend_ops *ops;
+ /** @msgs: list of messages to be processed in @work_process_msg */
+ struct list_head msgs;
+ /** @work_process_msg: processes messages */
+ struct work_struct work_process_msg;
+};
+
+#define xe_sched_entity drm_sched_entity
+#define xe_sched_policy drm_sched_policy
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
new file mode 100644
index 000000000000..5f8fa9d98d5a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -0,0 +1,830 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/minmax.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_clock.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_fence.h"
+#include "xe_irq.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_migrate.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_reg_sr.h"
+#include "xe_ring_ops.h"
+#include "xe_sa.h"
+#include "xe_sched_job.h"
+#include "xe_ttm_gtt_mgr.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_tuning.h"
+#include "xe_uc.h"
+#include "xe_vm.h"
+#include "xe_wa.h"
+#include "xe_wopcm.h"
+
+#include "gt/intel_gt_regs.h"
+
+struct xe_gt *xe_find_full_gt(struct xe_gt *gt)
+{
+ struct xe_gt *search;
+ u8 id;
+
+ XE_BUG_ON(!xe_gt_is_media_type(gt));
+
+ for_each_gt(search, gt_to_xe(gt), id) {
+ if (search->info.vram_id == gt->info.vram_id)
+ return search;
+ }
+
+ XE_BUG_ON("NOT POSSIBLE");
+ return NULL;
+}
+
+int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt)
+{
+ struct drm_device *drm = &xe->drm;
+
+ XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+
+ if (!xe_gt_is_media_type(gt)) {
+ gt->mem.ggtt = drmm_kzalloc(drm, sizeof(*gt->mem.ggtt),
+ GFP_KERNEL);
+ if (!gt->mem.ggtt)
+ return -ENOMEM;
+
+ gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr),
+ GFP_KERNEL);
+ if (!gt->mem.vram_mgr)
+ return -ENOMEM;
+
+ gt->mem.gtt_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.gtt_mgr),
+ GFP_KERNEL);
+ if (!gt->mem.gtt_mgr)
+ return -ENOMEM;
+ } else {
+ struct xe_gt *full_gt = xe_find_full_gt(gt);
+
+ gt->mem.ggtt = full_gt->mem.ggtt;
+ gt->mem.vram_mgr = full_gt->mem.vram_mgr;
+ gt->mem.gtt_mgr = full_gt->mem.gtt_mgr;
+ }
+
+ gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+
+ return 0;
+}
+
+/* FIXME: These should be in a common file */
+#define CHV_PPAT_SNOOP REG_BIT(6)
+#define GEN8_PPAT_AGE(x) ((x)<<4)
+#define GEN8_PPAT_LLCeLLC (3<<2)
+#define GEN8_PPAT_LLCELLC (2<<2)
+#define GEN8_PPAT_LLC (1<<2)
+#define GEN8_PPAT_WB (3<<0)
+#define GEN8_PPAT_WT (2<<0)
+#define GEN8_PPAT_WC (1<<0)
+#define GEN8_PPAT_UC (0<<0)
+#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
+#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8))
+#define GEN12_PPAT_CLOS(x) ((x)<<2)
+
+static void tgl_setup_private_ppat(struct xe_gt *gt)
+{
+ /* TGL doesn't support LLC or AGE settings */
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB);
+}
+
+static void pvc_setup_private_ppat(struct xe_gt *gt)
+{
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
+ GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg,
+ GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg,
+ GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg,
+ GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB);
+}
+
+#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0)
+#define MTL_PPAT_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
+static void mtl_setup_private_ppat(struct xe_gt *gt)
+{
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg,
+ MTL_PPAT_1_WT | MTL_2_COH_1W);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg,
+ MTL_PPAT_3_UC | MTL_2_COH_1W);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg,
+ MTL_PPAT_0_WB | MTL_2_COH_1W);
+ xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg,
+ MTL_PPAT_0_WB | MTL_3_COH_2W);
+}
+
+static void setup_private_ppat(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe->info.platform == XE_METEORLAKE)
+ mtl_setup_private_ppat(gt);
+ else if (xe->info.platform == XE_PVC)
+ pvc_setup_private_ppat(gt);
+ else
+ tgl_setup_private_ppat(gt);
+}
+
+static int gt_ttm_mgr_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+ struct sysinfo si;
+ u64 gtt_size;
+
+ si_meminfo(&si);
+ gtt_size = (u64)si.totalram * si.mem_unit * 3/4;
+
+ if (gt->mem.vram.size) {
+ err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr);
+ if (err)
+ return err;
+ gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20),
+ gt->mem.vram.size),
+ gtt_size);
+ xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1;
+ }
+
+ err = xe_ttm_gtt_mgr_init(gt, gt->mem.gtt_mgr, gtt_size);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void gt_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_gt *gt = arg;
+ int i;
+
+ destroy_workqueue(gt->ordered_wq);
+
+ for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+ xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+}
+
+static void gt_reset_worker(struct work_struct *w);
+
+int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
+{
+ struct xe_sched_job *job;
+ struct xe_bb *bb;
+ struct dma_fence *fence;
+ u64 batch_ofs;
+ long timeout;
+
+ bb = xe_bb_new(gt, 4, false);
+ if (IS_ERR(bb))
+ return PTR_ERR(bb);
+
+ batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo);
+ job = xe_bb_create_wa_job(e, bb, batch_ofs);
+ if (IS_ERR(job)) {
+ xe_bb_free(bb, NULL);
+ return PTR_ERR(bb);
+ }
+
+ xe_sched_job_arm(job);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ timeout = dma_fence_wait_timeout(fence, false, HZ);
+ dma_fence_put(fence);
+ xe_bb_free(bb, NULL);
+ if (timeout < 0)
+ return timeout;
+ else if (!timeout)
+ return -ETIME;
+
+ return 0;
+}
+
+int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
+{
+ struct xe_reg_sr *sr = &e->hwe->reg_lrc;
+ struct xe_reg_sr_entry *entry;
+ unsigned long reg;
+ struct xe_sched_job *job;
+ struct xe_bb *bb;
+ struct dma_fence *fence;
+ u64 batch_ofs;
+ long timeout;
+ int count = 0;
+
+ bb = xe_bb_new(gt, SZ_4K, false); /* Just pick a large BB size */
+ if (IS_ERR(bb))
+ return PTR_ERR(bb);
+
+ xa_for_each(&sr->xa, reg, entry)
+ ++count;
+
+ if (count) {
+ bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count);
+ xa_for_each(&sr->xa, reg, entry) {
+ bb->cs[bb->len++] = reg;
+ bb->cs[bb->len++] = entry->set_bits;
+ }
+ }
+ bb->cs[bb->len++] = MI_NOOP;
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+
+ batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo);
+ job = xe_bb_create_wa_job(e, bb, batch_ofs);
+ if (IS_ERR(job)) {
+ xe_bb_free(bb, NULL);
+ return PTR_ERR(bb);
+ }
+
+ xe_sched_job_arm(job);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ timeout = dma_fence_wait_timeout(fence, false, HZ);
+ dma_fence_put(fence);
+ xe_bb_free(bb, NULL);
+ if (timeout < 0)
+ return timeout;
+ else if (!timeout)
+ return -ETIME;
+
+ return 0;
+}
+
+int xe_gt_record_default_lrcs(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ int err = 0;
+
+ for_each_hw_engine(hwe, gt, id) {
+ struct xe_engine *e, *nop_e;
+ struct xe_vm *vm;
+ void *default_lrc;
+
+ if (gt->default_lrc[hwe->class])
+ continue;
+
+ xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe);
+ xe_wa_process_lrc(hwe);
+
+ default_lrc = drmm_kzalloc(&xe->drm,
+ xe_lrc_size(xe, hwe->class),
+ GFP_KERNEL);
+ if (!default_lrc)
+ return -ENOMEM;
+
+ vm = xe_migrate_get_vm(gt->migrate);
+ e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1,
+ hwe, ENGINE_FLAG_WA);
+ if (IS_ERR(e)) {
+ err = PTR_ERR(e);
+ goto put_vm;
+ }
+
+ /* Prime golden LRC with known good state */
+ err = emit_wa_job(gt, e);
+ if (err)
+ goto put_engine;
+
+ nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance),
+ 1, hwe, ENGINE_FLAG_WA);
+ if (IS_ERR(nop_e)) {
+ err = PTR_ERR(nop_e);
+ goto put_engine;
+ }
+
+ /* Switch to different LRC */
+ err = emit_nop_job(gt, nop_e);
+ if (err)
+ goto put_nop_e;
+
+ /* Reload golden LRC to record the effect of any indirect W/A */
+ err = emit_nop_job(gt, e);
+ if (err)
+ goto put_nop_e;
+
+ xe_map_memcpy_from(xe, default_lrc,
+ &e->lrc[0].bo->vmap,
+ xe_lrc_pphwsp_offset(&e->lrc[0]),
+ xe_lrc_size(xe, hwe->class));
+
+ gt->default_lrc[hwe->class] = default_lrc;
+put_nop_e:
+ xe_engine_put(nop_e);
+put_engine:
+ xe_engine_put(e);
+put_vm:
+ xe_vm_put(vm);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int xe_gt_init_early(struct xe_gt *gt)
+{
+ int err;
+
+ xe_force_wake_init_gt(gt, gt_to_fw(gt));
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+
+ xe_gt_topology_init(gt);
+ xe_gt_mcr_init(gt);
+
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+
+ xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
+ xe_wa_process_gt(gt);
+ xe_tuning_process_gt(gt);
+
+ return 0;
+}
+
+/**
+ * xe_gt_init_noalloc - Init GT up to the point where allocations can happen.
+ * @gt: The GT to initialize.
+ *
+ * This function prepares the GT to allow memory allocations to VRAM, but is not
+ * allowed to allocate memory itself. This state is useful for display readout,
+ * because the inherited display framebuffer will otherwise be overwritten as it
+ * is usually put at the start of VRAM.
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_gt_init_noalloc(struct xe_gt *gt)
+{
+ int err, err2;
+
+ if (xe_gt_is_media_type(gt))
+ return 0;
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ goto err;
+
+ err = gt_ttm_mgr_init(gt);
+ if (err)
+ goto err_force_wake;
+
+ err = xe_ggtt_init_noalloc(gt, gt->mem.ggtt);
+
+err_force_wake:
+ err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ XE_WARN_ON(err2);
+ xe_device_mem_access_put(gt_to_xe(gt));
+err:
+ return err;
+}
+
+static int gt_fw_domain_init(struct xe_gt *gt)
+{
+ int err, i;
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ goto err_hw_fence_irq;
+
+ if (!xe_gt_is_media_type(gt)) {
+ err = xe_ggtt_init(gt, gt->mem.ggtt);
+ if (err)
+ goto err_force_wake;
+ }
+
+ /* Allow driver to load if uC init fails (likely missing firmware) */
+ err = xe_uc_init(&gt->uc);
+ XE_WARN_ON(err);
+
+ err = xe_uc_init_hwconfig(&gt->uc);
+ if (err)
+ goto err_force_wake;
+
+ /* Enables per hw engine IRQs */
+ xe_gt_irq_postinstall(gt);
+
+ /* Rerun MCR init as we now have hw engine list */
+ xe_gt_mcr_init(gt);
+
+ err = xe_hw_engines_init_early(gt);
+ if (err)
+ goto err_force_wake;
+
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ XE_WARN_ON(err);
+ xe_device_mem_access_put(gt_to_xe(gt));
+
+ return 0;
+
+err_force_wake:
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+err_hw_fence_irq:
+ for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+ xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+ xe_device_mem_access_put(gt_to_xe(gt));
+
+ return err;
+}
+
+static int all_fw_domain_init(struct xe_gt *gt)
+{
+ int err, i;
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err)
+ goto err_hw_fence_irq;
+
+ setup_private_ppat(gt);
+
+ xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+ err = xe_gt_clock_init(gt);
+ if (err)
+ goto err_force_wake;
+
+ xe_mocs_init(gt);
+ err = xe_execlist_init(gt);
+ if (err)
+ goto err_force_wake;
+
+ err = xe_hw_engines_init(gt);
+ if (err)
+ goto err_force_wake;
+
+ err = xe_uc_init_post_hwconfig(&gt->uc);
+ if (err)
+ goto err_force_wake;
+
+ /*
+ * FIXME: This should be ok as SA should only be used by gt->migrate and
+ * vm->gt->migrate and both should be pointing to a non-media GT. But to
+ * realy safe, convert gt->kernel_bb_pool to a pointer and point a media
+ * GT to the kernel_bb_pool on a real tile.
+ */
+ if (!xe_gt_is_media_type(gt)) {
+ err = xe_sa_bo_manager_init(gt, &gt->kernel_bb_pool, SZ_1M, 16);
+ if (err)
+ goto err_force_wake;
+
+ /*
+ * USM has its only SA pool to non-block behind user operations
+ */
+ if (gt_to_xe(gt)->info.supports_usm) {
+ err = xe_sa_bo_manager_init(gt, &gt->usm.bb_pool,
+ SZ_1M, 16);
+ if (err)
+ goto err_force_wake;
+ }
+ }
+
+ if (!xe_gt_is_media_type(gt)) {
+ gt->migrate = xe_migrate_init(gt);
+ if (IS_ERR(gt->migrate))
+ goto err_force_wake;
+ } else {
+ gt->migrate = xe_find_full_gt(gt)->migrate;
+ }
+
+ err = xe_uc_init_hw(&gt->uc);
+ if (err)
+ goto err_force_wake;
+
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ XE_WARN_ON(err);
+ xe_device_mem_access_put(gt_to_xe(gt));
+
+ return 0;
+
+err_force_wake:
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+err_hw_fence_irq:
+ for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
+ xe_hw_fence_irq_finish(&gt->fence_irq[i]);
+ xe_device_mem_access_put(gt_to_xe(gt));
+
+ return err;
+}
+
+int xe_gt_init(struct xe_gt *gt)
+{
+ int err;
+ int i;
+
+ INIT_WORK(&gt->reset.worker, gt_reset_worker);
+
+ for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
+ gt->ring_ops[i] = xe_ring_ops_get(gt, i);
+ xe_hw_fence_irq_init(&gt->fence_irq[i]);
+ }
+
+ err = xe_gt_pagefault_init(gt);
+ if (err)
+ return err;
+
+ xe_gt_sysfs_init(gt);
+
+ err = gt_fw_domain_init(gt);
+ if (err)
+ return err;
+
+ xe_force_wake_init_engines(gt, gt_to_fw(gt));
+
+ err = all_fw_domain_init(gt);
+ if (err)
+ return err;
+
+ xe_force_wake_prune(gt, gt_to_fw(gt));
+
+ err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int do_gt_reset(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL);
+ err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5);
+ if (err)
+ drm_err(&xe->drm,
+ "GT reset failed to clear GEN11_GRDOM_FULL\n");
+
+ return err;
+}
+
+static int do_gt_restart(struct xe_gt *gt)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ int err;
+
+ setup_private_ppat(gt);
+
+ xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
+
+ err = xe_wopcm_init(&gt->uc.wopcm);
+ if (err)
+ return err;
+
+ for_each_hw_engine(hwe, gt, id)
+ xe_hw_engine_enable_ring(hwe);
+
+ err = xe_uc_init_hw(&gt->uc);
+ if (err)
+ return err;
+
+ xe_mocs_init(gt);
+ err = xe_uc_start(&gt->uc);
+ if (err)
+ return err;
+
+ for_each_hw_engine(hwe, gt, id) {
+ xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+ xe_reg_sr_apply_whitelist(&hwe->reg_whitelist,
+ hwe->mmio_base, gt);
+ }
+
+ return 0;
+}
+
+static int gt_reset(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ /* We only support GT resets with GuC submission */
+ if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
+ return -ENODEV;
+
+ drm_info(&xe->drm, "GT reset started\n");
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err)
+ goto err_msg;
+
+ xe_uc_stop_prepare(&gt->uc);
+ xe_gt_pagefault_reset(gt);
+
+ err = xe_uc_stop(&gt->uc);
+ if (err)
+ goto err_out;
+
+ err = do_gt_reset(gt);
+ if (err)
+ goto err_out;
+
+ err = do_gt_restart(gt);
+ if (err)
+ goto err_out;
+
+ xe_device_mem_access_put(gt_to_xe(gt));
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ XE_WARN_ON(err);
+
+ drm_info(&xe->drm, "GT reset done\n");
+
+ return 0;
+
+err_out:
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+ XE_WARN_ON(xe_uc_start(&gt->uc));
+ xe_device_mem_access_put(gt_to_xe(gt));
+ drm_err(&xe->drm, "GT reset failed, err=%d\n", err);
+
+ return err;
+}
+
+static void gt_reset_worker(struct work_struct *w)
+{
+ struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
+
+ gt_reset(gt);
+}
+
+void xe_gt_reset_async(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ drm_info(&xe->drm, "Try GT reset\n");
+
+ /* Don't do a reset while one is already in flight */
+ if (xe_uc_reset_prepare(&gt->uc))
+ return;
+
+ drm_info(&xe->drm, "Doing GT reset\n");
+ queue_work(gt->ordered_wq, &gt->reset.worker);
+}
+
+void xe_gt_suspend_prepare(struct xe_gt *gt)
+{
+ xe_device_mem_access_get(gt_to_xe(gt));
+ XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+
+ xe_uc_stop_prepare(&gt->uc);
+
+ xe_device_mem_access_put(gt_to_xe(gt));
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+}
+
+int xe_gt_suspend(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ /* For now suspend/resume is only allowed with GuC */
+ if (!xe_device_guc_submission_enabled(gt_to_xe(gt)))
+ return -ENODEV;
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err)
+ goto err_msg;
+
+ err = xe_uc_suspend(&gt->uc);
+ if (err)
+ goto err_force_wake;
+
+ xe_device_mem_access_put(gt_to_xe(gt));
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ drm_info(&xe->drm, "GT suspended\n");
+
+ return 0;
+
+err_force_wake:
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+ xe_device_mem_access_put(gt_to_xe(gt));
+ drm_err(&xe->drm, "GT suspend failed: %d\n", err);
+
+ return err;
+}
+
+int xe_gt_resume(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err)
+ goto err_msg;
+
+ err = do_gt_restart(gt);
+ if (err)
+ goto err_force_wake;
+
+ xe_device_mem_access_put(gt_to_xe(gt));
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ drm_info(&xe->drm, "GT resumed\n");
+
+ return 0;
+
+err_force_wake:
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+err_msg:
+ xe_device_mem_access_put(gt_to_xe(gt));
+ drm_err(&xe->drm, "GT resume failed: %d\n", err);
+
+ return err;
+}
+
+void xe_gt_migrate_wait(struct xe_gt *gt)
+{
+ xe_migrate_wait(gt->migrate);
+}
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+ enum xe_engine_class class,
+ u16 instance, bool logical)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ for_each_hw_engine(hwe, gt, id)
+ if (hwe->class == class &&
+ ((!logical && hwe->instance == instance) ||
+ (logical && hwe->logical_instance == instance)))
+ return hwe;
+
+ return NULL;
+}
+
+struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
+ enum xe_engine_class class)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ for_each_hw_engine(hwe, gt, id) {
+ switch (class) {
+ case XE_ENGINE_CLASS_RENDER:
+ case XE_ENGINE_CLASS_COMPUTE:
+ if (hwe->class == XE_ENGINE_CLASS_RENDER ||
+ hwe->class == XE_ENGINE_CLASS_COMPUTE)
+ return hwe;
+ break;
+ default:
+ if (hwe->class == class)
+ return hwe;
+ }
+ }
+
+ return NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
new file mode 100644
index 000000000000..5dc08a993cfe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_H_
+#define _XE_GT_H_
+
+#include <drm/drm_util.h>
+
+#include "xe_device_types.h"
+#include "xe_hw_engine.h"
+
+#define for_each_hw_engine(hwe__, gt__, id__) \
+ for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \
+ for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \
+ xe_hw_engine_is_valid((hwe__)))
+
+int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt);
+int xe_gt_init_early(struct xe_gt *gt);
+int xe_gt_init_noalloc(struct xe_gt *gt);
+int xe_gt_init(struct xe_gt *gt);
+int xe_gt_record_default_lrcs(struct xe_gt *gt);
+void xe_gt_suspend_prepare(struct xe_gt *gt);
+int xe_gt_suspend(struct xe_gt *gt);
+int xe_gt_resume(struct xe_gt *gt);
+void xe_gt_reset_async(struct xe_gt *gt);
+void xe_gt_migrate_wait(struct xe_gt *gt);
+
+struct xe_gt *xe_find_full_gt(struct xe_gt *gt);
+
+/**
+ * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
+ * first that matches the same reset domain as @class
+ * @gt: GT structure
+ * @class: hw engine class to lookup
+ */
+struct xe_hw_engine *
+xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class);
+
+struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
+ enum xe_engine_class class,
+ u16 instance,
+ bool logical);
+
+static inline bool xe_gt_is_media_type(struct xe_gt *gt)
+{
+ return gt->info.type == XE_GT_TYPE_MEDIA;
+}
+
+static inline struct xe_device * gt_to_xe(struct xe_gt *gt)
+{
+ return gt->xe;
+}
+
+static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+ hwe->instance == gt->usm.reserved_bcs_instance;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
new file mode 100644
index 000000000000..575433e9718a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_clock.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+
+static u32 read_reference_ts_freq(struct xe_gt *gt)
+{
+ u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg);
+ u32 base_freq, frac_freq;
+
+ base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
+ GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1;
+ base_freq *= 1000000;
+
+ frac_freq = ((ts_override &
+ GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
+ GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
+ frac_freq = 1000000 / (frac_freq + 1);
+
+ return base_freq + frac_freq;
+}
+
+static u32 get_crystal_clock_freq(u32 rpm_config_reg)
+{
+ const u32 f19_2_mhz = 19200000;
+ const u32 f24_mhz = 24000000;
+ const u32 f25_mhz = 25000000;
+ const u32 f38_4_mhz = 38400000;
+ u32 crystal_clock =
+ (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
+ GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+
+ switch (crystal_clock) {
+ case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
+ return f24_mhz;
+ case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
+ return f19_2_mhz;
+ case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
+ return f38_4_mhz;
+ case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
+ return f25_mhz;
+ default:
+ XE_BUG_ON("NOT_POSSIBLE");
+ return 0;
+ }
+}
+
+int xe_gt_clock_init(struct xe_gt *gt)
+{
+ u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE.reg);
+ u32 freq = 0;
+
+ /* Assuming gen11+ so assert this assumption is correct */
+ XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11);
+
+ if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) {
+ freq = read_reference_ts_freq(gt);
+ } else {
+ u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg);
+
+ freq = get_crystal_clock_freq(c0);
+
+ /*
+ * Now figure out how the command stream's timestamp
+ * register increments from this frequency (it might
+ * increment only every few clock cycle).
+ */
+ freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+ GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+ }
+
+ gt->info.clock_freq = freq;
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
new file mode 100644
index 000000000000..511923afd224
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_clock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_CLOCK_H_
+#define _XE_GT_CLOCK_H_
+
+struct xe_gt;
+
+int xe_gt_clock_init(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
new file mode 100644
index 000000000000..cd1888784141
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_debugfs.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_pagefault.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_engine.h"
+#include "xe_macros.h"
+#include "xe_uc_debugfs.h"
+
+static struct xe_gt *node_to_gt(struct drm_info_node *node)
+{
+ return node->info_ent->data;
+}
+
+static int hw_engines(struct seq_file *m, void *data)
+{
+ struct xe_gt *gt = node_to_gt(m->private);
+ struct xe_device *xe = gt_to_xe(gt);
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ int err;
+
+ xe_device_mem_access_get(xe);
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err) {
+ xe_device_mem_access_put(xe);
+ return err;
+ }
+
+ for_each_hw_engine(hwe, gt, id)
+ xe_hw_engine_print_state(hwe, &p);
+
+ xe_device_mem_access_put(xe);
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int force_reset(struct seq_file *m, void *data)
+{
+ struct xe_gt *gt = node_to_gt(m->private);
+
+ xe_gt_reset_async(gt);
+
+ return 0;
+}
+
+static int sa_info(struct seq_file *m, void *data)
+{
+ struct xe_gt *gt = node_to_gt(m->private);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ drm_suballoc_dump_debug_info(&gt->kernel_bb_pool.base, &p,
+ gt->kernel_bb_pool.gpu_addr);
+
+ return 0;
+}
+
+static int topology(struct seq_file *m, void *data)
+{
+ struct xe_gt *gt = node_to_gt(m->private);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_gt_topology_dump(gt, &p);
+
+ return 0;
+}
+
+static int steering(struct seq_file *m, void *data)
+{
+ struct xe_gt *gt = node_to_gt(m->private);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_gt_mcr_steering_dump(gt, &p);
+
+ return 0;
+}
+
+#ifdef CONFIG_DRM_XE_DEBUG
+static int invalidate_tlb(struct seq_file *m, void *data)
+{
+ struct xe_gt *gt = node_to_gt(m->private);
+ int seqno;
+ int ret = 0;
+
+ seqno = xe_gt_tlb_invalidation(gt);
+ XE_WARN_ON(seqno < 0);
+ if (seqno > 0)
+ ret = xe_gt_tlb_invalidation_wait(gt, seqno);
+ XE_WARN_ON(ret < 0);
+
+ return 0;
+}
+#endif
+
+static const struct drm_info_list debugfs_list[] = {
+ {"hw_engines", hw_engines, 0},
+ {"force_reset", force_reset, 0},
+ {"sa_info", sa_info, 0},
+ {"topology", topology, 0},
+ {"steering", steering, 0},
+#ifdef CONFIG_DRM_XE_DEBUG
+ {"invalidate_tlb", invalidate_tlb, 0},
+#endif
+};
+
+void xe_gt_debugfs_register(struct xe_gt *gt)
+{
+ struct drm_minor *minor = gt_to_xe(gt)->drm.primary;
+ struct dentry *root;
+ struct drm_info_list *local;
+ char name[8];
+ int i;
+
+ XE_BUG_ON(!minor->debugfs_root);
+
+ sprintf(name, "gt%d", gt->info.id);
+ root = debugfs_create_dir(name, minor->debugfs_root);
+ if (IS_ERR(root)) {
+ XE_WARN_ON("Create GT directory failed");
+ return;
+ }
+
+ /*
+ * Allocate local copy as we need to pass in the GT to the debugfs
+ * entry and drm_debugfs_create_files just references the drm_info_list
+ * passed in (e.g. can't define this on the stack).
+ */
+#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+ local = drmm_kmalloc(&gt_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+ if (!local) {
+ XE_WARN_ON("Couldn't allocate memory");
+ return;
+ }
+
+ memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+ for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+ local[i].data = gt;
+
+ drm_debugfs_create_files(local,
+ ARRAY_SIZE(debugfs_list),
+ root, minor);
+
+ xe_uc_debugfs_register(&gt->uc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h
new file mode 100644
index 000000000000..5a329f118a57
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_DEBUGFS_H_
+#define _XE_GT_DEBUGFS_H_
+
+struct xe_gt;
+
+void xe_gt_debugfs_register(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
new file mode 100644
index 000000000000..b69c0d6c6b2f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_topology.h"
+#include "xe_gt_types.h"
+#include "xe_mmio.h"
+
+#include "gt/intel_gt_regs.h"
+
+/**
+ * DOC: GT Multicast/Replicated (MCR) Register Support
+ *
+ * Some GT registers are designed as "multicast" or "replicated" registers:
+ * multiple instances of the same register share a single MMIO offset. MCR
+ * registers are generally used when the hardware needs to potentially track
+ * independent values of a register per hardware unit (e.g., per-subslice,
+ * per-L3bank, etc.). The specific types of replication that exist vary
+ * per-platform.
+ *
+ * MMIO accesses to MCR registers are controlled according to the settings
+ * programmed in the platform's MCR_SELECTOR register(s). MMIO writes to MCR
+ * registers can be done in either a (i.e., a single write updates all
+ * instances of the register to the same value) or unicast (a write updates only
+ * one specific instance). Reads of MCR registers always operate in a unicast
+ * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR.
+ * Selection of a specific MCR instance for unicast operations is referred to
+ * as "steering."
+ *
+ * If MCR register operations are steered toward a hardware unit that is
+ * fused off or currently powered down due to power gating, the MMIO operation
+ * is "terminated" by the hardware. Terminated read operations will return a
+ * value of zero and terminated unicast write operations will be silently
+ * ignored.
+ */
+
+enum {
+ MCR_OP_READ,
+ MCR_OP_WRITE
+};
+
+static const struct xe_mmio_range xelp_l3bank_steering_table[] = {
+ { 0x00B100, 0x00B3FF },
+ {},
+};
+
+/*
+ * Although the bspec lists more "MSLICE" ranges than shown here, some of those
+ * are of a "GAM" subclass that has special rules and doesn't need to be
+ * included here.
+ */
+static const struct xe_mmio_range xehp_mslice_steering_table[] = {
+ { 0x00DD00, 0x00DDFF },
+ { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
+ {},
+};
+
+static const struct xe_mmio_range xehp_lncf_steering_table[] = {
+ { 0x00B000, 0x00B0FF },
+ { 0x00D880, 0x00D8FF },
+ {},
+};
+
+/*
+ * We have several types of MCR registers where steering to (0,0) will always
+ * provide us with a non-terminated value. We'll stick them all in the same
+ * table for simplicity.
+ */
+static const struct xe_mmio_range xehpc_instance0_steering_table[] = {
+ { 0x004000, 0x004AFF }, /* HALF-BSLICE */
+ { 0x008800, 0x00887F }, /* CC */
+ { 0x008A80, 0x008AFF }, /* TILEPSMI */
+ { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */
+ { 0x00B100, 0x00B3FF }, /* L3BANK */
+ { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */
+ { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */
+ { 0x00DD00, 0x00DDFF }, /* BSLICE */
+ { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */
+ { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */
+ { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */
+ { 0x024180, 0x0241FF }, /* HALF-BSLICE */
+ {},
+};
+
+static const struct xe_mmio_range xelpg_instance0_steering_table[] = {
+ { 0x000B00, 0x000BFF }, /* SQIDI */
+ { 0x001000, 0x001FFF }, /* SQIDI */
+ { 0x004000, 0x0048FF }, /* GAM */
+ { 0x008700, 0x0087FF }, /* SQIDI */
+ { 0x00B000, 0x00B0FF }, /* NODE */
+ { 0x00C800, 0x00CFFF }, /* GAM */
+ { 0x00D880, 0x00D8FF }, /* NODE */
+ { 0x00DD00, 0x00DDFF }, /* OAAL2 */
+ {},
+};
+
+static const struct xe_mmio_range xelpg_l3bank_steering_table[] = {
+ { 0x00B100, 0x00B3FF },
+ {},
+};
+
+static const struct xe_mmio_range xelp_dss_steering_table[] = {
+ { 0x008150, 0x00815F },
+ { 0x009520, 0x00955F },
+ { 0x00DE80, 0x00E8FF },
+ { 0x024A00, 0x024A7F },
+ {},
+};
+
+/* DSS steering is used for GSLICE ranges as well */
+static const struct xe_mmio_range xehp_dss_steering_table[] = {
+ { 0x005200, 0x0052FF }, /* GSLICE */
+ { 0x005400, 0x007FFF }, /* GSLICE */
+ { 0x008140, 0x00815F }, /* GSLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+ { 0x008D00, 0x008DFF }, /* DSS */
+ { 0x0094D0, 0x00955F }, /* GSLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+ { 0x009680, 0x0096FF }, /* DSS */
+ { 0x00D800, 0x00D87F }, /* GSLICE */
+ { 0x00DC00, 0x00DCFF }, /* GSLICE */
+ { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved ) */
+ { 0x017000, 0x017FFF }, /* GSLICE */
+ { 0x024A00, 0x024A7F }, /* DSS */
+ {},
+};
+
+/* DSS steering is used for COMPUTE ranges as well */
+static const struct xe_mmio_range xehpc_dss_steering_table[] = {
+ { 0x008140, 0x00817F }, /* COMPUTE (0x8140-0x814F & 0x8160-0x817F), DSS (0x8150-0x815F) */
+ { 0x0094D0, 0x00955F }, /* COMPUTE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+ { 0x009680, 0x0096FF }, /* DSS */
+ { 0x00DC00, 0x00DCFF }, /* COMPUTE */
+ { 0x00DE80, 0x00E7FF }, /* DSS (0xDF00-0xE1FF reserved ) */
+ {},
+};
+
+/* DSS steering is used for SLICE ranges as well */
+static const struct xe_mmio_range xelpg_dss_steering_table[] = {
+ { 0x005200, 0x0052FF }, /* SLICE */
+ { 0x005500, 0x007FFF }, /* SLICE */
+ { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+ { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+ { 0x009680, 0x0096FF }, /* DSS */
+ { 0x00D800, 0x00D87F }, /* SLICE */
+ { 0x00DC00, 0x00DCFF }, /* SLICE */
+ { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
+ {},
+};
+
+static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = {
+ { 0x393200, 0x39323F },
+ { 0x393400, 0x3934FF },
+ {},
+};
+
+/*
+ * DG2 GAM registers are a special case; this table is checked directly in
+ * xe_gt_mcr_get_nonterminated_steering and is not hooked up via
+ * gt->steering[].
+ */
+static const struct xe_mmio_range dg2_gam_ranges[] = {
+ { 0x004000, 0x004AFF },
+ { 0x00C800, 0x00CFFF },
+ { 0x00F000, 0x00FFFF },
+ {},
+};
+
+static void init_steering_l3bank(struct xe_gt *gt)
+{
+ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+ u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
+ xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+ u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
+ xe_mmio_read32(gt, XEHP_FUSE4.reg));
+
+ /*
+ * Group selects mslice, instance selects bank within mslice.
+ * Bank 0 is always valid _except_ when the bank mask is 010b.
+ */
+ gt->steering[L3BANK].group_target = __ffs(mslice_mask);
+ gt->steering[L3BANK].instance_target =
+ bank_mask & BIT(0) ? 0 : 2;
+ } else {
+ u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK,
+ ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+
+ gt->steering[L3BANK].group_target = 0; /* unused */
+ gt->steering[L3BANK].instance_target = __ffs(fuse);
+ }
+}
+
+static void init_steering_mslice(struct xe_gt *gt)
+{
+ u32 mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK,
+ xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg));
+
+ /*
+ * mslice registers are valid (not terminated) if either the meml3
+ * associated with the mslice is present, or at least one DSS associated
+ * with the mslice is present. There will always be at least one meml3
+ * so we can just use that to find a non-terminated mslice and ignore
+ * the DSS fusing.
+ */
+ gt->steering[MSLICE].group_target = __ffs(mask);
+ gt->steering[MSLICE].instance_target = 0; /* unused */
+
+ /*
+ * LNCF termination is also based on mslice presence, so we'll set
+ * it up here. Either LNCF within a non-terminated mslice will work,
+ * so we just always pick LNCF 0 here.
+ */
+ gt->steering[LNCF].group_target = __ffs(mask) << 1;
+ gt->steering[LNCF].instance_target = 0; /* unused */
+}
+
+static void init_steering_dss(struct xe_gt *gt)
+{
+ unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
+ xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0));
+ unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4;
+
+ gt->steering[DSS].group_target = dss / dss_per_grp;
+ gt->steering[DSS].instance_target = dss % dss_per_grp;
+}
+
+static void init_steering_oaddrm(struct xe_gt *gt)
+{
+ /*
+ * First instance is only terminated if the entire first media slice
+ * is absent (i.e., no VCS0 or VECS0).
+ */
+ if (gt->info.engine_mask & (XE_HW_ENGINE_VCS0 | XE_HW_ENGINE_VECS0))
+ gt->steering[OADDRM].group_target = 0;
+ else
+ gt->steering[OADDRM].group_target = 1;
+
+ gt->steering[DSS].instance_target = 0; /* unused */
+}
+
+static void init_steering_inst0(struct xe_gt *gt)
+{
+ gt->steering[DSS].group_target = 0; /* unused */
+ gt->steering[DSS].instance_target = 0; /* unused */
+}
+
+static const struct {
+ const char *name;
+ void (*init)(struct xe_gt *);
+} xe_steering_types[] = {
+ { "L3BANK", init_steering_l3bank },
+ { "MSLICE", init_steering_mslice },
+ { "LNCF", NULL }, /* initialized by mslice init */
+ { "DSS", init_steering_dss },
+ { "OADDRM", init_steering_oaddrm },
+ { "INSTANCE 0", init_steering_inst0 },
+};
+
+void xe_gt_mcr_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
+
+ spin_lock_init(&gt->mcr_lock);
+
+ if (gt->info.type == XE_GT_TYPE_MEDIA) {
+ drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
+
+ gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
+ } else if (GRAPHICS_VERx100(xe) >= 1270) {
+ gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table;
+ gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
+ gt->steering[DSS].ranges = xelpg_dss_steering_table;
+ } else if (xe->info.platform == XE_PVC) {
+ gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table;
+ gt->steering[DSS].ranges = xehpc_dss_steering_table;
+ } else if (xe->info.platform == XE_DG2) {
+ gt->steering[MSLICE].ranges = xehp_mslice_steering_table;
+ gt->steering[LNCF].ranges = xehp_lncf_steering_table;
+ gt->steering[DSS].ranges = xehp_dss_steering_table;
+ } else {
+ gt->steering[L3BANK].ranges = xelp_l3bank_steering_table;
+ gt->steering[DSS].ranges = xelp_dss_steering_table;
+ }
+
+ /* Select non-terminated steering target for each type */
+ for (int i = 0; i < NUM_STEERING_TYPES; i++)
+ if (gt->steering[i].ranges && xe_steering_types[i].init)
+ xe_steering_types[i].init(gt);
+}
+
+/*
+ * xe_gt_mcr_get_nonterminated_steering - find group/instance values that
+ * will steer a register to a non-terminated instance
+ * @gt: GT structure
+ * @reg: register for which the steering is required
+ * @group: return variable for group steering
+ * @instance: return variable for instance steering
+ *
+ * This function returns a group/instance pair that is guaranteed to work for
+ * read steering of the given register. Note that a value will be returned even
+ * if the register is not replicated and therefore does not actually require
+ * steering.
+ *
+ * Returns true if the caller should steer to the @group/@instance values
+ * returned. Returns false if the caller need not perform any steering (i.e.,
+ * the DG2 GAM range special case).
+ */
+static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
+ i915_mcr_reg_t reg,
+ u8 *group, u8 *instance)
+{
+ for (int type = 0; type < NUM_STEERING_TYPES; type++) {
+ if (!gt->steering[type].ranges)
+ continue;
+
+ for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
+ if (xe_mmio_in_range(&gt->steering[type].ranges[i], reg.reg)) {
+ *group = gt->steering[type].group_target;
+ *instance = gt->steering[type].instance_target;
+ return true;
+ }
+ }
+ }
+
+ /*
+ * All MCR registers should usually be part of one of the steering
+ * ranges we're tracking. However there's one special case: DG2
+ * GAM registers are technically multicast registers, but are special
+ * in a number of ways:
+ * - they have their own dedicated steering control register (they
+ * don't share 0xFDC with other MCR classes)
+ * - all reads should be directed to instance 1 (unicast reads against
+ * other instances are not allowed), and instance 1 is already the
+ * the hardware's default steering target, which we never change
+ *
+ * Ultimately this means that we can just treat them as if they were
+ * unicast registers and all operations will work properly.
+ */
+ for (int i = 0; dg2_gam_ranges[i].end > 0; i++)
+ if (xe_mmio_in_range(&dg2_gam_ranges[i], reg.reg))
+ return false;
+
+ /*
+ * Not found in a steering table and not a DG2 GAM register? We'll
+ * just steer to 0/0 as a guess and raise a warning.
+ */
+ drm_WARN(&gt_to_xe(gt)->drm, true,
+ "Did not find MCR register %#x in any MCR steering table\n",
+ reg.reg);
+ *group = 0;
+ *instance = 0;
+
+ return true;
+}
+
+#define STEER_SEMAPHORE 0xFD0
+
+/*
+ * Obtain exclusive access to MCR steering. On MTL and beyond we also need
+ * to synchronize with external clients (e.g., firmware), so a semaphore
+ * register will also need to be taken.
+ */
+static void mcr_lock(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int ret;
+
+ spin_lock(&gt->mcr_lock);
+
+ /*
+ * Starting with MTL we also need to grab a semaphore register
+ * to synchronize with external agents (e.g., firmware) that now
+ * shares the same steering control register.
+ */
+ if (GRAPHICS_VERx100(xe) >= 1270)
+ ret = wait_for_us(xe_mmio_read32(gt, STEER_SEMAPHORE) == 0x1, 10);
+
+ drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
+}
+
+static void mcr_unlock(struct xe_gt *gt) {
+ /* Release hardware semaphore */
+ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
+ xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1);
+
+ spin_unlock(&gt->mcr_lock);
+}
+
+/*
+ * Access a register with specific MCR steering
+ *
+ * Caller needs to make sure the relevant forcewake wells are up.
+ */
+static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag,
+ int group, int instance, u32 value)
+{
+ u32 steer_reg, steer_val, val = 0;
+
+ lockdep_assert_held(&gt->mcr_lock);
+
+ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+ steer_reg = MTL_MCR_SELECTOR.reg;
+ steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
+ REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance);
+ } else {
+ steer_reg = GEN8_MCR_SELECTOR.reg;
+ steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, group) |
+ REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, instance);
+ }
+
+ /*
+ * Always leave the hardware in multicast mode when doing reads
+ * (see comment about Wa_22013088509 below) and only change it
+ * to unicast mode when doing writes of a specific instance.
+ *
+ * No need to save old steering reg value.
+ */
+ if (rw_flag == MCR_OP_READ)
+ steer_val |= GEN11_MCR_MULTICAST;
+
+ xe_mmio_write32(gt, steer_reg, steer_val);
+
+ if (rw_flag == MCR_OP_READ)
+ val = xe_mmio_read32(gt, reg.reg);
+ else
+ xe_mmio_write32(gt, reg.reg, value);
+
+ /*
+ * If we turned off the multicast bit (during a write) we're required
+ * to turn it back on before finishing. The group and instance values
+ * don't matter since they'll be re-programmed on the next MCR
+ * operation.
+ */
+ if (rw_flag == MCR_OP_WRITE)
+ xe_mmio_write32(gt, steer_reg, GEN11_MCR_MULTICAST);
+
+ return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register
+ * @gt: GT structure
+ * @reg: register to read
+ *
+ * Reads a GT MCR register. The read will be steered to a non-terminated
+ * instance (i.e., one that isn't fused off or powered down by power gating).
+ * This function assumes the caller is already holding any necessary forcewake
+ * domains.
+ *
+ * Returns the value from a non-terminated instance of @reg.
+ */
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg)
+{
+ u8 group, instance;
+ u32 val;
+ bool steer;
+
+ steer = xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
+
+ if (steer) {
+ mcr_lock(gt);
+ val = rw_with_mcr_steering(gt, reg, MCR_OP_READ,
+ group, instance, 0);
+ mcr_unlock(gt);
+ } else {
+ /* DG2 GAM special case rules; treat as if unicast */
+ val = xe_mmio_read32(gt, reg.reg);
+ }
+
+ return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_read - read a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to read
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Returns the value read from an MCR register after steering toward a specific
+ * group/instance.
+ */
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
+ i915_mcr_reg_t reg,
+ int group, int instance)
+{
+ u32 val;
+
+ mcr_lock(gt);
+ val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, group, instance, 0);
+ mcr_unlock(gt);
+
+ return val;
+}
+
+/**
+ * xe_gt_mcr_unicast_write - write a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to write
+ * @value: value to write
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Write an MCR register in unicast mode after steering toward a specific
+ * group/instance.
+ */
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value,
+ int group, int instance)
+{
+ mcr_lock(gt);
+ rw_with_mcr_steering(gt, reg, MCR_OP_WRITE, group, instance, value);
+ mcr_unlock(gt);
+}
+
+/**
+ * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to write
+ * @value: value to write
+ *
+ * Write an MCR register in multicast mode to update all instances.
+ */
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value)
+{
+ /*
+ * Synchronize with any unicast operations. Once we have exclusive
+ * access, the MULTICAST bit should already be set, so there's no need
+ * to touch the steering register.
+ */
+ mcr_lock(gt);
+ xe_mmio_write32(gt, reg.reg, value);
+ mcr_unlock(gt);
+}
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+ for (int i = 0; i < NUM_STEERING_TYPES; i++) {
+ if (gt->steering[i].ranges) {
+ drm_printf(p, "%s steering: group=%#x, instance=%#x\n",
+ xe_steering_types[i].name,
+ gt->steering[i].group_target,
+ gt->steering[i].instance_target);
+ for (int j = 0; gt->steering[i].ranges[j].end; j++)
+ drm_printf(p, "\t0x%06x - 0x%06x\n",
+ gt->steering[i].ranges[j].start,
+ gt->steering[i].ranges[j].end);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
new file mode 100644
index 000000000000..62ec6eb654a0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_MCR_H_
+#define _XE_GT_MCR_H_
+
+#include "i915_reg_defs.h"
+
+struct drm_printer;
+struct xe_gt;
+
+void xe_gt_mcr_init(struct xe_gt *gt);
+
+u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg,
+ int group, int instance);
+u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg);
+
+void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value,
+ int group, int instance);
+void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value);
+
+void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
+
+#endif /* _XE_GT_MCR_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
new file mode 100644
index 000000000000..7125113b7390
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -0,0 +1,750 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/circ_buf.h>
+
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_gt_pagefault.h"
+#include "xe_migrate.h"
+#include "xe_pt.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+struct pagefault {
+ u64 page_addr;
+ u32 asid;
+ u16 pdata;
+ u8 vfid;
+ u8 access_type;
+ u8 fault_type;
+ u8 fault_level;
+ u8 engine_class;
+ u8 engine_instance;
+ u8 fault_unsuccessful;
+};
+
+enum access_type {
+ ACCESS_TYPE_READ = 0,
+ ACCESS_TYPE_WRITE = 1,
+ ACCESS_TYPE_ATOMIC = 2,
+ ACCESS_TYPE_RESERVED = 3,
+};
+
+enum fault_type {
+ NOT_PRESENT = 0,
+ WRITE_ACCESS_VIOLATION = 1,
+ ATOMIC_ACCESS_VIOLATION = 2,
+};
+
+struct acc {
+ u64 va_range_base;
+ u32 asid;
+ u32 sub_granularity;
+ u8 granularity;
+ u8 vfid;
+ u8 access_type;
+ u8 engine_class;
+ u8 engine_instance;
+};
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+ return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static int send_tlb_invalidation(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 action[] = {
+ XE_GUC_ACTION_TLB_INVALIDATION,
+ 0,
+ XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT |
+ XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT |
+ XE_GUC_TLB_INVAL_FLUSH_CACHE,
+ };
+ int seqno;
+ int ret;
+
+ /*
+ * XXX: The seqno algorithm relies on TLB invalidation being processed
+ * in order which they currently are, if that changes the algorithm will
+ * need to be updated.
+ */
+ mutex_lock(&guc->ct.lock);
+ seqno = gt->usm.tlb_invalidation_seqno;
+ action[1] = seqno;
+ gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) %
+ TLB_INVALIDATION_SEQNO_MAX;
+ if (!gt->usm.tlb_invalidation_seqno)
+ gt->usm.tlb_invalidation_seqno = 1;
+ ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_TLB_INVALIDATE, 1);
+ if (!ret)
+ ret = seqno;
+ mutex_unlock(&guc->ct.lock);
+
+ return ret;
+}
+
+static bool access_is_atomic(enum access_type access_type)
+{
+ return access_type == ACCESS_TYPE_ATOMIC;
+}
+
+static bool vma_is_valid(struct xe_gt *gt, struct xe_vma *vma)
+{
+ return BIT(gt->info.id) & vma->gt_present &&
+ !(BIT(gt->info.id) & vma->usm.gt_invalidated);
+}
+
+static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup)
+{
+ if (lookup->start > vma->end || lookup->end < vma->start)
+ return false;
+
+ return true;
+}
+
+static bool only_needs_bo_lock(struct xe_bo *bo)
+{
+ return bo && bo->vm;
+}
+
+static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
+{
+ struct xe_vma *vma = NULL, lookup;
+
+ lookup.start = page_addr;
+ lookup.end = lookup.start + SZ_4K - 1;
+ if (vm->usm.last_fault_vma) { /* Fast lookup */
+ if (vma_matches(vm->usm.last_fault_vma, &lookup))
+ vma = vm->usm.last_fault_vma;
+ }
+ if (!vma)
+ vma = xe_vm_find_overlapping_vma(vm, &lookup);
+
+ return vma;
+}
+
+static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_vm *vm;
+ struct xe_vma *vma = NULL;
+ struct xe_bo *bo;
+ LIST_HEAD(objs);
+ LIST_HEAD(dups);
+ struct ttm_validate_buffer tv_bo, tv_vm;
+ struct ww_acquire_ctx ww;
+ struct dma_fence *fence;
+ bool write_locked;
+ int ret = 0;
+ bool atomic;
+
+ /* ASID to VM */
+ mutex_lock(&xe->usm.lock);
+ vm = xa_load(&xe->usm.asid_to_vm, pf->asid);
+ if (vm)
+ xe_vm_get(vm);
+ mutex_unlock(&xe->usm.lock);
+ if (!vm || !xe_vm_in_fault_mode(vm))
+ return -EINVAL;
+
+retry_userptr:
+ /*
+ * TODO: Avoid exclusive lock if VM doesn't have userptrs, or
+ * start out read-locked?
+ */
+ down_write(&vm->lock);
+ write_locked = true;
+ vma = lookup_vma(vm, pf->page_addr);
+ if (!vma) {
+ ret = -EINVAL;
+ goto unlock_vm;
+ }
+
+ if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) {
+ downgrade_write(&vm->lock);
+ write_locked = false;
+ }
+
+ trace_xe_vma_pagefault(vma);
+
+ atomic = access_is_atomic(pf->access_type);
+
+ /* Check if VMA is valid */
+ if (vma_is_valid(gt, vma) && !atomic)
+ goto unlock_vm;
+
+ /* TODO: Validate fault */
+
+ if (xe_vma_is_userptr(vma) && write_locked) {
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_del_init(&vma->userptr.invalidate_link);
+ spin_unlock(&vm->userptr.invalidated_lock);
+
+ ret = xe_vma_userptr_pin_pages(vma);
+ if (ret)
+ goto unlock_vm;
+
+ downgrade_write(&vm->lock);
+ write_locked = false;
+ }
+
+ /* Lock VM and BOs dma-resv */
+ bo = vma->bo;
+ if (only_needs_bo_lock(bo)) {
+ /* This path ensures the BO's LRU is updated */
+ ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
+ } else {
+ tv_vm.num_shared = xe->info.tile_count;
+ tv_vm.bo = xe_vm_ttm_bo(vm);
+ list_add(&tv_vm.head, &objs);
+ if (bo) {
+ tv_bo.bo = &bo->ttm;
+ tv_bo.num_shared = xe->info.tile_count;
+ list_add(&tv_bo.head, &objs);
+ }
+ ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
+ }
+ if (ret)
+ goto unlock_vm;
+
+ if (atomic) {
+ if (xe_vma_is_userptr(vma)) {
+ ret = -EACCES;
+ goto unlock_dma_resv;
+ }
+
+ /* Migrate to VRAM, move should invalidate the VMA first */
+ ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id);
+ if (ret)
+ goto unlock_dma_resv;
+ } else if (bo) {
+ /* Create backing store if needed */
+ ret = xe_bo_validate(bo, vm, true);
+ if (ret)
+ goto unlock_dma_resv;
+ }
+
+ /* Bind VMA only to the GT that has faulted */
+ trace_xe_vma_pf_bind(vma);
+ fence = __xe_pt_bind_vma(gt, vma, xe_gt_migrate_engine(gt), NULL, 0,
+ vma->gt_present & BIT(gt->info.id));
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
+ goto unlock_dma_resv;
+ }
+
+ /*
+ * XXX: Should we drop the lock before waiting? This only helps if doing
+ * GPU binds which is currently only done if we have to wait for more
+ * than 10ms on a move.
+ */
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+ if (xe_vma_is_userptr(vma))
+ ret = xe_vma_userptr_check_repin(vma);
+ vma->usm.gt_invalidated &= ~BIT(gt->info.id);
+
+unlock_dma_resv:
+ if (only_needs_bo_lock(bo))
+ xe_bo_unlock(bo, &ww);
+ else
+ ttm_eu_backoff_reservation(&ww, &objs);
+unlock_vm:
+ if (!ret)
+ vm->usm.last_fault_vma = vma;
+ if (write_locked)
+ up_write(&vm->lock);
+ else
+ up_read(&vm->lock);
+ if (ret == -EAGAIN)
+ goto retry_userptr;
+
+ if (!ret) {
+ /*
+ * FIXME: Doing a full TLB invalidation for now, likely could
+ * defer TLB invalidate + fault response to a callback of fence
+ * too
+ */
+ ret = send_tlb_invalidation(&gt->uc.guc);
+ if (ret >= 0)
+ ret = 0;
+ }
+ xe_vm_put(vm);
+
+ return ret;
+}
+
+static int send_pagefault_reply(struct xe_guc *guc,
+ struct xe_guc_pagefault_reply *reply)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
+ reply->dw0,
+ reply->dw1,
+ };
+
+ return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
+{
+ drm_warn(&xe->drm, "\n\tASID: %d\n"
+ "\tVFID: %d\n"
+ "\tPDATA: 0x%04x\n"
+ "\tFaulted Address: 0x%08x%08x\n"
+ "\tFaultType: %d\n"
+ "\tAccessType: %d\n"
+ "\tFaultLevel: %d\n"
+ "\tEngineClass: %d\n"
+ "\tEngineInstance: %d\n",
+ pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
+ lower_32_bits(pf->page_addr),
+ pf->fault_type, pf->access_type, pf->fault_level,
+ pf->engine_class, pf->engine_instance);
+}
+
+#define PF_MSG_LEN_DW 4
+
+static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
+{
+ const struct xe_guc_pagefault_desc *desc;
+ int ret = 0;
+
+ spin_lock_irq(&pf_queue->lock);
+ if (pf_queue->head != pf_queue->tail) {
+ desc = (const struct xe_guc_pagefault_desc *)
+ (pf_queue->data + pf_queue->head);
+
+ pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
+ pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0);
+ pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0);
+ pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) <<
+ PFD_PDATA_HI_SHIFT;
+ pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0);
+ pf->asid = FIELD_GET(PFD_ASID, desc->dw1);
+ pf->vfid = FIELD_GET(PFD_VFID, desc->dw2);
+ pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2);
+ pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2);
+ pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) <<
+ PFD_VIRTUAL_ADDR_HI_SHIFT;
+ pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
+ PFD_VIRTUAL_ADDR_LO_SHIFT;
+
+ pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
+ PF_QUEUE_NUM_DW;
+ } else {
+ ret = -1;
+ }
+ spin_unlock_irq(&pf_queue->lock);
+
+ return ret;
+}
+
+static bool pf_queue_full(struct pf_queue *pf_queue)
+{
+ lockdep_assert_held(&pf_queue->lock);
+
+ return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <=
+ PF_MSG_LEN_DW;
+}
+
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct pf_queue *pf_queue;
+ unsigned long flags;
+ u32 asid;
+ bool full;
+
+ if (unlikely(len != PF_MSG_LEN_DW))
+ return -EPROTO;
+
+ asid = FIELD_GET(PFD_ASID, msg[1]);
+ pf_queue = &gt->usm.pf_queue[asid % NUM_PF_QUEUE];
+
+ spin_lock_irqsave(&pf_queue->lock, flags);
+ full = pf_queue_full(pf_queue);
+ if (!full) {
+ memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32));
+ pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW;
+ queue_work(gt->usm.pf_wq, &pf_queue->worker);
+ } else {
+ XE_WARN_ON("PF Queue full, shouldn't be possible");
+ }
+ spin_unlock_irqrestore(&pf_queue->lock, flags);
+
+ return full ? -ENOSPC : 0;
+}
+
+static void pf_queue_work_func(struct work_struct *w)
+{
+ struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
+ struct xe_gt *gt = pf_queue->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_guc_pagefault_reply reply = {};
+ struct pagefault pf = {};
+ int ret;
+
+ ret = get_pagefault(pf_queue, &pf);
+ if (ret)
+ return;
+
+ ret = handle_pagefault(gt, &pf);
+ if (unlikely(ret)) {
+ print_pagefault(xe, &pf);
+ pf.fault_unsuccessful = 1;
+ drm_warn(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
+ }
+
+ reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
+ FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
+ FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
+ FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
+ FIELD_PREP(PFR_ASID, pf.asid);
+
+ reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
+ FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
+ FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
+ FIELD_PREP(PFR_PDATA, pf.pdata);
+
+ send_pagefault_reply(&gt->uc.guc, &reply);
+}
+
+static void acc_queue_work_func(struct work_struct *w);
+
+int xe_gt_pagefault_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int i;
+
+ if (!xe->info.supports_usm)
+ return 0;
+
+ gt->usm.tlb_invalidation_seqno = 1;
+ for (i = 0; i < NUM_PF_QUEUE; ++i) {
+ gt->usm.pf_queue[i].gt = gt;
+ spin_lock_init(&gt->usm.pf_queue[i].lock);
+ INIT_WORK(&gt->usm.pf_queue[i].worker, pf_queue_work_func);
+ }
+ for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+ gt->usm.acc_queue[i].gt = gt;
+ spin_lock_init(&gt->usm.acc_queue[i].lock);
+ INIT_WORK(&gt->usm.acc_queue[i].worker, acc_queue_work_func);
+ }
+
+ gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue",
+ WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE);
+ if (!gt->usm.pf_wq)
+ return -ENOMEM;
+
+ gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
+ WQ_UNBOUND | WQ_HIGHPRI,
+ NUM_ACC_QUEUE);
+ if (!gt->usm.acc_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void xe_gt_pagefault_reset(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int i;
+
+ if (!xe->info.supports_usm)
+ return;
+
+ for (i = 0; i < NUM_PF_QUEUE; ++i) {
+ spin_lock_irq(&gt->usm.pf_queue[i].lock);
+ gt->usm.pf_queue[i].head = 0;
+ gt->usm.pf_queue[i].tail = 0;
+ spin_unlock_irq(&gt->usm.pf_queue[i].lock);
+ }
+
+ for (i = 0; i < NUM_ACC_QUEUE; ++i) {
+ spin_lock(&gt->usm.acc_queue[i].lock);
+ gt->usm.acc_queue[i].head = 0;
+ gt->usm.acc_queue[i].tail = 0;
+ spin_unlock(&gt->usm.acc_queue[i].lock);
+ }
+}
+
+int xe_gt_tlb_invalidation(struct xe_gt *gt)
+{
+ return send_tlb_invalidation(&gt->uc.guc);
+}
+
+static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
+{
+ if (gt->usm.tlb_invalidation_seqno_recv >= seqno)
+ return true;
+
+ if (seqno - gt->usm.tlb_invalidation_seqno_recv >
+ (TLB_INVALIDATION_SEQNO_MAX / 2))
+ return true;
+
+ return false;
+}
+
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_guc *guc = &gt->uc.guc;
+ int ret;
+
+ /*
+ * XXX: See above, this algorithm only works if seqno are always in
+ * order
+ */
+ ret = wait_event_timeout(guc->ct.wq,
+ tlb_invalidation_seqno_past(gt, seqno),
+ HZ / 5);
+ if (!ret) {
+ drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
+ seqno, gt->usm.tlb_invalidation_seqno_recv);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ int expected_seqno;
+
+ if (unlikely(len != 1))
+ return -EPROTO;
+
+ /* Sanity check on seqno */
+ expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) %
+ TLB_INVALIDATION_SEQNO_MAX;
+ XE_WARN_ON(expected_seqno != msg[0]);
+
+ gt->usm.tlb_invalidation_seqno_recv = msg[0];
+ smp_wmb();
+ wake_up_all(&guc->ct.wq);
+
+ return 0;
+}
+
+static int granularity_in_byte(int val)
+{
+ switch (val) {
+ case 0:
+ return SZ_128K;
+ case 1:
+ return SZ_2M;
+ case 2:
+ return SZ_16M;
+ case 3:
+ return SZ_64M;
+ default:
+ return 0;
+ }
+}
+
+static int sub_granularity_in_byte(int val)
+{
+ return (granularity_in_byte(val) / 32);
+}
+
+static void print_acc(struct xe_device *xe, struct acc *acc)
+{
+ drm_warn(&xe->drm, "Access counter request:\n"
+ "\tType: %s\n"
+ "\tASID: %d\n"
+ "\tVFID: %d\n"
+ "\tEngine: %d:%d\n"
+ "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
+ "\tSub_Granularity Vector: 0x%08x\n"
+ "\tVA Range base: 0x%016llx\n",
+ acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
+ acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
+ granularity_in_byte(acc->granularity) / SZ_1K,
+ sub_granularity_in_byte(acc->granularity) / SZ_1K,
+ acc->sub_granularity, acc->va_range_base);
+}
+
+static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
+{
+ u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
+ sub_granularity_in_byte(acc->granularity);
+ struct xe_vma lookup;
+
+ lookup.start = page_va;
+ lookup.end = lookup.start + SZ_4K - 1;
+
+ return xe_vm_find_overlapping_vma(vm, &lookup);
+}
+
+static int handle_acc(struct xe_gt *gt, struct acc *acc)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_vm *vm;
+ struct xe_vma *vma;
+ struct xe_bo *bo;
+ LIST_HEAD(objs);
+ LIST_HEAD(dups);
+ struct ttm_validate_buffer tv_bo, tv_vm;
+ struct ww_acquire_ctx ww;
+ int ret = 0;
+
+ /* We only support ACC_TRIGGER at the moment */
+ if (acc->access_type != ACC_TRIGGER)
+ return -EINVAL;
+
+ /* ASID to VM */
+ mutex_lock(&xe->usm.lock);
+ vm = xa_load(&xe->usm.asid_to_vm, acc->asid);
+ if (vm)
+ xe_vm_get(vm);
+ mutex_unlock(&xe->usm.lock);
+ if (!vm || !xe_vm_in_fault_mode(vm))
+ return -EINVAL;
+
+ down_read(&vm->lock);
+
+ /* Lookup VMA */
+ vma = get_acc_vma(vm, acc);
+ if (!vma) {
+ ret = -EINVAL;
+ goto unlock_vm;
+ }
+
+ trace_xe_vma_acc(vma);
+
+ /* Userptr can't be migrated, nothing to do */
+ if (xe_vma_is_userptr(vma))
+ goto unlock_vm;
+
+ /* Lock VM and BOs dma-resv */
+ bo = vma->bo;
+ if (only_needs_bo_lock(bo)) {
+ /* This path ensures the BO's LRU is updated */
+ ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false);
+ } else {
+ tv_vm.num_shared = xe->info.tile_count;
+ tv_vm.bo = xe_vm_ttm_bo(vm);
+ list_add(&tv_vm.head, &objs);
+ tv_bo.bo = &bo->ttm;
+ tv_bo.num_shared = xe->info.tile_count;
+ list_add(&tv_bo.head, &objs);
+ ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
+ }
+ if (ret)
+ goto unlock_vm;
+
+ /* Migrate to VRAM, move should invalidate the VMA first */
+ ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id);
+
+ if (only_needs_bo_lock(bo))
+ xe_bo_unlock(bo, &ww);
+ else
+ ttm_eu_backoff_reservation(&ww, &objs);
+unlock_vm:
+ up_read(&vm->lock);
+ xe_vm_put(vm);
+
+ return ret;
+}
+
+#define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__))
+
+static int get_acc(struct acc_queue *acc_queue, struct acc *acc)
+{
+ const struct xe_guc_acc_desc *desc;
+ int ret = 0;
+
+ spin_lock(&acc_queue->lock);
+ if (acc_queue->head != acc_queue->tail) {
+ desc = (const struct xe_guc_acc_desc *)
+ (acc_queue->data + acc_queue->head);
+
+ acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
+ acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
+ FIELD_GET(ACC_SUBG_LO, desc->dw0);
+ acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1);
+ acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1);
+ acc->asid = FIELD_GET(ACC_ASID, desc->dw1);
+ acc->vfid = FIELD_GET(ACC_VFID, desc->dw2);
+ acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0);
+ acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
+ desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
+ } else {
+ ret = -1;
+ }
+ spin_unlock(&acc_queue->lock);
+
+ return ret;
+}
+
+static void acc_queue_work_func(struct work_struct *w)
+{
+ struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker);
+ struct xe_gt *gt = acc_queue->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct acc acc = {};
+ int ret;
+
+ ret = get_acc(acc_queue, &acc);
+ if (ret)
+ return;
+
+ ret = handle_acc(gt, &acc);
+ if (unlikely(ret)) {
+ print_acc(xe, &acc);
+ drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
+ }
+}
+
+#define ACC_MSG_LEN_DW 4
+
+static bool acc_queue_full(struct acc_queue *acc_queue)
+{
+ lockdep_assert_held(&acc_queue->lock);
+
+ return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <=
+ ACC_MSG_LEN_DW;
+}
+
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct acc_queue *acc_queue;
+ u32 asid;
+ bool full;
+
+ if (unlikely(len != ACC_MSG_LEN_DW))
+ return -EPROTO;
+
+ asid = FIELD_GET(ACC_ASID, msg[1]);
+ acc_queue = &gt->usm.acc_queue[asid % NUM_ACC_QUEUE];
+
+ spin_lock(&acc_queue->lock);
+ full = acc_queue_full(acc_queue);
+ if (!full) {
+ memcpy(acc_queue->data + acc_queue->tail, msg,
+ len * sizeof(u32));
+ acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW;
+ queue_work(gt->usm.acc_wq, &acc_queue->worker);
+ } else {
+ drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
+ }
+ spin_unlock(&acc_queue->lock);
+
+ return full ? -ENOSPC : 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h
new file mode 100644
index 000000000000..35f68027cc9c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_PAGEFAULT_H_
+#define _XE_GT_PAGEFAULT_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+struct xe_guc;
+
+int xe_gt_pagefault_init(struct xe_gt *gt);
+void xe_gt_pagefault_reset(struct xe_gt *gt);
+int xe_gt_tlb_invalidation(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif /* _XE_GT_PAGEFAULT_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
new file mode 100644
index 000000000000..2d966d935b8e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <drm/drm_managed.h>
+#include "xe_gt.h"
+#include "xe_gt_sysfs.h"
+
+static void xe_gt_sysfs_kobj_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
+static struct kobj_type xe_gt_sysfs_kobj_type = {
+ .release = xe_gt_sysfs_kobj_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void gt_sysfs_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_gt *gt = arg;
+
+ kobject_put(gt->sysfs);
+}
+
+int xe_gt_sysfs_init(struct xe_gt *gt)
+{
+ struct device *dev = gt_to_xe(gt)->drm.dev;
+ struct kobj_gt *kg;
+ int err;
+
+ kg = kzalloc(sizeof(*kg), GFP_KERNEL);
+ if (!kg)
+ return -ENOMEM;
+
+ kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
+ kg->gt = gt;
+
+ err = kobject_add(&kg->base, &dev->kobj, "gt%d", gt->info.id);
+ if (err) {
+ kobject_put(&kg->base);
+ return err;
+ }
+
+ gt->sysfs = &kg->base;
+
+ err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_sysfs_fini, gt);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h
new file mode 100644
index 000000000000..ecbfcc5c7d42
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_H_
+#define _XE_GT_SYSFS_H_
+
+#include "xe_gt_sysfs_types.h"
+
+int xe_gt_sysfs_init(struct xe_gt *gt);
+
+static inline struct xe_gt *
+kobj_to_gt(struct kobject *kobj)
+{
+ return container_of(kobj, struct kobj_gt, base)->gt;
+}
+
+#endif /* _XE_GT_SYSFS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
new file mode 100644
index 000000000000..d3bc6b83360f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_SYSFS_TYPES_H_
+#define _XE_GT_SYSFS_TYPES_H_
+
+#include <linux/kobject.h>
+
+struct xe_gt;
+
+/**
+ * struct kobj_gt - A GT's kobject struct that connects the kobject and the GT
+ *
+ * When dealing with multiple GTs, this struct helps to understand which GT
+ * needs to be addressed on a given sysfs call.
+ */
+struct kobj_gt {
+ /** @base: The actual kobject */
+ struct kobject base;
+ /** @gt: A pointer to the GT itself */
+ struct xe_gt *gt;
+};
+
+#endif /* _XE_GT_SYSFS_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
new file mode 100644
index 000000000000..8e02e362ba27
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitmap.h>
+
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_mmio.h"
+
+#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
+#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
+
+#define XELP_EU_ENABLE 0x9134 /* "_DISABLE" on Xe_LP */
+#define XELP_EU_MASK REG_GENMASK(7, 0)
+#define XELP_GT_GEOMETRY_DSS_ENABLE 0x913c
+#define XEHP_GT_COMPUTE_DSS_ENABLE 0x9144
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT 0x9148
+
+static void
+load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
+{
+ va_list argp;
+ u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
+ int i;
+
+ if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
+ numregs = XE_MAX_DSS_FUSE_REGS;
+
+ va_start(argp, numregs);
+ for (i = 0; i < numregs; i++)
+ fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, u32));
+ va_end(argp);
+
+ bitmap_from_arr32(mask, fuse_val, numregs * 32);
+}
+
+static void
+load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE);
+ u32 val = 0;
+ int i;
+
+ BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
+
+ /*
+ * Pre-Xe_HP platforms inverted the bit meaning (disable instead
+ * of enable).
+ */
+ if (GRAPHICS_VERx100(xe) < 1250)
+ reg = ~reg & XELP_EU_MASK;
+
+ /* On PVC, one bit = one EU */
+ if (GRAPHICS_VERx100(xe) == 1260) {
+ val = reg;
+ } else {
+ /* All other platforms, one bit = 2 EU */
+ for (i = 0; i < fls(reg); i++)
+ if (reg & BIT(i))
+ val |= 0x3 << 2 * i;
+ }
+
+ bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
+}
+
+void
+xe_gt_topology_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct drm_printer p = drm_debug_printer("GT topology");
+ int num_geometry_regs, num_compute_regs;
+
+ if (GRAPHICS_VERx100(xe) == 1260) {
+ num_geometry_regs = 0;
+ num_compute_regs = 2;
+ } else if (GRAPHICS_VERx100(xe) >= 1250) {
+ num_geometry_regs = 1;
+ num_compute_regs = 1;
+ } else {
+ num_geometry_regs = 1;
+ num_compute_regs = 0;
+ }
+
+ load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs,
+ XELP_GT_GEOMETRY_DSS_ENABLE);
+ load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
+ XEHP_GT_COMPUTE_DSS_ENABLE,
+ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
+ load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
+
+ xe_gt_topology_dump(gt, &p);
+}
+
+unsigned int
+xe_gt_topology_count_dss(xe_dss_mask_t mask)
+{
+ return bitmap_weight(mask, XE_MAX_DSS_FUSE_BITS);
+}
+
+u64
+xe_gt_topology_dss_group_mask(xe_dss_mask_t mask, int grpsize)
+{
+ xe_dss_mask_t per_dss_mask = {};
+ u64 grpmask = 0;
+
+ WARN_ON(DIV_ROUND_UP(XE_MAX_DSS_FUSE_BITS, grpsize) > BITS_PER_TYPE(grpmask));
+
+ bitmap_fill(per_dss_mask, grpsize);
+ for (int i = 0; !bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); i++) {
+ if (bitmap_intersects(mask, per_dss_mask, grpsize))
+ grpmask |= BIT(i);
+
+ bitmap_shift_right(mask, mask, grpsize, XE_MAX_DSS_FUSE_BITS);
+ }
+
+ return grpmask;
+}
+
+void
+xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+ drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
+ gt->fuse_topo.g_dss_mask);
+ drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS,
+ gt->fuse_topo.c_dss_mask);
+
+ drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
+ gt->fuse_topo.eu_mask_per_dss);
+
+}
+
+/*
+ * Used to obtain the index of the first DSS. Can start searching from the
+ * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
+ * groupsize and groupnum are non-zero.
+ */
+unsigned int
+xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum)
+{
+ return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
new file mode 100644
index 000000000000..7a0abc64084f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_GT_TOPOLOGY_H__
+#define __XE_GT_TOPOLOGY_H__
+
+#include "xe_gt_types.h"
+
+struct drm_printer;
+
+void xe_gt_topology_init(struct xe_gt *gt);
+
+void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
+
+unsigned int
+xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum);
+
+#endif /* __XE_GT_TOPOLOGY_H__ */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
new file mode 100644
index 000000000000..c80a9215098d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GT_TYPES_H_
+#define _XE_GT_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence_types.h"
+#include "xe_reg_sr_types.h"
+#include "xe_sa_types.h"
+#include "xe_uc_types.h"
+
+struct xe_engine_ops;
+struct xe_ggtt;
+struct xe_migrate;
+struct xe_ring_ops;
+struct xe_ttm_gtt_mgr;
+struct xe_ttm_vram_mgr;
+
+enum xe_gt_type {
+ XE_GT_TYPE_UNINITIALIZED,
+ XE_GT_TYPE_MAIN,
+ XE_GT_TYPE_REMOTE,
+ XE_GT_TYPE_MEDIA,
+};
+
+#define XE_MAX_DSS_FUSE_REGS 2
+#define XE_MAX_EU_FUSE_REGS 1
+
+typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
+typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
+
+struct xe_mmio_range {
+ u32 start;
+ u32 end;
+};
+
+/*
+ * The hardware has multiple kinds of multicast register ranges that need
+ * special register steering (and future platforms are expected to add
+ * additional types).
+ *
+ * During driver startup, we initialize the steering control register to
+ * direct reads to a slice/subslice that are valid for the 'subslice' class
+ * of multicast registers. If another type of steering does not have any
+ * overlap in valid steering targets with 'subslice' style registers, we will
+ * need to explicitly re-steer reads of registers of the other type.
+ *
+ * Only the replication types that may need additional non-default steering
+ * are listed here.
+ */
+enum xe_steering_type {
+ L3BANK,
+ MSLICE,
+ LNCF,
+ DSS,
+ OADDRM,
+
+ /*
+ * On some platforms there are multiple types of MCR registers that
+ * will always return a non-terminated value at instance (0, 0). We'll
+ * lump those all into a single category to keep things simple.
+ */
+ INSTANCE0,
+
+ NUM_STEERING_TYPES
+};
+
+/**
+ * struct xe_gt - Top level struct of a graphics tile
+ *
+ * A graphics tile may be a physical split (duplicate pieces of silicon,
+ * different GGTT + VRAM) or a virtual split (shared GGTT + VRAM). Either way
+ * this structure encapsulates of everything a GT is (MMIO, VRAM, memory
+ * management, microcontrols, and a hardware set of engines).
+ */
+struct xe_gt {
+ /** @xe: backpointer to XE device */
+ struct xe_device *xe;
+
+ /** @info: GT info */
+ struct {
+ /** @type: type of GT */
+ enum xe_gt_type type;
+ /** @id: id of GT */
+ u8 id;
+ /** @vram: id of the VRAM for this GT */
+ u8 vram_id;
+ /** @clock_freq: clock frequency */
+ u32 clock_freq;
+ /** @engine_mask: mask of engines present on GT */
+ u64 engine_mask;
+ } info;
+
+ /**
+ * @mmio: mmio info for GT, can be subset of the global device mmio
+ * space
+ */
+ struct {
+ /** @size: size of MMIO space on GT */
+ size_t size;
+ /** @regs: pointer to MMIO space on GT */
+ void *regs;
+ /** @fw: force wake for GT */
+ struct xe_force_wake fw;
+ /**
+ * @adj_limit: adjust MMIO address if address is below this
+ * value
+ */
+ u32 adj_limit;
+ /** @adj_offset: offect to add to MMIO address when adjusting */
+ u32 adj_offset;
+ } mmio;
+
+ /**
+ * @reg_sr: table with registers to be restored on GT init/resume/reset
+ */
+ struct xe_reg_sr reg_sr;
+
+ /**
+ * @mem: memory management info for GT, multiple GTs can point to same
+ * objects (virtual split)
+ */
+ struct {
+ /**
+ * @vram: VRAM info for GT, multiple GTs can point to same info
+ * (virtual split), can be subset of global device VRAM
+ */
+ struct {
+ /** @io_start: start address of VRAM */
+ resource_size_t io_start;
+ /** @size: size of VRAM */
+ resource_size_t size;
+ /** @mapping: pointer to VRAM mappable space */
+ void *__iomem mapping;
+ } vram;
+ /** @vram_mgr: VRAM TTM manager */
+ struct xe_ttm_vram_mgr *vram_mgr;
+ /** @gtt_mr: GTT TTM manager */
+ struct xe_ttm_gtt_mgr *gtt_mgr;
+ /** @ggtt: Global graphics translation table */
+ struct xe_ggtt *ggtt;
+ } mem;
+
+ /** @reset: state for GT resets */
+ struct {
+ /**
+ * @worker: work so GT resets can done async allowing to reset
+ * code to safely flush all code paths
+ */
+ struct work_struct worker;
+ } reset;
+
+ /** @usm: unified shared memory state */
+ struct {
+ /**
+ * @bb_pool: Pool from which batchbuffers, for USM operations
+ * (e.g. migrations, fixing page tables), are allocated.
+ * Dedicated pool needed so USM operations to not get blocked
+ * behind any user operations which may have resulted in a
+ * fault.
+ */
+ struct xe_sa_manager bb_pool;
+ /**
+ * @reserved_bcs_instance: reserved BCS instance used for USM
+ * operations (e.g. mmigrations, fixing page tables)
+ */
+ u16 reserved_bcs_instance;
+ /**
+ * @tlb_invalidation_seqno: TLB invalidation seqno, protected by
+ * CT lock
+ */
+#define TLB_INVALIDATION_SEQNO_MAX 0x100000
+ int tlb_invalidation_seqno;
+ /**
+ * @tlb_invalidation_seqno_recv: last received TLB invalidation
+ * seqno, protected by CT lock
+ */
+ int tlb_invalidation_seqno_recv;
+ /** @pf_wq: page fault work queue, unbound, high priority */
+ struct workqueue_struct *pf_wq;
+ /** @acc_wq: access counter work queue, unbound, high priority */
+ struct workqueue_struct *acc_wq;
+ /**
+ * @pf_queue: Page fault queue used to sync faults so faults can
+ * be processed not under the GuC CT lock. The queue is sized so
+ * it can sync all possible faults (1 per physical engine).
+ * Multiple queues exists for page faults from different VMs are
+ * be processed in parallel.
+ */
+ struct pf_queue {
+ /** @gt: back pointer to GT */
+ struct xe_gt *gt;
+#define PF_QUEUE_NUM_DW 128
+ /** @data: data in the page fault queue */
+ u32 data[PF_QUEUE_NUM_DW];
+ /**
+ * @head: head pointer in DWs for page fault queue,
+ * moved by worker which processes faults.
+ */
+ u16 head;
+ /**
+ * @tail: tail pointer in DWs for page fault queue,
+ * moved by G2H handler.
+ */
+ u16 tail;
+ /** @lock: protects page fault queue */
+ spinlock_t lock;
+ /** @worker: to process page faults */
+ struct work_struct worker;
+#define NUM_PF_QUEUE 4
+ } pf_queue[NUM_PF_QUEUE];
+ /**
+ * @acc_queue: Same as page fault queue, cannot process access
+ * counters under CT lock.
+ */
+ struct acc_queue {
+ /** @gt: back pointer to GT */
+ struct xe_gt *gt;
+#define ACC_QUEUE_NUM_DW 128
+ /** @data: data in the page fault queue */
+ u32 data[ACC_QUEUE_NUM_DW];
+ /**
+ * @head: head pointer in DWs for page fault queue,
+ * moved by worker which processes faults.
+ */
+ u16 head;
+ /**
+ * @tail: tail pointer in DWs for page fault queue,
+ * moved by G2H handler.
+ */
+ u16 tail;
+ /** @lock: protects page fault queue */
+ spinlock_t lock;
+ /** @worker: to process access counters */
+ struct work_struct worker;
+#define NUM_ACC_QUEUE 4
+ } acc_queue[NUM_ACC_QUEUE];
+ } usm;
+
+ /** @ordered_wq: used to serialize GT resets and TDRs */
+ struct workqueue_struct *ordered_wq;
+
+ /** @uc: micro controllers on the GT */
+ struct xe_uc uc;
+
+ /** @engine_ops: submission backend engine operations */
+ const struct xe_engine_ops *engine_ops;
+
+ /**
+ * @ring_ops: ring operations for this hw engine (1 per engine class)
+ */
+ const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX];
+
+ /** @fence_irq: fence IRQs (1 per engine class) */
+ struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX];
+
+ /** @default_lrc: default LRC state */
+ void *default_lrc[XE_ENGINE_CLASS_MAX];
+
+ /** @hw_engines: hardware engines on the GT */
+ struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
+
+ /** @kernel_bb_pool: Pool from which batchbuffers are allocated */
+ struct xe_sa_manager kernel_bb_pool;
+
+ /** @migrate: Migration helper for vram blits and clearing */
+ struct xe_migrate *migrate;
+
+ /** @pcode: GT's PCODE */
+ struct {
+ /** @lock: protecting GT's PCODE mailbox data */
+ struct mutex lock;
+ } pcode;
+
+ /** @sysfs: sysfs' kobj used by xe_gt_sysfs */
+ struct kobject *sysfs;
+
+ /** @mocs: info */
+ struct {
+ /** @uc_index: UC index */
+ u8 uc_index;
+ /** @wb_index: WB index, only used on L3_CCS platforms */
+ u8 wb_index;
+ } mocs;
+
+ /** @fuse_topo: GT topology reported by fuse registers */
+ struct {
+ /** @g_dss_mask: dual-subslices usable by geometry */
+ xe_dss_mask_t g_dss_mask;
+
+ /** @c_dss_mask: dual-subslices usable by compute */
+ xe_dss_mask_t c_dss_mask;
+
+ /** @eu_mask_per_dss: EU mask per DSS*/
+ xe_eu_mask_t eu_mask_per_dss;
+ } fuse_topo;
+
+ /** @steering: register steering for individual HW units */
+ struct {
+ /* @ranges: register ranges used for this steering type */
+ const struct xe_mmio_range *ranges;
+
+ /** @group_target: target to steer accesses to */
+ u16 group_target;
+ /** @instance_target: instance to steer accesses to */
+ u16 instance_target;
+ } steering[NUM_STEERING_TYPES];
+
+ /**
+ * @mcr_lock: protects the MCR_SELECTOR register for the duration
+ * of a steered operation
+ */
+ spinlock_t mcr_lock;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
new file mode 100644
index 000000000000..3c285d849ef6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -0,0 +1,875 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_guc.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_guc_log.h"
+#include "xe_guc_reg.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_gt.h"
+#include "xe_platform_types.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+#include "xe_mmio.h"
+#include "xe_force_wake.h"
+#include "i915_reg_defs.h"
+#include "gt/intel_gt_regs.h"
+
+/* TODO: move to common file */
+#define GUC_PVC_MOCS_INDEX_MASK REG_GENMASK(25, 24)
+#define PVC_MOCS_UC_INDEX 1
+#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(GUC_PVC_MOCS_INDEX_MASK,\
+ index)
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+ return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+ return gt_to_xe(guc_to_gt(guc));
+}
+
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP 0xFEE00000
+static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
+ struct xe_bo *bo)
+{
+ u32 addr = xe_bo_ggtt_addr(bo);
+
+ XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc)));
+ XE_BUG_ON(range_overflows_t(u32, addr, bo->size, GUC_GGTT_TOP));
+
+ return addr;
+}
+
+static u32 guc_ctl_debug_flags(struct xe_guc *guc)
+{
+ u32 level = xe_guc_log_get_level(&guc->log);
+ u32 flags = 0;
+
+ if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
+ flags |= GUC_LOG_DISABLED;
+ else
+ flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
+ GUC_LOG_VERBOSITY_SHIFT;
+
+ return flags;
+}
+
+static u32 guc_ctl_feature_flags(struct xe_guc *guc)
+{
+ return GUC_CTL_ENABLE_SLPC;
+}
+
+static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
+{
+ u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT;
+ u32 flags;
+
+ #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
+ #define LOG_UNIT SZ_1M
+ #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
+ #else
+ #define LOG_UNIT SZ_4K
+ #define LOG_FLAG 0
+ #endif
+
+ #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
+ #define CAPTURE_UNIT SZ_1M
+ #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
+ #else
+ #define CAPTURE_UNIT SZ_4K
+ #define CAPTURE_FLAG 0
+ #endif
+
+ BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
+ BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
+ BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
+
+ BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
+ (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
+ BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
+ (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
+ BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
+ (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
+
+ flags = GUC_LOG_VALID |
+ GUC_LOG_NOTIFY_ON_HALF_FULL |
+ CAPTURE_FLAG |
+ LOG_FLAG |
+ ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
+ ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+ ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
+ GUC_LOG_CAPTURE_SHIFT) |
+ (offset << GUC_LOG_BUF_ADDR_SHIFT);
+
+ #undef LOG_UNIT
+ #undef LOG_FLAG
+ #undef CAPTURE_UNIT
+ #undef CAPTURE_FLAG
+
+ return flags;
+}
+
+static u32 guc_ctl_ads_flags(struct xe_guc *guc)
+{
+ u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT;
+ u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+
+ return flags;
+}
+
+static u32 guc_ctl_wa_flags(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 flags = 0;
+
+ /* Wa_22012773006:gen11,gen12 < XeHP */
+ if (GRAPHICS_VER(xe) >= 11 &&
+ GRAPHICS_VERx100(xe) < 1250)
+ flags |= GUC_WA_POLLCS;
+
+ /* Wa_16011759253 */
+ /* Wa_22011383443 */
+ if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0) ||
+ IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0))
+ flags |= GUC_WA_GAM_CREDITS;
+
+ /* Wa_14014475959 */
+ if (IS_PLATFORM_STEP(xe, XE_METEORLAKE, STEP_A0, STEP_B0) ||
+ xe->info.platform == XE_DG2)
+ flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+
+ /*
+ * Wa_14012197797
+ * Wa_22011391025
+ *
+ * The same WA bit is used for both and 22011391025 is applicable to
+ * all DG2.
+ */
+ if (xe->info.platform == XE_DG2)
+ flags |= GUC_WA_DUAL_QUEUE;
+
+ /*
+ * Wa_2201180203
+ * GUC_WA_PRE_PARSER causes media workload hang for PVC A0 and PCIe
+ * errors. Disable this for PVC A0 steppings.
+ */
+ if (GRAPHICS_VER(xe) <= 12 &&
+ !IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0))
+ flags |= GUC_WA_PRE_PARSER;
+
+ /* Wa_16011777198 */
+ if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
+ IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0,
+ STEP_B0))
+ flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
+
+ /*
+ * Wa_22012727170
+ * Wa_22012727685
+ *
+ * This WA is applicable to PVC CT A0, but causes media regressions.
+ * Drop the WA for PVC.
+ */
+ if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) ||
+ IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0,
+ STEP_FOREVER))
+ flags |= GUC_WA_CONTEXT_ISOLATION;
+
+ /* Wa_16015675438, Wa_18020744125 */
+ if (!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
+ flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+
+ /* Wa_1509372804 */
+ if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_C0))
+ flags |= GUC_WA_RENDER_RST_RC6_EXIT;
+
+
+ return flags;
+}
+
+static u32 guc_ctl_devid(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+
+ return (((u32)xe->info.devid) << 16) | xe->info.revid;
+}
+
+static void guc_init_params(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ u32 *params = guc->params;
+ int i;
+
+ BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
+ BUILD_BUG_ON(SOFT_SCRATCH_COUNT != GUC_CTL_MAX_DWORDS + 2);
+
+ params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
+ params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
+ params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
+ params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+ params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
+ params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
+
+ for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+ drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+/*
+ * Initialise the GuC parameter block before starting the firmware
+ * transfer. These parameters are read by the firmware on startup
+ * and cannot be changed thereafter.
+ */
+void guc_write_params(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ int i;
+
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+ xe_mmio_write32(gt, SOFT_SCRATCH(0).reg, 0);
+
+ for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+ xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]);
+}
+
+#define MEDIA_GUC_HOST_INTERRUPT _MMIO(0x190304)
+
+int xe_guc_init(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ int ret;
+
+ guc->fw.type = XE_UC_FW_TYPE_GUC;
+ ret = xe_uc_fw_init(&guc->fw);
+ if (ret)
+ goto out;
+
+ ret = xe_guc_log_init(&guc->log);
+ if (ret)
+ goto out;
+
+ ret = xe_guc_ads_init(&guc->ads);
+ if (ret)
+ goto out;
+
+ ret = xe_guc_ct_init(&guc->ct);
+ if (ret)
+ goto out;
+
+ ret = xe_guc_pc_init(&guc->pc);
+ if (ret)
+ goto out;
+
+ guc_init_params(guc);
+
+ if (xe_gt_is_media_type(gt))
+ guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg;
+ else
+ guc->notify_reg = GEN11_GUC_HOST_INTERRUPT.reg;
+
+ xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+ return 0;
+
+out:
+ drm_err(&xe->drm, "GuC init failed with %d", ret);
+ return ret;
+}
+
+/**
+ * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
+ * @guc: The GuC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_init_post_hwconfig(struct xe_guc *guc)
+{
+ return xe_guc_ads_init_post_hwconfig(&guc->ads);
+}
+
+int xe_guc_post_load_init(struct xe_guc *guc)
+{
+ xe_guc_ads_populate_post_load(&guc->ads);
+
+ return 0;
+}
+
+int xe_guc_reset(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 guc_status;
+ int ret;
+
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+ xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC);
+
+ ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5);
+ if (ret) {
+ drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n",
+ xe_mmio_read32(gt, GEN6_GDRST.reg));
+ goto err_out;
+ }
+
+ guc_status = xe_mmio_read32(gt, GUC_STATUS.reg);
+ if (!(guc_status & GS_MIA_IN_RESET)) {
+ drm_err(&xe->drm,
+ "GuC status: 0x%x, MIA core expected to be in reset\n",
+ guc_status);
+ ret = -EIO;
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+
+ return ret;
+}
+
+static void guc_prepare_xfer(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = guc_to_xe(guc);
+ u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
+ GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
+ GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
+ GUC_ENABLE_MIA_CLOCK_GATING;
+
+ if (GRAPHICS_VERx100(xe) < 1250)
+ shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
+ GUC_ENABLE_MIA_CACHING;
+
+ if (xe->info.platform == XE_PVC)
+ shim_flags |= PVC_GUC_MOCS_INDEX(PVC_MOCS_UC_INDEX);
+
+ /* Must program this register before loading the ucode with DMA */
+ xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags);
+
+ xe_mmio_write32(gt, GEN9_GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE);
+}
+
+/*
+ * Supporting MMIO & in memory RSA
+ */
+static int guc_xfer_rsa(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 rsa[UOS_RSA_SCRATCH_COUNT];
+ size_t copied;
+ int i;
+
+ if (guc->fw.rsa_size > 256) {
+ u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) +
+ xe_uc_fw_rsa_offset(&guc->fw);
+ xe_mmio_write32(gt, UOS_RSA_SCRATCH(0).reg, rsa_ggtt_addr);
+ return 0;
+ }
+
+ copied = xe_uc_fw_copy_rsa(&guc->fw, rsa, sizeof(rsa));
+ if (copied < sizeof(rsa))
+ return -ENOMEM;
+
+ for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
+ xe_mmio_write32(gt, UOS_RSA_SCRATCH(i).reg, rsa[i]);
+
+ return 0;
+}
+
+/*
+ * Read the GuC status register (GUC_STATUS) and store it in the
+ * specified location; then return a boolean indicating whether
+ * the value matches either of two values representing completion
+ * of the GuC boot process.
+ *
+ * This is used for polling the GuC status in a wait_for()
+ * loop below.
+ */
+static bool guc_ready(struct xe_guc *guc, u32 *status)
+{
+ u32 val = xe_mmio_read32(guc_to_gt(guc), GUC_STATUS.reg);
+ u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
+
+ *status = val;
+ return uk_val == XE_GUC_LOAD_STATUS_READY;
+}
+
+static int guc_wait_ucode(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ u32 status;
+ int ret;
+
+ /*
+ * Wait for the GuC to start up.
+ * NB: Docs recommend not using the interrupt for completion.
+ * Measurements indicate this should take no more than 20ms
+ * (assuming the GT clock is at maximum frequency). So, a
+ * timeout here indicates that the GuC has failed and is unusable.
+ * (Higher levels of the driver may decide to reset the GuC and
+ * attempt the ucode load again if this happens.)
+ *
+ * FIXME: There is a known (but exceedingly unlikely) race condition
+ * where the asynchronous frequency management code could reduce
+ * the GT clock while a GuC reload is in progress (during a full
+ * GT reset). A fix is in progress but there are complex locking
+ * issues to be resolved. In the meantime bump the timeout to
+ * 200ms. Even at slowest clock, this should be sufficient. And
+ * in the working case, a larger timeout makes no difference.
+ */
+ ret = wait_for(guc_ready(guc, &status), 200);
+ if (ret) {
+ struct drm_device *drm = &xe->drm;
+ struct drm_printer p = drm_info_printer(drm->dev);
+
+ drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
+ drm_info(drm, "GuC load failed: status: Reset = %d, "
+ "BootROM = 0x%02X, UKernel = 0x%02X, "
+ "MIA = 0x%02X, Auth = 0x%02X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ REG_FIELD_GET(GS_BOOTROM_MASK, status),
+ REG_FIELD_GET(GS_UKERNEL_MASK, status),
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+ if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+ drm_info(drm, "GuC firmware signature verification failed\n");
+ ret = -ENOEXEC;
+ }
+
+ if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
+ XE_GUC_LOAD_STATUS_EXCEPTION) {
+ drm_info(drm, "GuC firmware exception. EIP: %#x\n",
+ xe_mmio_read32(guc_to_gt(guc),
+ SOFT_SCRATCH(13).reg));
+ ret = -ENXIO;
+ }
+
+ xe_guc_log_print(&guc->log, &p);
+ } else {
+ drm_dbg(&xe->drm, "GuC successfully loaded");
+ }
+
+ return ret;
+}
+
+static int __xe_guc_upload(struct xe_guc *guc)
+{
+ int ret;
+
+ guc_write_params(guc);
+ guc_prepare_xfer(guc);
+
+ /*
+ * Note that GuC needs the CSS header plus uKernel code to be copied
+ * by the DMA engine in one operation, whereas the RSA signature is
+ * loaded separately, either by copying it to the UOS_RSA_SCRATCH
+ * register (if key size <= 256) or through a ggtt-pinned vma (if key
+ * size > 256). The RSA size and therefore the way we provide it to the
+ * HW is fixed for each platform and hard-coded in the bootrom.
+ */
+ ret = guc_xfer_rsa(guc);
+ if (ret)
+ goto out;
+ /*
+ * Current uCode expects the code to be loaded at 8k; locations below
+ * this are used for the stack.
+ */
+ ret = xe_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
+ if (ret)
+ goto out;
+
+ /* Wait for authentication */
+ ret = guc_wait_ucode(guc);
+ if (ret)
+ goto out;
+
+ xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
+ return 0;
+
+out:
+ xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+ return 0 /* FIXME: ret, don't want to stop load currently */;
+}
+
+/**
+ * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table
+ * @guc: The GuC object
+ *
+ * This function uploads a minimal GuC that does not support submissions but
+ * in a state where the hwconfig table can be read. Next, it reads and parses
+ * the hwconfig table so it can be used for subsequent steps in the driver load.
+ * Lastly, it enables CT communication (XXX: this is needed for PFs/VFs only).
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
+{
+ int ret;
+
+ xe_guc_ads_populate_minimal(&guc->ads);
+
+ ret = __xe_guc_upload(guc);
+ if (ret)
+ return ret;
+
+ ret = xe_guc_hwconfig_init(guc);
+ if (ret)
+ return ret;
+
+ ret = xe_guc_enable_communication(guc);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int xe_guc_upload(struct xe_guc *guc)
+{
+ xe_guc_ads_populate(&guc->ads);
+
+ return __xe_guc_upload(guc);
+}
+
+static void guc_handle_mmio_msg(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 msg;
+
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+ msg = xe_mmio_read32(gt, SOFT_SCRATCH(15).reg);
+ msg &= XE_GUC_RECV_MSG_EXCEPTION |
+ XE_GUC_RECV_MSG_CRASH_DUMP_POSTED;
+ xe_mmio_write32(gt, SOFT_SCRATCH(15).reg, 0);
+
+ if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
+ drm_err(&guc_to_xe(guc)->drm,
+ "Received early GuC crash dump notification!\n");
+
+ if (msg & XE_GUC_RECV_MSG_EXCEPTION)
+ drm_err(&guc_to_xe(guc)->drm,
+ "Received early GuC exception notification!\n");
+}
+
+void guc_enable_irq(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 events = xe_gt_is_media_type(gt) ?
+ REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST) :
+ REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
+
+ xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg,
+ REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST));
+ if (xe_gt_is_media_type(gt))
+ xe_mmio_rmw32(gt, GEN11_GUC_SG_INTR_MASK.reg, events, 0);
+ else
+ xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~events);
+}
+
+int xe_guc_enable_communication(struct xe_guc *guc)
+{
+ int err;
+
+ guc_enable_irq(guc);
+
+ xe_mmio_rmw32(guc_to_gt(guc), GEN6_PMINTRMSK.reg,
+ ARAT_EXPIRED_INTRMSK, 0);
+
+ err = xe_guc_ct_enable(&guc->ct);
+ if (err)
+ return err;
+
+ guc_handle_mmio_msg(guc);
+
+ return 0;
+}
+
+int xe_guc_suspend(struct xe_guc *guc)
+{
+ int ret;
+ u32 action[] = {
+ XE_GUC_ACTION_CLIENT_SOFT_RESET,
+ };
+
+ ret = xe_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+ if (ret) {
+ drm_err(&guc_to_xe(guc)->drm,
+ "GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret);
+ return ret;
+ }
+
+ xe_guc_sanitize(guc);
+ return 0;
+}
+
+void xe_guc_notify(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ xe_mmio_write32(gt, guc->notify_reg, GUC_SEND_TRIGGER);
+}
+
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_AUTHENTICATE_HUC,
+ rsa_addr
+ };
+
+ return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+#define MEDIA_SOFT_SCRATCH(n) _MMIO(0x190310 + (n) * 4)
+#define MEDIA_SOFT_SCRATCH_COUNT 4
+
+int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 header;
+ u32 reply_reg = xe_gt_is_media_type(gt) ?
+ MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg;
+ int ret;
+ int i;
+
+ XE_BUG_ON(guc->ct.enabled);
+ XE_BUG_ON(!len);
+ XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT);
+ XE_BUG_ON(len > MEDIA_SOFT_SCRATCH_COUNT);
+ XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) !=
+ GUC_HXG_ORIGIN_HOST);
+ XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) !=
+ GUC_HXG_TYPE_REQUEST);
+
+retry:
+ /* Not in critical data-path, just do if else for GT type */
+ if (xe_gt_is_media_type(gt)) {
+ for (i = 0; i < len; ++i)
+ xe_mmio_write32(gt, MEDIA_SOFT_SCRATCH(i).reg,
+ request[i]);
+#define LAST_INDEX MEDIA_SOFT_SCRATCH_COUNT - 1
+ xe_mmio_read32(gt, MEDIA_SOFT_SCRATCH(LAST_INDEX).reg);
+ } else {
+ for (i = 0; i < len; ++i)
+ xe_mmio_write32(gt, GEN11_SOFT_SCRATCH(i).reg,
+ request[i]);
+#undef LAST_INDEX
+#define LAST_INDEX GEN11_SOFT_SCRATCH_COUNT - 1
+ xe_mmio_read32(gt, GEN11_SOFT_SCRATCH(LAST_INDEX).reg);
+ }
+
+ xe_guc_notify(guc);
+
+ ret = xe_mmio_wait32(gt, reply_reg,
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN,
+ GUC_HXG_ORIGIN_GUC),
+ GUC_HXG_MSG_0_ORIGIN,
+ 50);
+ if (ret) {
+timeout:
+ drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n",
+ request[0], xe_mmio_read32(gt, reply_reg));
+ return ret;
+ }
+
+ header = xe_mmio_read32(gt, reply_reg);
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+ GUC_HXG_TYPE_NO_RESPONSE_BUSY) {
+#define done ({ header = xe_mmio_read32(gt, reply_reg); \
+ FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != \
+ GUC_HXG_ORIGIN_GUC || \
+ FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != \
+ GUC_HXG_TYPE_NO_RESPONSE_BUSY; })
+
+ ret = wait_for(done, 1000);
+ if (unlikely(ret))
+ goto timeout;
+ if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
+ GUC_HXG_ORIGIN_GUC))
+ goto proto;
+#undef done
+ }
+
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+ GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+ u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header);
+
+ drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %u\n",
+ request[0], reason);
+ goto retry;
+ }
+
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
+ GUC_HXG_TYPE_RESPONSE_FAILURE) {
+ u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
+ u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
+
+ drm_err(&xe->drm, "mmio request %#x: failure %x/%u\n",
+ request[0], error, hint);
+ return -ENXIO;
+ }
+
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
+ GUC_HXG_TYPE_RESPONSE_SUCCESS) {
+proto:
+ drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n",
+ request[0], header);
+ return -EPROTO;
+ }
+
+ /* Use data from the GuC response as our return value */
+ return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header);
+}
+
+static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val)
+{
+ u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+ GUC_ACTION_HOST2GUC_SELF_CFG),
+ FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) |
+ FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len),
+ FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32,
+ lower_32_bits(val)),
+ FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64,
+ upper_32_bits(val)),
+ };
+ int ret;
+
+ XE_BUG_ON(len > 2);
+ XE_BUG_ON(len == 1 && upper_32_bits(val));
+
+ /* Self config must go over MMIO */
+ ret = xe_guc_send_mmio(guc, request, ARRAY_SIZE(request));
+
+ if (unlikely(ret < 0))
+ return ret;
+ if (unlikely(ret > 1))
+ return -EPROTO;
+ if (unlikely(!ret))
+ return -ENOKEY;
+
+ return 0;
+}
+
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val)
+{
+ return guc_self_cfg(guc, key, 1, val);
+}
+
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val)
+{
+ return guc_self_cfg(guc, key, 2, val);
+}
+
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir)
+{
+ if (iir & GUC_INTR_GUC2HOST)
+ xe_guc_ct_irq_handler(&guc->ct);
+}
+
+void xe_guc_sanitize(struct xe_guc *guc)
+{
+ xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
+ xe_guc_ct_disable(&guc->ct);
+}
+
+int xe_guc_reset_prepare(struct xe_guc *guc)
+{
+ return xe_guc_submit_reset_prepare(guc);
+}
+
+void xe_guc_reset_wait(struct xe_guc *guc)
+{
+ xe_guc_submit_reset_wait(guc);
+}
+
+void xe_guc_stop_prepare(struct xe_guc *guc)
+{
+ XE_WARN_ON(xe_guc_pc_stop(&guc->pc));
+}
+
+int xe_guc_stop(struct xe_guc *guc)
+{
+ int ret;
+
+ xe_guc_ct_disable(&guc->ct);
+
+ ret = xe_guc_submit_stop(guc);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int xe_guc_start(struct xe_guc *guc)
+{
+ int ret;
+
+ ret = xe_guc_submit_start(guc);
+ if (ret)
+ return ret;
+
+ ret = xe_guc_pc_start(&guc->pc);
+ XE_WARN_ON(ret);
+
+ return 0;
+}
+
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 status;
+ int err;
+ int i;
+
+ xe_uc_fw_print(&guc->fw, p);
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ status = xe_mmio_read32(gt, GUC_STATUS.reg);
+
+ drm_printf(p, "\nGuC status 0x%08x:\n", status);
+ drm_printf(p, "\tBootrom status = 0x%x\n",
+ (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
+ drm_printf(p, "\tuKernel status = 0x%x\n",
+ (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
+ drm_printf(p, "\tMIA Core status = 0x%x\n",
+ (status & GS_MIA_MASK) >> GS_MIA_SHIFT);
+ drm_printf(p, "\tLog level = %d\n",
+ xe_guc_log_get_level(&guc->log));
+
+ drm_puts(p, "\nScratch registers:\n");
+ for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
+ drm_printf(p, "\t%2d: \t0x%x\n",
+ i, xe_mmio_read32(gt, SOFT_SCRATCH(i).reg));
+ }
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+ xe_guc_ct_print(&guc->ct, p);
+ xe_guc_submit_print(guc, p);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
new file mode 100644
index 000000000000..72b71d75566c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_H_
+#define _XE_GUC_H_
+
+#include "xe_hw_engine_types.h"
+#include "xe_guc_types.h"
+#include "xe_macros.h"
+
+struct drm_printer;
+
+int xe_guc_init(struct xe_guc *guc);
+int xe_guc_init_post_hwconfig(struct xe_guc *guc);
+int xe_guc_post_load_init(struct xe_guc *guc);
+int xe_guc_reset(struct xe_guc *guc);
+int xe_guc_upload(struct xe_guc *guc);
+int xe_guc_min_load_for_hwconfig(struct xe_guc *guc);
+int xe_guc_enable_communication(struct xe_guc *guc);
+int xe_guc_suspend(struct xe_guc *guc);
+void xe_guc_notify(struct xe_guc *guc);
+int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
+int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len);
+int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val);
+int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val);
+void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir);
+void xe_guc_sanitize(struct xe_guc *guc);
+void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p);
+int xe_guc_reset_prepare(struct xe_guc *guc);
+void xe_guc_reset_wait(struct xe_guc *guc);
+void xe_guc_stop_prepare(struct xe_guc *guc);
+int xe_guc_stop(struct xe_guc *guc);
+int xe_guc_start(struct xe_guc *guc);
+
+static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
+{
+ switch (class) {
+ case XE_ENGINE_CLASS_RENDER:
+ return GUC_RENDER_CLASS;
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ return GUC_VIDEO_CLASS;
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ return GUC_VIDEOENHANCE_CLASS;
+ case XE_ENGINE_CLASS_COPY:
+ return GUC_BLITTER_CLASS;
+ case XE_ENGINE_CLASS_COMPUTE:
+ return GUC_COMPUTE_CLASS;
+ case XE_ENGINE_CLASS_OTHER:
+ default:
+ XE_WARN_ON(class);
+ return -1;
+ }
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
new file mode 100644
index 000000000000..0c08cecaca40
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ads.h"
+#include "xe_guc_reg.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_engine_regs.h"
+
+/* Slack of a few additional entries per engine */
+#define ADS_REGSET_EXTRA_MAX 8
+
+static struct xe_guc *
+ads_to_guc(struct xe_guc_ads *ads)
+{
+ return container_of(ads, struct xe_guc, ads);
+}
+
+static struct xe_gt *
+ads_to_gt(struct xe_guc_ads *ads)
+{
+ return container_of(ads, struct xe_gt, uc.guc.ads);
+}
+
+static struct xe_device *
+ads_to_xe(struct xe_guc_ads *ads)
+{
+ return gt_to_xe(ads_to_gt(ads));
+}
+
+static struct iosys_map *
+ads_to_map(struct xe_guc_ads *ads)
+{
+ return &ads->bo->vmap;
+}
+
+/* UM Queue parameters: */
+#define GUC_UM_QUEUE_SIZE (SZ_64K)
+#define GUC_PAGE_RES_TIMEOUT_US (-1)
+
+/*
+ * The Additional Data Struct (ADS) has pointers for different buffers used by
+ * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
+ * all the extra buffers indirectly linked via the ADS struct's entries.
+ *
+ * Layout of the ADS blob allocated for the GuC:
+ *
+ * +---------------------------------------+ <== base
+ * | guc_ads |
+ * +---------------------------------------+
+ * | guc_policies |
+ * +---------------------------------------+
+ * | guc_gt_system_info |
+ * +---------------------------------------+
+ * | guc_engine_usage |
+ * +---------------------------------------+
+ * | guc_um_init_params |
+ * +---------------------------------------+ <== static
+ * | guc_mmio_reg[countA] (engine 0.0) |
+ * | guc_mmio_reg[countB] (engine 0.1) |
+ * | guc_mmio_reg[countC] (engine 1.0) |
+ * | ... |
+ * +---------------------------------------+ <== dynamic
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
+ * | golden contexts |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
+ * | capture lists |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
+ * | UM queues |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
+ * | private data |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
+ */
+struct __guc_ads_blob {
+ struct guc_ads ads;
+ struct guc_policies policies;
+ struct guc_gt_system_info system_info;
+ struct guc_engine_usage engine_usage;
+ struct guc_um_init_params um_init_params;
+ /* From here on, location is dynamic! Refer to above diagram. */
+ struct guc_mmio_reg regset[0];
+} __packed;
+
+#define ads_blob_read(ads_, field_) \
+ xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
+ struct __guc_ads_blob, field_)
+
+#define ads_blob_write(ads_, field_, val_) \
+ xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
+ struct __guc_ads_blob, field_, val_)
+
+#define info_map_write(xe_, map_, field_, val_) \
+ xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
+
+#define info_map_read(xe_, map_, field_) \
+ xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
+
+static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
+{
+ XE_BUG_ON(!ads->regset_size);
+
+ return ads->regset_size;
+}
+
+static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
+{
+ return PAGE_ALIGN(ads->golden_lrc_size);
+}
+
+static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
+{
+ /* FIXME: Allocate a proper capture list */
+ return PAGE_ALIGN(PAGE_SIZE);
+}
+
+static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
+{
+ struct xe_device *xe = ads_to_xe(ads);
+
+ if (!xe->info.supports_usm)
+ return 0;
+
+ return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
+}
+
+static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
+{
+ return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
+}
+
+static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
+{
+ return offsetof(struct __guc_ads_blob, regset);
+}
+
+static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
+{
+ size_t offset;
+
+ offset = guc_ads_regset_offset(ads) +
+ guc_ads_regset_size(ads);
+
+ return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
+{
+ size_t offset;
+
+ offset = guc_ads_golden_lrc_offset(ads) +
+ guc_ads_golden_lrc_size(ads);
+
+ return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
+{
+ u32 offset;
+
+ offset = guc_ads_capture_offset(ads) +
+ guc_ads_capture_size(ads);
+
+ return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
+{
+ size_t offset;
+
+ offset = guc_ads_um_queues_offset(ads) +
+ guc_ads_um_queues_size(ads);
+
+ return PAGE_ALIGN(offset);
+}
+
+static size_t guc_ads_size(struct xe_guc_ads *ads)
+{
+ return guc_ads_private_data_offset(ads) +
+ guc_ads_private_data_size(ads);
+}
+
+static void guc_ads_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_ads *ads = arg;
+
+ xe_bo_unpin_map_no_vm(ads->bo);
+}
+
+static size_t calculate_regset_size(struct xe_gt *gt)
+{
+ struct xe_reg_sr_entry *sr_entry;
+ unsigned long sr_idx;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ unsigned int count = 0;
+
+ for_each_hw_engine(hwe, gt, id)
+ xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
+ count++;
+
+ count += (ADS_REGSET_EXTRA_MAX + LNCFCMOCS_REG_COUNT) * XE_NUM_HW_ENGINES;
+
+ return count * sizeof(struct guc_mmio_reg);
+}
+
+static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ u32 mask = 0;
+
+ for_each_hw_engine(hwe, gt, id)
+ if (hwe->class == class)
+ mask |= BIT(hwe->instance);
+
+ return mask;
+}
+
+static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
+{
+ struct xe_device *xe = ads_to_xe(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
+ size_t total_size = 0, alloc_size, real_size;
+ int class;
+
+ for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+ if (class == XE_ENGINE_CLASS_OTHER)
+ continue;
+
+ if (!engine_enable_mask(gt, class))
+ continue;
+
+ real_size = xe_lrc_size(xe, class);
+ alloc_size = PAGE_ALIGN(real_size);
+ total_size += alloc_size;
+ }
+
+ return total_size;
+}
+
+#define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64)
+
+int xe_guc_ads_init(struct xe_guc_ads *ads)
+{
+ struct xe_device *xe = ads_to_xe(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
+ struct xe_bo *bo;
+ int err;
+
+ ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+ ads->regset_size = calculate_regset_size(gt);
+
+ bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ads_size(ads) +
+ MAX_GOLDEN_LRC_SIZE,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ads->bo = bo;
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_ads_fini, ads);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
+ * @ads: Additional data structures object
+ *
+ * Recalcuate golden_lrc_size & regset_size as the number hardware engines may
+ * have changed after the hwconfig was loaded. Also verify the new sizes fit in
+ * the already allocated ADS buffer object.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
+{
+ struct xe_gt *gt = ads_to_gt(ads);
+ u32 prev_regset_size = ads->regset_size;
+
+ XE_BUG_ON(!ads->bo);
+
+ ads->golden_lrc_size = calculate_golden_lrc_size(ads);
+ ads->regset_size = calculate_regset_size(gt);
+
+ XE_WARN_ON(ads->golden_lrc_size +
+ (ads->regset_size - prev_regset_size) >
+ MAX_GOLDEN_LRC_SIZE);
+
+ return 0;
+}
+
+static void guc_policies_init(struct xe_guc_ads *ads)
+{
+ ads_blob_write(ads, policies.dpc_promote_time,
+ GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
+ ads_blob_write(ads, policies.max_num_work_items,
+ GLOBAL_POLICY_MAX_NUM_WI);
+ ads_blob_write(ads, policies.global_flags, 0);
+ ads_blob_write(ads, policies.is_valid, 1);
+}
+
+static void fill_engine_enable_masks(struct xe_gt *gt,
+ struct iosys_map *info_map)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
+ engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
+ info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
+ engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
+ info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
+ engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
+ info_map_write(xe, info_map,
+ engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
+ engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
+ info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
+ engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
+}
+
+static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
+{
+ struct xe_device *xe = ads_to_xe(ads);
+ struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+ offsetof(struct __guc_ads_blob, system_info));
+ u8 guc_class;
+
+ for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
+ if (!info_map_read(xe, &info_map,
+ engine_enabled_masks[guc_class]))
+ continue;
+
+ ads_blob_write(ads, ads.eng_state_size[guc_class],
+ guc_ads_golden_lrc_size(ads) -
+ xe_lrc_skip_size(xe));
+ ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+ xe_bo_ggtt_addr(ads->bo) +
+ guc_ads_golden_lrc_offset(ads));
+ }
+}
+
+static void guc_mapping_table_init_invalid(struct xe_gt *gt,
+ struct iosys_map *info_map)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int i, j;
+
+ /* Table must be set to invalid values for entries not used */
+ for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
+ for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
+ info_map_write(xe, info_map, mapping_table[i][j],
+ GUC_MAX_INSTANCES_PER_CLASS);
+}
+
+static void guc_mapping_table_init(struct xe_gt *gt,
+ struct iosys_map *info_map)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ guc_mapping_table_init_invalid(gt, info_map);
+
+ for_each_hw_engine(hwe, gt, id) {
+ u8 guc_class;
+
+ guc_class = xe_engine_class_to_guc_class(hwe->class);
+ info_map_write(xe, info_map,
+ mapping_table[guc_class][hwe->logical_instance],
+ hwe->instance);
+ }
+}
+
+static void guc_capture_list_init(struct xe_guc_ads *ads)
+{
+ int i, j;
+ u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads);
+
+ /* FIXME: Populate a proper capture list */
+ for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+ for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
+ ads_blob_write(ads, ads.capture_instance[i][j], addr);
+ ads_blob_write(ads, ads.capture_class[i][j], addr);
+ }
+
+ ads_blob_write(ads, ads.capture_global[i], addr);
+ }
+}
+
+static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
+ struct iosys_map *regset_map,
+ u32 reg, u32 flags,
+ unsigned int n_entry)
+{
+ struct guc_mmio_reg entry = {
+ .offset = reg,
+ .flags = flags,
+ /* TODO: steering */
+ };
+
+ xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
+ &entry, sizeof(entry));
+}
+
+static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
+ struct iosys_map *regset_map,
+ struct xe_hw_engine *hwe)
+{
+ struct xe_hw_engine *hwe_rcs_reset_domain =
+ xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
+ struct xe_reg_sr_entry *entry;
+ unsigned long idx;
+ unsigned count = 0;
+ const struct {
+ u32 reg;
+ u32 flags;
+ bool skip;
+ } *e, extra_regs[] = {
+ { .reg = RING_MODE_GEN7(hwe->mmio_base).reg, },
+ { .reg = RING_HWS_PGA(hwe->mmio_base).reg, },
+ { .reg = RING_IMR(hwe->mmio_base).reg, },
+ { .reg = GEN12_RCU_MODE.reg, .flags = 0x3,
+ .skip = hwe != hwe_rcs_reset_domain },
+ };
+ u32 i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
+
+ xa_for_each(&hwe->reg_sr.xa, idx, entry) {
+ u32 flags = entry->masked_reg ? GUC_REGSET_MASKED : 0;
+
+ guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++);
+ }
+
+ for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
+ if (e->skip)
+ continue;
+
+ guc_mmio_regset_write_one(ads, regset_map,
+ e->reg, e->flags, count++);
+ }
+
+ for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
+ guc_mmio_regset_write_one(ads, regset_map,
+ GEN9_LNCFCMOCS(i).reg, 0, count++);
+ }
+
+ XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg)));
+
+ return count;
+}
+
+static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
+{
+ size_t regset_offset = guc_ads_regset_offset(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
+ struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+ regset_offset);
+
+ for_each_hw_engine(hwe, gt, id) {
+ unsigned int count;
+ u8 gc;
+
+ /*
+ * 1. Write all MMIO entries for this engine to the table. No
+ * need to worry about fused-off engines and when there are
+ * entries in the regset: the reg_state_list has been zero'ed
+ * by xe_guc_ads_populate()
+ */
+ count = guc_mmio_regset_write(ads, &regset_map, hwe);
+ if (!count)
+ continue;
+
+ /*
+ * 2. Record in the header (ads.reg_state_list) the address
+ * location and number of entries
+ */
+ gc = xe_engine_class_to_guc_class(hwe->class);
+ ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
+ ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
+
+ addr += count * sizeof(struct guc_mmio_reg);
+ iosys_map_incr(&regset_map, count * sizeof(struct guc_mmio_reg));
+ }
+}
+
+static void guc_um_init_params(struct xe_guc_ads *ads)
+{
+ u32 um_queue_offset = guc_ads_um_queues_offset(ads);
+ u64 base_dpa;
+ u32 base_ggtt;
+ int i;
+
+ base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
+ base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
+
+ for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
+ ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
+ base_dpa + (i * GUC_UM_QUEUE_SIZE));
+ ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
+ base_ggtt + (i * GUC_UM_QUEUE_SIZE));
+ ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
+ GUC_UM_QUEUE_SIZE);
+ }
+
+ ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
+ GUC_PAGE_RES_TIMEOUT_US);
+}
+
+static void guc_doorbell_init(struct xe_guc_ads *ads)
+{
+ struct xe_device *xe = ads_to_xe(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
+
+ if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
+ u32 distdbreg =
+ xe_mmio_read32(gt, GEN12_DIST_DBS_POPULATED.reg);
+
+ ads_blob_write(ads,
+ system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
+ ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT)
+ & GEN12_DOORBELLS_PER_SQIDI) + 1);
+ }
+}
+
+/**
+ * xe_guc_ads_populate_minimal - populate minimal ADS
+ * @ads: Additional data structures object
+ *
+ * This function populates a minimal ADS that does not support submissions but
+ * enough so the GuC can load and the hwconfig table can be read.
+ */
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
+{
+ struct xe_gt *gt = ads_to_gt(ads);
+ struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+ offsetof(struct __guc_ads_blob, system_info));
+ u32 base = xe_bo_ggtt_addr(ads->bo);
+
+ XE_BUG_ON(!ads->bo);
+
+ xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+ guc_policies_init(ads);
+ guc_prep_golden_lrc_null(ads);
+ guc_mapping_table_init_invalid(gt, &info_map);
+ guc_doorbell_init(ads);
+
+ ads_blob_write(ads, ads.scheduler_policies, base +
+ offsetof(struct __guc_ads_blob, policies));
+ ads_blob_write(ads, ads.gt_system_info, base +
+ offsetof(struct __guc_ads_blob, system_info));
+ ads_blob_write(ads, ads.private_data, base +
+ guc_ads_private_data_offset(ads));
+}
+
+void xe_guc_ads_populate(struct xe_guc_ads *ads)
+{
+ struct xe_device *xe = ads_to_xe(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
+ struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+ offsetof(struct __guc_ads_blob, system_info));
+ u32 base = xe_bo_ggtt_addr(ads->bo);
+
+ XE_BUG_ON(!ads->bo);
+
+ xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+ guc_policies_init(ads);
+ fill_engine_enable_masks(gt, &info_map);
+ guc_mmio_reg_state_init(ads);
+ guc_prep_golden_lrc_null(ads);
+ guc_mapping_table_init(gt, &info_map);
+ guc_capture_list_init(ads);
+ guc_doorbell_init(ads);
+
+ if (xe->info.supports_usm) {
+ guc_um_init_params(ads);
+ ads_blob_write(ads, ads.um_init_data, base +
+ offsetof(struct __guc_ads_blob, um_init_params));
+ }
+
+ ads_blob_write(ads, ads.scheduler_policies, base +
+ offsetof(struct __guc_ads_blob, policies));
+ ads_blob_write(ads, ads.gt_system_info, base +
+ offsetof(struct __guc_ads_blob, system_info));
+ ads_blob_write(ads, ads.private_data, base +
+ guc_ads_private_data_offset(ads));
+}
+
+static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
+{
+ struct xe_device *xe = ads_to_xe(ads);
+ struct xe_gt *gt = ads_to_gt(ads);
+ struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
+ offsetof(struct __guc_ads_blob, system_info));
+ size_t total_size = 0, alloc_size, real_size;
+ u32 addr_ggtt, offset;
+ int class;
+
+ offset = guc_ads_golden_lrc_offset(ads);
+ addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
+
+ for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+ u8 guc_class;
+
+ if (class == XE_ENGINE_CLASS_OTHER)
+ continue;
+
+ guc_class = xe_engine_class_to_guc_class(class);
+
+ if (!info_map_read(xe, &info_map,
+ engine_enabled_masks[guc_class]))
+ continue;
+
+ XE_BUG_ON(!gt->default_lrc[class]);
+
+ real_size = xe_lrc_size(xe, class);
+ alloc_size = PAGE_ALIGN(real_size);
+ total_size += alloc_size;
+
+ /*
+ * This interface is slightly confusing. We need to pass the
+ * base address of the full golden context and the size of just
+ * the engine state, which is the section of the context image
+ * that starts after the execlists LRC registers. This is
+ * required to allow the GuC to restore just the engine state
+ * when a watchdog reset occurs.
+ * We calculate the engine state size by removing the size of
+ * what comes before it in the context image (which is identical
+ * on all engines).
+ */
+ ads_blob_write(ads, ads.eng_state_size[guc_class],
+ real_size - xe_lrc_skip_size(xe));
+ ads_blob_write(ads, ads.golden_context_lrca[guc_class],
+ addr_ggtt);
+
+ xe_map_memcpy_to(xe, ads_to_map(ads), offset,
+ gt->default_lrc[class], real_size);
+
+ addr_ggtt += alloc_size;
+ offset += alloc_size;
+ }
+
+ XE_BUG_ON(total_size != ads->golden_lrc_size);
+}
+
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
+{
+ guc_populate_golden_lrc(ads);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h
new file mode 100644
index 000000000000..138ef6267671
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_H_
+#define _XE_GUC_ADS_H_
+
+#include "xe_guc_ads_types.h"
+
+int xe_guc_ads_init(struct xe_guc_ads *ads);
+int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads);
+void xe_guc_ads_populate(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads);
+void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
new file mode 100644
index 000000000000..4afe44bece4b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ADS_TYPES_H_
+#define _XE_GUC_ADS_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_ads - GuC additional data structures (ADS)
+ */
+struct xe_guc_ads {
+ /** @bo: XE BO for GuC ads blob */
+ struct xe_bo *bo;
+ /** @golden_lrc_size: golden LRC size */
+ size_t golden_lrc_size;
+ /** @regset_size: size of register set passed to GuC for save/restore */
+ u32 regset_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
new file mode 100644
index 000000000000..61a424c41779
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -0,0 +1,1196 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_gt_pagefault.h"
+#include "xe_guc_submit.h"
+#include "xe_map.h"
+#include "xe_trace.h"
+
+/* Used when a CT send wants to block and / or receive data */
+struct g2h_fence {
+ wait_queue_head_t wq;
+ u32 *response_buffer;
+ u32 seqno;
+ u16 response_len;
+ u16 error;
+ u16 hint;
+ u16 reason;
+ bool retry;
+ bool fail;
+ bool done;
+};
+
+static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
+{
+ g2h_fence->response_buffer = response_buffer;
+ g2h_fence->response_len = 0;
+ g2h_fence->fail = false;
+ g2h_fence->retry = false;
+ g2h_fence->done = false;
+ g2h_fence->seqno = ~0x0;
+}
+
+static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
+{
+ return g2h_fence->seqno == ~0x0;
+}
+
+static struct xe_guc *
+ct_to_guc(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *
+ct_to_gt(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *
+ct_to_xe(struct xe_guc_ct *ct)
+{
+ return gt_to_xe(ct_to_gt(ct));
+}
+
+/**
+ * DOC: GuC CTB Blob
+ *
+ * We allocate single blob to hold both CTB descriptors and buffers:
+ *
+ * +--------+-----------------------------------------------+------+
+ * | offset | contents | size |
+ * +========+===============================================+======+
+ * | 0x0000 | H2G CTB Descriptor (send) | |
+ * +--------+-----------------------------------------------+ 4K |
+ * | 0x0800 | G2H CTB Descriptor (g2h) | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | H2G CT Buffer (send) | n*4K |
+ * | | | |
+ * +--------+-----------------------------------------------+------+
+ * | 0x1000 | G2H CT Buffer (g2h) | m*4K |
+ * | + n*4K | | |
+ * +--------+-----------------------------------------------+------+
+ *
+ * Size of each ``CT Buffer`` must be multiple of 4K.
+ * We don't expect too many messages in flight at any time, unless we are
+ * using the GuC submission. In that case each request requires a minimum
+ * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this
+ * enough space to avoid backpressure on the driver. We increase the size
+ * of the receive buffer (relative to the send) to ensure a G2H response
+ * CTB has a landing spot.
+ */
+
+#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_SIZE (SZ_4K)
+#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE)
+#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4)
+
+static size_t guc_ct_size(void)
+{
+ return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+ CTB_G2H_BUFFER_SIZE;
+}
+
+static void guc_ct_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_ct *ct = arg;
+
+ xa_destroy(&ct->fence_lookup);
+ xe_bo_unpin_map_no_vm(ct->bo);
+}
+
+static void g2h_worker_func(struct work_struct *w);
+
+static void primelockdep(struct xe_guc_ct *ct)
+{
+ if (!IS_ENABLED(CONFIG_LOCKDEP))
+ return;
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&ct->lock);
+ fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_bo *bo;
+ int err;
+
+ XE_BUG_ON(guc_ct_size() % PAGE_SIZE);
+
+ mutex_init(&ct->lock);
+ spin_lock_init(&ct->fast_lock);
+ xa_init(&ct->fence_lookup);
+ ct->fence_context = dma_fence_context_alloc(1);
+ INIT_WORK(&ct->g2h_worker, g2h_worker_func);
+ init_waitqueue_head(&ct->wq);
+
+ primelockdep(ct);
+
+ bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ct_size(),
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ct->bo = bo;
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+#define desc_read(xe_, guc_ctb__, field_) \
+ xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_)
+
+#define desc_write(xe_, guc_ctb__, field_, val_) \
+ xe_map_wr_field(xe_, &guc_ctb__->desc, 0, \
+ struct guc_ct_buffer_desc, field_, val_)
+
+static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
+ struct iosys_map *map)
+{
+ h2g->size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+ h2g->resv_space = 0;
+ h2g->tail = 0;
+ h2g->head = 0;
+ h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) -
+ h2g->resv_space;
+ h2g->broken = false;
+
+ h2g->desc = *map;
+ xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+}
+
+static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
+ struct iosys_map *map)
+{
+ g2h->size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
+ g2h->resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+ g2h->head = 0;
+ g2h->tail = 0;
+ g2h->space = CIRC_SPACE(g2h->tail, g2h->head, g2h->size) -
+ g2h->resv_space;
+ g2h->broken = false;
+
+ g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
+ xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
+
+ g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE);
+}
+
+static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo);
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+ size = ct->ctbs.h2g.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
+{
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 desc_addr, ctb_addr, size;
+ int err;
+
+ desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+ CTB_H2G_BUFFER_SIZE;
+ size = ct->ctbs.g2h.size * sizeof(u32);
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+ desc_addr);
+ if (err)
+ return err;
+
+ err = xe_guc_self_cfg64(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+ ctb_addr);
+ if (err)
+ return err;
+
+ return xe_guc_self_cfg32(guc,
+ GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+ size);
+}
+
+static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
+{
+ u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+ GUC_ACTION_HOST2GUC_CONTROL_CTB),
+ FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL,
+ enable ? GUC_CTB_CONTROL_ENABLE :
+ GUC_CTB_CONTROL_DISABLE),
+ };
+ int ret = xe_guc_send_mmio(ct_to_guc(ct), request, ARRAY_SIZE(request));
+
+ return ret > 0 ? -EPROTO : ret;
+}
+
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ int err;
+
+ XE_BUG_ON(ct->enabled);
+
+ guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+ guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+
+ err = guc_ct_ctb_h2g_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_ctb_g2h_register(ct);
+ if (err)
+ goto err_out;
+
+ err = guc_ct_control_toggle(ct, true);
+ if (err)
+ goto err_out;
+
+ mutex_lock(&ct->lock);
+ ct->g2h_outstanding = 0;
+ ct->enabled = true;
+ mutex_unlock(&ct->lock);
+
+ smp_mb();
+ wake_up_all(&ct->wq);
+ drm_dbg(&xe->drm, "GuC CT communication channel enabled\n");
+
+ return 0;
+
+err_out:
+ drm_err(&xe->drm, "Failed to enabled CT (%d)\n", err);
+
+ return err;
+}
+
+void xe_guc_ct_disable(struct xe_guc_ct *ct)
+{
+ mutex_lock(&ct->lock);
+ ct->enabled = false;
+ mutex_unlock(&ct->lock);
+
+ xa_destroy(&ct->fence_lookup);
+}
+
+static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ lockdep_assert_held(&ct->lock);
+
+ if (cmd_len > h2g->space) {
+ h2g->head = desc_read(ct_to_xe(ct), h2g, head);
+ h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) -
+ h2g->resv_space;
+ if (cmd_len > h2g->space)
+ return false;
+ }
+
+ return true;
+}
+
+static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ lockdep_assert_held(&ct->lock);
+
+ return ct->ctbs.g2h.space > g2h_len;
+}
+
+static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len)
+{
+ lockdep_assert_held(&ct->lock);
+
+ if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len))
+ return -EBUSY;
+
+ return 0;
+}
+
+static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
+{
+ lockdep_assert_held(&ct->lock);
+ ct->ctbs.h2g.space -= cmd_len;
+}
+
+static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
+{
+ XE_BUG_ON(g2h_len > ct->ctbs.g2h.space);
+
+ if (g2h_len) {
+ spin_lock_irq(&ct->fast_lock);
+ ct->ctbs.g2h.space -= g2h_len;
+ ct->g2h_outstanding += num_g2h;
+ spin_unlock_irq(&ct->fast_lock);
+ }
+}
+
+static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ lockdep_assert_held(&ct->fast_lock);
+ XE_WARN_ON(ct->ctbs.g2h.space + g2h_len >
+ ct->ctbs.g2h.size - ct->ctbs.g2h.resv_space);
+
+ ct->ctbs.g2h.space += g2h_len;
+ --ct->g2h_outstanding;
+}
+
+static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
+{
+ spin_lock_irq(&ct->fast_lock);
+ __g2h_release_space(ct, g2h_len);
+ spin_unlock_irq(&ct->fast_lock);
+}
+
+static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 ct_fence_value, bool want_response)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+ u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)];
+ u32 cmd_len = len + GUC_CTB_HDR_LEN;
+ u32 cmd_idx = 0, i;
+ u32 tail = h2g->tail;
+ struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds,
+ tail * sizeof(u32));
+
+ lockdep_assert_held(&ct->lock);
+ XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN);
+ XE_BUG_ON(tail > h2g->size);
+
+ /* Command will wrap, zero fill (NOPs), return and check credits again */
+ if (tail + cmd_len > h2g->size) {
+ xe_map_memset(xe, &map, 0, 0, (h2g->size - tail) * sizeof(u32));
+ h2g_reserve_space(ct, (h2g->size - tail));
+ h2g->tail = 0;
+ desc_write(xe, h2g, tail, h2g->tail);
+
+ return -EAGAIN;
+ }
+
+ /*
+ * dw0: CT header (including fence)
+ * dw1: HXG header (including action code)
+ * dw2+: action data
+ */
+ cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+ FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
+ FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
+ if (want_response) {
+ cmd[cmd_idx++] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ } else {
+ cmd[cmd_idx++] =
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
+ GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ }
+ for (i = 1; i < len; ++i)
+ cmd[cmd_idx++] = action[i];
+
+ /* Write H2G ensuring visable before descriptor update */
+ xe_map_memcpy_to(xe, &map, 0, cmd, cmd_len * sizeof(u32));
+ xe_device_wmb(ct_to_xe(ct));
+
+ /* Update local copies */
+ h2g->tail = (tail + cmd_len) % h2g->size;
+ h2g_reserve_space(ct, cmd_len);
+
+ /* Update descriptor */
+ desc_write(xe, h2g, tail, h2g->tail);
+
+ return 0;
+}
+
+static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ int ret;
+
+ XE_BUG_ON(g2h_len && g2h_fence);
+ XE_BUG_ON(num_g2h && g2h_fence);
+ XE_BUG_ON(g2h_len && !num_g2h);
+ XE_BUG_ON(!g2h_len && num_g2h);
+ lockdep_assert_held(&ct->lock);
+
+ if (unlikely(ct->ctbs.h2g.broken)) {
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (unlikely(!ct->enabled)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (g2h_fence) {
+ g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
+ num_g2h = 1;
+
+ if (g2h_fence_needs_alloc(g2h_fence)) {
+ void *ptr;
+
+ g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
+ init_waitqueue_head(&g2h_fence->wq);
+ ptr = xa_store(&ct->fence_lookup,
+ g2h_fence->seqno,
+ g2h_fence, GFP_ATOMIC);
+ if (IS_ERR(ptr)) {
+ ret = PTR_ERR(ptr);
+ goto out;
+ }
+ }
+ }
+
+ xe_device_mem_access_get(ct_to_xe(ct));
+retry:
+ ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len);
+ if (unlikely(ret))
+ goto put_wa;
+
+ ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
+ !!g2h_fence);
+ if (unlikely(ret)) {
+ if (ret == -EAGAIN)
+ goto retry;
+ goto put_wa;
+ }
+
+ g2h_reserve_space(ct, g2h_len, num_g2h);
+ xe_guc_notify(ct_to_guc(ct));
+put_wa:
+ xe_device_mem_access_put(ct_to_xe(ct));
+out:
+
+ return ret;
+}
+
+static void kick_reset(struct xe_guc_ct *ct)
+{
+ xe_gt_reset_async(ct_to_gt(ct));
+}
+
+static int dequeue_one_g2h(struct xe_guc_ct *ct);
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct drm_device *drm = &ct_to_xe(ct)->drm;
+ struct drm_printer p = drm_info_printer(drm->dev);
+ unsigned int sleep_period_ms = 1;
+ int ret;
+
+ XE_BUG_ON(g2h_len && g2h_fence);
+ lockdep_assert_held(&ct->lock);
+
+try_again:
+ ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+ g2h_fence);
+
+ /*
+ * We wait to try to restore credits for about 1 second before bailing.
+ * In the case of H2G credits we have no choice but just to wait for the
+ * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In
+ * the case of G2H we process any G2H in the channel, hopefully freeing
+ * credits as we consume the G2H messages.
+ */
+ if (unlikely(ret == -EBUSY &&
+ !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+ struct guc_ctb *h2g = &ct->ctbs.h2g;
+
+ if (sleep_period_ms == 1024)
+ goto broken;
+
+ trace_xe_guc_ct_h2g_flow_control(h2g->head, h2g->tail,
+ h2g->size, h2g->space,
+ len + GUC_CTB_HDR_LEN);
+ msleep(sleep_period_ms);
+ sleep_period_ms <<= 1;
+
+ goto try_again;
+ } else if (unlikely(ret == -EBUSY)) {
+ struct xe_device *xe = ct_to_xe(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+
+ trace_xe_guc_ct_g2h_flow_control(g2h->head,
+ desc_read(xe, g2h, tail),
+ g2h->size, g2h->space,
+ g2h_fence ?
+ GUC_CTB_HXG_MSG_MAX_LEN :
+ g2h_len);
+
+#define g2h_avail(ct) \
+ (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.head)
+ if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
+ g2h_avail(ct), HZ))
+ goto broken;
+#undef g2h_avail
+
+ if (dequeue_one_g2h(ct) < 0)
+ goto broken;
+
+ goto try_again;
+ }
+
+ return ret;
+
+broken:
+ drm_err(drm, "No forward process on H2G, reset required");
+ xe_guc_ct_print(ct, &p);
+ ct->ctbs.h2g.broken = true;
+
+ return -EDEADLK;
+}
+
+static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence)
+{
+ int ret;
+
+ XE_BUG_ON(g2h_len && g2h_fence);
+
+ mutex_lock(&ct->lock);
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
+ mutex_unlock(&ct->lock);
+
+ return ret;
+}
+
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h)
+{
+ int ret;
+
+ ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ return ret;
+}
+
+/*
+ * Check if a GT reset is in progress or will occur and if GT reset brought the
+ * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
+ */
+static bool retry_failure(struct xe_guc_ct *ct, int ret)
+{
+ if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV))
+ return false;
+
+#define ct_alive(ct) \
+ (ct->enabled && !ct->ctbs.h2g.broken && !ct->ctbs.g2h.broken)
+ if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
+ return false;
+#undef ct_alive
+
+ return true;
+}
+
+static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer, bool no_fail)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct g2h_fence g2h_fence;
+ int ret = 0;
+
+ /*
+ * We use a fence to implement blocking sends / receiving response data.
+ * The seqno of the fence is sent in the H2G, returned in the G2H, and
+ * an xarray is used as storage media with the seqno being to key.
+ * Fields in the fence hold success, failure, retry status and the
+ * response data. Safe to allocate on the stack as the xarray is the
+ * only reference and it cannot be present after this function exits.
+ */
+retry:
+ g2h_fence_init(&g2h_fence, response_buffer);
+retry_same_fence:
+ ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
+ if (unlikely(ret == -ENOMEM)) {
+ void *ptr;
+
+ /* Retry allocation /w GFP_KERNEL */
+ ptr = xa_store(&ct->fence_lookup,
+ g2h_fence.seqno,
+ &g2h_fence, GFP_KERNEL);
+ if (IS_ERR(ptr)) {
+ return PTR_ERR(ptr);
+ }
+
+ goto retry_same_fence;
+ } else if (unlikely(ret)) {
+ if (ret == -EDEADLK)
+ kick_reset(ct);
+
+ if (no_fail && retry_failure(ct, ret))
+ goto retry_same_fence;
+
+ if (!g2h_fence_needs_alloc(&g2h_fence))
+ xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+
+ return ret;
+ }
+
+ ret = wait_event_timeout(g2h_fence.wq, g2h_fence.done, HZ);
+ if (!ret) {
+ drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x",
+ g2h_fence.seqno, action[0]);
+ xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ return -ETIME;
+ }
+
+ if (g2h_fence.retry) {
+ drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d",
+ action[0], g2h_fence.reason);
+ goto retry;
+ }
+ if (g2h_fence.fail) {
+ drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d",
+ action[0], g2h_fence.error, g2h_fence.hint);
+ ret = -EIO;
+ }
+
+ return ret > 0 ? 0 : ret;
+}
+
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer)
+{
+ return guc_ct_send_recv(ct, action, len, response_buffer, false);
+}
+
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 *response_buffer)
+{
+ return guc_ct_send_recv(ct, action, len, response_buffer, true);
+}
+
+static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+
+ lockdep_assert_held(&ct->lock);
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ g2h_release_space(ct, len);
+ }
+
+ return 0;
+}
+
+static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ u32 response_len = len - GUC_CTB_MSG_MIN_LEN;
+ u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
+ u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]);
+ struct g2h_fence *g2h_fence;
+
+ lockdep_assert_held(&ct->lock);
+
+ g2h_fence = xa_erase(&ct->fence_lookup, fence);
+ if (unlikely(!g2h_fence)) {
+ /* Don't tear down channel, as send could've timed out */
+ drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence);
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+ return 0;
+ }
+
+ XE_WARN_ON(fence != g2h_fence->seqno);
+
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
+ g2h_fence->fail = true;
+ g2h_fence->error =
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
+ g2h_fence->hint =
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
+ } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+ g2h_fence->retry = true;
+ g2h_fence->reason =
+ FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[0]);
+ } else if (g2h_fence->response_buffer) {
+ g2h_fence->response_len = response_len;
+ memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
+ response_len * sizeof(u32));
+ }
+
+ g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
+
+ g2h_fence->done = true;
+ smp_mb();
+
+ wake_up(&g2h_fence->wq);
+
+ return 0;
+}
+
+static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ u32 header, hxg, origin, type;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ header = msg[0];
+ hxg = msg[1];
+
+ origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg);
+ if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
+ drm_err(&xe->drm,
+ "G2H channel broken on read, origin=%d, reset required\n",
+ origin);
+ ct->ctbs.g2h.broken = true;
+
+ return -EPROTO;
+ }
+
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg);
+ switch (type) {
+ case GUC_HXG_TYPE_EVENT:
+ ret = parse_g2h_event(ct, msg, len);
+ break;
+ case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+ case GUC_HXG_TYPE_RESPONSE_FAILURE:
+ case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+ ret = parse_g2h_response(ct, msg, len);
+ break;
+ default:
+ drm_err(&xe->drm,
+ "G2H channel broken on read, type=%d, reset required\n",
+ type);
+ ct->ctbs.g2h.broken = true;
+
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+ u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+ u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+ int ret = 0;
+
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
+ ret = xe_guc_sched_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ ret = xe_guc_deregister_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
+ ret = xe_guc_engine_reset_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
+ ret = xe_guc_engine_reset_failure_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
+ /* Selftest only at the moment */
+ break;
+ case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+ case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+ /* FIXME: Handle this */
+ break;
+ case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
+ ret = xe_guc_engine_memory_cat_error_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
+ ret = xe_guc_access_counter_notify_handler(guc, payload,
+ adj_len);
+ break;
+ default:
+ drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
+ }
+
+ if (ret)
+ drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+ action, ret);
+
+ return 0;
+}
+
+static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+ u32 tail, head, len;
+ s32 avail;
+
+ lockdep_assert_held(&ct->fast_lock);
+
+ if (!ct->enabled)
+ return -ENODEV;
+
+ if (g2h->broken)
+ return -EPIPE;
+
+ /* Calculate DW available to read */
+ tail = desc_read(xe, g2h, tail);
+ avail = tail - g2h->head;
+ if (unlikely(avail == 0))
+ return 0;
+
+ if (avail < 0)
+ avail += g2h->size;
+
+ /* Read header */
+ xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->head, sizeof(u32));
+ len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
+ if (len > avail) {
+ drm_err(&xe->drm,
+ "G2H channel broken on read, avail=%d, len=%d, reset required\n",
+ avail, len);
+ g2h->broken = true;
+
+ return -EPROTO;
+ }
+
+ head = (g2h->head + 1) % g2h->size;
+ avail = len - 1;
+
+ /* Read G2H message */
+ if (avail + head > g2h->size) {
+ u32 avail_til_wrap = g2h->size - head;
+
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail_til_wrap * sizeof(u32));
+ xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap,
+ &g2h->cmds, 0,
+ (avail - avail_til_wrap) * sizeof(u32));
+ } else {
+ xe_map_memcpy_from(xe, msg + 1,
+ &g2h->cmds, sizeof(u32) * head,
+ avail * sizeof(u32));
+ }
+
+ if (fast_path) {
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+ return 0;
+
+ switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) {
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ break; /* Process these in fast-path */
+ default:
+ return 0;
+ }
+ }
+
+ /* Update local / descriptor header */
+ g2h->head = (head + avail) % g2h->size;
+ desc_write(xe, g2h, head, g2h->head);
+
+ return len;
+}
+
+static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_guc *guc = ct_to_guc(ct);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+ u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
+ u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+ int ret = 0;
+
+ switch (action) {
+ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
+ ret = xe_guc_pagefault_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ __g2h_release_space(ct, len);
+ ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
+ adj_len);
+ break;
+ default:
+ XE_WARN_ON("NOT_POSSIBLE");
+ }
+
+ if (ret)
+ drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
+ action, ret);
+}
+
+/**
+ * xe_guc_ct_fast_path - process critical G2H in the IRQ handler
+ * @ct: GuC CT object
+ *
+ * Anything related to page faults is critical for performance, process these
+ * critical G2H in the IRQ. This is safe as these handlers either just wake up
+ * waiters or queue another worker.
+ */
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ int len;
+
+ if (!xe_device_in_fault_mode(xe) || !xe_device_mem_access_ongoing(xe))
+ return;
+
+ spin_lock(&ct->fast_lock);
+ do {
+ len = g2h_read(ct, ct->fast_msg, true);
+ if (len > 0)
+ g2h_fast_path(ct, ct->fast_msg, len);
+ } while (len > 0);
+ spin_unlock(&ct->fast_lock);
+}
+
+/* Returns less than zero on error, 0 on done, 1 on more available */
+static int dequeue_one_g2h(struct xe_guc_ct *ct)
+{
+ int len;
+ int ret;
+
+ lockdep_assert_held(&ct->lock);
+
+ spin_lock_irq(&ct->fast_lock);
+ len = g2h_read(ct, ct->msg, false);
+ spin_unlock_irq(&ct->fast_lock);
+ if (len <= 0)
+ return len;
+
+ ret = parse_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = process_g2h_msg(ct, ct->msg, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ return 1;
+}
+
+static void g2h_worker_func(struct work_struct *w)
+{
+ struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+ int ret;
+
+ xe_device_mem_access_get(ct_to_xe(ct));
+ do {
+ mutex_lock(&ct->lock);
+ ret = dequeue_one_g2h(ct);
+ mutex_unlock(&ct->lock);
+
+ if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
+ struct drm_device *drm = &ct_to_xe(ct)->drm;
+ struct drm_printer p = drm_info_printer(drm->dev);
+
+ xe_guc_ct_print(ct, &p);
+ kick_reset(ct);
+ }
+ } while (ret == 1);
+ xe_device_mem_access_put(ct_to_xe(ct));
+}
+
+static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
+ struct drm_printer *p)
+{
+ u32 head, tail;
+
+ drm_printf(p, "\tsize: %d\n", ctb->size);
+ drm_printf(p, "\tresv_space: %d\n", ctb->resv_space);
+ drm_printf(p, "\thead: %d\n", ctb->head);
+ drm_printf(p, "\ttail: %d\n", ctb->tail);
+ drm_printf(p, "\tspace: %d\n", ctb->space);
+ drm_printf(p, "\tbroken: %d\n", ctb->broken);
+
+ head = desc_read(xe, ctb, head);
+ tail = desc_read(xe, ctb, tail);
+ drm_printf(p, "\thead (memory): %d\n", head);
+ drm_printf(p, "\ttail (memory): %d\n", tail);
+ drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status));
+
+ if (head != tail) {
+ struct iosys_map map =
+ IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
+
+ while (head != tail) {
+ drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
+ xe_map_rd(xe, &map, 0, u32));
+ ++head;
+ if (head == ctb->size) {
+ head = 0;
+ map = ctb->cmds;
+ } else {
+ iosys_map_incr(&map, sizeof(u32));
+ }
+ }
+ }
+}
+
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+ if (ct->enabled) {
+ drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
+ guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p);
+
+ drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
+ guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p);
+ drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding);
+ } else {
+ drm_puts(p, "\nCT disabled\n");
+ }
+}
+
+#ifdef XE_GUC_CT_SELFTEST
+/*
+ * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
+ * control if enough sent, 8k sends is enough. Verify forward process, verify
+ * credits expected values on exit.
+ */
+void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+ u32 action[] = { XE_GUC_ACTION_SCHED_ENGINE_MODE_SET, 0, 0, 1, };
+ u32 bad_action[] = { XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 0, 0, };
+ int ret;
+ int i;
+
+ ct->suppress_irq_handler = true;
+ drm_puts(p, "Starting GuC CT selftest\n");
+
+ for (i = 0; i < 8192; ++i) {
+ ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1);
+ if (ret) {
+ drm_printf(p, "Aborted pass %d, ret %d\n", i, ret);
+ xe_guc_ct_print(ct, p);
+ break;
+ }
+ }
+
+ ct->suppress_irq_handler = false;
+ if (!ret) {
+ xe_guc_ct_irq_handler(ct);
+ msleep(200);
+ if (g2h->space !=
+ CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) {
+ drm_printf(p, "Mismatch on space %d, %d\n",
+ g2h->space,
+ CIRC_SPACE(0, 0, g2h->size) -
+ g2h->resv_space);
+ ret = -EIO;
+ }
+ if (ct->g2h_outstanding) {
+ drm_printf(p, "Outstanding G2H, %d\n",
+ ct->g2h_outstanding);
+ ret = -EIO;
+ }
+ }
+
+ /* Check failure path for blocking CTs too */
+ xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action));
+ if (g2h->space !=
+ CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) {
+ drm_printf(p, "Mismatch on space %d, %d\n",
+ g2h->space,
+ CIRC_SPACE(0, 0, g2h->size) -
+ g2h->resv_space);
+ ret = -EIO;
+ }
+ if (ct->g2h_outstanding) {
+ drm_printf(p, "Outstanding G2H, %d\n",
+ ct->g2h_outstanding);
+ ret = -EIO;
+ }
+
+ drm_printf(p, "GuC CT selftest done - %s\n", ret ? "FAIL" : "PASS");
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
new file mode 100644
index 000000000000..49fb74f91e4d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_H_
+#define _XE_GUC_CT_H_
+
+#include "xe_guc_ct_types.h"
+
+struct drm_printer;
+
+int xe_guc_ct_init(struct xe_guc_ct *ct);
+int xe_guc_ct_enable(struct xe_guc_ct *ct);
+void xe_guc_ct_disable(struct xe_guc_ct *ct);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
+void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
+
+static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
+{
+ wake_up_all(&ct->wq);
+#ifdef XE_GUC_CT_SELFTEST
+ if (!ct->suppress_irq_handler && ct->enabled)
+ queue_work(system_unbound_wq, &ct->g2h_worker);
+#else
+ if (ct->enabled)
+ queue_work(system_unbound_wq, &ct->g2h_worker);
+#endif
+ xe_guc_ct_fast_path(ct);
+}
+
+/* Basic CT send / receives */
+int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h);
+int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+ return xe_guc_ct_send_recv(ct, action, len, NULL);
+}
+
+/* This is only version of the send CT you can call from a G2H handler */
+int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action,
+ u32 len);
+
+/* Can't fail because a GT reset is in progress */
+int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
+ u32 len, u32 *response_buffer);
+static inline int
+xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
+{
+ return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL);
+}
+
+#ifdef XE_GUC_CT_SELFTEST
+void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p);
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
new file mode 100644
index 000000000000..17b148bf3735
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_CT_TYPES_H_
+#define _XE_GUC_CT_TYPES_H_
+
+#include <linux/iosys-map.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock_types.h>
+#include <linux/wait.h>
+#include <linux/xarray.h>
+
+#include "abi/guc_communication_ctb_abi.h"
+
+#define XE_GUC_CT_SELFTEST
+
+struct xe_bo;
+
+/**
+ * struct guc_ctb - GuC command transport buffer (CTB)
+ */
+struct guc_ctb {
+ /** @desc: dma buffer map for CTB descriptor */
+ struct iosys_map desc;
+ /** @cmds: dma buffer map for CTB commands */
+ struct iosys_map cmds;
+ /** @size: size of CTB commands (DW) */
+ u32 size;
+ /** @resv_space: reserved space of CTB commands (DW) */
+ u32 resv_space;
+ /** @head: head of CTB commands (DW) */
+ u32 head;
+ /** @tail: tail of CTB commands (DW) */
+ u32 tail;
+ /** @space: space in CTB commands (DW) */
+ u32 space;
+ /** @broken: channel broken */
+ bool broken;
+};
+
+/**
+ * struct xe_guc_ct - GuC command transport (CT) layer
+ *
+ * Includes a pair of CT buffers for bi-directional communication and tracking
+ * for the H2G and G2H requests sent and received through the buffers.
+ */
+struct xe_guc_ct {
+ /** @bo: XE BO for CT */
+ struct xe_bo *bo;
+ /** @lock: protects everything in CT layer */
+ struct mutex lock;
+ /** @fast_lock: protects G2H channel and credits */
+ spinlock_t fast_lock;
+ /** @ctbs: buffers for sending and receiving commands */
+ struct {
+ /** @send: Host to GuC (H2G, send) channel */
+ struct guc_ctb h2g;
+ /** @recv: GuC to Host (G2H, receive) channel */
+ struct guc_ctb g2h;
+ } ctbs;
+ /** @g2h_outstanding: number of outstanding G2H */
+ u32 g2h_outstanding;
+ /** @g2h_worker: worker to process G2H messages */
+ struct work_struct g2h_worker;
+ /** @enabled: CT enabled */
+ bool enabled;
+ /** @fence_seqno: G2H fence seqno - 16 bits used by CT */
+ u32 fence_seqno;
+ /** @fence_context: context for G2H fence */
+ u64 fence_context;
+ /** @fence_lookup: G2H fence lookup */
+ struct xarray fence_lookup;
+ /** @wq: wait queue used for reliable CT sends and freeing G2H credits */
+ wait_queue_head_t wq;
+#ifdef XE_GUC_CT_SELFTEST
+ /** @suppress_irq_handler: force flow control to sender */
+ bool suppress_irq_handler;
+#endif
+ /** @msg: Message buffer */
+ u32 msg[GUC_CTB_MSG_MAX_LEN];
+ /** @fast_msg: Message buffer */
+ u32 fast_msg[GUC_CTB_MSG_MAX_LEN];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
new file mode 100644
index 000000000000..916e9633b322
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_debugfs.h"
+#include "xe_guc_log.h"
+#include "xe_macros.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+ return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+ return gt_to_xe(guc_to_gt(guc));
+}
+
+static struct xe_guc *node_to_guc(struct drm_info_node *node)
+{
+ return node->info_ent->data;
+}
+
+static int guc_info(struct seq_file *m, void *data)
+{
+ struct xe_guc *guc = node_to_guc(m->private);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_device_mem_access_get(xe);
+ xe_guc_print_info(guc, &p);
+ xe_device_mem_access_put(xe);
+
+ return 0;
+}
+
+static int guc_log(struct seq_file *m, void *data)
+{
+ struct xe_guc *guc = node_to_guc(m->private);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_device_mem_access_get(xe);
+ xe_guc_log_print(&guc->log, &p);
+ xe_device_mem_access_put(xe);
+
+ return 0;
+}
+
+#ifdef XE_GUC_CT_SELFTEST
+static int guc_ct_selftest(struct seq_file *m, void *data)
+{
+ struct xe_guc *guc = node_to_guc(m->private);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_device_mem_access_get(xe);
+ xe_guc_ct_selftest(&guc->ct, &p);
+ xe_device_mem_access_put(xe);
+
+ return 0;
+}
+#endif
+
+static const struct drm_info_list debugfs_list[] = {
+ {"guc_info", guc_info, 0},
+ {"guc_log", guc_log, 0},
+#ifdef XE_GUC_CT_SELFTEST
+ {"guc_ct_selftest", guc_ct_selftest, 0},
+#endif
+};
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent)
+{
+ struct drm_minor *minor = guc_to_xe(guc)->drm.primary;
+ struct drm_info_list *local;
+ int i;
+
+#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+ local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+ if (!local) {
+ XE_WARN_ON("Couldn't allocate memory");
+ return;
+ }
+
+ memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+ for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+ local[i].data = guc;
+
+ drm_debugfs_create_files(local,
+ ARRAY_SIZE(debugfs_list),
+ parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.h b/drivers/gpu/drm/xe/xe_guc_debugfs.h
new file mode 100644
index 000000000000..4756dff26fca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_DEBUGFS_H_
+#define _XE_GUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_guc;
+
+void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_types.h b/drivers/gpu/drm/xe/xe_guc_engine_types.h
new file mode 100644
index 000000000000..512615d1ce8c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_engine_types.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ENGINE_TYPES_H_
+#define _XE_GUC_ENGINE_TYPES_H_
+
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "xe_gpu_scheduler_types.h"
+
+struct dma_fence;
+struct xe_engine;
+
+/**
+ * struct xe_guc_engine - GuC specific state for an xe_engine
+ */
+struct xe_guc_engine {
+ /** @engine: Backpointer to parent xe_engine */
+ struct xe_engine *engine;
+ /** @sched: GPU scheduler for this xe_engine */
+ struct xe_gpu_scheduler sched;
+ /** @entity: Scheduler entity for this xe_engine */
+ struct xe_sched_entity entity;
+ /**
+ * @static_msgs: Static messages for this xe_engine, used when a message
+ * needs to sent through the GPU scheduler but memory allocations are
+ * not allowed.
+ */
+#define MAX_STATIC_MSG_TYPE 3
+ struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
+ /** @fini_async: do final fini async from this worker */
+ struct work_struct fini_async;
+ /** @resume_time: time of last resume */
+ u64 resume_time;
+ /** @state: GuC specific state for this xe_engine */
+ atomic_t state;
+ /** @wqi_head: work queue item tail */
+ u32 wqi_head;
+ /** @wqi_tail: work queue item tail */
+ u32 wqi_tail;
+ /** @id: GuC id for this xe_engine */
+ u16 id;
+ /** @suspend_wait: wait queue used to wait on pending suspends */
+ wait_queue_head_t suspend_wait;
+ /** @suspend_pending: a suspend of the engine is pending */
+ bool suspend_pending;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
new file mode 100644
index 000000000000..f562404a6cf7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -0,0 +1,392 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_FWIF_H
+#define _XE_GUC_FWIF_H
+
+#include <linux/bits.h>
+
+#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_slpc_abi.h"
+#include "abi/guc_errors_abi.h"
+#include "abi/guc_communication_mmio_abi.h"
+#include "abi/guc_communication_ctb_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "abi/guc_messages_abi.h"
+
+#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4
+#define G2H_LEN_DW_DEREGISTER_CONTEXT 3
+#define G2H_LEN_DW_TLB_INVALIDATE 3
+
+#define GUC_CONTEXT_DISABLE 0
+#define GUC_CONTEXT_ENABLE 1
+
+#define GUC_CLIENT_PRIORITY_KMD_HIGH 0
+#define GUC_CLIENT_PRIORITY_HIGH 1
+#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2
+#define GUC_CLIENT_PRIORITY_NORMAL 3
+#define GUC_CLIENT_PRIORITY_NUM 4
+
+#define GUC_RENDER_ENGINE 0
+#define GUC_VIDEO_ENGINE 1
+#define GUC_BLITTER_ENGINE 2
+#define GUC_VIDEOENHANCE_ENGINE 3
+#define GUC_VIDEO_ENGINE2 4
+#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1)
+
+#define GUC_RENDER_CLASS 0
+#define GUC_VIDEO_CLASS 1
+#define GUC_VIDEOENHANCE_CLASS 2
+#define GUC_BLITTER_CLASS 3
+#define GUC_COMPUTE_CLASS 4
+#define GUC_GSC_OTHER_CLASS 5
+#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS
+#define GUC_MAX_ENGINE_CLASSES 16
+#define GUC_MAX_INSTANCES_PER_CLASS 32
+
+/* Work item for submitting workloads into work queue of GuC. */
+#define WQ_STATUS_ACTIVE 1
+#define WQ_STATUS_SUSPENDED 2
+#define WQ_STATUS_CMD_ERROR 3
+#define WQ_STATUS_ENGINE_ID_NOT_USED 4
+#define WQ_STATUS_SUSPENDED_FROM_RESET 5
+#define WQ_TYPE_NOOP 0x4
+#define WQ_TYPE_MULTI_LRC 0x5
+#define WQ_TYPE_MASK GENMASK(7, 0)
+#define WQ_LEN_MASK GENMASK(26, 16)
+
+#define WQ_GUC_ID_MASK GENMASK(15, 0)
+#define WQ_RING_TAIL_MASK GENMASK(28, 18)
+
+struct guc_wq_item {
+ u32 header;
+ u32 context_desc;
+ u32 submit_element_info;
+ u32 fence_id;
+} __packed;
+
+struct guc_sched_wq_desc {
+ u32 head;
+ u32 tail;
+ u32 error_offset;
+ u32 wq_status;
+ u32 reserved[28];
+} __packed;
+
+/* Helper for context registration H2G */
+struct guc_ctxt_registration_info {
+ u32 flags;
+ u32 context_idx;
+ u32 engine_class;
+ u32 engine_submit_mask;
+ u32 wq_desc_lo;
+ u32 wq_desc_hi;
+ u32 wq_base_lo;
+ u32 wq_base_hi;
+ u32 wq_size;
+ u32 hwlrca_lo;
+ u32 hwlrca_hi;
+};
+#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
+
+/* 32-bit KLV structure as used by policy updates and others */
+struct guc_klv_generic_dw_t {
+ u32 kl;
+ u32 value;
+} __packed;
+
+/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */
+struct guc_update_engine_policy_header {
+ u32 action;
+ u32 guc_id;
+} __packed;
+
+struct guc_update_engine_policy {
+ struct guc_update_engine_policy_header header;
+ struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
+} __packed;
+
+/* GUC_CTL_* - Parameters for loading the GuC */
+#define GUC_CTL_LOG_PARAMS 0
+#define GUC_LOG_VALID BIT(0)
+#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1)
+#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2)
+#define GUC_LOG_LOG_ALLOC_UNITS BIT(3)
+#define GUC_LOG_CRASH_SHIFT 4
+#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT)
+#define GUC_LOG_DEBUG_SHIFT 6
+#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT)
+#define GUC_LOG_CAPTURE_SHIFT 10
+#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT)
+#define GUC_LOG_BUF_ADDR_SHIFT 12
+
+#define GUC_CTL_WA 1
+#define GUC_WA_GAM_CREDITS BIT(10)
+#define GUC_WA_DUAL_QUEUE BIT(11)
+#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13)
+#define GUC_WA_CONTEXT_ISOLATION BIT(15)
+#define GUC_WA_PRE_PARSER BIT(14)
+#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17)
+#define GUC_WA_POLLCS BIT(18)
+#define GUC_WA_RENDER_RST_RC6_EXIT BIT(19)
+#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21)
+
+#define GUC_CTL_FEATURE 2
+#define GUC_CTL_ENABLE_SLPC BIT(2)
+#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
+
+#define GUC_CTL_DEBUG 3
+#define GUC_LOG_VERBOSITY_SHIFT 0
+#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT)
+#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT)
+#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT)
+#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT)
+#define GUC_LOG_VERBOSITY_MIN 0
+#define GUC_LOG_VERBOSITY_MAX 3
+#define GUC_LOG_VERBOSITY_MASK 0x0000000f
+#define GUC_LOG_DESTINATION_MASK (3 << 4)
+#define GUC_LOG_DISABLED (1 << 6)
+#define GUC_PROFILE_ENABLED (1 << 7)
+
+#define GUC_CTL_ADS 4
+#define GUC_ADS_ADDR_SHIFT 1
+#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT)
+
+#define GUC_CTL_DEVID 5
+
+#define GUC_CTL_MAX_DWORDS 14
+
+/* Scheduling policy settings */
+
+#define GLOBAL_POLICY_MAX_NUM_WI 15
+
+/* Don't reset an engine upon preemption failure */
+#define GLOBAL_POLICY_DISABLE_ENGINE_RESET BIT(0)
+
+#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
+
+struct guc_policies {
+ u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
+ /* In micro seconds. How much time to allow before DPC processing is
+ * called back via interrupt (to prevent DPC queue drain starving).
+ * Typically 1000s of micro seconds (example only, not granularity). */
+ u32 dpc_promote_time;
+
+ /* Must be set to take these new values. */
+ u32 is_valid;
+
+ /* Max number of WIs to process per call. A large value may keep CS
+ * idle. */
+ u32 max_num_work_items;
+
+ u32 global_flags;
+ u32 reserved[4];
+} __packed;
+
+/* GuC MMIO reg state struct */
+struct guc_mmio_reg {
+ u32 offset;
+ u32 value;
+ u32 flags;
+ u32 mask;
+#define GUC_REGSET_MASKED BIT(0)
+#define GUC_REGSET_MASKED_WITH_VALUE BIT(2)
+#define GUC_REGSET_RESTORE_ONLY BIT(3)
+} __packed;
+
+/* GuC register sets */
+struct guc_mmio_reg_set {
+ u32 address;
+ u16 count;
+ u16 reserved;
+} __packed;
+
+/* Generic GT SysInfo data types */
+#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0
+#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1
+#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI 2
+#define GUC_GENERIC_GT_SYSINFO_MAX 16
+
+/* HW info */
+struct guc_gt_system_info {
+ u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+ u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES];
+ u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
+} __packed;
+
+enum {
+ GUC_CAPTURE_LIST_INDEX_PF = 0,
+ GUC_CAPTURE_LIST_INDEX_VF = 1,
+ GUC_CAPTURE_LIST_INDEX_MAX = 2,
+};
+
+/* GuC Additional Data Struct */
+struct guc_ads {
+ struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+ u32 reserved0;
+ u32 scheduler_policies;
+ u32 gt_system_info;
+ u32 reserved1;
+ u32 control_data;
+ u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
+ u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
+ u32 private_data;
+ u32 um_init_data;
+ u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+ u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+ u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
+ u32 reserved[14];
+} __packed;
+
+/* Engine usage stats */
+struct guc_engine_usage_record {
+ u32 current_context_index;
+ u32 last_switch_in_stamp;
+ u32 reserved0;
+ u32 total_runtime;
+ u32 reserved1[4];
+} __packed;
+
+struct guc_engine_usage {
+ struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+} __packed;
+
+/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
+enum xe_guc_recv_message {
+ XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
+ XE_GUC_RECV_MSG_EXCEPTION = BIT(30),
+};
+
+/* Page fault structures */
+struct access_counter_desc {
+ u32 dw0;
+#define ACCESS_COUNTER_TYPE BIT(0)
+#define ACCESS_COUNTER_SUBG_LO GENMASK(31, 1)
+
+ u32 dw1;
+#define ACCESS_COUNTER_SUBG_HI BIT(0)
+#define ACCESS_COUNTER_RSVD0 GENMASK(2, 1)
+#define ACCESS_COUNTER_ENG_INSTANCE GENMASK(8, 3)
+#define ACCESS_COUNTER_ENG_CLASS GENMASK(11, 9)
+#define ACCESS_COUNTER_ASID GENMASK(31, 12)
+
+ u32 dw2;
+#define ACCESS_COUNTER_VFID GENMASK(5, 0)
+#define ACCESS_COUNTER_RSVD1 GENMASK(7, 6)
+#define ACCESS_COUNTER_GRANULARITY GENMASK(10, 8)
+#define ACCESS_COUNTER_RSVD2 GENMASK(16, 11)
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17)
+
+ u32 dw3;
+#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0)
+} __packed;
+
+enum guc_um_queue_type {
+ GUC_UM_HW_QUEUE_PAGE_FAULT = 0,
+ GUC_UM_HW_QUEUE_PAGE_FAULT_RESPONSE,
+ GUC_UM_HW_QUEUE_ACCESS_COUNTER,
+ GUC_UM_HW_QUEUE_MAX
+};
+
+struct guc_um_queue_params {
+ u64 base_dpa;
+ u32 base_ggtt_address;
+ u32 size_in_bytes;
+ u32 rsvd[4];
+} __packed;
+
+struct guc_um_init_params {
+ u64 page_response_timeout_in_us;
+ u32 rsvd[6];
+ struct guc_um_queue_params queue_params[GUC_UM_HW_QUEUE_MAX];
+} __packed;
+
+enum xe_guc_fault_reply_type {
+ PFR_ACCESS = 0,
+ PFR_ENGINE,
+ PFR_VFID,
+ PFR_ALL,
+ PFR_INVALID
+};
+
+enum xe_guc_response_desc_type {
+ TLB_INVALIDATION_DESC = 0,
+ FAULT_RESPONSE_DESC
+};
+
+struct xe_guc_pagefault_desc {
+ u32 dw0;
+#define PFD_FAULT_LEVEL GENMASK(2, 0)
+#define PFD_SRC_ID GENMASK(10, 3)
+#define PFD_RSVD_0 GENMASK(17, 11)
+#define XE2_PFD_TRVA_FAULT BIT(18)
+#define PFD_ENG_INSTANCE GENMASK(24, 19)
+#define PFD_ENG_CLASS GENMASK(27, 25)
+#define PFD_PDATA_LO GENMASK(31, 28)
+
+ u32 dw1;
+#define PFD_PDATA_HI GENMASK(11, 0)
+#define PFD_PDATA_HI_SHIFT 4
+#define PFD_ASID GENMASK(31, 12)
+
+ u32 dw2;
+#define PFD_ACCESS_TYPE GENMASK(1, 0)
+#define PFD_FAULT_TYPE GENMASK(3, 2)
+#define PFD_VFID GENMASK(9, 4)
+#define PFD_RSVD_1 GENMASK(11, 10)
+#define PFD_VIRTUAL_ADDR_LO GENMASK(31, 12)
+#define PFD_VIRTUAL_ADDR_LO_SHIFT 12
+
+ u32 dw3;
+#define PFD_VIRTUAL_ADDR_HI GENMASK(31, 0)
+#define PFD_VIRTUAL_ADDR_HI_SHIFT 32
+} __packed;
+
+struct xe_guc_pagefault_reply {
+ u32 dw0;
+#define PFR_VALID BIT(0)
+#define PFR_SUCCESS BIT(1)
+#define PFR_REPLY GENMASK(4, 2)
+#define PFR_RSVD_0 GENMASK(9, 5)
+#define PFR_DESC_TYPE GENMASK(11, 10)
+#define PFR_ASID GENMASK(31, 12)
+
+ u32 dw1;
+#define PFR_VFID GENMASK(5, 0)
+#define PFR_RSVD_1 BIT(6)
+#define PFR_ENG_INSTANCE GENMASK(12, 7)
+#define PFR_ENG_CLASS GENMASK(15, 13)
+#define PFR_PDATA GENMASK(31, 16)
+
+ u32 dw2;
+#define PFR_RSVD_2 GENMASK(31, 0)
+} __packed;
+
+struct xe_guc_acc_desc {
+ u32 dw0;
+#define ACC_TYPE BIT(0)
+#define ACC_TRIGGER 0
+#define ACC_NOTIFY 1
+#define ACC_SUBG_LO GENMASK(31, 1)
+
+ u32 dw1;
+#define ACC_SUBG_HI BIT(0)
+#define ACC_RSVD0 GENMASK(2, 1)
+#define ACC_ENG_INSTANCE GENMASK(8, 3)
+#define ACC_ENG_CLASS GENMASK(11, 9)
+#define ACC_ASID GENMASK(31, 12)
+
+ u32 dw2;
+#define ACC_VFID GENMASK(5, 0)
+#define ACC_RSVD1 GENMASK(7, 6)
+#define ACC_GRANULARITY GENMASK(10, 8)
+#define ACC_RSVD2 GENMASK(16, 11)
+#define ACC_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17)
+
+ u32 dw3;
+#define ACC_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0)
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
new file mode 100644
index 000000000000..8dfd48f71a7c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_hwconfig.h"
+#include "xe_map.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+ return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+ return gt_to_xe(guc_to_gt(guc));
+}
+
+static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_GET_HWCONFIG,
+ lower_32_bits(ggtt_addr),
+ upper_32_bits(ggtt_addr),
+ size,
+ };
+
+ return xe_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+}
+
+static int guc_hwconfig_size(struct xe_guc *guc, u32 *size)
+{
+ int ret = send_get_hwconfig(guc, 0, 0);
+
+ if (ret < 0)
+ return ret;
+
+ *size = ret;
+ return 0;
+}
+
+static int guc_hwconfig_copy(struct xe_guc *guc)
+{
+ int ret = send_get_hwconfig(guc, xe_bo_ggtt_addr(guc->hwconfig.bo),
+ guc->hwconfig.size);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static void guc_hwconfig_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc *guc = arg;
+
+ xe_bo_unpin_map_no_vm(guc->hwconfig.bo);
+}
+
+int xe_guc_hwconfig_init(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_bo *bo;
+ u32 size;
+ int err;
+
+ /* Initialization already done */
+ if (guc->hwconfig.bo)
+ return 0;
+
+ /*
+ * All hwconfig the same across GTs so only GT0 needs to be configured
+ */
+ if (gt->info.id != XE_GT0)
+ return 0;
+
+ /* ADL_P, DG2+ supports hwconfig table */
+ if (GRAPHICS_VERx100(xe) < 1255 && xe->info.platform != XE_ALDERLAKE_P)
+ return 0;
+
+ err = guc_hwconfig_size(guc, &size);
+ if (err)
+ return err;
+ if (!size)
+ return -EINVAL;
+
+ bo = xe_bo_create_pin_map(xe, gt, NULL, PAGE_ALIGN(size),
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+ guc->hwconfig.bo = bo;
+ guc->hwconfig.size = size;
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_hwconfig_fini, guc);
+ if (err)
+ return err;
+
+ return guc_hwconfig_copy(guc);
+}
+
+u32 xe_guc_hwconfig_size(struct xe_guc *guc)
+{
+ return !guc->hwconfig.bo ? 0 : guc->hwconfig.size;
+}
+
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+
+ XE_BUG_ON(!guc->hwconfig.bo);
+
+ xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0,
+ guc->hwconfig.size);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
new file mode 100644
index 000000000000..b5794d641900
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_HWCONFIG_H_
+#define _XE_GUC_HWCONFIG_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+
+int xe_guc_hwconfig_init(struct xe_guc *guc);
+u32 xe_guc_hwconfig_size(struct xe_guc *guc);
+void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
new file mode 100644
index 000000000000..7ec1b2bb1f8e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_guc_log.h"
+#include "xe_map.h"
+#include "xe_module.h"
+
+static struct xe_gt *
+log_to_gt(struct xe_guc_log *log)
+{
+ return container_of(log, struct xe_gt, uc.guc.log);
+}
+
+static struct xe_device *
+log_to_xe(struct xe_guc_log *log)
+{
+ return gt_to_xe(log_to_gt(log));
+}
+
+static size_t guc_log_size(void)
+{
+ /*
+ * GuC Log buffer Layout
+ *
+ * +===============================+ 00B
+ * | Crash dump state header |
+ * +-------------------------------+ 32B
+ * | Debug state header |
+ * +-------------------------------+ 64B
+ * | Capture state header |
+ * +-------------------------------+ 96B
+ * | |
+ * +===============================+ PAGE_SIZE (4KB)
+ * | Crash Dump logs |
+ * +===============================+ + CRASH_SIZE
+ * | Debug logs |
+ * +===============================+ + DEBUG_SIZE
+ * | Capture logs |
+ * +===============================+ + CAPTURE_SIZE
+ */
+ return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
+ CAPTURE_BUFFER_SIZE;
+}
+
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+{
+ struct xe_device *xe = log_to_xe(log);
+ size_t size;
+ int i, j;
+
+ XE_BUG_ON(!log->bo);
+
+ size = log->bo->size;
+
+#define DW_PER_READ 128
+ XE_BUG_ON(size % (DW_PER_READ * sizeof(u32)));
+ for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) {
+ u32 read[DW_PER_READ];
+
+ xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32),
+ DW_PER_READ * sizeof(u32));
+#define DW_PER_PRINT 4
+ for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) {
+ u32 *print = read + j * DW_PER_PRINT;
+
+ drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+ *(print + 0), *(print + 1),
+ *(print + 2), *(print + 3));
+ }
+ }
+}
+
+static void guc_log_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_log *log = arg;
+
+ xe_bo_unpin_map_no_vm(log->bo);
+}
+
+int xe_guc_log_init(struct xe_guc_log *log)
+{
+ struct xe_device *xe = log_to_xe(log);
+ struct xe_gt *gt = log_to_gt(log);
+ struct xe_bo *bo;
+ int err;
+
+ bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(),
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size());
+ log->bo = bo;
+ log->level = xe_guc_log_level;
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
new file mode 100644
index 000000000000..2d25ab28b4b3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_H_
+#define _XE_GUC_LOG_H_
+
+#include "xe_guc_log_types.h"
+
+struct drm_printer;
+
+#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
+#define CRASH_BUFFER_SIZE SZ_1M
+#define DEBUG_BUFFER_SIZE SZ_8M
+#define CAPTURE_BUFFER_SIZE SZ_2M
+#else
+#define CRASH_BUFFER_SIZE SZ_8K
+#define DEBUG_BUFFER_SIZE SZ_64K
+#define CAPTURE_BUFFER_SIZE SZ_16K
+#endif
+/*
+ * While we're using plain log level in i915, GuC controls are much more...
+ * "elaborate"? We have a couple of bits for verbosity, separate bit for actual
+ * log enabling, and separate bit for default logging - which "conveniently"
+ * ignores the enable bit.
+ */
+#define GUC_LOG_LEVEL_DISABLED 0
+#define GUC_LOG_LEVEL_NON_VERBOSE 1
+#define GUC_LOG_LEVEL_IS_ENABLED(x) ((x) > GUC_LOG_LEVEL_DISABLED)
+#define GUC_LOG_LEVEL_IS_VERBOSE(x) ((x) > GUC_LOG_LEVEL_NON_VERBOSE)
+#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({ \
+ typeof(x) _x = (x); \
+ GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0; \
+})
+#define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2)
+#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX)
+
+int xe_guc_log_init(struct xe_guc_log *log);
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
+
+static inline u32
+xe_guc_log_get_level(struct xe_guc_log *log)
+{
+ return log->level;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
new file mode 100644
index 000000000000..125080d138a7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_LOG_TYPES_H_
+#define _XE_GUC_LOG_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_guc_log - GuC log
+ */
+struct xe_guc_log {
+ /** @level: GuC log level */
+ u32 level;
+ /** @bo: XE BO for GuC log */
+ struct xe_bo *bo;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
new file mode 100644
index 000000000000..227e30a482e3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_types.h"
+#include "xe_gt_sysfs.h"
+#include "xe_guc_ct.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_pcode.h"
+#include "i915_reg_defs.h"
+#include "i915_reg.h"
+
+#include "intel_mchbar_regs.h"
+
+/* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */
+#define RP0_MASK REG_GENMASK(7, 0)
+#define RP1_MASK REG_GENMASK(15, 8)
+#define RPN_MASK REG_GENMASK(23, 16)
+
+#define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
+#define RPE_MASK REG_GENMASK(15, 8)
+
+#include "gt/intel_gt_regs.h"
+/* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */
+#define REQ_RATIO_MASK REG_GENMASK(31, 23)
+
+/* For GEN6_GT_CORE_STATUS.reg to be merged when the definition moves to Xe */
+#define RCN_MASK REG_GENMASK(2, 0)
+
+#define GEN12_RPSTAT1 _MMIO(0x1381b4)
+#define GEN12_CAGF_MASK REG_GENMASK(19, 11)
+
+#define GT_FREQUENCY_MULTIPLIER 50
+#define GEN9_FREQ_SCALER 3
+
+/**
+ * DOC: GuC Power Conservation (PC)
+ *
+ * GuC Power Conservation (PC) supports multiple features for the most
+ * efficient and performing use of the GT when GuC submission is enabled,
+ * including frequency management, Render-C states management, and various
+ * algorithms for power balancing.
+ *
+ * Single Loop Power Conservation (SLPC) is the name given to the suite of
+ * connected power conservation features in the GuC firmware. The firmware
+ * exposes a programming interface to the host for the control of SLPC.
+ *
+ * Frequency management:
+ * =====================
+ *
+ * Xe driver enables SLPC with all of its defaults features and frequency
+ * selection, which varies per platform.
+ * Xe's GuC PC provides a sysfs API for frequency management:
+ *
+ * device/gt#/freq_* *read-only* files:
+ * - freq_act: The actual resolved frequency decided by PCODE.
+ * - freq_cur: The current one requested by GuC PC to the Hardware.
+ * - freq_rpn: The Render Performance (RP) N level, which is the minimal one.
+ * - freq_rpe: The Render Performance (RP) E level, which is the efficient one.
+ * - freq_rp0: The Render Performance (RP) 0 level, which is the maximum one.
+ *
+ * device/gt#/freq_* *read-write* files:
+ * - freq_min: GuC PC min request.
+ * - freq_max: GuC PC max request.
+ * If max <= min, then freq_min becomes a fixed frequency request.
+ *
+ * Render-C States:
+ * ================
+ *
+ * Render-C states is also a GuC PC feature that is now enabled in Xe for
+ * all platforms.
+ * Xe's GuC PC provides a sysfs API for Render-C States:
+ *
+ * device/gt#/rc* *read-only* files:
+ * - rc_status: Provide the actual immediate status of Render-C: (rc0 or rc6)
+ * - rc6_residency: Provide the rc6_residency counter in units of 1.28 uSec.
+ * Prone to overflows.
+ */
+
+static struct xe_guc *
+pc_to_guc(struct xe_guc_pc *pc)
+{
+ return container_of(pc, struct xe_guc, pc);
+}
+
+static struct xe_device *
+pc_to_xe(struct xe_guc_pc *pc)
+{
+ struct xe_guc *guc = pc_to_guc(pc);
+ struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc);
+
+ return gt_to_xe(gt);
+}
+
+static struct xe_gt *
+pc_to_gt(struct xe_guc_pc *pc)
+{
+ return container_of(pc, struct xe_gt, uc.guc.pc);
+}
+
+static struct xe_guc_pc *
+dev_to_pc(struct device *dev)
+{
+ return &kobj_to_gt(&dev->kobj)->uc.guc.pc;
+}
+
+static struct iosys_map *
+pc_to_maps(struct xe_guc_pc *pc)
+{
+ return &pc->bo->vmap;
+}
+
+#define slpc_shared_data_read(pc_, field_) \
+ xe_map_rd_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+ struct slpc_shared_data, field_)
+
+#define slpc_shared_data_write(pc_, field_, val_) \
+ xe_map_wr_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \
+ struct slpc_shared_data, field_, val_)
+
+#define SLPC_EVENT(id, count) \
+ (FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \
+ FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
+
+static bool pc_is_in_state(struct xe_guc_pc *pc, enum slpc_global_state state)
+{
+ xe_device_assert_mem_access(pc_to_xe(pc));
+ return slpc_shared_data_read(pc, header.global_state) == state;
+}
+
+static int pc_action_reset(struct xe_guc_pc *pc)
+{
+ struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+ int ret;
+ u32 action[] = {
+ GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+ SLPC_EVENT(SLPC_EVENT_RESET, 2),
+ xe_bo_ggtt_addr(pc->bo),
+ 0,
+ };
+
+ ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+ if (ret)
+ drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret));
+
+ return ret;
+}
+
+static int pc_action_shutdown(struct xe_guc_pc *pc)
+{
+ struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+ int ret;
+ u32 action[] = {
+ GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+ SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2),
+ xe_bo_ggtt_addr(pc->bo),
+ 0,
+ };
+
+ ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+ if (ret)
+ drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe",
+ ERR_PTR(ret));
+
+ return ret;
+}
+
+static int pc_action_query_task_state(struct xe_guc_pc *pc)
+{
+ struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+ int ret;
+ u32 action[] = {
+ GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+ SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2),
+ xe_bo_ggtt_addr(pc->bo),
+ 0,
+ };
+
+ if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+ return -EAGAIN;
+
+ /* Blocking here to ensure the results are ready before reading them */
+ ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action));
+ if (ret)
+ drm_err(&pc_to_xe(pc)->drm,
+ "GuC PC query task state failed: %pe", ERR_PTR(ret));
+
+ return ret;
+}
+
+static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
+{
+ struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+ int ret;
+ u32 action[] = {
+ GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+ SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+ id,
+ value,
+ };
+
+ if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+ return -EAGAIN;
+
+ ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+ if (ret)
+ drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe",
+ ERR_PTR(ret));
+
+ return ret;
+}
+
+static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode)
+{
+ struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
+ u32 action[] = {
+ XE_GUC_ACTION_SETUP_PC_GUCRC,
+ mode,
+ };
+ int ret;
+
+ ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+ if (ret)
+ drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe",
+ ERR_PTR(ret));
+ return ret;
+}
+
+static u32 decode_freq(u32 raw)
+{
+ return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER,
+ GEN9_FREQ_SCALER);
+}
+
+static u32 pc_get_min_freq(struct xe_guc_pc *pc)
+{
+ u32 freq;
+
+ freq = FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK,
+ slpc_shared_data_read(pc, task_state_data.freq));
+
+ return decode_freq(freq);
+}
+
+static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
+{
+ /*
+ * Let's only check for the rpn-rp0 range. If max < min,
+ * min becomes a fixed request.
+ */
+ if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+ return -EINVAL;
+
+ /*
+ * GuC policy is to elevate minimum frequency to the efficient levels
+ * Our goal is to have the admin choices respected.
+ */
+ pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
+ freq < pc->rpe_freq);
+
+ return pc_action_set_param(pc,
+ SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+ freq);
+}
+
+static int pc_get_max_freq(struct xe_guc_pc *pc)
+{
+ u32 freq;
+
+ freq = FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK,
+ slpc_shared_data_read(pc, task_state_data.freq));
+
+ return decode_freq(freq);
+}
+
+static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
+{
+ /*
+ * Let's only check for the rpn-rp0 range. If max < min,
+ * min becomes a fixed request.
+ * Also, overclocking is not supported.
+ */
+ if (freq < pc->rpn_freq || freq > pc->rp0_freq)
+ return -EINVAL;
+
+ return pc_action_set_param(pc,
+ SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ,
+ freq);
+}
+
+static void pc_update_rp_values(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 reg;
+
+ /*
+ * For PVC we still need to use fused RP1 as the approximation for RPe
+ * For other platforms than PVC we get the resolved RPe directly from
+ * PCODE at a different register
+ */
+ if (xe->info.platform == XE_PVC)
+ reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg);
+ else
+ reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg);
+
+ pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+
+ /*
+ * RPe is decided at runtime by PCODE. In the rare case where that's
+ * smaller than the fused min, we will trust the PCODE and use that
+ * as our minimum one.
+ */
+ pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq);
+}
+
+static ssize_t freq_act_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct kobject *kobj = &dev->kobj;
+ struct xe_gt *gt = kobj_to_gt(kobj);
+ u32 freq;
+ ssize_t ret;
+
+ /*
+ * When in RC6, actual frequency is 0. Let's block RC6 so we are able
+ * to verify that our freq requests are really happening.
+ */
+ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (ret)
+ return ret;
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg);
+ xe_device_mem_access_put(gt_to_xe(gt));
+
+ freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq);
+ ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
+
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ return ret;
+}
+static DEVICE_ATTR_RO(freq_act);
+
+static ssize_t freq_cur_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct kobject *kobj = &dev->kobj;
+ struct xe_gt *gt = kobj_to_gt(kobj);
+ u32 freq;
+ ssize_t ret;
+
+ /*
+ * GuC SLPC plays with cur freq request when GuCRC is enabled
+ * Block RC6 for a more reliable read.
+ */
+ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (ret)
+ return ret;
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg);
+ xe_device_mem_access_put(gt_to_xe(gt));
+
+ freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
+ ret = sysfs_emit(buf, "%d\n", decode_freq(freq));
+
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ return ret;
+}
+static DEVICE_ATTR_RO(freq_cur);
+
+static ssize_t freq_rp0_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+
+ return sysfs_emit(buf, "%d\n", pc->rp0_freq);
+}
+static DEVICE_ATTR_RO(freq_rp0);
+
+static ssize_t freq_rpe_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+
+ pc_update_rp_values(pc);
+ return sysfs_emit(buf, "%d\n", pc->rpe_freq);
+}
+static DEVICE_ATTR_RO(freq_rpe);
+
+static ssize_t freq_rpn_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+
+ return sysfs_emit(buf, "%d\n", pc->rpn_freq);
+}
+static DEVICE_ATTR_RO(freq_rpn);
+
+static ssize_t freq_min_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ struct xe_gt *gt = pc_to_gt(pc);
+ ssize_t ret;
+
+ xe_device_mem_access_get(pc_to_xe(pc));
+ mutex_lock(&pc->freq_lock);
+ if (!pc->freq_ready) {
+ /* Might be in the middle of a gt reset */
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ /*
+ * GuC SLPC plays with min freq request when GuCRC is enabled
+ * Block RC6 for a more reliable read.
+ */
+ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (ret)
+ goto out;
+
+ ret = pc_action_query_task_state(pc);
+ if (ret)
+ goto fw;
+
+ ret = sysfs_emit(buf, "%d\n", pc_get_min_freq(pc));
+
+fw:
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+out:
+ mutex_unlock(&pc->freq_lock);
+ xe_device_mem_access_put(pc_to_xe(pc));
+ return ret;
+}
+
+static ssize_t freq_min_store(struct device *dev, struct device_attribute *attr,
+ const char *buff, size_t count)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ u32 freq;
+ ssize_t ret;
+
+ ret = kstrtou32(buff, 0, &freq);
+ if (ret)
+ return ret;
+
+ xe_device_mem_access_get(pc_to_xe(pc));
+ mutex_lock(&pc->freq_lock);
+ if (!pc->freq_ready) {
+ /* Might be in the middle of a gt reset */
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ ret = pc_set_min_freq(pc, freq);
+ if (ret)
+ goto out;
+
+ pc->user_requested_min = freq;
+
+out:
+ mutex_unlock(&pc->freq_lock);
+ xe_device_mem_access_put(pc_to_xe(pc));
+ return ret ?: count;
+}
+static DEVICE_ATTR_RW(freq_min);
+
+static ssize_t freq_max_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ ssize_t ret;
+
+ xe_device_mem_access_get(pc_to_xe(pc));
+ mutex_lock(&pc->freq_lock);
+ if (!pc->freq_ready) {
+ /* Might be in the middle of a gt reset */
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ ret = pc_action_query_task_state(pc);
+ if (ret)
+ goto out;
+
+ ret = sysfs_emit(buf, "%d\n", pc_get_max_freq(pc));
+
+out:
+ mutex_unlock(&pc->freq_lock);
+ xe_device_mem_access_put(pc_to_xe(pc));
+ return ret;
+}
+
+static ssize_t freq_max_store(struct device *dev, struct device_attribute *attr,
+ const char *buff, size_t count)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ u32 freq;
+ ssize_t ret;
+
+ ret = kstrtou32(buff, 0, &freq);
+ if (ret)
+ return ret;
+
+ xe_device_mem_access_get(pc_to_xe(pc));
+ mutex_lock(&pc->freq_lock);
+ if (!pc->freq_ready) {
+ /* Might be in the middle of a gt reset */
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ ret = pc_set_max_freq(pc, freq);
+ if (ret)
+ goto out;
+
+ pc->user_requested_max = freq;
+
+out:
+ mutex_unlock(&pc->freq_lock);
+ xe_device_mem_access_put(pc_to_xe(pc));
+ return ret ?: count;
+}
+static DEVICE_ATTR_RW(freq_max);
+
+static ssize_t rc_status_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 reg;
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ reg = xe_mmio_read32(gt, GEN6_GT_CORE_STATUS.reg);
+ xe_device_mem_access_put(gt_to_xe(gt));
+
+ switch (REG_FIELD_GET(RCN_MASK, reg)) {
+ case GEN6_RC6:
+ return sysfs_emit(buff, "rc6\n");
+ case GEN6_RC0:
+ return sysfs_emit(buff, "rc0\n");
+ default:
+ return -ENOENT;
+ }
+}
+static DEVICE_ATTR_RO(rc_status);
+
+static ssize_t rc6_residency_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 reg;
+ ssize_t ret;
+
+ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (ret)
+ return ret;
+
+ xe_device_mem_access_get(pc_to_xe(pc));
+ reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg);
+ xe_device_mem_access_put(pc_to_xe(pc));
+
+ ret = sysfs_emit(buff, "%u\n", reg);
+
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ return ret;
+}
+static DEVICE_ATTR_RO(rc6_residency);
+
+static const struct attribute *pc_attrs[] = {
+ &dev_attr_freq_act.attr,
+ &dev_attr_freq_cur.attr,
+ &dev_attr_freq_rp0.attr,
+ &dev_attr_freq_rpe.attr,
+ &dev_attr_freq_rpn.attr,
+ &dev_attr_freq_min.attr,
+ &dev_attr_freq_max.attr,
+ &dev_attr_rc_status.attr,
+ &dev_attr_rc6_residency.attr,
+ NULL
+};
+
+static void pc_init_fused_rp_values(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 reg;
+
+ xe_device_assert_mem_access(pc_to_xe(pc));
+
+ if (xe->info.platform == XE_PVC)
+ reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg);
+ else
+ reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP.reg);
+ pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+ pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
+{
+ int ret;
+
+ lockdep_assert_held(&pc->freq_lock);
+
+ ret = pc_action_query_task_state(pc);
+ if (ret)
+ return ret;
+
+ /*
+ * GuC defaults to some RPmax that is not actually achievable without
+ * overclocking. Let's adjust it to the Hardware RP0, which is the
+ * regular maximum
+ */
+ if (pc_get_max_freq(pc) > pc->rp0_freq)
+ pc_set_max_freq(pc, pc->rp0_freq);
+
+ /*
+ * Same thing happens for Server platforms where min is listed as
+ * RPMax
+ */
+ if (pc_get_min_freq(pc) > pc->rp0_freq)
+ pc_set_min_freq(pc, pc->rp0_freq);
+
+ return 0;
+}
+
+static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
+{
+ int ret = 0;
+
+ lockdep_assert_held(&pc->freq_lock);
+
+ if (pc->user_requested_min != 0) {
+ ret = pc_set_min_freq(pc, pc->user_requested_min);
+ if (ret)
+ return ret;
+ }
+
+ if (pc->user_requested_max != 0) {
+ ret = pc_set_max_freq(pc, pc->user_requested_max);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int pc_gucrc_disable(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ int ret;
+
+ xe_device_assert_mem_access(pc_to_xe(pc));
+
+ ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
+ if (ret)
+ return ret;
+
+ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (ret)
+ return ret;
+
+ xe_mmio_write32(gt, GEN9_PG_ENABLE.reg, 0);
+ xe_mmio_write32(gt, GEN6_RC_CONTROL.reg, 0);
+ xe_mmio_write32(gt, GEN6_RC_STATE.reg, 0);
+
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ return 0;
+}
+
+static void pc_init_pcode_freq(struct xe_guc_pc *pc)
+{
+ u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER);
+ u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER);
+
+ XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max));
+}
+
+static int pc_init_freqs(struct xe_guc_pc *pc)
+{
+ int ret;
+
+ mutex_lock(&pc->freq_lock);
+
+ ret = pc_adjust_freq_bounds(pc);
+ if (ret)
+ goto out;
+
+ ret = pc_adjust_requested_freq(pc);
+ if (ret)
+ goto out;
+
+ pc_update_rp_values(pc);
+
+ pc_init_pcode_freq(pc);
+
+ /*
+ * The frequencies are really ready for use only after the user
+ * requested ones got restored.
+ */
+ pc->freq_ready = true;
+
+out:
+ mutex_unlock(&pc->freq_lock);
+ return ret;
+}
+
+/**
+ * xe_guc_pc_start - Start GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_start(struct xe_guc_pc *pc)
+{
+ struct xe_device *xe = pc_to_xe(pc);
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+ int ret;
+
+ XE_WARN_ON(!xe_device_guc_submission_enabled(xe));
+
+ xe_device_mem_access_get(pc_to_xe(pc));
+
+ memset(pc->bo->vmap.vaddr, 0, size);
+ slpc_shared_data_write(pc, header.size, size);
+
+ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (ret)
+ return ret;
+
+ ret = pc_action_reset(pc);
+ if (ret)
+ goto out;
+
+ if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING), 5)) {
+ drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ ret = pc_init_freqs(pc);
+ if (ret)
+ goto out;
+
+ if (xe->info.platform == XE_PVC) {
+ pc_gucrc_disable(pc);
+ ret = 0;
+ goto out;
+ }
+
+ ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL);
+
+out:
+ xe_device_mem_access_put(pc_to_xe(pc));
+ XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ return ret;
+}
+
+/**
+ * xe_guc_pc_stop - Stop GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_stop(struct xe_guc_pc *pc)
+{
+ int ret;
+
+ xe_device_mem_access_get(pc_to_xe(pc));
+
+ ret = pc_gucrc_disable(pc);
+ if (ret)
+ goto out;
+
+ mutex_lock(&pc->freq_lock);
+ pc->freq_ready = false;
+ mutex_unlock(&pc->freq_lock);
+
+ ret = pc_action_shutdown(pc);
+ if (ret)
+ goto out;
+
+ if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING), 5)) {
+ drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n");
+ ret = -EIO;
+ }
+
+out:
+ xe_device_mem_access_put(pc_to_xe(pc));
+ return ret;
+}
+
+static void pc_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_pc *pc = arg;
+
+ XE_WARN_ON(xe_guc_pc_stop(pc));
+ sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs);
+ xe_bo_unpin_map_no_vm(pc->bo);
+}
+
+/**
+ * xe_guc_pc_init - Initialize GuC's Power Conservation component
+ * @pc: Xe_GuC_PC instance
+ */
+int xe_guc_pc_init(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_bo *bo;
+ u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+ int err;
+
+ mutex_init(&pc->freq_lock);
+
+ bo = xe_bo_create_pin_map(xe, gt, NULL, size,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_GGTT_BIT);
+
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ pc->bo = bo;
+
+ pc_init_fused_rp_values(pc);
+
+ err = sysfs_create_files(gt->sysfs, pc_attrs);
+ if (err)
+ return err;
+
+ err = drmm_add_action_or_reset(&xe->drm, pc_fini, pc);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
new file mode 100644
index 000000000000..da29e4934868
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_H_
+#define _XE_GUC_PC_H_
+
+#include "xe_guc_pc_types.h"
+
+int xe_guc_pc_init(struct xe_guc_pc *pc);
+int xe_guc_pc_start(struct xe_guc_pc *pc);
+int xe_guc_pc_stop(struct xe_guc_pc *pc);
+
+#endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
new file mode 100644
index 000000000000..39548e03acf4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PC_TYPES_H_
+#define _XE_GUC_PC_TYPES_H_
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+/**
+ * struct xe_guc_pc - GuC Power Conservation (PC)
+ */
+struct xe_guc_pc {
+ /** @bo: GGTT buffer object that is shared with GuC PC */
+ struct xe_bo *bo;
+ /** @rp0_freq: HW RP0 frequency - The Maximum one */
+ u32 rp0_freq;
+ /** @rpe_freq: HW RPe frequency - The Efficient one */
+ u32 rpe_freq;
+ /** @rpn_freq: HW RPN frequency - The Minimum one */
+ u32 rpn_freq;
+ /** @user_requested_min: Stash the minimum requested freq by user */
+ u32 user_requested_min;
+ /** @user_requested_max: Stash the maximum requested freq by user */
+ u32 user_requested_max;
+ /** @freq_lock: Let's protect the frequencies */
+ struct mutex freq_lock;
+ /** @freq_ready: Only handle freq changes, if they are really ready */
+ bool freq_ready;
+};
+
+#endif /* _XE_GUC_PC_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h
new file mode 100644
index 000000000000..1e16a9b76ddc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_reg.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_REG_H_
+#define _XE_GUC_REG_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "i915_reg.h"
+
+/* Definitions of GuC H/W registers, bits, etc */
+
+#define GUC_STATUS _MMIO(0xc000)
+#define GS_RESET_SHIFT 0
+#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT)
+#define GS_BOOTROM_SHIFT 1
+#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT)
+#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT)
+#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT)
+#define GS_UKERNEL_SHIFT 8
+#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT)
+#define GS_MIA_SHIFT 16
+#define GS_MIA_MASK (0x07 << GS_MIA_SHIFT)
+#define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT)
+#define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT)
+#define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT)
+#define GS_AUTH_STATUS_SHIFT 30
+#define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT)
+#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT)
+#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT)
+
+#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4)
+#define SOFT_SCRATCH_COUNT 16
+
+#define GEN11_SOFT_SCRATCH(n) _MMIO(0x190240 + (n) * 4)
+#define GEN11_SOFT_SCRATCH_COUNT 4
+
+#define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4)
+#define UOS_RSA_SCRATCH_COUNT 64
+
+#define DMA_ADDR_0_LOW _MMIO(0xc300)
+#define DMA_ADDR_0_HIGH _MMIO(0xc304)
+#define DMA_ADDR_1_LOW _MMIO(0xc308)
+#define DMA_ADDR_1_HIGH _MMIO(0xc30c)
+#define DMA_ADDRESS_SPACE_WOPCM (7 << 16)
+#define DMA_ADDRESS_SPACE_GTT (8 << 16)
+#define DMA_COPY_SIZE _MMIO(0xc310)
+#define DMA_CTRL _MMIO(0xc314)
+#define HUC_UKERNEL (1<<9)
+#define UOS_MOVE (1<<4)
+#define START_DMA (1<<0)
+#define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340)
+#define GUC_WOPCM_OFFSET_VALID (1<<0)
+#define HUC_LOADING_AGENT_VCR (0<<1)
+#define HUC_LOADING_AGENT_GUC (1<<1)
+#define GUC_WOPCM_OFFSET_SHIFT 14
+#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT)
+#define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4)
+
+#define HUC_STATUS2 _MMIO(0xD3B0)
+#define HUC_FW_VERIFIED (1<<7)
+
+#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xC1DC)
+#define HUC_LOAD_SUCCESSFUL (1 << 0)
+
+#define GUC_WOPCM_SIZE _MMIO(0xc050)
+#define GUC_WOPCM_SIZE_LOCKED (1<<0)
+#define GUC_WOPCM_SIZE_SHIFT 12
+#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT)
+
+#define GEN8_GT_PM_CONFIG _MMIO(0x138140)
+#define GEN9LP_GT_PM_CONFIG _MMIO(0x138140)
+#define GEN9_GT_PM_CONFIG _MMIO(0x13816c)
+#define GT_DOORBELL_ENABLE (1<<0)
+
+#define GEN8_GTCR _MMIO(0x4274)
+#define GEN8_GTCR_INVALIDATE (1<<0)
+
+#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8)
+#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0)
+
+#define GUC_ARAT_C6DIS _MMIO(0xA178)
+
+#define GUC_SHIM_CONTROL _MMIO(0xc064)
+#define GUC_DISABLE_SRAM_INIT_TO_ZEROES (1<<0)
+#define GUC_ENABLE_READ_CACHE_LOGIC (1<<1)
+#define GUC_ENABLE_MIA_CACHING (1<<2)
+#define GUC_GEN10_MSGCH_ENABLE (1<<4)
+#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA (1<<9)
+#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10)
+#define GUC_ENABLE_MIA_CLOCK_GATING (1<<15)
+#define GUC_GEN10_SHIM_WC_ENABLE (1<<21)
+
+#define GUC_SEND_INTERRUPT _MMIO(0xc4c8)
+#define GUC_SEND_TRIGGER (1<<0)
+#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0)
+
+#define GUC_NUM_DOORBELLS 256
+
+/* format of the HW-monitored doorbell cacheline */
+struct guc_doorbell_info {
+ u32 db_status;
+#define GUC_DOORBELL_DISABLED 0
+#define GUC_DOORBELL_ENABLED 1
+
+ u32 cookie;
+ u32 reserved[14];
+} __packed;
+
+#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8)
+#define GEN8_DRB_VALID (1<<0)
+#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4)
+
+#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08)
+#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16
+#define GEN12_DOORBELLS_PER_SQIDI (0xff)
+#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff)
+
+#define DE_GUCRMR _MMIO(0x44054)
+
+#define GUC_BCS_RCS_IER _MMIO(0xC550)
+#define GUC_VCS2_VCS1_IER _MMIO(0xC554)
+#define GUC_WD_VECS_IER _MMIO(0xC558)
+#define GUC_PM_P24C_IER _MMIO(0xC55C)
+
+/* GuC Interrupt Vector */
+#define GUC_INTR_GUC2HOST BIT(15)
+#define GUC_INTR_EXEC_ERROR BIT(14)
+#define GUC_INTR_DISPLAY_EVENT BIT(13)
+#define GUC_INTR_SEM_SIG BIT(12)
+#define GUC_INTR_IOMMU2GUC BIT(11)
+#define GUC_INTR_DOORBELL_RANG BIT(10)
+#define GUC_INTR_DMA_DONE BIT(9)
+#define GUC_INTR_FATAL_ERROR BIT(8)
+#define GUC_INTR_NOTIF_ERROR BIT(7)
+#define GUC_INTR_SW_INT_6 BIT(6)
+#define GUC_INTR_SW_INT_5 BIT(5)
+#define GUC_INTR_SW_INT_4 BIT(4)
+#define GUC_INTR_SW_INT_3 BIT(3)
+#define GUC_INTR_SW_INT_2 BIT(2)
+#define GUC_INTR_SW_INT_1 BIT(1)
+#define GUC_INTR_SW_INT_0 BIT(0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
new file mode 100644
index 000000000000..e0d424c2b78c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -0,0 +1,1695 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/dma-fence-array.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_engine_types.h"
+#include "xe_guc_submit.h"
+#include "xe_gt.h"
+#include "xe_force_wake.h"
+#include "xe_gpu_scheduler.h"
+#include "xe_hw_engine.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_ring_ops_types.h"
+#include "xe_sched_job.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+#include "gt/intel_lrc_reg.h"
+
+static struct xe_gt *
+guc_to_gt(struct xe_guc *guc)
+{
+ return container_of(guc, struct xe_gt, uc.guc);
+}
+
+static struct xe_device *
+guc_to_xe(struct xe_guc *guc)
+{
+ return gt_to_xe(guc_to_gt(guc));
+}
+
+static struct xe_guc *
+engine_to_guc(struct xe_engine *e)
+{
+ return &e->gt->uc.guc;
+}
+
+/*
+ * Helpers for engine state, using an atomic as some of the bits can transition
+ * as the same time (e.g. a suspend can be happning at the same time as schedule
+ * engine done being processed).
+ */
+#define ENGINE_STATE_REGISTERED (1 << 0)
+#define ENGINE_STATE_ENABLED (1 << 1)
+#define ENGINE_STATE_PENDING_ENABLE (1 << 2)
+#define ENGINE_STATE_PENDING_DISABLE (1 << 3)
+#define ENGINE_STATE_DESTROYED (1 << 4)
+#define ENGINE_STATE_SUSPENDED (1 << 5)
+#define ENGINE_STATE_RESET (1 << 6)
+#define ENGINE_STATE_KILLED (1 << 7)
+
+static bool engine_registered(struct xe_engine *e)
+{
+ return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED;
+}
+
+static void set_engine_registered(struct xe_engine *e)
+{
+ atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state);
+}
+
+static void clear_engine_registered(struct xe_engine *e)
+{
+ atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state);
+}
+
+static bool engine_enabled(struct xe_engine *e)
+{
+ return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED;
+}
+
+static void set_engine_enabled(struct xe_engine *e)
+{
+ atomic_or(ENGINE_STATE_ENABLED, &e->guc->state);
+}
+
+static void clear_engine_enabled(struct xe_engine *e)
+{
+ atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state);
+}
+
+static bool engine_pending_enable(struct xe_engine *e)
+{
+ return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE;
+}
+
+static void set_engine_pending_enable(struct xe_engine *e)
+{
+ atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
+}
+
+static void clear_engine_pending_enable(struct xe_engine *e)
+{
+ atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
+}
+
+static bool engine_pending_disable(struct xe_engine *e)
+{
+ return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE;
+}
+
+static void set_engine_pending_disable(struct xe_engine *e)
+{
+ atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
+}
+
+static void clear_engine_pending_disable(struct xe_engine *e)
+{
+ atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
+}
+
+static bool engine_destroyed(struct xe_engine *e)
+{
+ return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED;
+}
+
+static void set_engine_destroyed(struct xe_engine *e)
+{
+ atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state);
+}
+
+static bool engine_banned(struct xe_engine *e)
+{
+ return (e->flags & ENGINE_FLAG_BANNED);
+}
+
+static void set_engine_banned(struct xe_engine *e)
+{
+ e->flags |= ENGINE_FLAG_BANNED;
+}
+
+static bool engine_suspended(struct xe_engine *e)
+{
+ return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED;
+}
+
+static void set_engine_suspended(struct xe_engine *e)
+{
+ atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state);
+}
+
+static void clear_engine_suspended(struct xe_engine *e)
+{
+ atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state);
+}
+
+static bool engine_reset(struct xe_engine *e)
+{
+ return atomic_read(&e->guc->state) & ENGINE_STATE_RESET;
+}
+
+static void set_engine_reset(struct xe_engine *e)
+{
+ atomic_or(ENGINE_STATE_RESET, &e->guc->state);
+}
+
+static bool engine_killed(struct xe_engine *e)
+{
+ return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED;
+}
+
+static void set_engine_killed(struct xe_engine *e)
+{
+ atomic_or(ENGINE_STATE_KILLED, &e->guc->state);
+}
+
+static bool engine_killed_or_banned(struct xe_engine *e)
+{
+ return engine_killed(e) || engine_banned(e);
+}
+
+static void guc_submit_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_guc *guc = arg;
+
+ xa_destroy(&guc->submission_state.engine_lookup);
+ ida_destroy(&guc->submission_state.guc_ids);
+ bitmap_free(guc->submission_state.guc_ids_bitmap);
+}
+
+#define GUC_ID_MAX 65535
+#define GUC_ID_NUMBER_MLRC 4096
+#define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
+#define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC
+
+static const struct xe_engine_ops guc_engine_ops;
+
+static void primelockdep(struct xe_guc *guc)
+{
+ if (!IS_ENABLED(CONFIG_LOCKDEP))
+ return;
+
+ fs_reclaim_acquire(GFP_KERNEL);
+
+ mutex_lock(&guc->submission_state.lock);
+ might_lock(&guc->submission_state.suspend.lock);
+ mutex_unlock(&guc->submission_state.lock);
+
+ fs_reclaim_release(GFP_KERNEL);
+}
+
+int xe_guc_submit_init(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ int err;
+
+ guc->submission_state.guc_ids_bitmap =
+ bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL);
+ if (!guc->submission_state.guc_ids_bitmap)
+ return -ENOMEM;
+
+ gt->engine_ops = &guc_engine_ops;
+
+ mutex_init(&guc->submission_state.lock);
+ xa_init(&guc->submission_state.engine_lookup);
+ ida_init(&guc->submission_state.guc_ids);
+
+ spin_lock_init(&guc->submission_state.suspend.lock);
+ guc->submission_state.suspend.context = dma_fence_context_alloc(1);
+
+ primelockdep(guc);
+
+ err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e)
+{
+ int ret;
+ void *ptr;
+
+ /*
+ * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
+ * worse case user gets -ENOMEM on engine create and has to try again.
+ *
+ * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
+ * failure.
+ */
+ lockdep_assert_held(&guc->submission_state.lock);
+
+ if (xe_engine_is_parallel(e)) {
+ void *bitmap = guc->submission_state.guc_ids_bitmap;
+
+ ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
+ order_base_2(e->width));
+ } else {
+ ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
+ GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
+ }
+ if (ret < 0)
+ return ret;
+
+ e->guc->id = ret;
+ if (xe_engine_is_parallel(e))
+ e->guc->id += GUC_ID_START_MLRC;
+
+ ptr = xa_store(&guc->submission_state.engine_lookup,
+ e->guc->id, e, GFP_NOWAIT);
+ if (IS_ERR(ptr)) {
+ ret = PTR_ERR(ptr);
+ goto err_release;
+ }
+
+ return 0;
+
+err_release:
+ ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
+ return ret;
+}
+
+static void release_guc_id(struct xe_guc *guc, struct xe_engine *e)
+{
+ mutex_lock(&guc->submission_state.lock);
+ xa_erase(&guc->submission_state.engine_lookup, e->guc->id);
+ if (xe_engine_is_parallel(e))
+ bitmap_release_region(guc->submission_state.guc_ids_bitmap,
+ e->guc->id - GUC_ID_START_MLRC,
+ order_base_2(e->width));
+ else
+ ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
+ mutex_unlock(&guc->submission_state.lock);
+}
+
+struct engine_policy {
+ u32 count;
+ struct guc_update_engine_policy h2g;
+};
+
+static u32 __guc_engine_policy_action_size(struct engine_policy *policy)
+{
+ size_t bytes = sizeof(policy->h2g.header) +
+ (sizeof(policy->h2g.klv[0]) * policy->count);
+
+ return bytes / sizeof(u32);
+}
+
+static void __guc_engine_policy_start_klv(struct engine_policy *policy,
+ u16 guc_id)
+{
+ policy->h2g.header.action =
+ XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
+ policy->h2g.header.guc_id = guc_id;
+ policy->count = 0;
+}
+
+#define MAKE_ENGINE_POLICY_ADD(func, id) \
+static void __guc_engine_policy_add_##func(struct engine_policy *policy, \
+ u32 data) \
+{ \
+ XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
+ \
+ policy->h2g.klv[policy->count].kl = \
+ FIELD_PREP(GUC_KLV_0_KEY, \
+ GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
+ FIELD_PREP(GUC_KLV_0_LEN, 1); \
+ policy->h2g.klv[policy->count].value = data; \
+ policy->count++; \
+}
+
+MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
+MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
+MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
+#undef MAKE_ENGINE_POLICY_ADD
+
+static const int xe_engine_prio_to_guc[] = {
+ [XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
+ [XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
+ [XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
+ [XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
+};
+
+static void init_policies(struct xe_guc *guc, struct xe_engine *e)
+{
+ struct engine_policy policy;
+ enum xe_engine_priority prio = e->priority;
+ u32 timeslice_us = e->sched_props.timeslice_us;
+ u32 preempt_timeout_us = e->sched_props.preempt_timeout_us;
+
+ XE_BUG_ON(!engine_registered(e));
+
+ __guc_engine_policy_start_klv(&policy, e->guc->id);
+ __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]);
+ __guc_engine_policy_add_execution_quantum(&policy, timeslice_us);
+ __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us);
+
+ xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+ __guc_engine_policy_action_size(&policy), 0, 0);
+}
+
+static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
+{
+ struct engine_policy policy;
+
+ __guc_engine_policy_start_klv(&policy, e->guc->id);
+ __guc_engine_policy_add_preemption_timeout(&policy, 1);
+
+ xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
+ __guc_engine_policy_action_size(&policy), 0, 0);
+}
+
+#define PARALLEL_SCRATCH_SIZE 2048
+#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2)
+#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE)
+#define CACHELINE_BYTES 64
+
+struct sync_semaphore {
+ u32 semaphore;
+ u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+struct parallel_scratch {
+ struct guc_sched_wq_desc wq_desc;
+
+ struct sync_semaphore go;
+ struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
+
+ u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+ sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)];
+
+ u32 wq[WQ_SIZE / sizeof(u32)];
+};
+
+#define parallel_read(xe_, map_, field_) \
+ xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_)
+#define parallel_write(xe_, map_, field_, val_) \
+ xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_)
+
+static void __register_mlrc_engine(struct xe_guc *guc,
+ struct xe_engine *e,
+ struct guc_ctxt_registration_info *info)
+{
+#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
+ u32 action[MAX_MLRC_REG_SIZE];
+ int len = 0;
+ int i;
+
+ XE_BUG_ON(!xe_engine_is_parallel(e));
+
+ action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+ action[len++] = info->flags;
+ action[len++] = info->context_idx;
+ action[len++] = info->engine_class;
+ action[len++] = info->engine_submit_mask;
+ action[len++] = info->wq_desc_lo;
+ action[len++] = info->wq_desc_hi;
+ action[len++] = info->wq_base_lo;
+ action[len++] = info->wq_base_hi;
+ action[len++] = info->wq_size;
+ action[len++] = e->width;
+ action[len++] = info->hwlrca_lo;
+ action[len++] = info->hwlrca_hi;
+
+ for (i = 1; i < e->width; ++i) {
+ struct xe_lrc *lrc = e->lrc + i;
+
+ action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
+ action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
+ }
+
+ XE_BUG_ON(len > MAX_MLRC_REG_SIZE);
+#undef MAX_MLRC_REG_SIZE
+
+ xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+}
+
+static void __register_engine(struct xe_guc *guc,
+ struct guc_ctxt_registration_info *info)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_REGISTER_CONTEXT,
+ info->flags,
+ info->context_idx,
+ info->engine_class,
+ info->engine_submit_mask,
+ info->wq_desc_lo,
+ info->wq_desc_hi,
+ info->wq_base_lo,
+ info->wq_base_hi,
+ info->wq_size,
+ info->hwlrca_lo,
+ info->hwlrca_hi,
+ };
+
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static void register_engine(struct xe_engine *e)
+{
+ struct xe_guc *guc = engine_to_guc(e);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_lrc *lrc = e->lrc;
+ struct guc_ctxt_registration_info info;
+
+ XE_BUG_ON(engine_registered(e));
+
+ memset(&info, 0, sizeof(info));
+ info.context_idx = e->guc->id;
+ info.engine_class = xe_engine_class_to_guc_class(e->class);
+ info.engine_submit_mask = e->logical_mask;
+ info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
+ info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
+ info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
+
+ if (xe_engine_is_parallel(e)) {
+ u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
+ struct iosys_map map = xe_lrc_parallel_map(lrc);
+
+ info.wq_desc_lo = lower_32_bits(ggtt_addr +
+ offsetof(struct parallel_scratch, wq_desc));
+ info.wq_desc_hi = upper_32_bits(ggtt_addr +
+ offsetof(struct parallel_scratch, wq_desc));
+ info.wq_base_lo = lower_32_bits(ggtt_addr +
+ offsetof(struct parallel_scratch, wq[0]));
+ info.wq_base_hi = upper_32_bits(ggtt_addr +
+ offsetof(struct parallel_scratch, wq[0]));
+ info.wq_size = WQ_SIZE;
+
+ e->guc->wqi_head = 0;
+ e->guc->wqi_tail = 0;
+ xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
+ parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
+ }
+
+ set_engine_registered(e);
+ trace_xe_engine_register(e);
+ if (xe_engine_is_parallel(e))
+ __register_mlrc_engine(guc, e, &info);
+ else
+ __register_engine(guc, &info);
+ init_policies(guc, e);
+}
+
+static u32 wq_space_until_wrap(struct xe_engine *e)
+{
+ return (WQ_SIZE - e->guc->wqi_tail);
+}
+
+static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size)
+{
+ struct xe_guc *guc = engine_to_guc(e);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+ unsigned int sleep_period_ms = 1;
+
+#define AVAILABLE_SPACE \
+ CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE)
+ if (wqi_size > AVAILABLE_SPACE) {
+try_again:
+ e->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
+ if (wqi_size > AVAILABLE_SPACE) {
+ if (sleep_period_ms == 1024) {
+ xe_gt_reset_async(e->gt);
+ return -ENODEV;
+ }
+
+ msleep(sleep_period_ms);
+ sleep_period_ms <<= 1;
+ goto try_again;
+ }
+ }
+#undef AVAILABLE_SPACE
+
+ return 0;
+}
+
+static int wq_noop_append(struct xe_engine *e)
+{
+ struct xe_guc *guc = engine_to_guc(e);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+ u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1;
+
+ if (wq_wait_for_space(e, wq_space_until_wrap(e)))
+ return -ENODEV;
+
+ XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
+
+ parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)],
+ FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
+ FIELD_PREP(WQ_LEN_MASK, len_dw));
+ e->guc->wqi_tail = 0;
+
+ return 0;
+}
+
+static void wq_item_append(struct xe_engine *e)
+{
+ struct xe_guc *guc = engine_to_guc(e);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+ u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3];
+ u32 wqi_size = (e->width + 3) * sizeof(u32);
+ u32 len_dw = (wqi_size / sizeof(u32)) - 1;
+ int i = 0, j;
+
+ if (wqi_size > wq_space_until_wrap(e)) {
+ if (wq_noop_append(e))
+ return;
+ }
+ if (wq_wait_for_space(e, wqi_size))
+ return;
+
+ wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
+ FIELD_PREP(WQ_LEN_MASK, len_dw);
+ wqi[i++] = xe_lrc_descriptor(e->lrc);
+ wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) |
+ FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64));
+ wqi[i++] = 0;
+ for (j = 1; j < e->width; ++j) {
+ struct xe_lrc *lrc = e->lrc + j;
+
+ wqi[i++] = lrc->ring.tail / sizeof(u64);
+ }
+
+ XE_BUG_ON(i != wqi_size / sizeof(u32));
+
+ iosys_map_incr(&map, offsetof(struct parallel_scratch,
+ wq[e->guc->wqi_tail / sizeof(u32)]));
+ xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
+ e->guc->wqi_tail += wqi_size;
+ XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE);
+
+ xe_device_wmb(xe);
+
+ map = xe_lrc_parallel_map(e->lrc);
+ parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail);
+}
+
+#define RESUME_PENDING ~0x0ull
+static void submit_engine(struct xe_engine *e)
+{
+ struct xe_guc *guc = engine_to_guc(e);
+ struct xe_lrc *lrc = e->lrc;
+ u32 action[3];
+ u32 g2h_len = 0;
+ u32 num_g2h = 0;
+ int len = 0;
+ bool extra_submit = false;
+
+ XE_BUG_ON(!engine_registered(e));
+
+ if (xe_engine_is_parallel(e))
+ wq_item_append(e);
+ else
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+
+ if (engine_suspended(e) && !xe_engine_is_parallel(e))
+ return;
+
+ if (!engine_enabled(e) && !engine_suspended(e)) {
+ action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+ action[len++] = e->guc->id;
+ action[len++] = GUC_CONTEXT_ENABLE;
+ g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
+ num_g2h = 1;
+ if (xe_engine_is_parallel(e))
+ extra_submit = true;
+
+ e->guc->resume_time = RESUME_PENDING;
+ set_engine_pending_enable(e);
+ set_engine_enabled(e);
+ trace_xe_engine_scheduling_enable(e);
+ } else {
+ action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+ action[len++] = e->guc->id;
+ trace_xe_engine_submit(e);
+ }
+
+ xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
+
+ if (extra_submit) {
+ len = 0;
+ action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
+ action[len++] = e->guc->id;
+ trace_xe_engine_submit(e);
+
+ xe_guc_ct_send(&guc->ct, action, len, 0, 0);
+ }
+}
+
+static struct dma_fence *
+guc_engine_run_job(struct drm_sched_job *drm_job)
+{
+ struct xe_sched_job *job = to_xe_sched_job(drm_job);
+ struct xe_engine *e = job->engine;
+
+ XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) &&
+ !engine_banned(e) && !engine_suspended(e));
+
+ trace_xe_sched_job_run(job);
+
+ if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) {
+ if (!engine_registered(e))
+ register_engine(e);
+ e->ring_ops->emit_job(job);
+ submit_engine(e);
+ }
+
+ if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags))
+ return job->fence;
+ else
+ return dma_fence_get(job->fence);
+}
+
+static void guc_engine_free_job(struct drm_sched_job *drm_job)
+{
+ struct xe_sched_job *job = to_xe_sched_job(drm_job);
+
+ trace_xe_sched_job_free(job);
+ xe_sched_job_put(job);
+}
+
+static int guc_read_stopped(struct xe_guc *guc)
+{
+ return atomic_read(&guc->submission_state.stopped);
+}
+
+#define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \
+ u32 action[] = { \
+ XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \
+ e->guc->id, \
+ GUC_CONTEXT_##enable_disable, \
+ }
+
+static void disable_scheduling_deregister(struct xe_guc *guc,
+ struct xe_engine *e)
+{
+ MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
+ int ret;
+
+ set_min_preemption_timeout(guc, e);
+ smp_rmb();
+ ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) ||
+ guc_read_stopped(guc), HZ * 5);
+ if (!ret) {
+ struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+ XE_WARN_ON("Pending enable failed to respond");
+ xe_sched_submission_start(sched);
+ xe_gt_reset_async(e->gt);
+ xe_sched_tdr_queue_imm(sched);
+ return;
+ }
+
+ clear_engine_enabled(e);
+ set_engine_pending_disable(e);
+ set_engine_destroyed(e);
+ trace_xe_engine_scheduling_disable(e);
+
+ /*
+ * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
+ * handler and we are not allowed to reserved G2H space in handlers.
+ */
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
+ G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
+}
+
+static void guc_engine_print(struct xe_engine *e, struct drm_printer *p);
+
+#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
+static void simple_error_capture(struct xe_engine *e)
+{
+ struct xe_guc *guc = engine_to_guc(e);
+ struct drm_printer p = drm_err_printer("");
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ u32 adj_logical_mask = e->logical_mask;
+ u32 width_mask = (0x1 << e->width) - 1;
+ int i;
+ bool cookie;
+
+ if (e->vm && !e->vm->error_capture.capture_once) {
+ e->vm->error_capture.capture_once = true;
+ cookie = dma_fence_begin_signalling();
+ for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
+ if (adj_logical_mask & BIT(i)) {
+ adj_logical_mask |= width_mask << i;
+ i += e->width;
+ } else {
+ ++i;
+ }
+ }
+
+ xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+ xe_guc_ct_print(&guc->ct, &p);
+ guc_engine_print(e, &p);
+ for_each_hw_engine(hwe, guc_to_gt(guc), id) {
+ if (hwe->class != e->hwe->class ||
+ !(BIT(hwe->logical_instance) & adj_logical_mask))
+ continue;
+ xe_hw_engine_print_state(hwe, &p);
+ }
+ xe_analyze_vm(&p, e->vm, e->gt->info.id);
+ xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+ dma_fence_end_signalling(cookie);
+ }
+}
+#else
+static void simple_error_capture(struct xe_engine *e)
+{
+}
+#endif
+
+static enum drm_gpu_sched_stat
+guc_engine_timedout_job(struct drm_sched_job *drm_job)
+{
+ struct xe_sched_job *job = to_xe_sched_job(drm_job);
+ struct xe_sched_job *tmp_job;
+ struct xe_engine *e = job->engine;
+ struct xe_gpu_scheduler *sched = &e->guc->sched;
+ struct xe_device *xe = guc_to_xe(engine_to_guc(e));
+ int err = -ETIME;
+ int i = 0;
+
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
+ XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL);
+ XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e));
+
+ drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
+ xe_sched_job_seqno(job), e->guc->id, e->flags);
+ simple_error_capture(e);
+ } else {
+ drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
+ xe_sched_job_seqno(job), e->guc->id, e->flags);
+ }
+ trace_xe_sched_job_timedout(job);
+
+ /* Kill the run_job entry point */
+ xe_sched_submission_stop(sched);
+
+ /*
+ * Kernel jobs should never fail, nor should VM jobs if they do
+ * somethings has gone wrong and the GT needs a reset
+ */
+ if (e->flags & ENGINE_FLAG_KERNEL ||
+ (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) {
+ if (!xe_sched_invalidate_job(job, 2)) {
+ xe_sched_add_pending_job(sched, job);
+ xe_sched_submission_start(sched);
+ xe_gt_reset_async(e->gt);
+ goto out;
+ }
+ }
+
+ /* Engine state now stable, disable scheduling if needed */
+ if (engine_enabled(e)) {
+ struct xe_guc *guc = engine_to_guc(e);
+ int ret;
+
+ if (engine_reset(e))
+ err = -EIO;
+ set_engine_banned(e);
+ xe_engine_get(e);
+ disable_scheduling_deregister(engine_to_guc(e), e);
+
+ /*
+ * Must wait for scheduling to be disabled before signalling
+ * any fences, if GT broken the GT reset code should signal us.
+ *
+ * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
+ * error) messages which can cause the schedule disable to get
+ * lost. If this occurs, trigger a GT reset to recover.
+ */
+ smp_rmb();
+ ret = wait_event_timeout(guc->ct.wq,
+ !engine_pending_disable(e) ||
+ guc_read_stopped(guc), HZ * 5);
+ if (!ret) {
+ XE_WARN_ON("Schedule disable failed to respond");
+ xe_sched_add_pending_job(sched, job);
+ xe_sched_submission_start(sched);
+ xe_gt_reset_async(e->gt);
+ xe_sched_tdr_queue_imm(sched);
+ goto out;
+ }
+ }
+
+ /* Stop fence signaling */
+ xe_hw_fence_irq_stop(e->fence_irq);
+
+ /*
+ * Fence state now stable, stop / start scheduler which cleans up any
+ * fences that are complete
+ */
+ xe_sched_add_pending_job(sched, job);
+ xe_sched_submission_start(sched);
+ xe_sched_tdr_queue_imm(&e->guc->sched);
+
+ /* Mark all outstanding jobs as bad, thus completing them */
+ spin_lock(&sched->base.job_list_lock);
+ list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
+ xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
+ spin_unlock(&sched->base.job_list_lock);
+
+ /* Start fence signaling */
+ xe_hw_fence_irq_start(e->fence_irq);
+
+out:
+ return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static void __guc_engine_fini_async(struct work_struct *w)
+{
+ struct xe_guc_engine *ge =
+ container_of(w, struct xe_guc_engine, fini_async);
+ struct xe_engine *e = ge->engine;
+ struct xe_guc *guc = engine_to_guc(e);
+
+ trace_xe_engine_destroy(e);
+
+ if (e->flags & ENGINE_FLAG_PERSISTENT)
+ xe_device_remove_persitent_engines(gt_to_xe(e->gt), e);
+ release_guc_id(guc, e);
+ xe_sched_entity_fini(&ge->entity);
+ xe_sched_fini(&ge->sched);
+
+ if (!(e->flags & ENGINE_FLAG_KERNEL)) {
+ kfree(ge);
+ xe_engine_fini(e);
+ }
+}
+
+static void guc_engine_fini_async(struct xe_engine *e)
+{
+ bool kernel = e->flags & ENGINE_FLAG_KERNEL;
+
+ INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async);
+ queue_work(system_unbound_wq, &e->guc->fini_async);
+
+ /* We must block on kernel engines so slabs are empty on driver unload */
+ if (kernel) {
+ struct xe_guc_engine *ge = e->guc;
+
+ flush_work(&ge->fini_async);
+ kfree(ge);
+ xe_engine_fini(e);
+ }
+}
+
+static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e)
+{
+ /*
+ * Might be done from within the GPU scheduler, need to do async as we
+ * fini the scheduler when the engine is fini'd, the scheduler can't
+ * complete fini within itself (circular dependency). Async resolves
+ * this we and don't really care when everything is fini'd, just that it
+ * is.
+ */
+ guc_engine_fini_async(e);
+}
+
+static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg)
+{
+ struct xe_engine *e = msg->private_data;
+ struct xe_guc *guc = engine_to_guc(e);
+
+ XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL);
+ trace_xe_engine_cleanup_entity(e);
+
+ if (engine_registered(e))
+ disable_scheduling_deregister(guc, e);
+ else
+ __guc_engine_fini(guc, e);
+}
+
+static bool guc_engine_allowed_to_change_state(struct xe_engine *e)
+{
+ return !engine_killed_or_banned(e) && engine_registered(e);
+}
+
+static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg)
+{
+ struct xe_engine *e = msg->private_data;
+ struct xe_guc *guc = engine_to_guc(e);
+
+ if (guc_engine_allowed_to_change_state(e))
+ init_policies(guc, e);
+ kfree(msg);
+}
+
+static void suspend_fence_signal(struct xe_engine *e)
+{
+ struct xe_guc *guc = engine_to_guc(e);
+
+ XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) &&
+ !guc_read_stopped(guc));
+ XE_BUG_ON(!e->guc->suspend_pending);
+
+ e->guc->suspend_pending = false;
+ smp_wmb();
+ wake_up(&e->guc->suspend_wait);
+}
+
+static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg)
+{
+ struct xe_engine *e = msg->private_data;
+ struct xe_guc *guc = engine_to_guc(e);
+
+ if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) &&
+ engine_enabled(e)) {
+ wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING ||
+ guc_read_stopped(guc));
+
+ if (!guc_read_stopped(guc)) {
+ MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
+ s64 since_resume_ms =
+ ktime_ms_delta(ktime_get(),
+ e->guc->resume_time);
+ s64 wait_ms = e->vm->preempt.min_run_period_ms -
+ since_resume_ms;
+
+ if (wait_ms > 0 && e->guc->resume_time)
+ msleep(wait_ms);
+
+ set_engine_suspended(e);
+ clear_engine_enabled(e);
+ set_engine_pending_disable(e);
+ trace_xe_engine_scheduling_disable(e);
+
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+ }
+ } else if (e->guc->suspend_pending) {
+ set_engine_suspended(e);
+ suspend_fence_signal(e);
+ }
+}
+
+static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg)
+{
+ struct xe_engine *e = msg->private_data;
+ struct xe_guc *guc = engine_to_guc(e);
+
+ if (guc_engine_allowed_to_change_state(e)) {
+ MAKE_SCHED_CONTEXT_ACTION(e, ENABLE);
+
+ e->guc->resume_time = RESUME_PENDING;
+ clear_engine_suspended(e);
+ set_engine_pending_enable(e);
+ set_engine_enabled(e);
+ trace_xe_engine_scheduling_enable(e);
+
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+ } else {
+ clear_engine_suspended(e);
+ }
+}
+
+#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */
+#define SET_SCHED_PROPS 2
+#define SUSPEND 3
+#define RESUME 4
+
+static void guc_engine_process_msg(struct xe_sched_msg *msg)
+{
+ trace_xe_sched_msg_recv(msg);
+
+ switch (msg->opcode) {
+ case CLEANUP:
+ __guc_engine_process_msg_cleanup(msg);
+ break;
+ case SET_SCHED_PROPS:
+ __guc_engine_process_msg_set_sched_props(msg);
+ break;
+ case SUSPEND:
+ __guc_engine_process_msg_suspend(msg);
+ break;
+ case RESUME:
+ __guc_engine_process_msg_resume(msg);
+ break;
+ default:
+ XE_BUG_ON("Unknown message type");
+ }
+}
+
+static const struct drm_sched_backend_ops drm_sched_ops = {
+ .run_job = guc_engine_run_job,
+ .free_job = guc_engine_free_job,
+ .timedout_job = guc_engine_timedout_job,
+};
+
+static const struct xe_sched_backend_ops xe_sched_ops = {
+ .process_msg = guc_engine_process_msg,
+};
+
+static int guc_engine_init(struct xe_engine *e)
+{
+ struct xe_gpu_scheduler *sched;
+ struct xe_guc *guc = engine_to_guc(e);
+ struct xe_guc_engine *ge;
+ long timeout;
+ int err;
+
+ XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc)));
+
+ ge = kzalloc(sizeof(*ge), GFP_KERNEL);
+ if (!ge)
+ return -ENOMEM;
+
+ e->guc = ge;
+ ge->engine = e;
+ init_waitqueue_head(&ge->suspend_wait);
+
+ timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5;
+ err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL,
+ e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+ 64, timeout, guc_to_gt(guc)->ordered_wq, NULL,
+ e->name, gt_to_xe(e->gt)->drm.dev);
+ if (err)
+ goto err_free;
+
+ sched = &ge->sched;
+ err = xe_sched_entity_init(&ge->entity, sched);
+ if (err)
+ goto err_sched;
+ e->priority = XE_ENGINE_PRIORITY_NORMAL;
+
+ mutex_lock(&guc->submission_state.lock);
+
+ err = alloc_guc_id(guc, e);
+ if (err)
+ goto err_entity;
+
+ e->entity = &ge->entity;
+
+ if (guc_read_stopped(guc))
+ xe_sched_stop(sched);
+
+ mutex_unlock(&guc->submission_state.lock);
+
+ switch (e->class) {
+ case XE_ENGINE_CLASS_RENDER:
+ sprintf(e->name, "rcs%d", e->guc->id);
+ break;
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ sprintf(e->name, "vcs%d", e->guc->id);
+ break;
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ sprintf(e->name, "vecs%d", e->guc->id);
+ break;
+ case XE_ENGINE_CLASS_COPY:
+ sprintf(e->name, "bcs%d", e->guc->id);
+ break;
+ case XE_ENGINE_CLASS_COMPUTE:
+ sprintf(e->name, "ccs%d", e->guc->id);
+ break;
+ default:
+ XE_WARN_ON(e->class);
+ }
+
+ trace_xe_engine_create(e);
+
+ return 0;
+
+err_entity:
+ xe_sched_entity_fini(&ge->entity);
+err_sched:
+ xe_sched_fini(&ge->sched);
+err_free:
+ kfree(ge);
+
+ return err;
+}
+
+static void guc_engine_kill(struct xe_engine *e)
+{
+ trace_xe_engine_kill(e);
+ set_engine_killed(e);
+ xe_sched_tdr_queue_imm(&e->guc->sched);
+}
+
+static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg,
+ u32 opcode)
+{
+ INIT_LIST_HEAD(&msg->link);
+ msg->opcode = opcode;
+ msg->private_data = e;
+
+ trace_xe_sched_msg_add(msg);
+ xe_sched_add_msg(&e->guc->sched, msg);
+}
+
+#define STATIC_MSG_CLEANUP 0
+#define STATIC_MSG_SUSPEND 1
+#define STATIC_MSG_RESUME 2
+static void guc_engine_fini(struct xe_engine *e)
+{
+ struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP;
+
+ if (!(e->flags & ENGINE_FLAG_KERNEL))
+ guc_engine_add_msg(e, msg, CLEANUP);
+ else
+ __guc_engine_fini(engine_to_guc(e), e);
+}
+
+static int guc_engine_set_priority(struct xe_engine *e,
+ enum xe_engine_priority priority)
+{
+ struct xe_sched_msg *msg;
+
+ if (e->priority == priority || engine_killed_or_banned(e))
+ return 0;
+
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+ e->priority = priority;
+
+ return 0;
+}
+
+static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
+{
+ struct xe_sched_msg *msg;
+
+ if (e->sched_props.timeslice_us == timeslice_us ||
+ engine_killed_or_banned(e))
+ return 0;
+
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ e->sched_props.timeslice_us = timeslice_us;
+ guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+
+ return 0;
+}
+
+static int guc_engine_set_preempt_timeout(struct xe_engine *e,
+ u32 preempt_timeout_us)
+{
+ struct xe_sched_msg *msg;
+
+ if (e->sched_props.preempt_timeout_us == preempt_timeout_us ||
+ engine_killed_or_banned(e))
+ return 0;
+
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ e->sched_props.preempt_timeout_us = preempt_timeout_us;
+ guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
+
+ return 0;
+}
+
+static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms)
+{
+ struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+ XE_BUG_ON(engine_registered(e));
+ XE_BUG_ON(engine_banned(e));
+ XE_BUG_ON(engine_killed(e));
+
+ sched->base.timeout = job_timeout_ms;
+
+ return 0;
+}
+
+static int guc_engine_suspend(struct xe_engine *e)
+{
+ struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND;
+
+ if (engine_killed_or_banned(e) || e->guc->suspend_pending)
+ return -EINVAL;
+
+ e->guc->suspend_pending = true;
+ guc_engine_add_msg(e, msg, SUSPEND);
+
+ return 0;
+}
+
+static void guc_engine_suspend_wait(struct xe_engine *e)
+{
+ struct xe_guc *guc = engine_to_guc(e);
+
+ wait_event(e->guc->suspend_wait, !e->guc->suspend_pending ||
+ guc_read_stopped(guc));
+}
+
+static void guc_engine_resume(struct xe_engine *e)
+{
+ struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME;
+
+ XE_BUG_ON(e->guc->suspend_pending);
+
+ xe_mocs_init_engine(e);
+ guc_engine_add_msg(e, msg, RESUME);
+}
+
+/*
+ * All of these functions are an abstraction layer which other parts of XE can
+ * use to trap into the GuC backend. All of these functions, aside from init,
+ * really shouldn't do much other than trap into the DRM scheduler which
+ * synchronizes these operations.
+ */
+static const struct xe_engine_ops guc_engine_ops = {
+ .init = guc_engine_init,
+ .kill = guc_engine_kill,
+ .fini = guc_engine_fini,
+ .set_priority = guc_engine_set_priority,
+ .set_timeslice = guc_engine_set_timeslice,
+ .set_preempt_timeout = guc_engine_set_preempt_timeout,
+ .set_job_timeout = guc_engine_set_job_timeout,
+ .suspend = guc_engine_suspend,
+ .suspend_wait = guc_engine_suspend_wait,
+ .resume = guc_engine_resume,
+};
+
+static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e)
+{
+ struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+ /* Stop scheduling + flush any DRM scheduler operations */
+ xe_sched_submission_stop(sched);
+
+ /* Clean up lost G2H + reset engine state */
+ if (engine_destroyed(e) && engine_registered(e)) {
+ if (engine_banned(e))
+ xe_engine_put(e);
+ else
+ __guc_engine_fini(guc, e);
+ }
+ if (e->guc->suspend_pending) {
+ set_engine_suspended(e);
+ suspend_fence_signal(e);
+ }
+ atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
+ &e->guc->state);
+ e->guc->resume_time = 0;
+ trace_xe_engine_stop(e);
+
+ /*
+ * Ban any engine (aside from kernel and engines used for VM ops) with a
+ * started but not complete job or if a job has gone through a GT reset
+ * more than twice.
+ */
+ if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) {
+ struct xe_sched_job *job = xe_sched_first_pending_job(sched);
+
+ if (job) {
+ if ((xe_sched_job_started(job) &&
+ !xe_sched_job_completed(job)) ||
+ xe_sched_invalidate_job(job, 2)) {
+ trace_xe_sched_job_ban(job);
+ xe_sched_tdr_queue_imm(&e->guc->sched);
+ set_engine_banned(e);
+ }
+ }
+ }
+}
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc)
+{
+ int ret;
+
+ /*
+ * Using an atomic here rather than submission_state.lock as this
+ * function can be called while holding the CT lock (engine reset
+ * failure). submission_state.lock needs the CT lock to resubmit jobs.
+ * Atomic is not ideal, but it works to prevent against concurrent reset
+ * and releasing any TDRs waiting on guc->submission_state.stopped.
+ */
+ ret = atomic_fetch_or(1, &guc->submission_state.stopped);
+ smp_wmb();
+ wake_up_all(&guc->ct.wq);
+
+ return ret;
+}
+
+void xe_guc_submit_reset_wait(struct xe_guc *guc)
+{
+ wait_event(guc->ct.wq, !guc_read_stopped(guc));
+}
+
+int xe_guc_submit_stop(struct xe_guc *guc)
+{
+ struct xe_engine *e;
+ unsigned long index;
+
+ XE_BUG_ON(guc_read_stopped(guc) != 1);
+
+ mutex_lock(&guc->submission_state.lock);
+
+ xa_for_each(&guc->submission_state.engine_lookup, index, e)
+ guc_engine_stop(guc, e);
+
+ mutex_unlock(&guc->submission_state.lock);
+
+ /*
+ * No one can enter the backend at this point, aside from new engine
+ * creation which is protected by guc->submission_state.lock.
+ */
+
+ return 0;
+}
+
+static void guc_engine_start(struct xe_engine *e)
+{
+ struct xe_gpu_scheduler *sched = &e->guc->sched;
+
+ if (!engine_killed_or_banned(e)) {
+ int i;
+
+ trace_xe_engine_resubmit(e);
+ for (i = 0; i < e->width; ++i)
+ xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail);
+ xe_sched_resubmit_jobs(sched);
+ }
+
+ xe_sched_submission_start(sched);
+}
+
+int xe_guc_submit_start(struct xe_guc *guc)
+{
+ struct xe_engine *e;
+ unsigned long index;
+
+ XE_BUG_ON(guc_read_stopped(guc) != 1);
+
+ mutex_lock(&guc->submission_state.lock);
+ atomic_dec(&guc->submission_state.stopped);
+ xa_for_each(&guc->submission_state.engine_lookup, index, e)
+ guc_engine_start(e);
+ mutex_unlock(&guc->submission_state.lock);
+
+ wake_up_all(&guc->ct.wq);
+
+ return 0;
+}
+
+static struct xe_engine *
+g2h_engine_lookup(struct xe_guc *guc, u32 guc_id)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_engine *e;
+
+ if (unlikely(guc_id >= GUC_ID_MAX)) {
+ drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
+ return NULL;
+ }
+
+ e = xa_load(&guc->submission_state.engine_lookup, guc_id);
+ if (unlikely(!e)) {
+ drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
+ return NULL;
+ }
+
+ XE_BUG_ON(e->guc->id != guc_id);
+
+ return e;
+}
+
+static void deregister_engine(struct xe_guc *guc, struct xe_engine *e)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_DEREGISTER_CONTEXT,
+ e->guc->id,
+ };
+
+ trace_xe_engine_deregister(e);
+
+ xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_engine *e;
+ u32 guc_id = msg[0];
+
+ if (unlikely(len < 2)) {
+ drm_err(&xe->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ e = g2h_engine_lookup(guc, guc_id);
+ if (unlikely(!e))
+ return -EPROTO;
+
+ if (unlikely(!engine_pending_enable(e) &&
+ !engine_pending_disable(e))) {
+ drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+ atomic_read(&e->guc->state));
+ return -EPROTO;
+ }
+
+ trace_xe_engine_scheduling_done(e);
+
+ if (engine_pending_enable(e)) {
+ e->guc->resume_time = ktime_get();
+ clear_engine_pending_enable(e);
+ smp_wmb();
+ wake_up_all(&guc->ct.wq);
+ } else {
+ clear_engine_pending_disable(e);
+ if (e->guc->suspend_pending) {
+ suspend_fence_signal(e);
+ } else {
+ if (engine_banned(e)) {
+ smp_wmb();
+ wake_up_all(&guc->ct.wq);
+ }
+ deregister_engine(guc, e);
+ }
+ }
+
+ return 0;
+}
+
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_engine *e;
+ u32 guc_id = msg[0];
+
+ if (unlikely(len < 1)) {
+ drm_err(&xe->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ e = g2h_engine_lookup(guc, guc_id);
+ if (unlikely(!e))
+ return -EPROTO;
+
+ if (!engine_destroyed(e) || engine_pending_disable(e) ||
+ engine_pending_enable(e) || engine_enabled(e)) {
+ drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+ atomic_read(&e->guc->state));
+ return -EPROTO;
+ }
+
+ trace_xe_engine_deregister_done(e);
+
+ clear_engine_registered(e);
+ if (engine_banned(e))
+ xe_engine_put(e);
+ else
+ __guc_engine_fini(guc, e);
+
+ return 0;
+}
+
+int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_engine *e;
+ u32 guc_id = msg[0];
+
+ if (unlikely(len < 1)) {
+ drm_err(&xe->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ e = g2h_engine_lookup(guc, guc_id);
+ if (unlikely(!e))
+ return -EPROTO;
+
+ drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
+
+ /* FIXME: Do error capture, most likely async */
+
+ trace_xe_engine_reset(e);
+
+ /*
+ * A banned engine is a NOP at this point (came from
+ * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel
+ * jobs by setting timeout of the job to the minimum value kicking
+ * guc_engine_timedout_job.
+ */
+ set_engine_reset(e);
+ if (!engine_banned(e))
+ xe_sched_tdr_queue_imm(&e->guc->sched);
+
+ return 0;
+}
+
+int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+ u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_engine *e;
+ u32 guc_id = msg[0];
+
+ if (unlikely(len < 1)) {
+ drm_err(&xe->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ e = g2h_engine_lookup(guc, guc_id);
+ if (unlikely(!e))
+ return -EPROTO;
+
+ drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
+ trace_xe_engine_memory_cat_error(e);
+
+ /* Treat the same as engine reset */
+ set_engine_reset(e);
+ if (!engine_banned(e))
+ xe_sched_tdr_queue_imm(&e->guc->sched);
+
+ return 0;
+}
+
+int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ u8 guc_class, instance;
+ u32 reason;
+
+ if (unlikely(len != 3)) {
+ drm_err(&xe->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ guc_class = msg[0];
+ instance = msg[1];
+ reason = msg[2];
+
+ /* Unexpected failure of a hardware feature, log an actual error */
+ drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
+ guc_class, instance, reason);
+
+ xe_gt_reset_async(guc_to_gt(guc));
+
+ return 0;
+}
+
+static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p)
+{
+ struct xe_guc *guc = engine_to_guc(e);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct iosys_map map = xe_lrc_parallel_map(e->lrc);
+ int i;
+
+ drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
+ e->guc->wqi_head, parallel_read(xe, map, wq_desc.head));
+ drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
+ e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail));
+ drm_printf(p, "\tWQ status: %u\n",
+ parallel_read(xe, map, wq_desc.wq_status));
+ if (parallel_read(xe, map, wq_desc.head) !=
+ parallel_read(xe, map, wq_desc.tail)) {
+ for (i = parallel_read(xe, map, wq_desc.head);
+ i != parallel_read(xe, map, wq_desc.tail);
+ i = (i + sizeof(u32)) % WQ_SIZE)
+ drm_printf(p, "\tWQ[%ld]: 0x%08x\n", i / sizeof(u32),
+ parallel_read(xe, map, wq[i / sizeof(u32)]));
+ }
+}
+
+static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
+{
+ struct xe_gpu_scheduler *sched = &e->guc->sched;
+ struct xe_sched_job *job;
+ int i;
+
+ drm_printf(p, "\nGuC ID: %d\n", e->guc->id);
+ drm_printf(p, "\tName: %s\n", e->name);
+ drm_printf(p, "\tClass: %d\n", e->class);
+ drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask);
+ drm_printf(p, "\tWidth: %d\n", e->width);
+ drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount));
+ drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout);
+ drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us);
+ drm_printf(p, "\tPreempt timeout: %u (us)\n",
+ e->sched_props.preempt_timeout_us);
+ for (i = 0; i < e->width; ++i ) {
+ struct xe_lrc *lrc = e->lrc + i;
+
+ drm_printf(p, "\tHW Context Desc: 0x%08x\n",
+ lower_32_bits(xe_lrc_ggtt_addr(lrc)));
+ drm_printf(p, "\tLRC Head: (memory) %u\n",
+ xe_lrc_ring_head(lrc));
+ drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
+ lrc->ring.tail,
+ xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL));
+ drm_printf(p, "\tStart seqno: (memory) %d\n",
+ xe_lrc_start_seqno(lrc));
+ drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc));
+ }
+ drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state));
+ drm_printf(p, "\tFlags: 0x%lx\n", e->flags);
+ if (xe_engine_is_parallel(e))
+ guc_engine_wq_print(e, p);
+
+ spin_lock(&sched->base.job_list_lock);
+ list_for_each_entry(job, &sched->base.pending_list, drm.list)
+ drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
+ xe_sched_job_seqno(job),
+ dma_fence_is_signaled(job->fence) ? 1 : 0,
+ dma_fence_is_signaled(&job->drm.s_fence->finished) ?
+ 1 : 0);
+ spin_unlock(&sched->base.job_list_lock);
+}
+
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
+{
+ struct xe_engine *e;
+ unsigned long index;
+
+ if (!xe_device_guc_submission_enabled(guc_to_xe(guc)))
+ return;
+
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.engine_lookup, index, e)
+ guc_engine_print(e, p);
+ mutex_unlock(&guc->submission_state.lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
new file mode 100644
index 000000000000..8002734d6f24
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_SUBMIT_H_
+#define _XE_GUC_SUBMIT_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_engine;
+struct xe_guc;
+
+int xe_guc_submit_init(struct xe_guc *guc);
+void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
+
+int xe_guc_submit_reset_prepare(struct xe_guc *guc);
+void xe_guc_submit_reset_wait(struct xe_guc *guc);
+int xe_guc_submit_stop(struct xe_guc *guc);
+int xe_guc_submit_start(struct xe_guc *guc);
+
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
+ u32 len);
+int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
new file mode 100644
index 000000000000..ca177853cc12
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_GUC_TYPES_H_
+#define _XE_GUC_TYPES_H_
+
+#include <linux/idr.h>
+#include <linux/xarray.h>
+
+#include "xe_guc_ads_types.h"
+#include "xe_guc_ct_types.h"
+#include "xe_guc_fwif.h"
+#include "xe_guc_log_types.h"
+#include "xe_guc_pc_types.h"
+#include "xe_uc_fw_types.h"
+
+/**
+ * struct xe_guc - Graphic micro controller
+ */
+struct xe_guc {
+ /** @fw: Generic uC firmware management */
+ struct xe_uc_fw fw;
+ /** @log: GuC log */
+ struct xe_guc_log log;
+ /** @ads: GuC ads */
+ struct xe_guc_ads ads;
+ /** @ct: GuC ct */
+ struct xe_guc_ct ct;
+ /** @pc: GuC Power Conservation */
+ struct xe_guc_pc pc;
+ /** @submission_state: GuC submission state */
+ struct {
+ /** @engine_lookup: Lookup an xe_engine from guc_id */
+ struct xarray engine_lookup;
+ /** @guc_ids: used to allocate new guc_ids, single-lrc */
+ struct ida guc_ids;
+ /** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
+ unsigned long *guc_ids_bitmap;
+ /** @stopped: submissions are stopped */
+ atomic_t stopped;
+ /** @lock: protects submission state */
+ struct mutex lock;
+ /** @suspend: suspend fence state */
+ struct {
+ /** @lock: suspend fences lock */
+ spinlock_t lock;
+ /** @context: suspend fences context */
+ u64 context;
+ /** @seqno: suspend fences seqno */
+ u32 seqno;
+ } suspend;
+ } submission_state;
+ /** @hwconfig: Hardware config state */
+ struct {
+ /** @bo: buffer object of the hardware config */
+ struct xe_bo *bo;
+ /** @size: size of the hardware config */
+ u32 size;
+ } hwconfig;
+
+ /**
+ * @notify_reg: Register which is written to notify GuC of H2G messages
+ */
+ u32 notify_reg;
+ /** @params: Control params for fw initialization */
+ u32 params[GUC_CTL_MAX_DWORDS];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
new file mode 100644
index 000000000000..93b22fac6e14
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_reg.h"
+#include "xe_huc.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+ return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+ return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_guc *
+huc_to_guc(struct xe_huc *huc)
+{
+ return &container_of(huc, struct xe_uc, huc)->guc;
+}
+
+int xe_huc_init(struct xe_huc *huc)
+{
+ struct xe_device *xe = huc_to_xe(huc);
+ int ret;
+
+ huc->fw.type = XE_UC_FW_TYPE_HUC;
+ ret = xe_uc_fw_init(&huc->fw);
+ if (ret)
+ goto out;
+
+ xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+
+ return 0;
+
+out:
+ if (xe_uc_fw_is_disabled(&huc->fw)) {
+ drm_info(&xe->drm, "HuC disabled\n");
+ return 0;
+ }
+ drm_err(&xe->drm, "HuC init failed with %d", ret);
+ return ret;
+}
+
+int xe_huc_upload(struct xe_huc *huc)
+{
+ if (xe_uc_fw_is_disabled(&huc->fw))
+ return 0;
+ return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
+}
+
+int xe_huc_auth(struct xe_huc *huc)
+{
+ struct xe_device *xe = huc_to_xe(huc);
+ struct xe_gt *gt = huc_to_gt(huc);
+ struct xe_guc *guc = huc_to_guc(huc);
+ int ret;
+ if (xe_uc_fw_is_disabled(&huc->fw))
+ return 0;
+
+ XE_BUG_ON(xe_uc_fw_is_running(&huc->fw));
+
+ if (!xe_uc_fw_is_loaded(&huc->fw))
+ return -ENOEXEC;
+
+ ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) +
+ xe_uc_fw_rsa_offset(&huc->fw));
+ if (ret) {
+ drm_err(&xe->drm, "HuC: GuC did not ack Auth request %d\n",
+ ret);
+ goto fail;
+ }
+
+ ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg,
+ HUC_LOAD_SUCCESSFUL,
+ HUC_LOAD_SUCCESSFUL, 100);
+ if (ret) {
+ drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret);
+ goto fail;
+ }
+
+ xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING);
+ drm_dbg(&xe->drm, "HuC authenticated\n");
+
+ return 0;
+
+fail:
+ drm_err(&xe->drm, "HuC authentication failed %d\n", ret);
+ xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+
+ return ret;
+}
+
+void xe_huc_sanitize(struct xe_huc *huc)
+{
+ if (xe_uc_fw_is_disabled(&huc->fw))
+ return;
+ xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
+{
+ struct xe_gt *gt = huc_to_gt(huc);
+ int err;
+
+ xe_uc_fw_print(&huc->fw, p);
+
+ if (xe_uc_fw_is_disabled(&huc->fw))
+ return;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return;
+
+ drm_printf(p, "\nHuC status: 0x%08x\n",
+ xe_mmio_read32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg));
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
new file mode 100644
index 000000000000..5802c43b6ce2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_H_
+#define _XE_HUC_H_
+
+#include "xe_huc_types.h"
+
+struct drm_printer;
+
+int xe_huc_init(struct xe_huc *huc);
+int xe_huc_upload(struct xe_huc *huc);
+int xe_huc_auth(struct xe_huc *huc);
+void xe_huc_sanitize(struct xe_huc *huc);
+void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
new file mode 100644
index 000000000000..268bac36336a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_huc.h"
+#include "xe_huc_debugfs.h"
+#include "xe_macros.h"
+
+static struct xe_gt *
+huc_to_gt(struct xe_huc *huc)
+{
+ return container_of(huc, struct xe_gt, uc.huc);
+}
+
+static struct xe_device *
+huc_to_xe(struct xe_huc *huc)
+{
+ return gt_to_xe(huc_to_gt(huc));
+}
+
+static struct xe_huc *node_to_huc(struct drm_info_node *node)
+{
+ return node->info_ent->data;
+}
+
+static int huc_info(struct seq_file *m, void *data)
+{
+ struct xe_huc *huc = node_to_huc(m->private);
+ struct xe_device *xe = huc_to_xe(huc);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_device_mem_access_get(xe);
+ xe_huc_print_info(huc, &p);
+ xe_device_mem_access_put(xe);
+
+ return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+ {"huc_info", huc_info, 0},
+};
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent)
+{
+ struct drm_minor *minor = huc_to_xe(huc)->drm.primary;
+ struct drm_info_list *local;
+ int i;
+
+#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)
+ local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL);
+ if (!local) {
+ XE_WARN_ON("Couldn't allocate memory");
+ return;
+ }
+
+ memcpy(local, debugfs_list, DEBUGFS_SIZE);
+#undef DEBUGFS_SIZE
+
+ for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
+ local[i].data = huc;
+
+ drm_debugfs_create_files(local,
+ ARRAY_SIZE(debugfs_list),
+ parent, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.h b/drivers/gpu/drm/xe/xe_huc_debugfs.h
new file mode 100644
index 000000000000..ec58f1818804
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_DEBUGFS_H_
+#define _XE_HUC_DEBUGFS_H_
+
+struct dentry;
+struct xe_huc;
+
+void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h
new file mode 100644
index 000000000000..cae6d19097df
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_huc_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HUC_TYPES_H_
+#define _XE_HUC_TYPES_H_
+
+#include "xe_uc_fw_types.h"
+
+/**
+ * struct xe_huc - HuC
+ */
+struct xe_huc {
+ /** @fw: Generic uC firmware management */
+ struct xe_uc_fw fw;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
new file mode 100644
index 000000000000..fd89dd90131c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_engine.h"
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_execlist.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_topology.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_reg_sr.h"
+#include "xe_sched_job.h"
+#include "xe_wa.h"
+
+#include "gt/intel_engine_regs.h"
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+#define MAX_MMIO_BASES 3
+struct engine_info {
+ const char *name;
+ unsigned int class : 8;
+ unsigned int instance : 8;
+ enum xe_force_wake_domains domain;
+ /* mmio bases table *must* be sorted in reverse graphics_ver order */
+ struct engine_mmio_base {
+ unsigned int graphics_ver : 8;
+ unsigned int base : 24;
+ } mmio_bases[MAX_MMIO_BASES];
+};
+
+static const struct engine_info engine_infos[] = {
+ [XE_HW_ENGINE_RCS0] = {
+ .name = "rcs0",
+ .class = XE_ENGINE_CLASS_RENDER,
+ .instance = 0,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 1, .base = RENDER_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_BCS0] = {
+ .name = "bcs0",
+ .class = XE_ENGINE_CLASS_COPY,
+ .instance = 0,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 6, .base = BLT_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_BCS1] = {
+ .name = "bcs1",
+ .class = XE_ENGINE_CLASS_COPY,
+ .instance = 1,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_BCS2] = {
+ .name = "bcs2",
+ .class = XE_ENGINE_CLASS_COPY,
+ .instance = 2,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_BCS3] = {
+ .name = "bcs3",
+ .class = XE_ENGINE_CLASS_COPY,
+ .instance = 3,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_BCS4] = {
+ .name = "bcs4",
+ .class = XE_ENGINE_CLASS_COPY,
+ .instance = 4,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_BCS5] = {
+ .name = "bcs5",
+ .class = XE_ENGINE_CLASS_COPY,
+ .instance = 5,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_BCS6] = {
+ .name = "bcs6",
+ .class = XE_ENGINE_CLASS_COPY,
+ .instance = 6,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_BCS7] = {
+ .name = "bcs7",
+ .class = XE_ENGINE_CLASS_COPY,
+ .instance = 7,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_BCS8] = {
+ .name = "bcs8",
+ .class = XE_ENGINE_CLASS_COPY,
+ .instance = 8,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE }
+ },
+ },
+
+ [XE_HW_ENGINE_VCS0] = {
+ .name = "vcs0",
+ .class = XE_ENGINE_CLASS_VIDEO_DECODE,
+ .instance = 0,
+ .domain = XE_FW_MEDIA_VDBOX0,
+ .mmio_bases = {
+ { .graphics_ver = 11, .base = GEN11_BSD_RING_BASE },
+ { .graphics_ver = 6, .base = GEN6_BSD_RING_BASE },
+ { .graphics_ver = 4, .base = BSD_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VCS1] = {
+ .name = "vcs1",
+ .class = XE_ENGINE_CLASS_VIDEO_DECODE,
+ .instance = 1,
+ .domain = XE_FW_MEDIA_VDBOX1,
+ .mmio_bases = {
+ { .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE },
+ { .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VCS2] = {
+ .name = "vcs2",
+ .class = XE_ENGINE_CLASS_VIDEO_DECODE,
+ .instance = 2,
+ .domain = XE_FW_MEDIA_VDBOX2,
+ .mmio_bases = {
+ { .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VCS3] = {
+ .name = "vcs3",
+ .class = XE_ENGINE_CLASS_VIDEO_DECODE,
+ .instance = 3,
+ .domain = XE_FW_MEDIA_VDBOX3,
+ .mmio_bases = {
+ { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VCS4] = {
+ .name = "vcs4",
+ .class = XE_ENGINE_CLASS_VIDEO_DECODE,
+ .instance = 4,
+ .domain = XE_FW_MEDIA_VDBOX4,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VCS5] = {
+ .name = "vcs5",
+ .class = XE_ENGINE_CLASS_VIDEO_DECODE,
+ .instance = 5,
+ .domain = XE_FW_MEDIA_VDBOX5,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VCS6] = {
+ .name = "vcs6",
+ .class = XE_ENGINE_CLASS_VIDEO_DECODE,
+ .instance = 6,
+ .domain = XE_FW_MEDIA_VDBOX6,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VCS7] = {
+ .name = "vcs7",
+ .class = XE_ENGINE_CLASS_VIDEO_DECODE,
+ .instance = 7,
+ .domain = XE_FW_MEDIA_VDBOX7,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VECS0] = {
+ .name = "vecs0",
+ .class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+ .instance = 0,
+ .domain = XE_FW_MEDIA_VEBOX0,
+ .mmio_bases = {
+ { .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE },
+ { .graphics_ver = 7, .base = VEBOX_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VECS1] = {
+ .name = "vecs1",
+ .class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+ .instance = 1,
+ .domain = XE_FW_MEDIA_VEBOX1,
+ .mmio_bases = {
+ { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VECS2] = {
+ .name = "vecs2",
+ .class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+ .instance = 2,
+ .domain = XE_FW_MEDIA_VEBOX2,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_VECS3] = {
+ .name = "vecs3",
+ .class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+ .instance = 3,
+ .domain = XE_FW_MEDIA_VEBOX3,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE }
+ },
+ },
+ [XE_HW_ENGINE_CCS0] = {
+ .name = "ccs0",
+ .class = XE_ENGINE_CLASS_COMPUTE,
+ .instance = 0,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE },
+ },
+ },
+ [XE_HW_ENGINE_CCS1] = {
+ .name = "ccs1",
+ .class = XE_ENGINE_CLASS_COMPUTE,
+ .instance = 1,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE },
+ },
+ },
+ [XE_HW_ENGINE_CCS2] = {
+ .name = "ccs2",
+ .class = XE_ENGINE_CLASS_COMPUTE,
+ .instance = 2,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE },
+ },
+ },
+ [XE_HW_ENGINE_CCS3] = {
+ .name = "ccs3",
+ .class = XE_ENGINE_CLASS_COMPUTE,
+ .instance = 3,
+ .domain = XE_FW_RENDER,
+ .mmio_bases = {
+ { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE },
+ },
+ },
+};
+
+static u32 engine_info_mmio_base(const struct engine_info *info,
+ unsigned int graphics_ver)
+{
+ int i;
+
+ for (i = 0; i < MAX_MMIO_BASES; i++)
+ if (graphics_ver >= info->mmio_bases[i].graphics_ver)
+ break;
+
+ XE_BUG_ON(i == MAX_MMIO_BASES);
+ XE_BUG_ON(!info->mmio_bases[i].base);
+
+ return info->mmio_bases[i].base;
+}
+
+static void hw_engine_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_hw_engine *hwe = arg;
+
+ if (hwe->exl_port)
+ xe_execlist_port_destroy(hwe->exl_port);
+ xe_lrc_finish(&hwe->kernel_lrc);
+
+ xe_bo_unpin_map_no_vm(hwe->hwsp);
+
+ hwe->gt = NULL;
+}
+
+static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, u32 reg, u32 val)
+{
+ XE_BUG_ON(reg & hwe->mmio_base);
+ xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+ xe_mmio_write32(hwe->gt, reg + hwe->mmio_base, val);
+}
+
+static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, u32 reg)
+{
+ XE_BUG_ON(reg & hwe->mmio_base);
+ xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
+
+ return xe_mmio_read32(hwe->gt, reg + hwe->mmio_base);
+}
+
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
+{
+ u32 ccs_mask =
+ xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
+
+ if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask & BIT(0))
+ xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg,
+ _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
+
+ hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0);
+ hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg,
+ xe_bo_ggtt_addr(hwe->hwsp));
+ hw_engine_mmio_write32(hwe, RING_MODE_GEN7(0).reg,
+ _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+ hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg,
+ _MASKED_BIT_DISABLE(STOP_RING));
+ hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg);
+}
+
+static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
+ enum xe_hw_engine_id id)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ const struct engine_info *info;
+
+ if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
+ return;
+
+ if (!(gt->info.engine_mask & BIT(id)))
+ return;
+
+ info = &engine_infos[id];
+
+ XE_BUG_ON(hwe->gt);
+
+ hwe->gt = gt;
+ hwe->class = info->class;
+ hwe->instance = info->instance;
+ hwe->mmio_base = engine_info_mmio_base(info, GRAPHICS_VER(xe));
+ hwe->domain = info->domain;
+ hwe->name = info->name;
+ hwe->fence_irq = &gt->fence_irq[info->class];
+ hwe->engine_id = id;
+
+ xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
+ xe_wa_process_engine(hwe);
+
+ xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
+ xe_reg_whitelist_process_engine(hwe);
+}
+
+static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
+ enum xe_hw_engine_id id)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name);
+ XE_BUG_ON(!(gt->info.engine_mask & BIT(id)));
+
+ xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
+ xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt);
+
+ hwe->hwsp = xe_bo_create_locked(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(hwe->hwsp)) {
+ err = PTR_ERR(hwe->hwsp);
+ goto err_name;
+ }
+
+ err = xe_bo_pin(hwe->hwsp);
+ if (err)
+ goto err_unlock_put_hwsp;
+
+ err = xe_bo_vmap(hwe->hwsp);
+ if (err)
+ goto err_unpin_hwsp;
+
+ xe_bo_unlock_no_vm(hwe->hwsp);
+
+ err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
+ if (err)
+ goto err_hwsp;
+
+ if (!xe_device_guc_submission_enabled(xe)) {
+ hwe->exl_port = xe_execlist_port_create(xe, hwe);
+ if (IS_ERR(hwe->exl_port)) {
+ err = PTR_ERR(hwe->exl_port);
+ goto err_kernel_lrc;
+ }
+ }
+
+ if (xe_device_guc_submission_enabled(xe))
+ xe_hw_engine_enable_ring(hwe);
+
+ /* We reserve the highest BCS instance for USM */
+ if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY)
+ gt->usm.reserved_bcs_instance = hwe->instance;
+
+ err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
+ if (err)
+ return err;
+
+ return 0;
+
+err_unpin_hwsp:
+ xe_bo_unpin(hwe->hwsp);
+err_unlock_put_hwsp:
+ xe_bo_unlock_no_vm(hwe->hwsp);
+ xe_bo_put(hwe->hwsp);
+err_kernel_lrc:
+ xe_lrc_finish(&hwe->kernel_lrc);
+err_hwsp:
+ xe_bo_put(hwe->hwsp);
+err_name:
+ hwe->name = NULL;
+
+ return err;
+}
+
+static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
+{
+ int class;
+
+ /* FIXME: Doing a simple logical mapping that works for most hardware */
+ for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ int logical_instance = 0;
+
+ for_each_hw_engine(hwe, gt, id)
+ if (hwe->class == class)
+ hwe->logical_instance = logical_instance++;
+ }
+}
+
+static void read_fuses(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 media_fuse;
+ u16 vdbox_mask;
+ u16 vebox_mask;
+ u32 bcs_mask;
+ int i, j;
+
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+ /*
+ * FIXME: Hack job, thinking we should have table of vfuncs for each
+ * class which picks the correct vfunc based on IP version.
+ */
+
+ media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg);
+ if (GRAPHICS_VERx100(xe) < 1250)
+ media_fuse = ~media_fuse;
+
+ vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
+ vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
+ GEN11_GT_VEBOX_DISABLE_SHIFT;
+
+ for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+ if (!(gt->info.engine_mask & BIT(i)))
+ continue;
+
+ if (!(BIT(j) & vdbox_mask)) {
+ gt->info.engine_mask &= ~BIT(i);
+ drm_info(&xe->drm, "vcs%u fused off\n", j);
+ }
+ }
+
+ for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
+ if (!(gt->info.engine_mask & BIT(i)))
+ continue;
+
+ if (!(BIT(j) & vebox_mask)) {
+ gt->info.engine_mask &= ~BIT(i);
+ drm_info(&xe->drm, "vecs%u fused off\n", j);
+ }
+ }
+
+ bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg);
+ bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask);
+
+ for (i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
+ if (!(gt->info.engine_mask & BIT(i)))
+ continue;
+
+ if (!(BIT(j/2) & bcs_mask)) {
+ gt->info.engine_mask &= ~BIT(i);
+ drm_info(&xe->drm, "bcs%u fused off\n", j);
+ }
+ }
+
+ /* TODO: compute engines */
+}
+
+int xe_hw_engines_init_early(struct xe_gt *gt)
+{
+ int i;
+
+ read_fuses(gt);
+
+ for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
+ hw_engine_init_early(gt, &gt->hw_engines[i], i);
+
+ return 0;
+}
+
+int xe_hw_engines_init(struct xe_gt *gt)
+{
+ int err;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ for_each_hw_engine(hwe, gt, id) {
+ err = hw_engine_init(gt, hwe, id);
+ if (err)
+ return err;
+ }
+
+ hw_engine_setup_logical_mapping(gt);
+
+ return 0;
+}
+
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
+{
+ wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
+
+ if (hwe->irq_handler)
+ hwe->irq_handler(hwe, intr_vec);
+
+ if (intr_vec & GT_RENDER_USER_INTERRUPT)
+ xe_hw_fence_irq_run(hwe->fence_irq);
+}
+
+void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p)
+{
+ if (!xe_hw_engine_is_valid(hwe))
+ return;
+
+ drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name,
+ hwe->logical_instance);
+ drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
+ hwe->domain,
+ xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain));
+ drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base);
+
+ drm_printf(p, "\tHWSTAM: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_HWSTAM(0).reg));
+ drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_HWS_PGA(0).reg));
+
+ drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0).reg));
+ drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg));
+ drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
+ hw_engine_mmio_read32(hwe,
+ RING_EXECLIST_SQ_CONTENTS(0).reg));
+ drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
+ hw_engine_mmio_read32(hwe,
+ RING_EXECLIST_SQ_CONTENTS(0).reg) + 4);
+ drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg));
+
+ drm_printf(p, "\tRING_START: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_START(0).reg));
+ drm_printf(p, "\tRING_HEAD: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_HEAD(0).reg) & HEAD_ADDR);
+ drm_printf(p, "\tRING_TAIL: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_TAIL(0).reg) & TAIL_ADDR);
+ drm_printf(p, "\tRING_CTL: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_CTL(0).reg));
+ drm_printf(p, "\tRING_MODE: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg));
+ drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_MODE_GEN7(0).reg));
+
+ drm_printf(p, "\tRING_IMR: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_IMR(0).reg));
+ drm_printf(p, "\tRING_ESR: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_ESR(0).reg));
+ drm_printf(p, "\tRING_EMR: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_EMR(0).reg));
+ drm_printf(p, "\tRING_EIR: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, RING_EIR(0).reg));
+
+ drm_printf(p, "\tACTHD: 0x%08x_%08x\n",
+ hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0).reg),
+ hw_engine_mmio_read32(hwe, RING_ACTHD(0).reg));
+ drm_printf(p, "\tBBADDR: 0x%08x_%08x\n",
+ hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0).reg),
+ hw_engine_mmio_read32(hwe, RING_BBADDR(0).reg));
+ drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
+ hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0).reg),
+ hw_engine_mmio_read32(hwe, RING_DMA_FADD(0).reg));
+
+ drm_printf(p, "\tIPEIR: 0x%08x\n",
+ hw_engine_mmio_read32(hwe, IPEIR(0).reg));
+ drm_printf(p, "\tIPEHR: 0x%08x\n\n",
+ hw_engine_mmio_read32(hwe, IPEHR(0).reg));
+
+ if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
+ drm_printf(p, "\tGEN12_RCU_MODE: 0x%08x\n",
+ xe_mmio_read32(hwe->gt, GEN12_RCU_MODE.reg));
+
+}
+
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+ enum xe_engine_class engine_class)
+{
+ u32 mask = 0;
+ enum xe_hw_engine_id id;
+
+ for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+ if (engine_infos[id].class == engine_class &&
+ gt->info.engine_mask & BIT(id))
+ mask |= BIT(engine_infos[id].instance);
+ }
+ return mask;
+}
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
+ hwe->instance == gt->usm.reserved_bcs_instance;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
new file mode 100644
index 000000000000..ceab65397256
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_H_
+#define _XE_HW_ENGINE_H_
+
+#include "xe_hw_engine_types.h"
+
+struct drm_printer;
+
+int xe_hw_engines_init_early(struct xe_gt *gt);
+int xe_hw_engines_init(struct xe_gt *gt);
+void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
+void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
+void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p);
+u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
+ enum xe_engine_class engine_class);
+
+bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe);
+static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
+{
+ return hwe->name;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
new file mode 100644
index 000000000000..05a2fdc381d7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_TYPES_H_
+#define _XE_HW_ENGINE_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_lrc_types.h"
+#include "xe_reg_sr_types.h"
+
+/* See "Engine ID Definition" struct in the Icelake PRM */
+enum xe_engine_class {
+ XE_ENGINE_CLASS_RENDER = 0,
+ XE_ENGINE_CLASS_VIDEO_DECODE = 1,
+ XE_ENGINE_CLASS_VIDEO_ENHANCE = 2,
+ XE_ENGINE_CLASS_COPY = 3,
+ XE_ENGINE_CLASS_OTHER = 4,
+ XE_ENGINE_CLASS_COMPUTE = 5,
+ XE_ENGINE_CLASS_MAX = 6,
+};
+
+enum xe_hw_engine_id {
+ XE_HW_ENGINE_RCS0,
+ XE_HW_ENGINE_BCS0,
+ XE_HW_ENGINE_BCS1,
+ XE_HW_ENGINE_BCS2,
+ XE_HW_ENGINE_BCS3,
+ XE_HW_ENGINE_BCS4,
+ XE_HW_ENGINE_BCS5,
+ XE_HW_ENGINE_BCS6,
+ XE_HW_ENGINE_BCS7,
+ XE_HW_ENGINE_BCS8,
+ XE_HW_ENGINE_VCS0,
+ XE_HW_ENGINE_VCS1,
+ XE_HW_ENGINE_VCS2,
+ XE_HW_ENGINE_VCS3,
+ XE_HW_ENGINE_VCS4,
+ XE_HW_ENGINE_VCS5,
+ XE_HW_ENGINE_VCS6,
+ XE_HW_ENGINE_VCS7,
+ XE_HW_ENGINE_VECS0,
+ XE_HW_ENGINE_VECS1,
+ XE_HW_ENGINE_VECS2,
+ XE_HW_ENGINE_VECS3,
+ XE_HW_ENGINE_CCS0,
+ XE_HW_ENGINE_CCS1,
+ XE_HW_ENGINE_CCS2,
+ XE_HW_ENGINE_CCS3,
+ XE_NUM_HW_ENGINES,
+};
+
+/* FIXME: s/XE_HW_ENGINE_MAX_INSTANCE/XE_HW_ENGINE_MAX_COUNT */
+#define XE_HW_ENGINE_MAX_INSTANCE 9
+
+struct xe_bo;
+struct xe_execlist_port;
+struct xe_gt;
+
+/**
+ * struct xe_hw_engine - Hardware engine
+ *
+ * Contains all the hardware engine state for physical instances.
+ */
+struct xe_hw_engine {
+ /** @gt: graphics tile this hw engine belongs to */
+ struct xe_gt *gt;
+ /** @name: name of this hw engine */
+ const char *name;
+ /** @class: class of this hw engine */
+ enum xe_engine_class class;
+ /** @instance: physical instance of this hw engine */
+ u16 instance;
+ /** @logical_instance: logical instance of this hw engine */
+ u16 logical_instance;
+ /** @mmio_base: MMIO base address of this hw engine*/
+ u32 mmio_base;
+ /**
+ * @reg_sr: table with registers to be restored on GT init/resume/reset
+ */
+ struct xe_reg_sr reg_sr;
+ /**
+ * @reg_whitelist: table with registers to be whitelisted
+ */
+ struct xe_reg_sr reg_whitelist;
+ /**
+ * @reg_lrc: LRC workaround registers
+ */
+ struct xe_reg_sr reg_lrc;
+ /** @domain: force wake domain of this hw engine */
+ enum xe_force_wake_domains domain;
+ /** @hwsp: hardware status page buffer object */
+ struct xe_bo *hwsp;
+ /** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */
+ struct xe_lrc kernel_lrc;
+ /** @exl_port: execlists port */
+ struct xe_execlist_port *exl_port;
+ /** @fence_irq: fence IRQ to run when a hw engine IRQ is received */
+ struct xe_hw_fence_irq *fence_irq;
+ /** @irq_handler: IRQ handler to run when hw engine IRQ is received */
+ void (*irq_handler)(struct xe_hw_engine *, u16);
+ /** @engine_id: id for this hw engine */
+ enum xe_hw_engine_id engine_id;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
new file mode 100644
index 000000000000..e56ca2867545
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_hw_fence.h"
+
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_trace.h"
+
+static struct kmem_cache *xe_hw_fence_slab;
+
+int __init xe_hw_fence_module_init(void)
+{
+ xe_hw_fence_slab = kmem_cache_create("xe_hw_fence",
+ sizeof(struct xe_hw_fence), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!xe_hw_fence_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void xe_hw_fence_module_exit(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(xe_hw_fence_slab);
+}
+
+static struct xe_hw_fence *fence_alloc(void)
+{
+ return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL);
+}
+
+static void fence_free(struct rcu_head *rcu)
+{
+ struct xe_hw_fence *fence =
+ container_of(rcu, struct xe_hw_fence, dma.rcu);
+
+ if (!WARN_ON_ONCE(!fence))
+ kmem_cache_free(xe_hw_fence_slab, fence);
+}
+
+static void hw_fence_irq_run_cb(struct irq_work *work)
+{
+ struct xe_hw_fence_irq *irq = container_of(work, typeof(*irq), work);
+ struct xe_hw_fence *fence, *next;
+ bool tmp;
+
+ tmp = dma_fence_begin_signalling();
+ spin_lock(&irq->lock);
+ if (irq->enabled) {
+ list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+ struct dma_fence *dma_fence = &fence->dma;
+
+ trace_xe_hw_fence_try_signal(fence);
+ if (dma_fence_is_signaled_locked(dma_fence)) {
+ trace_xe_hw_fence_signal(fence);
+ list_del_init(&fence->irq_link);
+ dma_fence_put(dma_fence);
+ }
+ }
+ }
+ spin_unlock(&irq->lock);
+ dma_fence_end_signalling(tmp);
+}
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq)
+{
+ spin_lock_init(&irq->lock);
+ init_irq_work(&irq->work, hw_fence_irq_run_cb);
+ INIT_LIST_HEAD(&irq->pending);
+ irq->enabled = true;
+}
+
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq)
+{
+ struct xe_hw_fence *fence, *next;
+ unsigned long flags;
+ int err;
+ bool tmp;
+
+ if (XE_WARN_ON(!list_empty(&irq->pending))) {
+ tmp = dma_fence_begin_signalling();
+ spin_lock_irqsave(&irq->lock, flags);
+ list_for_each_entry_safe(fence, next, &irq->pending, irq_link) {
+ list_del_init(&fence->irq_link);
+ err = dma_fence_signal_locked(&fence->dma);
+ dma_fence_put(&fence->dma);
+ XE_WARN_ON(err);
+ }
+ spin_unlock_irqrestore(&irq->lock, flags);
+ dma_fence_end_signalling(tmp);
+ }
+}
+
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq)
+{
+ irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq)
+{
+ spin_lock_irq(&irq->lock);
+ irq->enabled = false;
+ spin_unlock_irq(&irq->lock);
+}
+
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq)
+{
+ spin_lock_irq(&irq->lock);
+ irq->enabled = true;
+ spin_unlock_irq(&irq->lock);
+
+ irq_work_queue(&irq->work);
+}
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+ struct xe_hw_fence_irq *irq, const char *name)
+{
+ ctx->gt = gt;
+ ctx->irq = irq;
+ ctx->dma_fence_ctx = dma_fence_context_alloc(1);
+ ctx->next_seqno = 1;
+ sprintf(ctx->name, "%s", name);
+}
+
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx)
+{
+}
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence);
+
+static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence)
+{
+ return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock);
+}
+
+static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+ struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+ return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev);
+}
+
+static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+ struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+ return fence->ctx->name;
+}
+
+static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
+{
+ struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+ struct xe_device *xe = gt_to_xe(fence->ctx->gt);
+ u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32);
+
+ return dma_fence->error ||
+ (s32)fence->dma.seqno <= (s32)seqno;
+}
+
+static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
+{
+ struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+ struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence);
+
+ dma_fence_get(dma_fence);
+ list_add_tail(&fence->irq_link, &irq->pending);
+
+ /* SW completed (no HW IRQ) so kick handler to signal fence */
+ if (xe_hw_fence_signaled(dma_fence))
+ xe_hw_fence_irq_run(irq);
+
+ return true;
+}
+
+static void xe_hw_fence_release(struct dma_fence *dma_fence)
+{
+ struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
+
+ trace_xe_hw_fence_free(fence);
+ XE_BUG_ON(!list_empty(&fence->irq_link));
+ call_rcu(&dma_fence->rcu, fence_free);
+}
+
+static const struct dma_fence_ops xe_hw_fence_ops = {
+ .get_driver_name = xe_hw_fence_get_driver_name,
+ .get_timeline_name = xe_hw_fence_get_timeline_name,
+ .enable_signaling = xe_hw_fence_enable_signaling,
+ .signaled = xe_hw_fence_signaled,
+ .release = xe_hw_fence_release,
+};
+
+static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence)
+{
+ if (XE_WARN_ON(fence->ops != &xe_hw_fence_ops))
+ return NULL;
+
+ return container_of(fence, struct xe_hw_fence, dma);
+}
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+ struct iosys_map seqno_map)
+{
+ struct xe_hw_fence *fence;
+
+ fence = fence_alloc();
+ if (!fence)
+ return ERR_PTR(-ENOMEM);
+
+ dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
+ ctx->dma_fence_ctx, ctx->next_seqno++);
+
+ fence->ctx = ctx;
+ fence->seqno_map = seqno_map;
+ INIT_LIST_HEAD(&fence->irq_link);
+
+ trace_xe_hw_fence_create(fence);
+
+ return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h
new file mode 100644
index 000000000000..07f202db6526
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_H_
+#define _XE_HW_FENCE_H_
+
+#include "xe_hw_fence_types.h"
+
+int xe_hw_fence_module_init(void);
+void xe_hw_fence_module_exit(void);
+
+void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq);
+void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq);
+
+void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
+ struct xe_hw_fence_irq *irq, const char *name);
+void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx);
+
+struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
+ struct iosys_map seqno_map);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h
new file mode 100644
index 000000000000..a78e50eb3cb8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_HW_FENCE_TYPES_H_
+#define _XE_HW_FENCE_TYPES_H_
+
+#include <linux/iosys-map.h>
+#include <linux/dma-fence.h>
+#include <linux/irq_work.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct xe_gt;
+
+/**
+ * struct xe_hw_fence_irq - hardware fence IRQ handler
+ *
+ * One per engine class, signals completed xe_hw_fences, triggered via hw engine
+ * interrupt. On each trigger, search list of pending fences and signal.
+ */
+struct xe_hw_fence_irq {
+ /** @lock: protects all xe_hw_fences + pending list */
+ spinlock_t lock;
+ /** @work: IRQ worker run to signal the fences */
+ struct irq_work work;
+ /** @pending: list of pending xe_hw_fences */
+ struct list_head pending;
+ /** @enabled: fence signaling enabled */
+ bool enabled;
+};
+
+#define MAX_FENCE_NAME_LEN 16
+
+/**
+ * struct xe_hw_fence_ctx - hardware fence context
+ *
+ * The context for a hardware fence. 1 to 1 relationship with xe_engine. Points
+ * to a xe_hw_fence_irq, maintains serial seqno.
+ */
+struct xe_hw_fence_ctx {
+ /** @gt: graphics tile of hardware fence context */
+ struct xe_gt *gt;
+ /** @irq: fence irq handler */
+ struct xe_hw_fence_irq *irq;
+ /** @dma_fence_ctx: dma fence context for hardware fence */
+ u64 dma_fence_ctx;
+ /** @next_seqno: next seqno for hardware fence */
+ u32 next_seqno;
+ /** @name: name of hardware fence context */
+ char name[MAX_FENCE_NAME_LEN];
+};
+
+/**
+ * struct xe_hw_fence - hardware fence
+ *
+ * Used to indicate a xe_sched_job is complete via a seqno written to memory.
+ * Signals on error or seqno past.
+ */
+struct xe_hw_fence {
+ /** @dma: base dma fence for hardware fence context */
+ struct dma_fence dma;
+ /** @ctx: hardware fence context */
+ struct xe_hw_fence_ctx *ctx;
+ /** @seqno_map: I/O map for seqno */
+ struct iosys_map seqno_map;
+ /** @irq_link: Link in struct xe_hw_fence_irq.pending */
+ struct list_head irq_link;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
new file mode 100644
index 000000000000..df2e3573201d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -0,0 +1,565 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/sched/clock.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_drv.h"
+#include "xe_guc.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_mmio.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gt_regs.h"
+
+static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg)
+{
+ u32 val = xe_mmio_read32(gt, reg.reg);
+
+ if (val == 0)
+ return;
+
+ drm_WARN(&gt_to_xe(gt)->drm, 1,
+ "Interrupt register 0x%x is not zero: 0x%08x\n",
+ reg.reg, val);
+ xe_mmio_write32(gt, reg.reg, 0xffffffff);
+ xe_mmio_read32(gt, reg.reg);
+ xe_mmio_write32(gt, reg.reg, 0xffffffff);
+ xe_mmio_read32(gt, reg.reg);
+}
+
+static void gen3_irq_init(struct xe_gt *gt,
+ i915_reg_t imr, u32 imr_val,
+ i915_reg_t ier, u32 ier_val,
+ i915_reg_t iir)
+{
+ gen3_assert_iir_is_zero(gt, iir);
+
+ xe_mmio_write32(gt, ier.reg, ier_val);
+ xe_mmio_write32(gt, imr.reg, imr_val);
+ xe_mmio_read32(gt, imr.reg);
+}
+#define GEN3_IRQ_INIT(gt, type, imr_val, ier_val) \
+ gen3_irq_init((gt), \
+ type##IMR, imr_val, \
+ type##IER, ier_val, \
+ type##IIR)
+
+static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir,
+ i915_reg_t ier)
+{
+ xe_mmio_write32(gt, imr.reg, 0xffffffff);
+ xe_mmio_read32(gt, imr.reg);
+
+ xe_mmio_write32(gt, ier.reg, 0);
+
+ /* IIR can theoretically queue up two events. Be paranoid. */
+ xe_mmio_write32(gt, iir.reg, 0xffffffff);
+ xe_mmio_read32(gt, iir.reg);
+ xe_mmio_write32(gt, iir.reg, 0xffffffff);
+ xe_mmio_read32(gt, iir.reg);
+}
+#define GEN3_IRQ_RESET(gt, type) \
+ gen3_irq_reset((gt), type##IMR, type##IIR, type##IER)
+
+static u32 gen11_intr_disable(struct xe_gt *gt)
+{
+ xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, 0);
+
+ /*
+ * Now with master disabled, get a sample of level indications
+ * for this interrupt. Indications will be cleared on related acks.
+ * New indications can and will light up during processing,
+ * and will generate new interrupt after enabling master.
+ */
+ return xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+}
+
+static u32
+gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl)
+{
+ u32 iir;
+
+ if (!(master_ctl & GEN11_GU_MISC_IRQ))
+ return 0;
+
+ iir = xe_mmio_read32(gt, GEN11_GU_MISC_IIR.reg);
+ if (likely(iir))
+ xe_mmio_write32(gt, GEN11_GU_MISC_IIR.reg, iir);
+
+ return iir;
+}
+
+static inline void gen11_intr_enable(struct xe_gt *gt, bool stall)
+{
+ xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, GEN11_MASTER_IRQ);
+ if (stall)
+ xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+}
+
+static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+ u32 irqs, dmask, smask;
+ u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+ u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+
+ if (xe_device_guc_submission_enabled(xe)) {
+ irqs = GT_RENDER_USER_INTERRUPT |
+ GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
+ } else {
+ irqs = GT_RENDER_USER_INTERRUPT |
+ GT_CS_MASTER_ERROR_INTERRUPT |
+ GT_CONTEXT_SWITCH_INTERRUPT |
+ GT_WAIT_SEMAPHORE_INTERRUPT;
+ }
+
+ dmask = irqs << 16 | irqs;
+ smask = irqs << 16;
+
+ /* Enable RCS, BCS, VCS and VECS class interrupts. */
+ xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, dmask);
+ xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, dmask);
+ if (ccs_mask)
+ xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, smask);
+
+ /* Unmask irqs on RCS, BCS, VCS and VECS engines. */
+ xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~smask);
+ xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~smask);
+ if (bcs_mask & (BIT(1)|BIT(2)))
+ xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask);
+ if (bcs_mask & (BIT(3)|BIT(4)))
+ xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~dmask);
+ if (bcs_mask & (BIT(5)|BIT(6)))
+ xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask);
+ if (bcs_mask & (BIT(7)|BIT(8)))
+ xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask);
+ xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~dmask);
+ xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~dmask);
+ //if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
+ // intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask);
+ //if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
+ // intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask);
+ xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~dmask);
+ //if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
+ // intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask);
+ if (ccs_mask & (BIT(0)|BIT(1)))
+ xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~dmask);
+ if (ccs_mask & (BIT(2)|BIT(3)))
+ xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~dmask);
+
+ /*
+ * RPS interrupts will get enabled/disabled on demand when RPS itself
+ * is enabled/disabled.
+ */
+ /* TODO: gt->pm_ier, gt->pm_imr */
+ xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
+ xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0);
+
+ /* Same thing for GuC interrupts */
+ xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0);
+ xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0);
+}
+
+static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+ /* TODO: PCH */
+
+ gen11_gt_irq_postinstall(xe, gt);
+
+ GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE,
+ GEN11_GU_MISC_GSE);
+
+ gen11_intr_enable(gt, true);
+}
+
+static u32
+gen11_gt_engine_identity(struct xe_device *xe,
+ struct xe_gt *gt,
+ const unsigned int bank,
+ const unsigned int bit)
+{
+ u32 timeout_ts;
+ u32 ident;
+
+ lockdep_assert_held(&xe->irq.lock);
+
+ xe_mmio_write32(gt, GEN11_IIR_REG_SELECTOR(bank).reg, BIT(bit));
+
+ /*
+ * NB: Specs do not specify how long to spin wait,
+ * so we do ~100us as an educated guess.
+ */
+ timeout_ts = (local_clock() >> 10) + 100;
+ do {
+ ident = xe_mmio_read32(gt, GEN11_INTR_IDENTITY_REG(bank).reg);
+ } while (!(ident & GEN11_INTR_DATA_VALID) &&
+ !time_after32(local_clock() >> 10, timeout_ts));
+
+ if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) {
+ drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n",
+ bank, bit, ident);
+ return 0;
+ }
+
+ xe_mmio_write32(gt, GEN11_INTR_IDENTITY_REG(bank).reg,
+ GEN11_INTR_DATA_VALID);
+
+ return ident;
+}
+
+#define OTHER_MEDIA_GUC_INSTANCE 16
+
+static void
+gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
+{
+ if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt))
+ return xe_guc_irq_handler(&gt->uc.guc, iir);
+ if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt))
+ return xe_guc_irq_handler(&gt->uc.guc, iir);
+
+ if (instance != OTHER_GUC_INSTANCE &&
+ instance != OTHER_MEDIA_GUC_INSTANCE) {
+ WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
+ instance, iir);
+ }
+}
+
+static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt,
+ u32 master_ctl, long unsigned int *intr_dw,
+ u32 *identity)
+{
+ unsigned int bank, bit;
+ u16 instance, intr_vec;
+ enum xe_engine_class class;
+ struct xe_hw_engine *hwe;
+
+ spin_lock(&xe->irq.lock);
+
+ for (bank = 0; bank < 2; bank++) {
+ if (!(master_ctl & GEN11_GT_DW_IRQ(bank)))
+ continue;
+
+ if (!xe_gt_is_media_type(gt)) {
+ intr_dw[bank] =
+ xe_mmio_read32(gt, GEN11_GT_INTR_DW(bank).reg);
+ for_each_set_bit(bit, intr_dw + bank, 32)
+ identity[bit] = gen11_gt_engine_identity(xe, gt,
+ bank,
+ bit);
+ xe_mmio_write32(gt, GEN11_GT_INTR_DW(bank).reg,
+ intr_dw[bank]);
+ }
+
+ for_each_set_bit(bit, intr_dw + bank, 32) {
+ class = GEN11_INTR_ENGINE_CLASS(identity[bit]);
+ instance = GEN11_INTR_ENGINE_INSTANCE(identity[bit]);
+ intr_vec = GEN11_INTR_ENGINE_INTR(identity[bit]);
+
+ if (class == XE_ENGINE_CLASS_OTHER) {
+ gen11_gt_other_irq_handler(gt, instance,
+ intr_vec);
+ continue;
+ }
+
+ hwe = xe_gt_hw_engine(gt, class, instance, false);
+ if (!hwe)
+ continue;
+
+ xe_hw_engine_handle_irq(hwe, intr_vec);
+ }
+ }
+
+ spin_unlock(&xe->irq.lock);
+}
+
+static irqreturn_t gen11_irq_handler(int irq, void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_gt *gt = xe_device_get_gt(xe, 0); /* Only 1 GT here */
+ u32 master_ctl, gu_misc_iir;
+ long unsigned int intr_dw[2];
+ u32 identity[32];
+
+ master_ctl = gen11_intr_disable(gt);
+ if (!master_ctl) {
+ gen11_intr_enable(gt, false);
+ return IRQ_NONE;
+ }
+
+ gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+
+ gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
+
+ gen11_intr_enable(gt, false);
+
+ return IRQ_HANDLED;
+}
+
+static u32 dg1_intr_disable(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_device_get_gt(xe, 0);
+ u32 val;
+
+ /* First disable interrupts */
+ xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, 0);
+
+ /* Get the indication levels and ack the master unit */
+ val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg);
+ if (unlikely(!val))
+ return 0;
+
+ xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, val);
+
+ return val;
+}
+
+static void dg1_intr_enable(struct xe_device *xe, bool stall)
+{
+ struct xe_gt *gt = xe_device_get_gt(xe, 0);
+
+ xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, DG1_MSTR_IRQ);
+ if (stall)
+ xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg);
+}
+
+static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+{
+ gen11_gt_irq_postinstall(xe, gt);
+
+ GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE,
+ GEN11_GU_MISC_GSE);
+
+ if (gt->info.id + 1 == xe->info.tile_count)
+ dg1_intr_enable(xe, true);
+}
+
+static irqreturn_t dg1_irq_handler(int irq, void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_gt *gt;
+ u32 master_tile_ctl, master_ctl = 0, gu_misc_iir;
+ long unsigned int intr_dw[2];
+ u32 identity[32];
+ u8 id;
+
+ /* TODO: This really shouldn't be copied+pasted */
+
+ master_tile_ctl = dg1_intr_disable(xe);
+ if (!master_tile_ctl) {
+ dg1_intr_enable(xe, false);
+ return IRQ_NONE;
+ }
+
+ for_each_gt(gt, xe, id) {
+ if ((master_tile_ctl & DG1_MSTR_TILE(gt->info.vram_id)) == 0)
+ continue;
+
+ if (!xe_gt_is_media_type(gt))
+ master_ctl = xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg);
+
+ /*
+ * We might be in irq handler just when PCIe DPC is initiated
+ * and all MMIO reads will be returned with all 1's. Ignore this
+ * irq as device is inaccessible.
+ */
+ if (master_ctl == REG_GENMASK(31, 0)) {
+ dev_dbg(gt_to_xe(gt)->drm.dev,
+ "Ignore this IRQ as device might be in DPC containment.\n");
+ return IRQ_HANDLED;
+ }
+
+ if (!xe_gt_is_media_type(gt))
+ xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, master_ctl);
+ gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity);
+ }
+
+ gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl);
+
+ dg1_intr_enable(xe, false);
+
+ return IRQ_HANDLED;
+}
+
+static void gen11_gt_irq_reset(struct xe_gt *gt)
+{
+ u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+ u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+
+ /* Disable RCS, BCS, VCS and VECS class engines. */
+ xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, 0);
+ xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, 0);
+ if (ccs_mask)
+ xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, 0);
+
+ /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
+ xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~0);
+ xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~0);
+ if (bcs_mask & (BIT(1)|BIT(2)))
+ xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0);
+ if (bcs_mask & (BIT(3)|BIT(4)))
+ xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~0);
+ if (bcs_mask & (BIT(5)|BIT(6)))
+ xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0);
+ if (bcs_mask & (BIT(7)|BIT(8)))
+ xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0);
+ xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~0);
+ xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~0);
+// if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
+// xe_mmio_write32(xe, GEN12_VCS4_VCS5_INTR_MASK.reg, ~0);
+// if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7))
+// xe_mmio_write32(xe, GEN12_VCS6_VCS7_INTR_MASK.reg, ~0);
+ xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~0);
+// if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3))
+// xe_mmio_write32(xe, GEN12_VECS2_VECS3_INTR_MASK.reg, ~0);
+ if (ccs_mask & (BIT(0)|BIT(1)))
+ xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~0);
+ if (ccs_mask & (BIT(2)|BIT(3)))
+ xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~0);
+
+ xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0);
+ xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0);
+ xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0);
+ xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0);
+}
+
+static void gen11_irq_reset(struct xe_gt *gt)
+{
+ gen11_intr_disable(gt);
+
+ gen11_gt_irq_reset(gt);
+
+ GEN3_IRQ_RESET(gt, GEN11_GU_MISC_);
+ GEN3_IRQ_RESET(gt, GEN8_PCU_);
+}
+
+static void dg1_irq_reset(struct xe_gt *gt)
+{
+ if (gt->info.id == 0)
+ dg1_intr_disable(gt_to_xe(gt));
+
+ gen11_gt_irq_reset(gt);
+
+ GEN3_IRQ_RESET(gt, GEN11_GU_MISC_);
+ GEN3_IRQ_RESET(gt, GEN8_PCU_);
+}
+
+void xe_irq_reset(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id) {
+ if (GRAPHICS_VERx100(xe) >= 1210) {
+ dg1_irq_reset(gt);
+ } else if (GRAPHICS_VER(xe) >= 11) {
+ gen11_irq_reset(gt);
+ } else {
+ drm_err(&xe->drm, "No interrupt reset hook");
+ }
+ }
+}
+
+void xe_gt_irq_postinstall(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (GRAPHICS_VERx100(xe) >= 1210)
+ dg1_irq_postinstall(xe, gt);
+ else if (GRAPHICS_VER(xe) >= 11)
+ gen11_irq_postinstall(xe, gt);
+ else
+ drm_err(&xe->drm, "No interrupt postinstall hook");
+}
+
+static void xe_irq_postinstall(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ for_each_gt(gt, xe, id)
+ xe_gt_irq_postinstall(gt);
+}
+
+static irq_handler_t xe_irq_handler(struct xe_device *xe)
+{
+ if (GRAPHICS_VERx100(xe) >= 1210) {
+ return dg1_irq_handler;
+ } else if (GRAPHICS_VER(xe) >= 11) {
+ return gen11_irq_handler;
+ } else {
+ return NULL;
+ }
+}
+
+static void irq_uninstall(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int irq = pdev->irq;
+
+ if (!xe->irq.enabled)
+ return;
+
+ xe->irq.enabled = false;
+ xe_irq_reset(xe);
+ free_irq(irq, xe);
+ if (pdev->msi_enabled)
+ pci_disable_msi(pdev);
+}
+
+int xe_irq_install(struct xe_device *xe)
+{
+ int irq = to_pci_dev(xe->drm.dev)->irq;
+ static irq_handler_t irq_handler;
+ int err;
+
+ irq_handler = xe_irq_handler(xe);
+ if (!irq_handler) {
+ drm_err(&xe->drm, "No supported interrupt handler");
+ return -EINVAL;
+ }
+
+ xe->irq.enabled = true;
+
+ xe_irq_reset(xe);
+
+ err = request_irq(irq, irq_handler,
+ IRQF_SHARED, DRIVER_NAME, xe);
+ if (err < 0) {
+ xe->irq.enabled = false;
+ return err;
+ }
+
+ err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
+ if (err)
+ return err;
+
+ return err;
+}
+
+void xe_irq_shutdown(struct xe_device *xe)
+{
+ irq_uninstall(&xe->drm, xe);
+}
+
+void xe_irq_suspend(struct xe_device *xe)
+{
+ spin_lock_irq(&xe->irq.lock);
+ xe->irq.enabled = false;
+ xe_irq_reset(xe);
+ spin_unlock_irq(&xe->irq.lock);
+}
+
+void xe_irq_resume(struct xe_device *xe)
+{
+ spin_lock_irq(&xe->irq.lock);
+ xe->irq.enabled = true;
+ xe_irq_reset(xe);
+ xe_irq_postinstall(xe);
+ spin_unlock_irq(&xe->irq.lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h
new file mode 100644
index 000000000000..34ecf22b32d3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_irq.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_IRQ_H_
+#define _XE_IRQ_H_
+
+struct xe_device;
+struct xe_gt;
+
+int xe_irq_install(struct xe_device *xe);
+void xe_gt_irq_postinstall(struct xe_gt *gt);
+void xe_irq_shutdown(struct xe_device *xe);
+void xe_irq_suspend(struct xe_device *xe);
+void xe_irq_resume(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
new file mode 100644
index 000000000000..056c2c5a0b81
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_lrc.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine_types.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_hw_fence.h"
+#include "xe_vm.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_regs.h"
+
+#define GEN8_CTX_VALID (1 << 0)
+#define GEN8_CTX_L3LLC_COHERENT (1 << 5)
+#define GEN8_CTX_PRIVILEGE (1 << 8)
+#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
+#define INTEL_LEGACY_64B_CONTEXT 3
+
+#define GEN11_ENGINE_CLASS_SHIFT 61
+#define GEN11_ENGINE_INSTANCE_SHIFT 48
+
+static struct xe_device *
+lrc_to_xe(struct xe_lrc *lrc)
+{
+ return gt_to_xe(lrc->fence_ctx.gt);
+}
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
+{
+ switch (class) {
+ case XE_ENGINE_CLASS_RENDER:
+ case XE_ENGINE_CLASS_COMPUTE:
+ /* 14 pages since graphics_ver == 11 */
+ return 14 * SZ_4K;
+ default:
+ WARN(1, "Unknown engine class: %d", class);
+ fallthrough;
+ case XE_ENGINE_CLASS_COPY:
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ return 2 * SZ_4K;
+ }
+}
+
+/*
+ * The per-platform tables are u8-encoded in @data. Decode @data and set the
+ * addresses' offset and commands in @regs. The following encoding is used
+ * for each byte. There are 2 steps: decoding commands and decoding addresses.
+ *
+ * Commands:
+ * [7]: create NOPs - number of NOPs are set in lower bits
+ * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
+ * MI_LRI_FORCE_POSTED
+ * [5:0]: Number of NOPs or registers to set values to in case of
+ * MI_LOAD_REGISTER_IMM
+ *
+ * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
+ * number of registers. They are set by using the REG/REG16 macros: the former
+ * is used for offsets smaller than 0x200 while the latter is for values bigger
+ * than that. Those macros already set all the bits documented below correctly:
+ *
+ * [7]: When a register offset needs more than 6 bits, use additional bytes, to
+ * follow, for the lower bits
+ * [6:0]: Register offset, without considering the engine base.
+ *
+ * This function only tweaks the commands and register offsets. Values are not
+ * filled out.
+ */
+static void set_offsets(u32 *regs,
+ const u8 *data,
+ const struct xe_hw_engine *hwe)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count) | \
+ BUILD_BUG_ON_ZERO(count >= BIT(6)))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+ (((x) >> 2) & 0x7f)
+#define END 0
+{
+ const u32 base = hwe->mmio_base;
+
+ while (*data) {
+ u8 count, flags;
+
+ if (*data & BIT(7)) { /* skip */
+ count = *data++ & ~BIT(7);
+ regs += count;
+ continue;
+ }
+
+ count = *data & 0x3f;
+ flags = *data >> 6;
+ data++;
+
+ *regs = MI_LOAD_REGISTER_IMM(count);
+ if (flags & POSTED)
+ *regs |= MI_LRI_FORCE_POSTED;
+ *regs |= MI_LRI_LRM_CS_MMIO;
+ regs++;
+
+ XE_BUG_ON(!count);
+ do {
+ u32 offset = 0;
+ u8 v;
+
+ do {
+ v = *data++;
+ offset <<= 7;
+ offset |= v & ~BIT(7);
+ } while (v & BIT(7));
+
+ regs[0] = base + (offset << 2);
+ regs += 2;
+ } while (--count);
+ }
+
+ *regs = MI_BATCH_BUFFER_END | BIT(0);
+}
+
+static const u8 gen12_xcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ END
+};
+
+static const u8 dg2_xcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+ REG(0x120),
+ REG(0x124),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ END
+};
+
+static const u8 gen12_rcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(3, POSTED),
+ REG(0x1b0),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+ NOP(3 + 9 + 1),
+
+ LRI(51, POSTED),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG(0x028),
+ REG(0x09c),
+ REG(0x0c0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x068),
+ REG(0x084),
+ NOP(1),
+
+ END
+};
+
+static const u8 xehp_rcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(3, POSTED),
+ REG(0x1b0),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END
+};
+
+static const u8 dg2_rcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+ REG(0x120),
+ REG(0x124),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(3, POSTED),
+ REG(0x1b0),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END
+};
+
+static const u8 mtl_rcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+ REG(0x120),
+ REG(0x124),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(2),
+ LRI(2, POSTED),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
+{
+ if (class == XE_ENGINE_CLASS_RENDER) {
+ if (GRAPHICS_VERx100(xe) >= 1270)
+ return mtl_rcs_offsets;
+ else if (GRAPHICS_VERx100(xe) >= 1255)
+ return dg2_rcs_offsets;
+ else if (GRAPHICS_VERx100(xe) >= 1250)
+ return xehp_rcs_offsets;
+ else
+ return gen12_rcs_offsets;
+ } else {
+ if (GRAPHICS_VERx100(xe) >= 1255)
+ return dg2_xcs_offsets;
+ else
+ return gen12_xcs_offsets;
+ }
+}
+
+static void set_context_control(u32 * regs, struct xe_hw_engine *hwe)
+{
+ regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
+ _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
+ CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+
+ /* TODO: Timestamp */
+}
+
+static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
+{
+ struct xe_device *xe = gt_to_xe(hwe->gt);
+
+ if (GRAPHICS_VERx100(xe) >= 1250)
+ return 0x70;
+ else
+ return 0x60;
+}
+
+static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
+{
+ int x;
+
+ x = lrc_ring_mi_mode(hwe);
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
+}
+
+static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
+{
+ return 0;
+}
+
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
+{
+ return lrc->ring.size;
+}
+
+/* Make the magic macros work */
+#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
+
+#define LRC_SEQNO_PPHWSP_OFFSET 512
+#define LRC_START_SEQNO_PPHWSP_OFFSET LRC_SEQNO_PPHWSP_OFFSET + 8
+#define LRC_PARALLEL_PPHWSP_OFFSET 2048
+#define LRC_PPHWSP_SIZE SZ_4K
+
+static size_t lrc_reg_size(struct xe_device *xe)
+{
+ if (GRAPHICS_VERx100(xe) >= 1250)
+ return 96 * sizeof(u32);
+ else
+ return 80 * sizeof(u32);
+}
+
+size_t xe_lrc_skip_size(struct xe_device *xe)
+{
+ return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
+}
+
+static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
+{
+ /* The seqno is stored in the driver-defined portion of PPHWSP */
+ return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
+{
+ /* The start seqno is stored in the driver-defined portion of PPHWSP */
+ return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
+{
+ /* The parallel is stored in the driver-defined portion of PPHWSP */
+ return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
+}
+
+static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
+{
+ return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
+}
+
+#define DECL_MAP_ADDR_HELPERS(elem) \
+static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
+{ \
+ struct iosys_map map = lrc->bo->vmap; \
+\
+ XE_BUG_ON(iosys_map_is_null(&map)); \
+ iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
+ return map; \
+} \
+static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
+{ \
+ return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
+} \
+
+DECL_MAP_ADDR_HELPERS(ring)
+DECL_MAP_ADDR_HELPERS(pphwsp)
+DECL_MAP_ADDR_HELPERS(seqno)
+DECL_MAP_ADDR_HELPERS(regs)
+DECL_MAP_ADDR_HELPERS(start_seqno)
+DECL_MAP_ADDR_HELPERS(parallel)
+
+#undef DECL_MAP_ADDR_HELPERS
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
+{
+ return __xe_lrc_pphwsp_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+ struct iosys_map map;
+
+ map = __xe_lrc_regs_map(lrc);
+ iosys_map_incr(&map, reg_nr * sizeof(u32));
+ return xe_map_read32(xe, &map);
+}
+
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+ struct iosys_map map;
+
+ map = __xe_lrc_regs_map(lrc);
+ iosys_map_incr(&map, reg_nr * sizeof(u32));
+ xe_map_write32(xe, &map, val);
+}
+
+static void *empty_lrc_data(struct xe_hw_engine *hwe)
+{
+ struct xe_device *xe = gt_to_xe(hwe->gt);
+ void *data;
+ u32 *regs;
+
+ data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
+ if (!data)
+ return NULL;
+
+ /* 1st page: Per-Process of HW status Page */
+ regs = data + LRC_PPHWSP_SIZE;
+ set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
+ set_context_control(regs, hwe);
+ reset_stop_ring(regs, hwe);
+
+ return data;
+}
+
+static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
+{
+ u64 desc = xe_vm_pdp4_descriptor(vm, lrc->full_gt);
+
+ xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
+ xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
+}
+
+#define PVC_CTX_ASID (0x2e + 1)
+#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
+#define ACC_GRANULARITY_S 20
+#define ACC_NOTIFY_S 16
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ struct xe_engine *e, struct xe_vm *vm, u32 ring_size)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct iosys_map map;
+ void *init_data = NULL;
+ u32 arb_enable;
+ int err;
+
+ lrc->flags = 0;
+
+ lrc->bo = xe_bo_create_locked(xe, hwe->gt, vm,
+ ring_size + xe_lrc_size(xe, hwe->class),
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(lrc->bo))
+ return PTR_ERR(lrc->bo);
+
+ if (xe_gt_is_media_type(hwe->gt))
+ lrc->full_gt = xe_find_full_gt(hwe->gt);
+ else
+ lrc->full_gt = hwe->gt;
+
+ /*
+ * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
+ * via VM bind calls.
+ */
+ err = xe_bo_pin(lrc->bo);
+ if (err)
+ goto err_unlock_put_bo;
+ lrc->flags |= XE_LRC_PINNED;
+
+ err = xe_bo_vmap(lrc->bo);
+ if (err)
+ goto err_unpin_bo;
+
+ xe_bo_unlock_vm_held(lrc->bo);
+
+ lrc->ring.size = ring_size;
+ lrc->ring.tail = 0;
+
+ xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
+ hwe->fence_irq, hwe->name);
+
+ if (!gt->default_lrc[hwe->class]) {
+ init_data = empty_lrc_data(hwe);
+ if (!init_data) {
+ xe_lrc_finish(lrc);
+ return -ENOMEM;
+ }
+ }
+
+ /*
+ * Init Per-Process of HW status Page, LRC / context state to known
+ * values
+ */
+ map = __xe_lrc_pphwsp_map(lrc);
+ if (!init_data) {
+ xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
+ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
+ gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
+ xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
+ } else {
+ xe_map_memcpy_to(xe, &map, 0, init_data,
+ xe_lrc_size(xe, hwe->class));
+ kfree(init_data);
+ }
+
+ if (vm)
+ xe_lrc_set_ppgtt(lrc, vm);
+
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
+ RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+ if (xe->info.supports_usm && vm) {
+ xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
+ (e->usm.acc_granularity <<
+ ACC_GRANULARITY_S) | vm->usm.asid);
+ xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
+ (e->usm.acc_notify << ACC_NOTIFY_S) |
+ e->usm.acc_trigger);
+ }
+
+ lrc->desc = GEN8_CTX_VALID;
+ lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT;
+ /* TODO: Priority */
+
+ /* While this appears to have something about privileged batches or
+ * some such, it really just means PPGTT mode.
+ */
+ if (vm)
+ lrc->desc |= GEN8_CTX_PRIVILEGE;
+
+ if (GRAPHICS_VERx100(xe) < 1250) {
+ lrc->desc |= (u64)hwe->instance << GEN11_ENGINE_INSTANCE_SHIFT;
+ lrc->desc |= (u64)hwe->class << GEN11_ENGINE_CLASS_SHIFT;
+ }
+
+ arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
+
+ return 0;
+
+err_unpin_bo:
+ if (lrc->flags & XE_LRC_PINNED)
+ xe_bo_unpin(lrc->bo);
+err_unlock_put_bo:
+ xe_bo_unlock_vm_held(lrc->bo);
+ xe_bo_put(lrc->bo);
+ return err;
+}
+
+void xe_lrc_finish(struct xe_lrc *lrc)
+{
+ struct ww_acquire_ctx ww;
+
+ xe_hw_fence_ctx_finish(&lrc->fence_ctx);
+ if (lrc->flags & XE_LRC_PINNED) {
+ if (lrc->bo->vm)
+ xe_vm_lock(lrc->bo->vm, &ww, 0, false);
+ else
+ xe_bo_lock_no_vm(lrc->bo, NULL);
+ xe_bo_unpin(lrc->bo);
+ if (lrc->bo->vm)
+ xe_vm_unlock(lrc->bo->vm, &ww);
+ else
+ xe_bo_unlock_no_vm(lrc->bo);
+ }
+ xe_bo_put(lrc->bo);
+}
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
+{
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
+}
+
+u32 xe_lrc_ring_head(struct xe_lrc *lrc)
+{
+ return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
+}
+
+u32 xe_lrc_ring_space(struct xe_lrc *lrc)
+{
+ const u32 head = xe_lrc_ring_head(lrc);
+ const u32 tail = lrc->ring.tail;
+ const u32 size = lrc->ring.size;
+
+ return ((head - tail - 1) & (size - 1)) + 1;
+}
+
+static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
+ const void *data, size_t size)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+
+ iosys_map_incr(&ring, lrc->ring.tail);
+ xe_map_memcpy_to(xe, &ring, 0, data, size);
+ lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
+}
+
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
+{
+ struct iosys_map ring;
+ u32 rhs;
+ size_t aligned_size;
+
+ XE_BUG_ON(!IS_ALIGNED(size, 4));
+ aligned_size = ALIGN(size, 8);
+
+ ring = __xe_lrc_ring_map(lrc);
+
+ XE_BUG_ON(lrc->ring.tail >= lrc->ring.size);
+ rhs = lrc->ring.size - lrc->ring.tail;
+ if (size > rhs) {
+ __xe_lrc_write_ring(lrc, ring, data, rhs);
+ __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
+ } else {
+ __xe_lrc_write_ring(lrc, ring, data, size);
+ }
+
+ if (aligned_size > size) {
+ u32 noop = MI_NOOP;
+
+ __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
+ }
+}
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc)
+{
+ return lrc->desc | xe_lrc_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+ return __xe_lrc_seqno_ggtt_addr(lrc);
+}
+
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
+{
+ return &xe_hw_fence_create(&lrc->fence_ctx,
+ __xe_lrc_seqno_map(lrc))->dma;
+}
+
+s32 xe_lrc_seqno(struct xe_lrc *lrc)
+{
+ struct iosys_map map = __xe_lrc_seqno_map(lrc);
+
+ return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
+{
+ struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
+
+ return xe_map_read32(lrc_to_xe(lrc), &map);
+}
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
+{
+ return __xe_lrc_start_seqno_ggtt_addr(lrc);
+}
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
+{
+ return __xe_lrc_parallel_ggtt_addr(lrc);
+}
+
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
+{
+ return __xe_lrc_parallel_map(lrc);
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
new file mode 100644
index 000000000000..e37f89e75ef8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _XE_LRC_H_
+#define _XE_LRC_H_
+
+#include "xe_lrc_types.h"
+
+struct xe_device;
+struct xe_engine;
+enum xe_engine_class;
+struct xe_hw_engine;
+struct xe_vm;
+
+#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
+
+int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ struct xe_engine *e, struct xe_vm *vm, u32 ring_size);
+void xe_lrc_finish(struct xe_lrc *lrc);
+
+size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class);
+u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
+
+void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head);
+u32 xe_lrc_ring_head(struct xe_lrc *lrc);
+u32 xe_lrc_ring_space(struct xe_lrc *lrc);
+void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
+
+u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
+u32 *xe_lrc_regs(struct xe_lrc *lrc);
+
+u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr);
+void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val);
+
+u64 xe_lrc_descriptor(struct xe_lrc *lrc);
+
+u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc);
+struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc);
+s32 xe_lrc_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc);
+s32 xe_lrc_start_seqno(struct xe_lrc *lrc);
+
+u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc);
+struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
+
+size_t xe_lrc_skip_size(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
new file mode 100644
index 000000000000..2827efa2091d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_LRC_TYPES_H_
+#define _XE_LRC_TYPES_H_
+
+#include "xe_hw_fence_types.h"
+
+struct xe_bo;
+
+/**
+ * struct xe_lrc - Logical ring context (LRC) and submission ring object
+ */
+struct xe_lrc {
+ /**
+ * @bo: buffer object (memory) for logical ring context, per process HW
+ * status page, and submission ring.
+ */
+ struct xe_bo *bo;
+
+ /** @full_gt: full GT which this LRC belongs to */
+ struct xe_gt *full_gt;
+
+ /** @flags: LRC flags */
+ u32 flags;
+#define XE_LRC_PINNED BIT(1)
+
+ /** @ring: submission ring state */
+ struct {
+ /** @size: size of submission ring */
+ u32 size;
+ /** @tail: tail of submission ring */
+ u32 tail;
+ /** @old_tail: shadow of tail */
+ u32 old_tail;
+ } ring;
+
+ /** @desc: LRC descriptor */
+ u64 desc;
+
+ /** @fence_ctx: context for hw fence */
+ struct xe_hw_fence_ctx fence_ctx;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
new file mode 100644
index 000000000000..0d24c124d202
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_MACROS_H_
+#define _XE_MACROS_H_
+
+#include <linux/bug.h>
+
+#define XE_EXTRA_DEBUG 1
+#define XE_WARN_ON WARN_ON
+#define XE_BUG_ON BUG_ON
+
+#define XE_IOCTL_ERR(xe, cond) \
+ ((cond) && (drm_info(&(xe)->drm, \
+ "Ioctl argument check failed at %s:%d: %s", \
+ __FILE__, __LINE__, #cond), 1))
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
new file mode 100644
index 000000000000..0bac1f73a80d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __XE_MAP_H__
+#define __XE_MAP_H__
+
+#include <linux/iosys-map.h>
+
+#include <xe_device.h>
+
+/**
+ * DOC: Map layer
+ *
+ * All access to any memory shared with a device (both sysmem and vram) in the
+ * XE driver should go through this layer (xe_map). This layer is built on top
+ * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory`
+ * and with extra hooks into the XE driver that allows adding asserts to memory
+ * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics).
+ */
+
+static inline void xe_map_memcpy_to(struct xe_device *xe, struct iosys_map *dst,
+ size_t dst_offset, const void *src,
+ size_t len)
+{
+ xe_device_assert_mem_access(xe);
+ iosys_map_memcpy_to(dst, dst_offset, src, len);
+}
+
+static inline void xe_map_memcpy_from(struct xe_device *xe, void *dst,
+ const struct iosys_map *src,
+ size_t src_offset, size_t len)
+{
+ xe_device_assert_mem_access(xe);
+ iosys_map_memcpy_from(dst, src, src_offset, len);
+}
+
+static inline void xe_map_memset(struct xe_device *xe,
+ struct iosys_map *dst, size_t offset,
+ int value, size_t len)
+{
+ xe_device_assert_mem_access(xe);
+ iosys_map_memset(dst, offset, value, len);
+}
+
+/* FIXME: We likely should kill these two functions sooner or later */
+static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map)
+{
+ xe_device_assert_mem_access(xe);
+
+ if (map->is_iomem)
+ return readl(map->vaddr_iomem);
+ else
+ return READ_ONCE(*(u32 *)map->vaddr);
+}
+
+static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map,
+ u32 val)
+{
+ xe_device_assert_mem_access(xe);
+
+ if (map->is_iomem)
+ writel(val, map->vaddr_iomem);
+ else
+ *(u32 *)map->vaddr = val;
+}
+
+#define xe_map_rd(xe__, map__, offset__, type__) ({ \
+ struct xe_device *__xe = xe__; \
+ xe_device_assert_mem_access(__xe); \
+ iosys_map_rd(map__, offset__, type__); \
+})
+
+#define xe_map_wr(xe__, map__, offset__, type__, val__) ({ \
+ struct xe_device *__xe = xe__; \
+ xe_device_assert_mem_access(__xe); \
+ iosys_map_wr(map__, offset__, type__, val__); \
+})
+
+#define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({ \
+ struct xe_device *__xe = xe__; \
+ xe_device_assert_mem_access(__xe); \
+ iosys_map_rd_field(map__, struct_offset__, struct_type__, field__); \
+})
+
+#define xe_map_wr_field(xe__, map__, struct_offset__, struct_type__, field__, val__) ({ \
+ struct xe_device *__xe = xe__; \
+ xe_device_assert_mem_access(__xe); \
+ iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__); \
+})
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
new file mode 100644
index 000000000000..7fc40e8009c3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -0,0 +1,1168 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+#include "xe_migrate.h"
+
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_engine.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_lrc.h"
+#include "xe_map.h"
+#include "xe_mocs.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_sched_job.h"
+#include "xe_sync.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+#include <linux/sizes.h>
+#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "gt/intel_gpu_commands.h"
+
+struct xe_migrate {
+ struct xe_engine *eng;
+ struct xe_gt *gt;
+ struct mutex job_mutex;
+ struct xe_bo *pt_bo;
+ struct xe_bo *cleared_bo;
+ u64 batch_base_ofs;
+ u64 usm_batch_base_ofs;
+ u64 cleared_vram_ofs;
+ struct dma_fence *fence;
+ struct drm_suballoc_manager vm_update_sa;
+};
+
+#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
+#define NUM_KERNEL_PDE 17
+#define NUM_PT_SLOTS 32
+#define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M)
+
+struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt)
+{
+ return gt->migrate->eng;
+}
+
+static void xe_migrate_fini(struct drm_device *dev, void *arg)
+{
+ struct xe_migrate *m = arg;
+ struct ww_acquire_ctx ww;
+
+ xe_vm_lock(m->eng->vm, &ww, 0, false);
+ xe_bo_unpin(m->pt_bo);
+ if (m->cleared_bo)
+ xe_bo_unpin(m->cleared_bo);
+ xe_vm_unlock(m->eng->vm, &ww);
+
+ dma_fence_put(m->fence);
+ if (m->cleared_bo)
+ xe_bo_put(m->cleared_bo);
+ xe_bo_put(m->pt_bo);
+ drm_suballoc_manager_fini(&m->vm_update_sa);
+ mutex_destroy(&m->job_mutex);
+ xe_vm_close_and_put(m->eng->vm);
+ xe_engine_put(m->eng);
+}
+
+static u64 xe_migrate_vm_addr(u64 slot, u32 level)
+{
+ XE_BUG_ON(slot >= NUM_PT_SLOTS);
+
+ /* First slot is reserved for mapping of PT bo and bb, start from 1 */
+ return (slot + 1ULL) << xe_pt_shift(level + 1);
+}
+
+static u64 xe_migrate_vram_ofs(u64 addr)
+{
+ return addr + (256ULL << xe_pt_shift(2));
+}
+
+/*
+ * For flat CCS clearing we need a cleared chunk of memory to copy from,
+ * since the CCS clearing mode of XY_FAST_COLOR_BLT appears to be buggy
+ * (it clears on only 14 bytes in each chunk of 16).
+ * If clearing the main surface one can use the part of the main surface
+ * already cleared, but for clearing as part of copying non-compressed
+ * data out of system memory, we don't readily have a cleared part of
+ * VRAM to copy from, so create one to use for that case.
+ */
+static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm)
+{
+ struct xe_gt *gt = m->gt;
+ struct xe_device *xe = vm->xe;
+ size_t cleared_size;
+ u64 vram_addr;
+ bool is_vram;
+
+ if (!xe_device_has_flat_ccs(xe))
+ return 0;
+
+ cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER);
+ cleared_size = PAGE_ALIGN(cleared_size);
+ m->cleared_bo = xe_bo_create_pin_map(xe, gt, vm, cleared_size,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_PINNED_BIT);
+ if (IS_ERR(m->cleared_bo))
+ return PTR_ERR(m->cleared_bo);
+
+ xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
+ vram_addr = xe_bo_addr(m->cleared_bo, 0, GEN8_PAGE_SIZE, &is_vram);
+ XE_BUG_ON(!is_vram);
+ m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr);
+
+ return 0;
+}
+
+static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m,
+ struct xe_vm *vm)
+{
+ u8 id = gt->info.id;
+ u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
+ u32 map_ofs, level, i;
+ struct xe_device *xe = gt_to_xe(m->gt);
+ struct xe_bo *bo, *batch = gt->kernel_bb_pool.bo;
+ u64 entry;
+ int ret;
+
+ /* Can't bump NUM_PT_SLOTS too high */
+ BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/GEN8_PAGE_SIZE);
+ /* Must be a multiple of 64K to support all platforms */
+ BUILD_BUG_ON(NUM_PT_SLOTS * GEN8_PAGE_SIZE % SZ_64K);
+ /* And one slot reserved for the 4KiB page table updates */
+ BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
+
+ /* Need to be sure everything fits in the first PT, or create more */
+ XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M);
+
+ bo = xe_bo_create_pin_map(vm->xe, m->gt, vm,
+ num_entries * GEN8_PAGE_SIZE,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(m->gt) |
+ XE_BO_CREATE_PINNED_BIT);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ ret = xe_migrate_create_cleared_bo(m, vm);
+ if (ret) {
+ xe_bo_put(bo);
+ return ret;
+ }
+
+ entry = gen8_pde_encode(bo, bo->size - GEN8_PAGE_SIZE, XE_CACHE_WB);
+ xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
+
+ map_ofs = (num_entries - num_level) * GEN8_PAGE_SIZE;
+
+ /* Map the entire BO in our level 0 pt */
+ for (i = 0, level = 0; i < num_entries; level++) {
+ entry = gen8_pte_encode(NULL, bo, i * GEN8_PAGE_SIZE,
+ XE_CACHE_WB, 0, 0);
+
+ xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
+
+ if (vm->flags & XE_VM_FLAGS_64K)
+ i += 16;
+ else
+ i += 1;
+ }
+
+ if (!IS_DGFX(xe)) {
+ XE_BUG_ON(xe->info.supports_usm);
+
+ /* Write out batch too */
+ m->batch_base_ofs = NUM_PT_SLOTS * GEN8_PAGE_SIZE;
+ for (i = 0; i < batch->size;
+ i += vm->flags & XE_VM_FLAGS_64K ? GEN8_64K_PAGE_SIZE :
+ GEN8_PAGE_SIZE) {
+ entry = gen8_pte_encode(NULL, batch, i,
+ XE_CACHE_WB, 0, 0);
+
+ xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
+ entry);
+ level++;
+ }
+ } else {
+ bool is_lmem;
+ u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_lmem);
+
+ m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
+
+ if (xe->info.supports_usm) {
+ batch = gt->usm.bb_pool.bo;
+ batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE,
+ &is_lmem);
+ m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
+ }
+ }
+
+ for (level = 1; level < num_level; level++) {
+ u32 flags = 0;
+
+ if (vm->flags & XE_VM_FLAGS_64K && level == 1)
+ flags = GEN12_PDE_64K;
+
+ entry = gen8_pde_encode(bo, map_ofs + (level - 1) *
+ GEN8_PAGE_SIZE, XE_CACHE_WB);
+ xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE * level, u64,
+ entry | flags);
+ }
+
+ /* Write PDE's that point to our BO. */
+ for (i = 0; i < num_entries - num_level; i++) {
+ entry = gen8_pde_encode(bo, i * GEN8_PAGE_SIZE,
+ XE_CACHE_WB);
+
+ xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE +
+ (i + 1) * 8, u64, entry);
+ }
+
+ /* Identity map the entire vram at 256GiB offset */
+ if (IS_DGFX(xe)) {
+ u64 pos, ofs, flags;
+
+ level = 2;
+ ofs = map_ofs + GEN8_PAGE_SIZE * level + 256 * 8;
+ flags = GEN8_PAGE_RW | GEN8_PAGE_PRESENT | PPAT_CACHED |
+ GEN12_PPGTT_PTE_LM | GEN8_PDPE_PS_1G;
+
+ /*
+ * Use 1GB pages, it shouldn't matter the physical amount of
+ * vram is less, when we don't access it.
+ */
+ for (pos = 0; pos < xe->mem.vram.size; pos += SZ_1G, ofs += 8)
+ xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+ }
+
+ /*
+ * Example layout created above, with root level = 3:
+ * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
+ * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's
+ * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's
+ * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2]
+ *
+ * This makes the lowest part of the VM point to the pagetables.
+ * Hence the lowest 2M in the vm should point to itself, with a few writes
+ * and flushes, other parts of the VM can be used either for copying and
+ * clearing.
+ *
+ * For performance, the kernel reserves PDE's, so about 20 are left
+ * for async VM updates.
+ *
+ * To make it easier to work, each scratch PT is put in slot (1 + PT #)
+ * everywhere, this allows lockless updates to scratch pages by using
+ * the different addresses in VM.
+ */
+#define NUM_VMUSA_UNIT_PER_PAGE 32
+#define VM_SA_UPDATE_UNIT_SIZE (GEN8_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
+#define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
+ drm_suballoc_manager_init(&m->vm_update_sa,
+ (map_ofs / GEN8_PAGE_SIZE - NUM_KERNEL_PDE) *
+ NUM_VMUSA_UNIT_PER_PAGE, 0);
+
+ m->pt_bo = bo;
+ return 0;
+}
+
+struct xe_migrate *xe_migrate_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_migrate *m;
+ struct xe_vm *vm;
+ struct ww_acquire_ctx ww;
+ int err;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL);
+ if (!m)
+ return ERR_PTR(-ENOMEM);
+
+ m->gt = gt;
+
+ /* Special layout, prepared below.. */
+ vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
+ XE_VM_FLAG_SET_GT_ID(gt));
+ if (IS_ERR(vm))
+ return ERR_CAST(vm);
+
+ xe_vm_lock(vm, &ww, 0, false);
+ err = xe_migrate_prepare_vm(gt, m, vm);
+ xe_vm_unlock(vm, &ww);
+ if (err) {
+ xe_vm_close_and_put(vm);
+ return ERR_PTR(err);
+ }
+
+ if (xe->info.supports_usm) {
+ struct xe_hw_engine *hwe = xe_gt_hw_engine(gt,
+ XE_ENGINE_CLASS_COPY,
+ gt->usm.reserved_bcs_instance,
+ false);
+ if (!hwe)
+ return ERR_PTR(-EINVAL);
+
+ m->eng = xe_engine_create(xe, vm,
+ BIT(hwe->logical_instance), 1,
+ hwe, ENGINE_FLAG_KERNEL);
+ } else {
+ m->eng = xe_engine_create_class(xe, gt, vm,
+ XE_ENGINE_CLASS_COPY,
+ ENGINE_FLAG_KERNEL);
+ }
+ if (IS_ERR(m->eng)) {
+ xe_vm_close_and_put(vm);
+ return ERR_CAST(m->eng);
+ }
+
+ mutex_init(&m->job_mutex);
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
+ if (err)
+ return ERR_PTR(err);
+
+ return m;
+}
+
+static void emit_arb_clear(struct xe_bb *bb)
+{
+ /* 1 dword */
+ bb->cs[bb->len++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+}
+
+static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur)
+{
+ /*
+ * For VRAM we use identity mapped pages so we are limited to current
+ * cursor size. For system we program the pages ourselves so we have no
+ * such limitation.
+ */
+ return min_t(u64, MAX_PREEMPTDISABLE_TRANSFER,
+ mem_type_is_vram(cur->mem_type) ? cur->size :
+ cur->remaining);
+}
+
+static u32 pte_update_size(struct xe_migrate *m,
+ bool is_vram,
+ struct xe_res_cursor *cur,
+ u64 *L0, u64 *L0_ofs, u32 *L0_pt,
+ u32 cmd_size, u32 pt_ofs, u32 avail_pts)
+{
+ u32 cmds = 0;
+
+ *L0_pt = pt_ofs;
+ if (!is_vram) {
+ /* Clip L0 to available size */
+ u64 size = min(*L0, (u64)avail_pts * SZ_2M);
+ u64 num_4k_pages = DIV_ROUND_UP(size, GEN8_PAGE_SIZE);
+
+ *L0 = size;
+ *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
+
+ /* MI_STORE_DATA_IMM */
+ cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff);
+
+ /* PDE qwords */
+ cmds += num_4k_pages * 2;
+
+ /* Each chunk has a single blit command */
+ cmds += cmd_size;
+ } else {
+ /* Offset into identity map. */
+ *L0_ofs = xe_migrate_vram_ofs(cur->start);
+ cmds += cmd_size;
+ }
+
+ return cmds;
+}
+
+static void emit_pte(struct xe_migrate *m,
+ struct xe_bb *bb, u32 at_pt,
+ bool is_vram,
+ struct xe_res_cursor *cur,
+ u32 size, struct xe_bo *bo)
+{
+ u32 ptes;
+ u64 ofs = at_pt * GEN8_PAGE_SIZE;
+ u64 cur_ofs;
+
+ /*
+ * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently
+ * we're only emitting VRAM PTEs during sanity tests, so when
+ * that's moved to a Kunit test, we should condition VRAM PTEs
+ * on running tests.
+ */
+
+ ptes = DIV_ROUND_UP(size, GEN8_PAGE_SIZE);
+
+ while (ptes) {
+ u32 chunk = min(0x1ffU, ptes);
+
+ bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
+ (chunk * 2 + 1);
+ bb->cs[bb->len++] = ofs;
+ bb->cs[bb->len++] = 0;
+
+ cur_ofs = ofs;
+ ofs += chunk * 8;
+ ptes -= chunk;
+
+ while (chunk--) {
+ u64 addr;
+
+ XE_BUG_ON(cur->start & (PAGE_SIZE - 1));
+
+ if (is_vram) {
+ addr = cur->start;
+
+ /* Is this a 64K PTE entry? */
+ if ((m->eng->vm->flags & XE_VM_FLAGS_64K) &&
+ !(cur_ofs & (16 * 8 - 1))) {
+ XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K));
+ addr |= GEN12_PTE_PS64;
+ }
+
+ addr |= GEN12_PPGTT_PTE_LM;
+ } else {
+ addr = xe_res_dma(cur);
+ }
+ addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+ bb->cs[bb->len++] = lower_32_bits(addr);
+ bb->cs[bb->len++] = upper_32_bits(addr);
+
+ xe_res_next(cur, PAGE_SIZE);
+ cur_ofs += 8;
+ }
+ }
+}
+
+#define EMIT_COPY_CCS_DW 5
+static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
+ u64 dst_ofs, bool dst_is_indirect,
+ u64 src_ofs, bool src_is_indirect,
+ u32 size)
+{
+ u32 *cs = bb->cs + bb->len;
+ u32 num_ccs_blks;
+ u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+
+ num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
+ NUM_CCS_BYTES_PER_BLOCK);
+ XE_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER);
+ *cs++ = XY_CTRL_SURF_COPY_BLT |
+ (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
+ (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
+ ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
+ *cs++ = lower_32_bits(src_ofs);
+ *cs++ = upper_32_bits(src_ofs) |
+ FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+ *cs++ = lower_32_bits(dst_ofs);
+ *cs++ = upper_32_bits(dst_ofs) |
+ FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
+
+ bb->len = cs - bb->cs;
+}
+
+#define EMIT_COPY_DW 10
+static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
+ u64 src_ofs, u64 dst_ofs, unsigned int size,
+ unsigned pitch)
+{
+ XE_BUG_ON(size / pitch > S16_MAX);
+ XE_BUG_ON(pitch / 4 > S16_MAX);
+ XE_BUG_ON(pitch > U16_MAX);
+
+ bb->cs[bb->len++] = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
+ bb->cs[bb->len++] = BLT_DEPTH_32 | pitch;
+ bb->cs[bb->len++] = 0;
+ bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
+ bb->cs[bb->len++] = lower_32_bits(dst_ofs);
+ bb->cs[bb->len++] = upper_32_bits(dst_ofs);
+ bb->cs[bb->len++] = 0;
+ bb->cs[bb->len++] = pitch;
+ bb->cs[bb->len++] = lower_32_bits(src_ofs);
+ bb->cs[bb->len++] = upper_32_bits(src_ofs);
+}
+
+static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
+ enum dma_resv_usage usage)
+{
+ return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage);
+}
+
+static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
+{
+ return usm ? m->usm_batch_base_ofs : m->batch_base_ofs;
+}
+
+static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
+ struct xe_bb *bb,
+ u64 src_ofs, bool src_is_vram,
+ u64 dst_ofs, bool dst_is_vram, u32 dst_size,
+ u64 ccs_ofs, bool copy_ccs)
+{
+ struct xe_gt *gt = m->gt;
+ u32 flush_flags = 0;
+
+ if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
+ /*
+ * If the bo doesn't have any CCS metadata attached, we still
+ * need to clear it for security reasons.
+ */
+ emit_copy_ccs(gt, bb, dst_ofs, true, m->cleared_vram_ofs, false,
+ dst_size);
+ flush_flags = MI_FLUSH_DW_CCS;
+ } else if (copy_ccs) {
+ if (!src_is_vram)
+ src_ofs = ccs_ofs;
+ else if (!dst_is_vram)
+ dst_ofs = ccs_ofs;
+
+ /*
+ * At the moment, we don't support copying CCS metadata from
+ * system to system.
+ */
+ XE_BUG_ON(!src_is_vram && !dst_is_vram);
+
+ emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
+ src_is_vram, dst_size);
+ if (dst_is_vram)
+ flush_flags = MI_FLUSH_DW_CCS;
+ }
+
+ return flush_flags;
+}
+
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+ struct xe_bo *bo,
+ struct ttm_resource *src,
+ struct ttm_resource *dst)
+{
+ struct xe_gt *gt = m->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct dma_fence *fence = NULL;
+ u64 size = bo->size;
+ struct xe_res_cursor src_it, dst_it, ccs_it;
+ u64 src_L0_ofs, dst_L0_ofs;
+ u32 src_L0_pt, dst_L0_pt;
+ u64 src_L0, dst_L0;
+ int pass = 0;
+ int err;
+ bool src_is_vram = mem_type_is_vram(src->mem_type);
+ bool dst_is_vram = mem_type_is_vram(dst->mem_type);
+ bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(bo);
+ bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram);
+
+ if (!src_is_vram)
+ xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it);
+ else
+ xe_res_first(src, 0, bo->size, &src_it);
+ if (!dst_is_vram)
+ xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &dst_it);
+ else
+ xe_res_first(dst, 0, bo->size, &dst_it);
+
+ if (copy_system_ccs)
+ xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo),
+ PAGE_ALIGN(xe_device_ccs_bytes(xe, size)),
+ &ccs_it);
+
+ while (size) {
+ u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+ struct xe_sched_job *job;
+ struct xe_bb *bb;
+ u32 flush_flags;
+ u32 update_idx;
+ u64 ccs_ofs, ccs_size;
+ u32 ccs_pt;
+ bool usm = xe->info.supports_usm;
+
+ src_L0 = xe_migrate_res_sizes(&src_it);
+ dst_L0 = xe_migrate_res_sizes(&dst_it);
+
+ drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
+ pass++, src_L0, dst_L0);
+
+ src_L0 = min(src_L0, dst_L0);
+
+ batch_size += pte_update_size(m, src_is_vram, &src_it, &src_L0,
+ &src_L0_ofs, &src_L0_pt, 0, 0,
+ NUM_PT_PER_BLIT);
+
+ batch_size += pte_update_size(m, dst_is_vram, &dst_it, &src_L0,
+ &dst_L0_ofs, &dst_L0_pt, 0,
+ NUM_PT_PER_BLIT, NUM_PT_PER_BLIT);
+
+ if (copy_system_ccs) {
+ ccs_size = xe_device_ccs_bytes(xe, src_L0);
+ batch_size += pte_update_size(m, false, &ccs_it, &ccs_size,
+ &ccs_ofs, &ccs_pt, 0,
+ 2 * NUM_PT_PER_BLIT,
+ NUM_PT_PER_BLIT);
+ }
+
+ /* Add copy commands size here */
+ batch_size += EMIT_COPY_DW +
+ (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
+
+ bb = xe_bb_new(gt, batch_size, usm);
+ if (IS_ERR(bb)) {
+ err = PTR_ERR(bb);
+ goto err_sync;
+ }
+
+ /* Preemption is enabled again by the ring ops. */
+ if (!src_is_vram || !dst_is_vram)
+ emit_arb_clear(bb);
+
+ if (!src_is_vram)
+ emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0,
+ bo);
+ else
+ xe_res_next(&src_it, src_L0);
+
+ if (!dst_is_vram)
+ emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0,
+ bo);
+ else
+ xe_res_next(&dst_it, src_L0);
+
+ if (copy_system_ccs)
+ emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, bo);
+
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ update_idx = bb->len;
+
+ emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, GEN8_PAGE_SIZE);
+ flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram,
+ dst_L0_ofs, dst_is_vram,
+ src_L0, ccs_ofs, copy_ccs);
+
+ mutex_lock(&m->job_mutex);
+ job = xe_bb_create_migration_job(m->eng, bb,
+ xe_migrate_batch_base(m, usm),
+ update_idx);
+ if (IS_ERR(job)) {
+ err = PTR_ERR(job);
+ goto err;
+ }
+
+ xe_sched_job_add_migrate_flush(job, flush_flags);
+ if (!fence) {
+ err = job_add_deps(job, bo->ttm.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP);
+ if (err)
+ goto err_job;
+ }
+
+ xe_sched_job_arm(job);
+ dma_fence_put(fence);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ dma_fence_put(m->fence);
+ m->fence = dma_fence_get(fence);
+
+ mutex_unlock(&m->job_mutex);
+
+ xe_bb_free(bb, fence);
+ size -= src_L0;
+ continue;
+
+err_job:
+ xe_sched_job_put(job);
+err:
+ mutex_unlock(&m->job_mutex);
+ xe_bb_free(bb, NULL);
+
+err_sync:
+ /* Sync partial copy if any. */
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+
+ return ERR_PTR(err);
+ }
+
+ return fence;
+}
+
+static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u32 size, u32 pitch, u32 value, bool is_vram)
+{
+ u32 *cs = bb->cs + bb->len;
+ u32 len = XY_FAST_COLOR_BLT_DW;
+ u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+
+ if (GRAPHICS_VERx100(gt->xe) < 1250)
+ len = 11;
+
+ *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
+ (len - 2);
+ *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
+ (pitch - 1);
+ *cs++ = 0;
+ *cs++ = (size / pitch) << 16 | pitch / 4;
+ *cs++ = lower_32_bits(src_ofs);
+ *cs++ = upper_32_bits(src_ofs);
+ *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
+ *cs++ = value;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ if (len > 11) {
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ }
+
+ XE_BUG_ON(cs - bb->cs != len + bb->len);
+ bb->len += len;
+
+ return 0;
+}
+
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+ struct xe_bo *bo,
+ struct ttm_resource *dst,
+ u32 value)
+{
+ bool clear_vram = mem_type_is_vram(dst->mem_type);
+ struct xe_gt *gt = m->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct dma_fence *fence = NULL;
+ u64 size = bo->size;
+ struct xe_res_cursor src_it;
+ struct ttm_resource *src = dst;
+ int err;
+ int pass = 0;
+
+ if (!clear_vram)
+ xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it);
+ else
+ xe_res_first(src, 0, bo->size, &src_it);
+
+ while (size) {
+ u64 clear_L0_ofs;
+ u32 clear_L0_pt;
+ u32 flush_flags = 0;
+ u64 clear_L0;
+ struct xe_sched_job *job;
+ struct xe_bb *bb;
+ u32 batch_size, update_idx;
+ bool usm = xe->info.supports_usm;
+
+ clear_L0 = xe_migrate_res_sizes(&src_it);
+ drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
+
+ /* Calculate final sizes and batch size.. */
+ batch_size = 2 +
+ pte_update_size(m, clear_vram, &src_it,
+ &clear_L0, &clear_L0_ofs, &clear_L0_pt,
+ XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT);
+ if (xe_device_has_flat_ccs(xe) && clear_vram)
+ batch_size += EMIT_COPY_CCS_DW;
+
+ /* Clear commands */
+
+ if (WARN_ON_ONCE(!clear_L0))
+ break;
+
+ bb = xe_bb_new(gt, batch_size, usm);
+ if (IS_ERR(bb)) {
+ err = PTR_ERR(bb);
+ goto err_sync;
+ }
+
+ size -= clear_L0;
+
+ /* TODO: Add dependencies here */
+
+ /* Preemption is enabled again by the ring ops. */
+ if (!clear_vram) {
+ emit_arb_clear(bb);
+ emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0,
+ bo);
+ } else {
+ xe_res_next(&src_it, clear_L0);
+ }
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ update_idx = bb->len;
+
+ emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE,
+ value, clear_vram);
+ if (xe_device_has_flat_ccs(xe) && clear_vram) {
+ emit_copy_ccs(gt, bb, clear_L0_ofs, true,
+ m->cleared_vram_ofs, false, clear_L0);
+ flush_flags = MI_FLUSH_DW_CCS;
+ }
+
+ mutex_lock(&m->job_mutex);
+ job = xe_bb_create_migration_job(m->eng, bb,
+ xe_migrate_batch_base(m, usm),
+ update_idx);
+ if (IS_ERR(job)) {
+ err = PTR_ERR(job);
+ goto err;
+ }
+
+ xe_sched_job_add_migrate_flush(job, flush_flags);
+
+ xe_sched_job_arm(job);
+ dma_fence_put(fence);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ dma_fence_put(m->fence);
+ m->fence = dma_fence_get(fence);
+
+ mutex_unlock(&m->job_mutex);
+
+ xe_bb_free(bb, fence);
+ continue;
+
+err:
+ mutex_unlock(&m->job_mutex);
+ xe_bb_free(bb, NULL);
+err_sync:
+ /* Sync partial copies if any. */
+ if (fence) {
+ dma_fence_wait(m->fence, false);
+ dma_fence_put(fence);
+ }
+
+ return ERR_PTR(err);
+ }
+
+ return fence;
+}
+
+static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs,
+ const struct xe_vm_pgtable_update *update,
+ struct xe_migrate_pt_update *pt_update)
+{
+ const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+ u32 chunk;
+ u32 ofs = update->ofs, size = update->qwords;
+
+ /*
+ * If we have 512 entries (max), we would populate it ourselves,
+ * and update the PDE above it to the new pointer.
+ * The only time this can only happen if we have to update the top
+ * PDE. This requires a BO that is almost vm->size big.
+ *
+ * This shouldn't be possible in practice.. might change when 16K
+ * pages are used. Hence the BUG_ON.
+ */
+ XE_BUG_ON(update->qwords > 0x1ff);
+ if (!ppgtt_ofs) {
+ bool is_lmem;
+
+ ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0,
+ GEN8_PAGE_SIZE,
+ &is_lmem));
+ XE_BUG_ON(!is_lmem);
+ }
+
+ do {
+ u64 addr = ppgtt_ofs + ofs * 8;
+ chunk = min(update->qwords, 0x1ffU);
+
+ /* Ensure populatefn can do memset64 by aligning bb->cs */
+ if (!(bb->len & 1))
+ bb->cs[bb->len++] = MI_NOOP;
+
+ bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
+ (chunk * 2 + 1);
+ bb->cs[bb->len++] = lower_32_bits(addr);
+ bb->cs[bb->len++] = upper_32_bits(addr);
+ ops->populate(pt_update, gt, NULL, bb->cs + bb->len, ofs, chunk,
+ update);
+
+ bb->len += chunk * 2;
+ ofs += chunk;
+ size -= chunk;
+ } while (size);
+}
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m)
+{
+ return xe_vm_get(m->eng->vm);
+}
+
+static struct dma_fence *
+xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
+ struct xe_vm *vm, struct xe_bo *bo,
+ const struct xe_vm_pgtable_update *updates,
+ u32 num_updates, bool wait_vm,
+ struct xe_migrate_pt_update *pt_update)
+{
+ const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+ struct dma_fence *fence;
+ int err;
+ u32 i;
+
+ /* Wait on BO moves for 10 ms, then fall back to GPU job */
+ if (bo) {
+ long wait;
+
+ wait = dma_resv_wait_timeout(bo->ttm.base.resv,
+ DMA_RESV_USAGE_KERNEL,
+ true, HZ / 100);
+ if (wait <= 0)
+ return ERR_PTR(-ETIME);
+ }
+ if (wait_vm) {
+ long wait;
+
+ wait = dma_resv_wait_timeout(&vm->resv,
+ DMA_RESV_USAGE_BOOKKEEP,
+ true, HZ / 100);
+ if (wait <= 0)
+ return ERR_PTR(-ETIME);
+ }
+
+ if (ops->pre_commit) {
+ err = ops->pre_commit(pt_update);
+ if (err)
+ return ERR_PTR(err);
+ }
+ for (i = 0; i < num_updates; i++) {
+ const struct xe_vm_pgtable_update *update = &updates[i];
+
+ ops->populate(pt_update, m->gt, &update->pt_bo->vmap, NULL,
+ update->ofs, update->qwords, update);
+ }
+
+ trace_xe_vm_cpu_bind(vm);
+ xe_device_wmb(vm->xe);
+
+ fence = dma_fence_get_stub();
+
+ return fence;
+}
+
+static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs)
+{
+ int i;
+
+ for (i = 0; i < num_syncs; i++) {
+ struct dma_fence *fence = syncs[i].fence;
+
+ if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &fence->flags))
+ return false;
+ }
+
+ return true;
+}
+
+static bool engine_is_idle(struct xe_engine *e)
+{
+ return !e || e->lrc[0].fence_ctx.next_seqno == 1 ||
+ xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno;
+}
+
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+ struct xe_vm *vm,
+ struct xe_bo *bo,
+ struct xe_engine *eng,
+ const struct xe_vm_pgtable_update *updates,
+ u32 num_updates,
+ struct xe_sync_entry *syncs, u32 num_syncs,
+ struct xe_migrate_pt_update *pt_update)
+{
+ const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
+ struct xe_gt *gt = m->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_sched_job *job;
+ struct dma_fence *fence;
+ struct drm_suballoc *sa_bo = NULL;
+ struct xe_vma *vma = pt_update->vma;
+ struct xe_bb *bb;
+ u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0;
+ u64 addr;
+ int err = 0;
+ bool usm = !eng && xe->info.supports_usm;
+ bool first_munmap_rebind = vma && vma->first_munmap_rebind;
+
+ /* Use the CPU if no in syncs and engine is idle */
+ if (no_in_syncs(syncs, num_syncs) && engine_is_idle(eng)) {
+ fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates,
+ num_updates,
+ first_munmap_rebind,
+ pt_update);
+ if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN))
+ return fence;
+ }
+
+ /* fixed + PTE entries */
+ if (IS_DGFX(xe))
+ batch_size = 2;
+ else
+ batch_size = 6 + num_updates * 2;
+
+ for (i = 0; i < num_updates; i++) {
+ u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff);
+
+ /* align noop + MI_STORE_DATA_IMM cmd prefix */
+ batch_size += 4 * num_cmds + updates[i].qwords * 2;
+ }
+
+ /*
+ * XXX: Create temp bo to copy from, if batch_size becomes too big?
+ *
+ * Worst case: Sum(2 * (each lower level page size) + (top level page size))
+ * Should be reasonably bound..
+ */
+ XE_BUG_ON(batch_size >= SZ_128K);
+
+ bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm);
+ if (IS_ERR(bb))
+ return ERR_CAST(bb);
+
+ /* For sysmem PTE's, need to map them in our hole.. */
+ if (!IS_DGFX(xe)) {
+ ppgtt_ofs = NUM_KERNEL_PDE - 1;
+ if (eng) {
+ XE_BUG_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT);
+
+ sa_bo = drm_suballoc_new(&m->vm_update_sa, 1,
+ GFP_KERNEL, true, 0);
+ if (IS_ERR(sa_bo)) {
+ err = PTR_ERR(sa_bo);
+ goto err;
+ }
+
+ ppgtt_ofs = NUM_KERNEL_PDE +
+ (drm_suballoc_soffset(sa_bo) /
+ NUM_VMUSA_UNIT_PER_PAGE);
+ page_ofs = (drm_suballoc_soffset(sa_bo) %
+ NUM_VMUSA_UNIT_PER_PAGE) *
+ VM_SA_UPDATE_UNIT_SIZE;
+ }
+
+ /* Preemption is enabled again by the ring ops. */
+ emit_arb_clear(bb);
+
+ /* Map our PT's to gtt */
+ bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) |
+ (num_updates * 2 + 1);
+ bb->cs[bb->len++] = ppgtt_ofs * GEN8_PAGE_SIZE + page_ofs;
+ bb->cs[bb->len++] = 0; /* upper_32_bits */
+
+ for (i = 0; i < num_updates; i++) {
+ struct xe_bo *pt_bo = updates[i].pt_bo;
+
+ BUG_ON(pt_bo->size != SZ_4K);
+
+ addr = gen8_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB,
+ 0, 0);
+ bb->cs[bb->len++] = lower_32_bits(addr);
+ bb->cs[bb->len++] = upper_32_bits(addr);
+ }
+
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ update_idx = bb->len;
+
+ addr = xe_migrate_vm_addr(ppgtt_ofs, 0) +
+ (page_ofs / sizeof(u64)) * GEN8_PAGE_SIZE;
+ for (i = 0; i < num_updates; i++)
+ write_pgtable(m->gt, bb, addr + i * GEN8_PAGE_SIZE,
+ &updates[i], pt_update);
+ } else {
+ /* phys pages, no preamble required */
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ update_idx = bb->len;
+
+ /* Preemption is enabled again by the ring ops. */
+ emit_arb_clear(bb);
+ for (i = 0; i < num_updates; i++)
+ write_pgtable(m->gt, bb, 0, &updates[i], pt_update);
+ }
+
+ if (!eng)
+ mutex_lock(&m->job_mutex);
+
+ job = xe_bb_create_migration_job(eng ?: m->eng, bb,
+ xe_migrate_batch_base(m, usm),
+ update_idx);
+ if (IS_ERR(job)) {
+ err = PTR_ERR(job);
+ goto err_bb;
+ }
+
+ /* Wait on BO move */
+ if (bo) {
+ err = job_add_deps(job, bo->ttm.base.resv,
+ DMA_RESV_USAGE_KERNEL);
+ if (err)
+ goto err_job;
+ }
+
+ /*
+ * Munmap style VM unbind, need to wait for all jobs to be complete /
+ * trigger preempts before moving forward
+ */
+ if (first_munmap_rebind) {
+ err = job_add_deps(job, &vm->resv,
+ DMA_RESV_USAGE_BOOKKEEP);
+ if (err)
+ goto err_job;
+ }
+
+ for (i = 0; !err && i < num_syncs; i++)
+ err = xe_sync_entry_add_deps(&syncs[i], job);
+
+ if (err)
+ goto err_job;
+
+ if (ops->pre_commit) {
+ err = ops->pre_commit(pt_update);
+ if (err)
+ goto err_job;
+ }
+ xe_sched_job_arm(job);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ if (!eng)
+ mutex_unlock(&m->job_mutex);
+
+ xe_bb_free(bb, fence);
+ drm_suballoc_free(sa_bo, fence);
+
+ return fence;
+
+err_job:
+ xe_sched_job_put(job);
+err_bb:
+ if (!eng)
+ mutex_unlock(&m->job_mutex);
+ xe_bb_free(bb, NULL);
+err:
+ drm_suballoc_free(sa_bo, NULL);
+ return ERR_PTR(err);
+}
+
+void xe_migrate_wait(struct xe_migrate *m)
+{
+ if (m->fence)
+ dma_fence_wait(m->fence, false);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_migrate.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
new file mode 100644
index 000000000000..267057a3847f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __XE_MIGRATE__
+#define __XE_MIGRATE__
+
+#include <drm/drm_mm.h>
+
+struct dma_fence;
+struct iosys_map;
+struct ttm_resource;
+
+struct xe_bo;
+struct xe_gt;
+struct xe_engine;
+struct xe_migrate;
+struct xe_migrate_pt_update;
+struct xe_sync_entry;
+struct xe_pt;
+struct xe_vm;
+struct xe_vm_pgtable_update;
+struct xe_vma;
+
+struct xe_migrate_pt_update_ops {
+ /**
+ * populate() - Populate a command buffer or page-table with ptes.
+ * @pt_update: Embeddable callback argument.
+ * @gt: The gt for the current operation.
+ * @map: struct iosys_map into the memory to be populated.
+ * @pos: If @map is NULL, map into the memory to be populated.
+ * @ofs: qword offset into @map, unused if @map is NULL.
+ * @num_qwords: Number of qwords to write.
+ * @update: Information about the PTEs to be inserted.
+ *
+ * This interface is intended to be used as a callback into the
+ * page-table system to populate command buffers or shared
+ * page-tables with PTEs.
+ */
+ void (*populate)(struct xe_migrate_pt_update *pt_update,
+ struct xe_gt *gt, struct iosys_map *map,
+ void *pos, u32 ofs, u32 num_qwords,
+ const struct xe_vm_pgtable_update *update);
+
+ /**
+ * pre_commit(): Callback to be called just before arming the
+ * sched_job.
+ * @pt_update: Pointer to embeddable callback argument.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+ int (*pre_commit)(struct xe_migrate_pt_update *pt_update);
+};
+
+struct xe_migrate_pt_update {
+ const struct xe_migrate_pt_update_ops *ops;
+ struct xe_vma *vma;
+};
+
+struct xe_migrate *xe_migrate_init(struct xe_gt *gt);
+
+struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
+ struct xe_bo *bo,
+ struct ttm_resource *src,
+ struct ttm_resource *dst);
+
+struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
+ struct xe_bo *bo,
+ struct ttm_resource *dst,
+ u32 value);
+
+struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
+
+struct dma_fence *
+xe_migrate_update_pgtables(struct xe_migrate *m,
+ struct xe_vm *vm,
+ struct xe_bo *bo,
+ struct xe_engine *eng,
+ const struct xe_vm_pgtable_update *updates,
+ u32 num_updates,
+ struct xe_sync_entry *syncs, u32 num_syncs,
+ struct xe_migrate_pt_update *pt_update);
+
+void xe_migrate_wait(struct xe_migrate *m);
+
+struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h
new file mode 100644
index 000000000000..6a68fdff08dc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_migrate_doc.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MIGRATE_DOC_H_
+#define _XE_MIGRATE_DOC_H_
+
+/**
+ * DOC: Migrate Layer
+ *
+ * The XE migrate layer is used generate jobs which can copy memory (eviction),
+ * clear memory, or program tables (binds). This layer exists in every GT, has
+ * a migrate engine, and uses a special VM for all generated jobs.
+ *
+ * Special VM details
+ * ==================
+ *
+ * The special VM is configured with a page structure where we can dynamically
+ * map BOs which need to be copied and cleared, dynamically map other VM's page
+ * table BOs for updates, and identity map the entire device's VRAM with 1 GB
+ * pages.
+ *
+ * Currently the page structure consists of 48 phyiscal pages with 16 being
+ * reserved for BO mapping during copies and clear, 1 reserved for kernel binds,
+ * several pages are needed to setup the identity mappings (exact number based
+ * on how many bits of address space the device has), and the rest are reserved
+ * user bind operations.
+ *
+ * TODO: Diagram of layout
+ *
+ * Bind jobs
+ * =========
+ *
+ * A bind job consist of two batches and runs either on the migrate engine
+ * (kernel binds) or the bind engine passed in (user binds). In both cases the
+ * VM of the engine is the migrate VM.
+ *
+ * The first batch is used to update the migration VM page structure to point to
+ * the bind VM page table BOs which need to be updated. A physical page is
+ * required for this. If it is a user bind, the page is allocated from pool of
+ * pages reserved user bind operations with drm_suballoc managing this pool. If
+ * it is a kernel bind, the page reserved for kernel binds is used.
+ *
+ * The first batch is only required for devices without VRAM as when the device
+ * has VRAM the bind VM page table BOs are in VRAM and the identity mapping can
+ * be used.
+ *
+ * The second batch is used to program page table updated in the bind VM. Why
+ * not just one batch? Well the TLBs need to be invalidated between these two
+ * batches and that only can be done from the ring.
+ *
+ * When the bind job complete, the page allocated is returned the pool of pages
+ * reserved for user bind operations if a user bind. No need do this for kernel
+ * binds as the reserved kernel page is serially used by each job.
+ *
+ * Copy / clear jobs
+ * =================
+ *
+ * A copy or clear job consist of two batches and runs on the migrate engine.
+ *
+ * Like binds, the first batch is used update the migration VM page structure.
+ * In copy jobs, we need to map the source and destination of the BO into page
+ * the structure. In clear jobs, we just need to add 1 mapping of BO into the
+ * page structure. We use the 16 reserved pages in migration VM for mappings,
+ * this gives us a maximum copy size of 16 MB and maximum clear size of 32 MB.
+ *
+ * The second batch is used do either do the copy or clear. Again similar to
+ * binds, two batches are required as the TLBs need to be invalidated from the
+ * ring between the batches.
+ *
+ * More than one job will be generated if the BO is larger than maximum copy /
+ * clear size.
+ *
+ * Future work
+ * ===========
+ *
+ * Update copy and clear code to use identity mapped VRAM.
+ *
+ * Can we rework the use of the pages async binds to use all the entries in each
+ * page?
+ *
+ * Using large pages for sysmem mappings.
+ *
+ * Is it possible to identity map the sysmem? We should explore this.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
new file mode 100644
index 000000000000..42e2405f2f48
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_mmio.h"
+
+#include <drm/drm_managed.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_macros.h"
+#include "xe_module.h"
+
+#include "i915_reg.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
+
+#define XEHP_MTCFG_ADDR _MMIO(0x101800)
+#define TILE_COUNT REG_GENMASK(15, 8)
+#define GEN12_LMEM_BAR 2
+
+static int xe_set_dma_info(struct xe_device *xe)
+{
+ unsigned int mask_size = xe->info.dma_mask_size;
+ int err;
+
+ /*
+ * We don't have a max segment size, so set it to the max so sg's
+ * debugging layer doesn't complain
+ */
+ dma_set_max_seg_size(xe->drm.dev, UINT_MAX);
+
+ err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
+ if (err)
+ goto mask_err;
+
+ return 0;
+
+mask_err:
+ drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
+ return err;
+}
+
+#ifdef CONFIG_64BIT
+static int
+_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int bar_size = pci_rebar_bytes_to_size(size);
+ int ret;
+
+ if (pci_resource_len(pdev, resno))
+ pci_release_resource(pdev, resno);
+
+ ret = pci_resize_resource(pdev, resno, bar_size);
+ if (ret) {
+ drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe)\n",
+ resno, 1 << bar_size, ERR_PTR(ret));
+ return -1;
+ }
+
+ drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
+ return 1;
+}
+
+static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct pci_bus *root = pdev->bus;
+ struct resource *root_res;
+ resource_size_t rebar_size;
+ resource_size_t current_size;
+ u32 pci_cmd;
+ int i;
+ int ret;
+ u64 force_lmem_bar_size = xe_force_lmem_bar_size;
+
+ current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR));
+
+ if (force_lmem_bar_size) {
+ u32 bar_sizes;
+
+ rebar_size = force_lmem_bar_size * (resource_size_t)SZ_1M;
+ bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR);
+
+ if (rebar_size == current_size)
+ return 0;
+
+ if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) ||
+ rebar_size >= roundup_pow_of_two(lmem_size)) {
+ rebar_size = lmem_size;
+ drm_info(&xe->drm,
+ "Given bar size is not within supported size, setting it to default: %llu\n",
+ (u64)lmem_size >> 20);
+ }
+ } else {
+ rebar_size = current_size;
+
+ if (rebar_size != roundup_pow_of_two(lmem_size))
+ rebar_size = lmem_size;
+ else
+ return 0;
+ }
+
+ while (root->parent)
+ root = root->parent;
+
+ pci_bus_for_each_resource(root, root_res, i) {
+ if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
+ root_res->start > 0x100000000ull)
+ break;
+ }
+
+ if (!root_res) {
+ drm_info(&xe->drm, "Can't resize LMEM BAR - platform support is missing\n");
+ return -1;
+ }
+
+ pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
+ pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
+
+ ret = _resize_bar(xe, GEN12_LMEM_BAR, rebar_size);
+
+ pci_assign_unassigned_bus_resources(pdev->bus);
+ pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+ return ret;
+}
+#else
+static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) { return 0; }
+#endif
+
+static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
+{
+ if (!pci_resource_flags(pdev, bar))
+ return false;
+
+ if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET)
+ return false;
+
+ if (!pci_resource_len(pdev, bar))
+ return false;
+
+ return true;
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct xe_gt *gt;
+ u8 id;
+ u64 lmem_size;
+ u64 original_size;
+ u64 current_size;
+ u64 flat_ccs_base;
+ int resize_result;
+
+ if (!IS_DGFX(xe)) {
+ xe->mem.vram.mapping = 0;
+ xe->mem.vram.size = 0;
+ xe->mem.vram.io_start = 0;
+
+ for_each_gt(gt, xe, id) {
+ gt->mem.vram.mapping = 0;
+ gt->mem.vram.size = 0;
+ gt->mem.vram.io_start = 0;
+ }
+ return 0;
+ }
+
+ if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) {
+ drm_err(&xe->drm, "pci resource is not valid\n");
+ return -ENXIO;
+ }
+
+ gt = xe_device_get_gt(xe, 0);
+ lmem_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg);
+
+ original_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+
+ if (xe->info.has_flat_ccs) {
+ int err;
+ u32 reg;
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+ reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE);
+ lmem_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
+ reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+ flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+
+ drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n",
+ lmem_size, flat_ccs_base);
+
+ err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ return err;
+ } else {
+ flat_ccs_base = lmem_size;
+ }
+
+ resize_result = xe_resize_lmem_bar(xe, lmem_size);
+ current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+ xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
+
+ xe->mem.vram.size = min(current_size, lmem_size);
+
+ if (!xe->mem.vram.size)
+ return -EIO;
+
+ if (resize_result > 0)
+ drm_info(&xe->drm, "Successfully resize LMEM from %lluMiB to %lluMiB\n",
+ (u64)original_size >> 20,
+ (u64)current_size >> 20);
+ else if (xe->mem.vram.size < lmem_size && !xe_force_lmem_bar_size)
+ drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n",
+ (u64)xe->mem.vram.size >> 20);
+ if (xe->mem.vram.size < lmem_size)
+ drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
+ lmem_size, xe->mem.vram.size);
+
+#ifdef CONFIG_64BIT
+ xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size);
+#endif
+
+ xe->mem.vram.size = min_t(u64, xe->mem.vram.size, flat_ccs_base);
+
+ drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size);
+
+ /* FIXME: Assuming equally partitioned VRAM, incorrect */
+ if (xe->info.tile_count > 1) {
+ u8 adj_tile_count = xe->info.tile_count;
+ resource_size_t size, io_start;
+
+ for_each_gt(gt, xe, id)
+ if (xe_gt_is_media_type(gt))
+ --adj_tile_count;
+
+ XE_BUG_ON(!adj_tile_count);
+
+ size = xe->mem.vram.size / adj_tile_count;
+ io_start = xe->mem.vram.io_start;
+
+ for_each_gt(gt, xe, id) {
+ if (id && !xe_gt_is_media_type(gt))
+ io_start += size;
+
+ gt->mem.vram.size = size;
+ gt->mem.vram.io_start = io_start;
+ gt->mem.vram.mapping = xe->mem.vram.mapping +
+ (io_start - xe->mem.vram.io_start);
+
+ drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n",
+ id, gt->info.vram_id, &gt->mem.vram.io_start,
+ &gt->mem.vram.size);
+ }
+ } else {
+ gt->mem.vram.size = xe->mem.vram.size;
+ gt->mem.vram.io_start = xe->mem.vram.io_start;
+ gt->mem.vram.mapping = xe->mem.vram.mapping;
+
+ drm_info(&xe->drm, "VRAM: %pa\n", &gt->mem.vram.size);
+ }
+ return 0;
+}
+
+static void xe_mmio_probe_tiles(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_device_get_gt(xe, 0);
+ u32 mtcfg;
+ u8 adj_tile_count;
+ u8 id;
+
+ if (xe->info.tile_count == 1)
+ return;
+
+ mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg);
+ adj_tile_count = xe->info.tile_count =
+ REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
+ if (xe->info.media_ver >= 13)
+ xe->info.tile_count *= 2;
+
+ drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n",
+ xe->info.tile_count, adj_tile_count);
+
+ if (xe->info.tile_count > 1) {
+ const int mmio_bar = 0;
+ size_t size;
+ void *regs;
+
+ if (adj_tile_count > 1) {
+ pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
+ xe->mmio.size = SZ_16M * adj_tile_count;
+ xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev),
+ mmio_bar, xe->mmio.size);
+ }
+
+ size = xe->mmio.size / adj_tile_count;
+ regs = xe->mmio.regs;
+
+ for_each_gt(gt, xe, id) {
+ if (id && !xe_gt_is_media_type(gt))
+ regs += size;
+ gt->mmio.size = size;
+ gt->mmio.regs = regs;
+ }
+ }
+}
+
+static void mmio_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
+ if (xe->mem.vram.mapping)
+ iounmap(xe->mem.vram.mapping);
+}
+
+int xe_mmio_init(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_device_get_gt(xe, 0);
+ const int mmio_bar = 0;
+ int err;
+
+ /*
+ * Map the entire BAR, which includes registers (0-4MB), reserved space
+ * (4MB-8MB), and GGTT (8MB-16MB). Other parts of the driver (GTs,
+ * GGTTs) will derive the pointers they need from the mapping in the
+ * device structure.
+ */
+ xe->mmio.size = SZ_16M;
+ xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar,
+ xe->mmio.size);
+ if (xe->mmio.regs == NULL) {
+ drm_err(&xe->drm, "failed to map registers\n");
+ return -EIO;
+ }
+
+ err = drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
+ if (err)
+ return err;
+
+ /* 1 GT for now, 1 to 1 mapping, may change on multi-GT devices */
+ gt->mmio.size = xe->mmio.size;
+ gt->mmio.regs = xe->mmio.regs;
+
+ /*
+ * The boot firmware initializes local memory and assesses its health.
+ * If memory training fails, the punit will have been instructed to
+ * keep the GT powered down; we won't be able to communicate with it
+ * and we should not continue with driver initialization.
+ */
+ if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) {
+ drm_err(&xe->drm, "LMEM not initialized by firmware\n");
+ return -ENODEV;
+ }
+
+ err = xe_set_dma_info(xe);
+ if (err)
+ return err;
+
+ xe_mmio_probe_tiles(xe);
+
+ return 0;
+}
+
+#define VALID_MMIO_FLAGS (\
+ DRM_XE_MMIO_BITS_MASK |\
+ DRM_XE_MMIO_READ |\
+ DRM_XE_MMIO_WRITE)
+
+static const i915_reg_t mmio_read_whitelist[] = {
+ RING_TIMESTAMP(RENDER_RING_BASE),
+};
+
+int xe_mmio_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct drm_xe_mmio *args = data;
+ unsigned int bits_flag, bytes;
+ bool allowed;
+ int ret = 0;
+
+ if (XE_IOCTL_ERR(xe, args->extensions))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value))
+ return -EINVAL;
+
+ allowed = capable(CAP_SYS_ADMIN);
+ if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) == DRM_XE_MMIO_READ)) {
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) {
+ if (mmio_read_whitelist[i].reg == args->addr) {
+ allowed = true;
+ break;
+ }
+ }
+ }
+
+ if (XE_IOCTL_ERR(xe, !allowed))
+ return -EPERM;
+
+ bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK;
+ bytes = 1 << bits_flag;
+ if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size))
+ return -EINVAL;
+
+ xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
+
+ if (args->flags & DRM_XE_MMIO_WRITE) {
+ switch (bits_flag) {
+ case DRM_XE_MMIO_8BIT:
+ return -EINVAL; /* TODO */
+ case DRM_XE_MMIO_16BIT:
+ return -EINVAL; /* TODO */
+ case DRM_XE_MMIO_32BIT:
+ if (XE_IOCTL_ERR(xe, args->value > U32_MAX))
+ return -EINVAL;
+ xe_mmio_write32(to_gt(xe), args->addr, args->value);
+ break;
+ case DRM_XE_MMIO_64BIT:
+ xe_mmio_write64(to_gt(xe), args->addr, args->value);
+ break;
+ default:
+ drm_WARN(&xe->drm, 1, "Invalid MMIO bit size");
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+
+ if (args->flags & DRM_XE_MMIO_READ) {
+ switch (bits_flag) {
+ case DRM_XE_MMIO_8BIT:
+ return -EINVAL; /* TODO */
+ case DRM_XE_MMIO_16BIT:
+ return -EINVAL; /* TODO */
+ case DRM_XE_MMIO_32BIT:
+ args->value = xe_mmio_read32(to_gt(xe), args->addr);
+ break;
+ case DRM_XE_MMIO_64BIT:
+ args->value = xe_mmio_read64(to_gt(xe), args->addr);
+ break;
+ default:
+ drm_WARN(&xe->drm, 1, "Invalid MMIO bit size");
+ ret = -EINVAL;
+ }
+ }
+
+exit:
+ xe_force_wake_put(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
new file mode 100644
index 000000000000..09d24467096f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_MMIO_H_
+#define _XE_MMIO_H_
+
+#include <linux/delay.h>
+
+#include "xe_gt_types.h"
+
+/*
+ * FIXME: This header has been deemed evil and we need to kill it. Temporarily
+ * including so we can use 'wait_for' and unblock initial development. A follow
+ * should replace 'wait_for' with a sane version and drop including this header.
+ */
+#include "i915_utils.h"
+
+struct drm_device;
+struct drm_file;
+struct xe_device;
+
+int xe_mmio_init(struct xe_device *xe);
+
+static inline u8 xe_mmio_read8(struct xe_gt *gt, u32 reg)
+{
+ if (reg < gt->mmio.adj_limit)
+ reg += gt->mmio.adj_offset;
+
+ return readb(gt->mmio.regs + reg);
+}
+
+static inline void xe_mmio_write32(struct xe_gt *gt,
+ u32 reg, u32 val)
+{
+ if (reg < gt->mmio.adj_limit)
+ reg += gt->mmio.adj_offset;
+
+ writel(val, gt->mmio.regs + reg);
+}
+
+static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg)
+{
+ if (reg < gt->mmio.adj_limit)
+ reg += gt->mmio.adj_offset;
+
+ return readl(gt->mmio.regs + reg);
+}
+
+static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 mask,
+ u32 val)
+{
+ u32 old, reg_val;
+
+ old = xe_mmio_read32(gt, reg);
+ reg_val = (old & mask) | val;
+ xe_mmio_write32(gt, reg, reg_val);
+
+ return old;
+}
+
+static inline void xe_mmio_write64(struct xe_gt *gt,
+ u32 reg, u64 val)
+{
+ if (reg < gt->mmio.adj_limit)
+ reg += gt->mmio.adj_offset;
+
+ writeq(val, gt->mmio.regs + reg);
+}
+
+static inline u64 xe_mmio_read64(struct xe_gt *gt, u32 reg)
+{
+ if (reg < gt->mmio.adj_limit)
+ reg += gt->mmio.adj_offset;
+
+ return readq(gt->mmio.regs + reg);
+}
+
+static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
+ u32 reg, u32 val,
+ u32 mask, u32 eval)
+{
+ u32 reg_val;
+
+ xe_mmio_write32(gt, reg, val);
+ reg_val = xe_mmio_read32(gt, reg);
+
+ return (reg_val & mask) != eval ? -EINVAL : 0;
+}
+
+static inline int xe_mmio_wait32(struct xe_gt *gt,
+ u32 reg, u32 val,
+ u32 mask, u32 timeout_ms)
+{
+ return wait_for((xe_mmio_read32(gt, reg) & mask) == val,
+ timeout_ms);
+}
+
+int xe_mmio_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg)
+{
+ return range && reg >= range->start && reg <= range->end;
+}
+
+int xe_mmio_probe_vram(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
new file mode 100644
index 000000000000..86b966fffbe5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_platform_types.h"
+#include "xe_mmio.h"
+#include "xe_mocs.h"
+#include "xe_step_types.h"
+
+#include "gt/intel_gt_regs.h"
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define mocs_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void mocs_dbg(const struct drm_device *dev,
+ const char *format, ...)
+{ /* noop */ }
+#endif
+
+/*
+ * MOCS indexes used for GPU surfaces, defining the cacheability of the
+ * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
+ */
+enum xe_mocs_info_index {
+ /*
+ * Not cached anywhere, coherency between CPU and GPU accesses is
+ * guaranteed.
+ */
+ XE_MOCS_UNCACHED,
+ /*
+ * Cacheability and coherency controlled by the kernel automatically
+ * based on the xxxx IOCTL setting and the current
+ * usage of the surface (used for display scanout or not).
+ */
+ XE_MOCS_PTE,
+ /*
+ * Cached in all GPU caches available on the platform.
+ * Coherency between CPU and GPU accesses to the surface is not
+ * guaranteed without extra synchronization.
+ */
+ XE_MOCS_CACHED,
+};
+
+enum {
+ HAS_GLOBAL_MOCS = BIT(0),
+ HAS_RENDER_L3CC = BIT(1),
+};
+
+struct xe_mocs_entry {
+ u32 control_value;
+ u16 l3cc_value;
+ u16 used;
+};
+
+struct xe_mocs_info {
+ unsigned int size;
+ unsigned int n_entries;
+ const struct xe_mocs_entry *table;
+ u8 uc_index;
+ u8 wb_index;
+ u8 unused_entries_index;
+};
+
+/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
+#define _LE_CACHEABILITY(value) ((value) << 0)
+#define _LE_TGT_CACHE(value) ((value) << 2)
+#define LE_LRUM(value) ((value) << 4)
+#define LE_AOM(value) ((value) << 6)
+#define LE_RSC(value) ((value) << 7)
+#define LE_SCC(value) ((value) << 8)
+#define LE_PFM(value) ((value) << 11)
+#define LE_SCF(value) ((value) << 14)
+#define LE_COS(value) ((value) << 15)
+#define LE_SSE(value) ((value) << 17)
+
+/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
+#define L3_ESC(value) ((value) << 0)
+#define L3_SCC(value) ((value) << 1)
+#define _L3_CACHEABILITY(value) ((value) << 4)
+#define L3_GLBGO(value) ((value) << 6)
+#define L3_LKUP(value) ((value) << 7)
+
+/* Helper defines */
+#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
+#define PVC_NUM_MOCS_ENTRIES 3
+#define MTL_NUM_MOCS_ENTRIES 16
+
+/* (e)LLC caching options */
+/*
+ * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
+ * the same as LE_UC
+ */
+#define LE_0_PAGETABLE _LE_CACHEABILITY(0)
+#define LE_1_UC _LE_CACHEABILITY(1)
+#define LE_2_WT _LE_CACHEABILITY(2)
+#define LE_3_WB _LE_CACHEABILITY(3)
+
+/* Target cache */
+#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0)
+#define LE_TC_1_LLC _LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3)
+
+/* L3 caching options */
+#define L3_0_DIRECT _L3_CACHEABILITY(0)
+#define L3_1_UC _L3_CACHEABILITY(1)
+#define L3_2_RESERVED _L3_CACHEABILITY(2)
+#define L3_3_WB _L3_CACHEABILITY(3)
+
+#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
+ [__idx] = { \
+ .control_value = __control_value, \
+ .l3cc_value = __l3cc_value, \
+ .used = 1, \
+ }
+
+/*
+ * MOCS tables
+ *
+ * These are the MOCS tables that are programmed across all the rings.
+ * The control value is programmed to all the rings that support the
+ * MOCS registers. While the l3cc_values are only programmed to the
+ * LNCFCMOCS0 - LNCFCMOCS32 registers.
+ *
+ * These tables are intended to be kept reasonably consistent across
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
+ *
+ * Entries not part of the following tables are undefined as far as
+ * userspace is concerned and shouldn't be relied upon. For Gen < 12
+ * they will be initialized to PTE. Gen >= 12 don't have a setting for
+ * PTE and those platforms except TGL/RKL will be initialized L3 WB to
+ * catch accidental use of reserved and unused mocs indexes.
+ *
+ * The last few entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE1: These tables are part of bspec and defined as part of hardware
+ * interface for ICL+. For older platforms, they are part of kernel
+ * ABI. It is expected that, for specific hardware platform, existing
+ * entries will remain constant and the table will only be updated by
+ * adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
+ * indices have been set to L3 WB. These reserved entries should never
+ * be used, they may be changed to low performant variants with better
+ * coherency in the future if more entries are needed.
+ * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
+ */
+
+#define GEN11_MOCS_ENTRIES \
+ /* Entries 0 and 1 are defined per-platform */ \
+ /* Base - L3 + LLC */ \
+ MOCS_ENTRY(2, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+ L3_3_WB), \
+ /* Base - Uncached */ \
+ MOCS_ENTRY(3, \
+ LE_1_UC | LE_TC_1_LLC, \
+ L3_1_UC), \
+ /* Base - L3 */ \
+ MOCS_ENTRY(4, \
+ LE_1_UC | LE_TC_1_LLC, \
+ L3_3_WB), \
+ /* Base - LLC */ \
+ MOCS_ENTRY(5, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+ L3_1_UC), \
+ /* Age 0 - LLC */ \
+ MOCS_ENTRY(6, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+ L3_1_UC), \
+ /* Age 0 - L3 + LLC */ \
+ MOCS_ENTRY(7, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+ L3_3_WB), \
+ /* Age: Don't Chg. - LLC */ \
+ MOCS_ENTRY(8, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+ L3_1_UC), \
+ /* Age: Don't Chg. - L3 + LLC */ \
+ MOCS_ENTRY(9, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+ L3_3_WB), \
+ /* No AOM - LLC */ \
+ MOCS_ENTRY(10, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+ L3_1_UC), \
+ /* No AOM - L3 + LLC */ \
+ MOCS_ENTRY(11, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+ L3_3_WB), \
+ /* No AOM; Age 0 - LLC */ \
+ MOCS_ENTRY(12, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+ L3_1_UC), \
+ /* No AOM; Age 0 - L3 + LLC */ \
+ MOCS_ENTRY(13, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+ L3_3_WB), \
+ /* No AOM; Age:DC - LLC */ \
+ MOCS_ENTRY(14, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+ L3_1_UC), \
+ /* No AOM; Age:DC - L3 + LLC */ \
+ MOCS_ENTRY(15, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+ L3_3_WB), \
+ /* Self-Snoop - L3 + LLC */ \
+ MOCS_ENTRY(18, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(12.5%) */ \
+ MOCS_ENTRY(19, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(25%) */ \
+ MOCS_ENTRY(20, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(50%) */ \
+ MOCS_ENTRY(21, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(75%) */ \
+ MOCS_ENTRY(22, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(87.5%) */ \
+ MOCS_ENTRY(23, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
+ L3_3_WB), \
+ /* HW Reserved - SW program but never use */ \
+ MOCS_ENTRY(62, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+ L3_1_UC), \
+ /* HW Reserved - SW program but never use */ \
+ MOCS_ENTRY(63, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+ L3_1_UC)
+
+static const struct xe_mocs_entry tgl_mocs_desc[] = {
+ /*
+ * NOTE:
+ * Reserved and unspecified MOCS indices have been set to (L3 + LCC).
+ * These reserved entries should never be used, they may be changed
+ * to low performant variants with better coherency in the future if
+ * more entries are needed. We are programming index XE_MOCS_PTE(1)
+ * only, __init_mocs_table() take care to program unused index with
+ * this entry.
+ */
+ MOCS_ENTRY(XE_MOCS_PTE,
+ LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
+ L3_1_UC),
+ GEN11_MOCS_ENTRIES,
+
+ /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+ MOCS_ENTRY(48,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + L3 */
+ MOCS_ENTRY(49,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + LLC */
+ MOCS_ENTRY(50,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* Implicitly enable L1 - HDC:L1 */
+ MOCS_ENTRY(51,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_1_UC),
+ /* HW Special Case (CCS) */
+ MOCS_ENTRY(60,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* HW Special Case (Displayable) */
+ MOCS_ENTRY(61,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+};
+
+static const struct xe_mocs_entry dg1_mocs_desc[] = {
+ /* UC */
+ MOCS_ENTRY(1, 0, L3_1_UC),
+ /* WB - L3 */
+ MOCS_ENTRY(5, 0, L3_3_WB),
+ /* WB - L3 50% */
+ MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB),
+ /* WB - L3 25% */
+ MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB),
+ /* WB - L3 12.5% */
+ MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB),
+
+ /* HDC:L1 + L3 */
+ MOCS_ENTRY(48, 0, L3_3_WB),
+ /* HDC:L1 */
+ MOCS_ENTRY(49, 0, L3_1_UC),
+
+ /* HW Reserved */
+ MOCS_ENTRY(60, 0, L3_1_UC),
+ MOCS_ENTRY(61, 0, L3_1_UC),
+ MOCS_ENTRY(62, 0, L3_1_UC),
+ MOCS_ENTRY(63, 0, L3_1_UC),
+};
+
+static const struct xe_mocs_entry gen12_mocs_desc[] = {
+ GEN11_MOCS_ENTRIES,
+ /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+ MOCS_ENTRY(48,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + L3 */
+ MOCS_ENTRY(49,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + LLC */
+ MOCS_ENTRY(50,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* Implicitly enable L1 - HDC:L1 */
+ MOCS_ENTRY(51,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_1_UC),
+ /* HW Special Case (CCS) */
+ MOCS_ENTRY(60,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* HW Special Case (Displayable) */
+ MOCS_ENTRY(61,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc[] = {
+ /* UC - Coherent; GO:L3 */
+ MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
+ /* UC - Coherent; GO:Memory */
+ MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Non-Coherent; GO:Memory */
+ MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+ /* WB - LC */
+ MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = {
+ /* Wa_14011441408: Set Go to Memory for MOCS#0 */
+ MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Coherent; GO:Memory */
+ MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Non-Coherent; GO:Memory */
+ MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+ /* WB - LC */
+ MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct xe_mocs_entry pvc_mocs_desc[] = {
+ /* Error */
+ MOCS_ENTRY(0, 0, L3_3_WB),
+
+ /* UC */
+ MOCS_ENTRY(1, 0, L3_1_UC),
+
+ /* WB */
+ MOCS_ENTRY(2, 0, L3_3_WB),
+};
+
+static unsigned int get_mocs_settings(struct xe_device *xe,
+ struct xe_mocs_info *info)
+{
+ unsigned int flags;
+
+ memset(info, 0, sizeof(struct xe_mocs_info));
+
+ info->unused_entries_index = XE_MOCS_PTE;
+ switch (xe->info.platform) {
+ case XE_PVC:
+ info->size = ARRAY_SIZE(pvc_mocs_desc);
+ info->table = pvc_mocs_desc;
+ info->n_entries = PVC_NUM_MOCS_ENTRIES;
+ info->uc_index = 1;
+ info->wb_index = 2;
+ info->unused_entries_index = 2;
+ break;
+ case XE_METEORLAKE:
+ info->size = ARRAY_SIZE(dg2_mocs_desc);
+ info->table = dg2_mocs_desc;
+ info->n_entries = MTL_NUM_MOCS_ENTRIES;
+ info->uc_index = 1;
+ info->unused_entries_index = 3;
+ break;
+ case XE_DG2:
+ if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
+ xe->info.step.graphics >= STEP_A0 &&
+ xe->info.step.graphics <= STEP_B0) {
+ info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax);
+ info->table = dg2_mocs_desc_g10_ax;
+ } else {
+ info->size = ARRAY_SIZE(dg2_mocs_desc);
+ info->table = dg2_mocs_desc;
+ }
+ info->uc_index = 1;
+ info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ info->unused_entries_index = 3;
+ break;
+ case XE_DG1:
+ info->size = ARRAY_SIZE(dg1_mocs_desc);
+ info->table = dg1_mocs_desc;
+ info->uc_index = 1;
+ info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ info->uc_index = 1;
+ info->unused_entries_index = 5;
+ break;
+ case XE_TIGERLAKE:
+ info->size = ARRAY_SIZE(tgl_mocs_desc);
+ info->table = tgl_mocs_desc;
+ info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ info->uc_index = 3;
+ break;
+ case XE_ALDERLAKE_S:
+ case XE_ALDERLAKE_P:
+ info->size = ARRAY_SIZE(gen12_mocs_desc);
+ info->table = gen12_mocs_desc;
+ info->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ info->uc_index = 3;
+ info->unused_entries_index = 2;
+ break;
+ default:
+ drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n");
+ return 0;
+ }
+
+ if (XE_WARN_ON(info->size > info->n_entries))
+ return 0;
+
+ flags = HAS_RENDER_L3CC;
+ if (!IS_DGFX(xe))
+ flags |= HAS_GLOBAL_MOCS;
+
+ return flags;
+}
+
+/*
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise XE_MOCS_PTE's value is returned in this case.
+ */
+static u32 get_entry_control(const struct xe_mocs_info *info,
+ unsigned int index)
+{
+ if (index < info->size && info->table[index].used)
+ return info->table[index].control_value;
+ return info->table[info->unused_entries_index].control_value;
+}
+
+static void __init_mocs_table(struct xe_gt *gt,
+ const struct xe_mocs_info *info,
+ u32 addr)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ unsigned int i;
+ u32 mocs;
+
+ mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries);
+ drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
+ "Unused entries index should have been defined\n");
+ for (i = 0;
+ i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
+ i++) {
+ mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, _MMIO(addr + i * 4).reg, mocs);
+ xe_mmio_write32(gt, _MMIO(addr + i * 4).reg, mocs);
+ }
+}
+
+/*
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is not zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
+ */
+static u16 get_entry_l3cc(const struct xe_mocs_info *info,
+ unsigned int index)
+{
+ if (index < info->size && info->table[index].used)
+ return info->table[index].l3cc_value;
+ return info->table[info->unused_entries_index].l3cc_value;
+}
+
+static u32 l3cc_combine(u16 low, u16 high)
+{
+ return low | (u32)high << 16;
+}
+
+static void init_l3cc_table(struct xe_gt *gt,
+ const struct xe_mocs_info *info)
+{
+ unsigned int i;
+ u32 l3cc;
+
+ mocs_dbg(&gt->xe->drm, "entries:%d\n", info->n_entries);
+ for (i = 0;
+ i < (info->n_entries + 1) / 2 ?
+ (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+ get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
+ i++) {
+ mocs_dbg(&gt->xe->drm, "%d 0x%x 0x%x\n", i, GEN9_LNCFCMOCS(i).reg, l3cc);
+ xe_mmio_write32(gt, GEN9_LNCFCMOCS(i).reg, l3cc);
+ }
+}
+
+void xe_mocs_init_engine(const struct xe_engine *engine)
+{
+ struct xe_mocs_info table;
+ unsigned int flags;
+
+ flags = get_mocs_settings(engine->gt->xe, &table);
+ if (!flags)
+ return;
+
+ if (flags & HAS_RENDER_L3CC && engine->class == XE_ENGINE_CLASS_RENDER)
+ init_l3cc_table(engine->gt, &table);
+}
+
+void xe_mocs_init(struct xe_gt *gt)
+{
+ struct xe_mocs_info table;
+ unsigned int flags;
+
+ /*
+ * LLC and eDRAM control values are not applicable to dgfx
+ */
+ flags = get_mocs_settings(gt->xe, &table);
+ mocs_dbg(&gt->xe->drm, "flag:0x%x\n", flags);
+ gt->mocs.uc_index = table.uc_index;
+ gt->mocs.wb_index = table.wb_index;
+
+ if (flags & HAS_GLOBAL_MOCS)
+ __init_mocs_table(gt, &table, GEN12_GLOBAL_MOCS(0).reg);
+
+ /*
+ * Initialize the L3CC table as part of mocs initalization to make
+ * sure the LNCFCMOCSx registers are programmed for the subsequent
+ * memory transactions including guc transactions
+ */
+ if (flags & HAS_RENDER_L3CC)
+ init_l3cc_table(gt, &table);
+}
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
new file mode 100644
index 000000000000..aba1abe216ab
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_MOCS_H_
+#define _XE_MOCS_H_
+
+#include <linux/types.h>
+
+struct xe_engine;
+struct xe_gt;
+
+void xe_mocs_init_engine(const struct xe_engine *engine);
+void xe_mocs_init(struct xe_gt *gt);
+
+/**
+ * xe_mocs_index_to_value - Translate mocs index to the mocs value exected by
+ * most blitter commands.
+ * @mocs_index: index into the mocs tables
+ *
+ * Return: The corresponding mocs value to be programmed.
+ */
+static inline u32 xe_mocs_index_to_value(u32 mocs_index)
+{
+ return mocs_index << 1;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
new file mode 100644
index 000000000000..cc862553a252
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "xe_drv.h"
+#include "xe_hw_fence.h"
+#include "xe_module.h"
+#include "xe_pci.h"
+#include "xe_sched_job.h"
+
+bool enable_guc = true;
+module_param_named_unsafe(enable_guc, enable_guc, bool, 0444);
+MODULE_PARM_DESC(enable_guc, "Enable GuC submission");
+
+u32 xe_force_lmem_bar_size;
+module_param_named(lmem_bar_size, xe_force_lmem_bar_size, uint, 0600);
+MODULE_PARM_DESC(lmem_bar_size, "Set the lmem bar size(in MiB)");
+
+int xe_guc_log_level = 5;
+module_param_named(guc_log_level, xe_guc_log_level, int, 0600);
+MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
+
+char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE;
+module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400);
+MODULE_PARM_DESC(force_probe,
+ "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
+
+struct init_funcs {
+ int (*init)(void);
+ void (*exit)(void);
+};
+#define MAKE_INIT_EXIT_FUNCS(name) \
+ { .init = xe_##name##_module_init, \
+ .exit = xe_##name##_module_exit, }
+static const struct init_funcs init_funcs[] = {
+ MAKE_INIT_EXIT_FUNCS(hw_fence),
+ MAKE_INIT_EXIT_FUNCS(sched_job),
+};
+
+static int __init xe_init(void)
+{
+ int err, i;
+
+ for (i = 0; i < ARRAY_SIZE(init_funcs); i++) {
+ err = init_funcs[i].init();
+ if (err) {
+ while (i--)
+ init_funcs[i].exit();
+ return err;
+ }
+ }
+
+ return xe_register_pci_driver();
+}
+
+static void __exit xe_exit(void)
+{
+ int i;
+
+ xe_unregister_pci_driver();
+
+ for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--)
+ init_funcs[i].exit();
+}
+
+module_init(xe_init);
+module_exit(xe_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
new file mode 100644
index 000000000000..2c6ee46f5595
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/init.h>
+
+/* Module modprobe variables */
+extern bool enable_guc;
+extern bool enable_display;
+extern u32 xe_force_lmem_bar_size;
+extern int xe_guc_log_level;
+extern char *xe_param_force_probe;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
new file mode 100644
index 000000000000..55d8a597a068
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_pci.h"
+
+#include <linux/device/driver.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_color_mgmt.h>
+#include <drm/xe_pciids.h>
+
+#include "xe_drv.h"
+#include "xe_device.h"
+#include "xe_macros.h"
+#include "xe_module.h"
+#include "xe_pm.h"
+#include "xe_step.h"
+
+#include "i915_reg.h"
+
+#define DEV_INFO_FOR_EACH_FLAG(func) \
+ func(require_force_probe); \
+ func(is_dgfx); \
+ /* Keep has_* in alphabetical order */ \
+
+struct xe_subplatform_desc {
+ enum xe_subplatform subplatform;
+ const char *name;
+ const u16 *pciidlist;
+};
+
+struct xe_gt_desc {
+ enum xe_gt_type type;
+ u8 vram_id;
+ u64 engine_mask;
+ u32 mmio_adj_limit;
+ u32 mmio_adj_offset;
+};
+
+struct xe_device_desc {
+ u8 graphics_ver;
+ u8 graphics_rel;
+ u8 media_ver;
+ u8 media_rel;
+
+ u64 platform_engine_mask; /* Engines supported by the HW */
+
+ enum xe_platform platform;
+ const char *platform_name;
+ const struct xe_subplatform_desc *subplatforms;
+ const struct xe_gt_desc *extra_gts;
+
+ u8 dma_mask_size; /* available DMA address bits */
+
+ u8 gt; /* GT number, 0 if undefined */
+
+#define DEFINE_FLAG(name) u8 name:1
+ DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
+#undef DEFINE_FLAG
+
+ u8 vram_flags;
+ u8 max_tiles;
+ u8 vm_max_level;
+
+ bool supports_usm;
+ bool has_flat_ccs;
+ bool has_4tile;
+};
+
+#define PLATFORM(x) \
+ .platform = (x), \
+ .platform_name = #x
+
+#define NOP(x) x
+
+/* Keep in gen based order, and chronological order within a gen */
+#define GEN12_FEATURES \
+ .require_force_probe = true, \
+ .graphics_ver = 12, \
+ .media_ver = 12, \
+ .dma_mask_size = 39, \
+ .max_tiles = 1, \
+ .vm_max_level = 3, \
+ .vram_flags = 0
+
+static const struct xe_device_desc tgl_desc = {
+ GEN12_FEATURES,
+ PLATFORM(XE_TIGERLAKE),
+ .platform_engine_mask =
+ BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+ BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+ BIT(XE_HW_ENGINE_VCS2),
+};
+
+static const struct xe_device_desc adl_s_desc = {
+ GEN12_FEATURES,
+ PLATFORM(XE_ALDERLAKE_S),
+ .platform_engine_mask =
+ BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+ BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+ BIT(XE_HW_ENGINE_VCS2),
+};
+
+static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
+
+static const struct xe_device_desc adl_p_desc = {
+ GEN12_FEATURES,
+ PLATFORM(XE_ALDERLAKE_P),
+ .platform_engine_mask =
+ BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+ BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+ BIT(XE_HW_ENGINE_VCS2),
+ .subplatforms = (const struct xe_subplatform_desc[]) {
+ { XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids },
+ {},
+ },
+};
+
+#define DGFX_FEATURES \
+ .is_dgfx = 1
+
+static const struct xe_device_desc dg1_desc = {
+ GEN12_FEATURES,
+ DGFX_FEATURES,
+ .graphics_rel = 10,
+ PLATFORM(XE_DG1),
+ .platform_engine_mask =
+ BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) |
+ BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) |
+ BIT(XE_HW_ENGINE_VCS2),
+};
+
+#define XE_HP_FEATURES \
+ .require_force_probe = true, \
+ .graphics_ver = 12, \
+ .graphics_rel = 50, \
+ .has_flat_ccs = true, \
+ .dma_mask_size = 46, \
+ .max_tiles = 1, \
+ .vm_max_level = 3
+
+#define XE_HPM_FEATURES \
+ .media_ver = 12, \
+ .media_rel = 50
+
+static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 };
+static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 };
+static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
+
+#define DG2_FEATURES \
+ DGFX_FEATURES, \
+ .graphics_rel = 55, \
+ .media_rel = 55, \
+ PLATFORM(XE_DG2), \
+ .subplatforms = (const struct xe_subplatform_desc[]) { \
+ { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \
+ { XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \
+ { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \
+ { } \
+ }, \
+ .platform_engine_mask = \
+ BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \
+ BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1) | \
+ BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
+ BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
+ BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \
+ .require_force_probe = true, \
+ .vram_flags = XE_VRAM_FLAGS_NEED64K, \
+ .has_4tile = 1
+
+static const struct xe_device_desc ats_m_desc = {
+ XE_HP_FEATURES,
+ XE_HPM_FEATURES,
+
+ DG2_FEATURES,
+};
+
+static const struct xe_device_desc dg2_desc = {
+ XE_HP_FEATURES,
+ XE_HPM_FEATURES,
+
+ DG2_FEATURES,
+};
+
+#define PVC_ENGINES \
+ BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | \
+ BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | \
+ BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | \
+ BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | \
+ BIT(XE_HW_ENGINE_BCS8) | \
+ BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | \
+ BIT(XE_HW_ENGINE_VCS2) | \
+ BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \
+ BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3)
+
+static const struct xe_gt_desc pvc_gts[] = {
+ {
+ .type = XE_GT_TYPE_REMOTE,
+ .vram_id = 1,
+ .engine_mask = PVC_ENGINES,
+ .mmio_adj_limit = 0,
+ .mmio_adj_offset = 0,
+ },
+};
+
+static const __maybe_unused struct xe_device_desc pvc_desc = {
+ XE_HP_FEATURES,
+ XE_HPM_FEATURES,
+ DGFX_FEATURES,
+ PLATFORM(XE_PVC),
+ .extra_gts = pvc_gts,
+ .graphics_rel = 60,
+ .has_flat_ccs = 0,
+ .media_rel = 60,
+ .platform_engine_mask = PVC_ENGINES,
+ .vram_flags = XE_VRAM_FLAGS_NEED64K,
+ .dma_mask_size = 52,
+ .max_tiles = 2,
+ .vm_max_level = 4,
+ .supports_usm = true,
+};
+
+#define MTL_MEDIA_ENGINES \
+ BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \
+ BIT(XE_HW_ENGINE_VECS0) /* TODO: GSC0 */
+
+static const struct xe_gt_desc xelpmp_gts[] = {
+ {
+ .type = XE_GT_TYPE_MEDIA,
+ .vram_id = 0,
+ .engine_mask = MTL_MEDIA_ENGINES,
+ .mmio_adj_limit = 0x40000,
+ .mmio_adj_offset = 0x380000,
+ },
+};
+
+#define MTL_MAIN_ENGINES \
+ BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \
+ BIT(XE_HW_ENGINE_CCS0)
+
+static const struct xe_device_desc mtl_desc = {
+ /*
+ * Real graphics IP version will be obtained from hardware GMD_ID
+ * register. Value provided here is just for sanity checking.
+ */
+ .require_force_probe = true,
+ .graphics_ver = 12,
+ .graphics_rel = 70,
+ .dma_mask_size = 46,
+ .max_tiles = 2,
+ .vm_max_level = 3,
+ .media_ver = 13,
+ PLATFORM(XE_METEORLAKE),
+ .extra_gts = xelpmp_gts,
+ .platform_engine_mask = MTL_MAIN_ENGINES,
+};
+
+#undef PLATFORM
+
+#define INTEL_VGA_DEVICE(id, info) { \
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, id), \
+ PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16, \
+ (unsigned long) info }
+
+/*
+ * Make sure any device matches here are from most specific to most
+ * general. For example, since the Quanta match is based on the subsystem
+ * and subvendor IDs, we need it to come before the more general IVB
+ * PCI ID matches, otherwise we'll use the wrong info struct above.
+ */
+static const struct pci_device_id pciidlist[] = {
+ XE_TGL_GT2_IDS(INTEL_VGA_DEVICE, &tgl_desc),
+ XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc),
+ XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc),
+ XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc),
+ XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc),
+ XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc),
+ XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc),
+ { }
+};
+MODULE_DEVICE_TABLE(pci, pciidlist);
+
+#undef INTEL_VGA_DEVICE
+
+/* is device_id present in comma separated list of ids */
+static bool device_id_in_list(u16 device_id, const char *devices, bool negative)
+{
+ char *s, *p, *tok;
+ bool ret;
+
+ if (!devices || !*devices)
+ return false;
+
+ /* match everything */
+ if (negative && strcmp(devices, "!*") == 0)
+ return true;
+ if (!negative && strcmp(devices, "*") == 0)
+ return true;
+
+ s = kstrdup(devices, GFP_KERNEL);
+ if (!s)
+ return false;
+
+ for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) {
+ u16 val;
+
+ if (negative && tok[0] == '!')
+ tok++;
+ else if ((negative && tok[0] != '!') ||
+ (!negative && tok[0] == '!'))
+ continue;
+
+ if (kstrtou16(tok, 16, &val) == 0 && val == device_id) {
+ ret = true;
+ break;
+ }
+ }
+
+ kfree(s);
+
+ return ret;
+}
+
+static bool id_forced(u16 device_id)
+{
+ return device_id_in_list(device_id, xe_param_force_probe, false);
+}
+
+static bool id_blocked(u16 device_id)
+{
+ return device_id_in_list(device_id, xe_param_force_probe, true);
+}
+
+static const struct xe_subplatform_desc *
+subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc)
+{
+ const struct xe_subplatform_desc *sp;
+ const u16 *id;
+
+ for (sp = desc->subplatforms; sp && sp->subplatform; sp++)
+ for (id = sp->pciidlist; *id; id++)
+ if (*id == xe->info.devid)
+ return sp;
+
+ return NULL;
+}
+
+static void xe_pci_remove(struct pci_dev *pdev)
+{
+ struct xe_device *xe;
+
+ xe = pci_get_drvdata(pdev);
+ if (!xe) /* driver load aborted, nothing to cleanup */
+ return;
+
+ xe_device_remove(xe);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ const struct xe_device_desc *desc = (void *)ent->driver_data;
+ const struct xe_subplatform_desc *spd;
+ struct xe_device *xe;
+ struct xe_gt *gt;
+ u8 id;
+ int err;
+
+ if (desc->require_force_probe && !id_forced(pdev->device)) {
+ dev_info(&pdev->dev,
+ "Your graphics device %04x is not officially supported\n"
+ "by xe driver in this kernel version. To force Xe probe,\n"
+ "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n"
+ "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n"
+ "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n",
+ pdev->device, pdev->device, pdev->device,
+ pdev->device, pdev->device);
+ return -ENODEV;
+ }
+
+ if (id_blocked(pdev->device)) {
+ dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n",
+ pdev->vendor, pdev->device);
+ return -ENODEV;
+ }
+
+ xe = xe_device_create(pdev, ent);
+ if (IS_ERR(xe))
+ return PTR_ERR(xe);
+
+ xe->info.graphics_verx100 = desc->graphics_ver * 100 +
+ desc->graphics_rel;
+ xe->info.media_verx100 = desc->media_ver * 100 +
+ desc->media_rel;
+ xe->info.is_dgfx = desc->is_dgfx;
+ xe->info.platform = desc->platform;
+ xe->info.dma_mask_size = desc->dma_mask_size;
+ xe->info.vram_flags = desc->vram_flags;
+ xe->info.tile_count = desc->max_tiles;
+ xe->info.vm_max_level = desc->vm_max_level;
+ xe->info.media_ver = desc->media_ver;
+ xe->info.supports_usm = desc->supports_usm;
+ xe->info.has_flat_ccs = desc->has_flat_ccs;
+ xe->info.has_4tile = desc->has_4tile;
+
+ spd = subplatform_get(xe, desc);
+ xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;
+ xe->info.step = xe_step_get(xe);
+
+ for (id = 0; id < xe->info.tile_count; ++id) {
+ gt = xe->gt + id;
+ gt->info.id = id;
+ gt->xe = xe;
+
+ if (id == 0) {
+ gt->info.type = XE_GT_TYPE_MAIN;
+ gt->info.vram_id = id;
+ gt->info.engine_mask = desc->platform_engine_mask;
+ gt->mmio.adj_limit = 0;
+ gt->mmio.adj_offset = 0;
+ } else {
+ gt->info.type = desc->extra_gts[id - 1].type;
+ gt->info.vram_id = desc->extra_gts[id - 1].vram_id;
+ gt->info.engine_mask =
+ desc->extra_gts[id - 1].engine_mask;
+ gt->mmio.adj_limit =
+ desc->extra_gts[id - 1].mmio_adj_limit;
+ gt->mmio.adj_offset =
+ desc->extra_gts[id - 1].mmio_adj_offset;
+ }
+ }
+
+ drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d",
+ desc->platform_name, spd ? spd->name : "",
+ xe->info.devid, xe->info.revid,
+ xe->info.is_dgfx, xe->info.graphics_verx100,
+ xe->info.media_verx100,
+ xe->info.dma_mask_size, xe->info.tile_count);
+
+ drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n",
+ xe_step_name(xe->info.step.graphics),
+ xe_step_name(xe->info.step.media),
+ xe_step_name(xe->info.step.display),
+ xe_step_name(xe->info.step.basedie));
+
+ pci_set_drvdata(pdev, xe);
+ err = pci_enable_device(pdev);
+ if (err) {
+ drm_dev_put(&xe->drm);
+ return err;
+ }
+
+ pci_set_master(pdev);
+
+ if (pci_enable_msi(pdev) < 0)
+ drm_dbg(&xe->drm, "can't enable MSI");
+
+ err = xe_device_probe(xe);
+ if (err) {
+ pci_disable_device(pdev);
+ return err;
+ }
+
+ xe_pm_runtime_init(xe);
+
+ return 0;
+}
+
+static void xe_pci_shutdown(struct pci_dev *pdev)
+{
+ xe_device_shutdown(pdev_to_xe_device(pdev));
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int xe_pci_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int err;
+
+ err = xe_pm_suspend(pdev_to_xe_device(pdev));
+ if (err)
+ return err;
+
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+
+ err = pci_set_power_state(pdev, PCI_D3hot);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int xe_pci_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int err;
+
+ err = pci_set_power_state(pdev, PCI_D0);
+ if (err)
+ return err;
+
+ pci_restore_state(pdev);
+
+ err = pci_enable_device(pdev);
+ if (err)
+ return err;
+
+ pci_set_master(pdev);
+
+ err = xe_pm_resume(pdev_to_xe_device(pdev));
+ if (err)
+ return err;
+
+ return 0;
+}
+#endif
+
+static int xe_pci_runtime_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+ int err;
+
+ err = xe_pm_runtime_suspend(xe);
+ if (err)
+ return err;
+
+ pci_save_state(pdev);
+
+ if (xe->d3cold_allowed) {
+ pci_disable_device(pdev);
+ pci_ignore_hotplug(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
+ } else {
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+
+ return 0;
+}
+
+static int xe_pci_runtime_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+ int err;
+
+ err = pci_set_power_state(pdev, PCI_D0);
+ if (err)
+ return err;
+
+ pci_restore_state(pdev);
+
+ if (xe->d3cold_allowed) {
+ err = pci_enable_device(pdev);
+ if (err)
+ return err;
+
+ pci_set_master(pdev);
+ }
+
+ return xe_pm_runtime_resume(xe);
+}
+
+static int xe_pci_runtime_idle(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+
+ /*
+ * FIXME: d3cold should be allowed (true) if
+ * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe))
+ * however the change to the buddy allocator broke the
+ * xe_bo_restore_kernel when the pci device is disabled
+ */
+ xe->d3cold_allowed = false;
+
+ return 0;
+}
+
+static const struct dev_pm_ops xe_pm_ops = {
+ .suspend = xe_pci_suspend,
+ .resume = xe_pci_resume,
+ .freeze = xe_pci_suspend,
+ .thaw = xe_pci_resume,
+ .poweroff = xe_pci_suspend,
+ .restore = xe_pci_resume,
+ .runtime_suspend = xe_pci_runtime_suspend,
+ .runtime_resume = xe_pci_runtime_resume,
+ .runtime_idle = xe_pci_runtime_idle,
+};
+
+static struct pci_driver xe_pci_driver = {
+ .name = DRIVER_NAME,
+ .id_table = pciidlist,
+ .probe = xe_pci_probe,
+ .remove = xe_pci_remove,
+ .shutdown = xe_pci_shutdown,
+ .driver.pm = &xe_pm_ops,
+};
+
+int xe_register_pci_driver(void)
+{
+ return pci_register_driver(&xe_pci_driver);
+}
+
+void xe_unregister_pci_driver(void)
+{
+ pci_unregister_driver(&xe_pci_driver);
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+static int dev_to_xe_device_fn(struct device *dev, void *data)
+
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ int (*xe_fn)(struct xe_device *xe) = data;
+ int ret = 0;
+ int idx;
+
+ if (drm_dev_enter(drm, &idx))
+ ret = xe_fn(to_xe_device(dev_get_drvdata(dev)));
+ drm_dev_exit(idx);
+
+ return ret;
+}
+
+/**
+ * xe_call_for_each_device - Iterate over all devices this driver binds to
+ * @xe_fn: Function to call for each device.
+ *
+ * This function iterated over all devices this driver binds to, and calls
+ * @xe_fn: for each one of them. If the called function returns anything else
+ * than 0, iteration is stopped and the return value is returned by this
+ * function. Across each function call, drm_dev_enter() / drm_dev_exit() is
+ * called for the corresponding drm device.
+ *
+ * Return: Zero or the error code of a call to @xe_fn returning an error
+ * code.
+ */
+int xe_call_for_each_device(xe_device_fn xe_fn)
+{
+ return driver_for_each_device(&xe_pci_driver.driver, NULL,
+ xe_fn, dev_to_xe_device_fn);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
new file mode 100644
index 000000000000..9e3089549d5f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_PCI_H_
+#define _XE_PCI_H_
+
+#include "tests/xe_test.h"
+
+int xe_register_pci_driver(void);
+void xe_unregister_pci_driver(void);
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+struct xe_device;
+
+typedef int (*xe_device_fn)(struct xe_device *);
+
+int xe_call_for_each_device(xe_device_fn xe_fn);
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
new file mode 100644
index 000000000000..236159c8a6c0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_pcode_api.h"
+#include "xe_pcode.h"
+
+#include "xe_gt.h"
+#include "xe_mmio.h"
+
+#include <linux/errno.h>
+
+/**
+ * DOC: PCODE
+ *
+ * Xe PCODE is the component responsible for interfacing with the PCODE
+ * firmware.
+ * It shall provide a very simple ABI to other Xe components, but be the
+ * single and consolidated place that will communicate with PCODE. All read
+ * and write operations to PCODE will be internal and private to this component.
+ *
+ * What's next:
+ * - PCODE hw metrics
+ * - PCODE for display operations
+ */
+
+static int pcode_mailbox_status(struct xe_gt *gt)
+{
+ u32 err;
+ static const struct pcode_err_decode err_decode[] = {
+ [PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"},
+ [PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"},
+ [PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"},
+ [PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"},
+ [PCODE_LOCKED] = {-EBUSY, "PCODE Locked"},
+ [PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW,
+ "GT ratio out of range"},
+ [PCODE_REJECTED] = {-EACCES, "PCODE Rejected"},
+ [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
+ };
+
+ lockdep_assert_held(&gt->pcode.lock);
+
+ err = xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_ERROR_MASK;
+ if (err) {
+ drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
+ err_decode[err].str ?: "Unknown");
+ return err_decode[err].errno ?: -EPROTO;
+ }
+
+ return 0;
+}
+
+static bool pcode_mailbox_done(struct xe_gt *gt)
+{
+ lockdep_assert_held(&gt->pcode.lock);
+ return (xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) == 0;
+}
+
+static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+ unsigned int timeout, bool return_data, bool atomic)
+{
+ lockdep_assert_held(&gt->pcode.lock);
+
+ if (!pcode_mailbox_done(gt))
+ return -EAGAIN;
+
+ xe_mmio_write32(gt, PCODE_DATA0.reg, *data0);
+ xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0);
+ xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox);
+
+ if (atomic)
+ _wait_for_atomic(pcode_mailbox_done(gt), timeout * 1000, 1);
+ else
+ wait_for(pcode_mailbox_done(gt), timeout);
+
+ if (return_data) {
+ *data0 = xe_mmio_read32(gt, PCODE_DATA0.reg);
+ if (data1)
+ *data1 = xe_mmio_read32(gt, PCODE_DATA1.reg);
+ }
+
+ return pcode_mailbox_status(gt);
+}
+
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout)
+{
+ int err;
+
+ mutex_lock(&gt->pcode.lock);
+ err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false);
+ mutex_unlock(&gt->pcode.lock);
+
+ return err;
+}
+
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
+{
+ int err;
+
+ mutex_lock(&gt->pcode.lock);
+ err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false);
+ mutex_unlock(&gt->pcode.lock);
+
+ return err;
+}
+
+static bool xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
+ u32 request, u32 reply_mask, u32 reply,
+ u32 *status, bool atomic)
+{
+ *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, atomic);
+
+ return (*status == 0) && ((request & reply_mask) == reply);
+}
+
+/**
+ * xe_pcode_request - send PCODE request until acknowledgment
+ * @gt: gt
+ * @mbox: PCODE mailbox ID the request is targeted for
+ * @request: request ID
+ * @reply_mask: mask used to check for request acknowledgment
+ * @reply: value used to check for request acknowledgment
+ * @timeout_base_ms: timeout for polling with preemption enabled
+ *
+ * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
+ * The request is acknowledged once the PCODE reply dword equals @reply after
+ * applying @reply_mask. Polling is first attempted with preemption enabled
+ * for @timeout_base_ms and if this times out for another 50 ms with
+ * preemption disabled.
+ *
+ * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
+ * other error as reported by PCODE.
+ */
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+ u32 reply_mask, u32 reply, int timeout_base_ms)
+{
+ u32 status;
+ int ret;
+ bool atomic = false;
+
+ mutex_lock(&gt->pcode.lock);
+
+#define COND \
+ xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, atomic)
+
+ /*
+ * Prime the PCODE by doing a request first. Normally it guarantees
+ * that a subsequent request, at most @timeout_base_ms later, succeeds.
+ * _wait_for() doesn't guarantee when its passed condition is evaluated
+ * first, so send the first request explicitly.
+ */
+ if (COND) {
+ ret = 0;
+ goto out;
+ }
+ ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
+ if (!ret)
+ goto out;
+
+ /*
+ * The above can time out if the number of requests was low (2 in the
+ * worst case) _and_ PCODE was busy for some reason even after a
+ * (queued) request and @timeout_base_ms delay. As a workaround retry
+ * the poll with preemption disabled to maximize the number of
+ * requests. Increase the timeout from @timeout_base_ms to 50ms to
+ * account for interrupts that could reduce the number of these
+ * requests, and for any quirks of the PCODE firmware that delays
+ * the request completion.
+ */
+ drm_err(&gt_to_xe(gt)->drm,
+ "PCODE timeout, retrying with preemption disabled\n");
+ drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
+ preempt_disable();
+ atomic = true;
+ ret = wait_for_atomic(COND, 50);
+ atomic = false;
+ preempt_enable();
+
+out:
+ mutex_unlock(&gt->pcode.lock);
+ return status ? status : ret;
+#undef COND
+}
+/**
+ * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table
+ * @gt: gt instance
+ * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz.
+ * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz.
+ *
+ * This function initialize PCODE's QOS frequency table for a proper minimal
+ * frequency/power steering decision, depending on the current requested GT
+ * frequency. For older platforms this was a more complete table including
+ * the IA freq. However for the latest platforms this table become a simple
+ * 1-1 Ring vs GT frequency. Even though, without setting it, PCODE might
+ * not take the right decisions for some memory frequencies and affect latency.
+ *
+ * It returns 0 on success, and -ERROR number on failure, -EINVAL if max
+ * frequency is higher then the minimal, and other errors directly translated
+ * from the PCODE Error returs:
+ * - -ENXIO: "Illegal Command"
+ * - -ETIMEDOUT: "Timed out"
+ * - -EINVAL: "Illegal Data"
+ * - -ENXIO, "Illegal Subcommand"
+ * - -EBUSY: "PCODE Locked"
+ * - -EOVERFLOW, "GT ratio out of range"
+ * - -EACCES, "PCODE Rejected"
+ * - -EPROTO, "Unknown"
+ */
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+ u32 max_gt_freq)
+{
+ int ret;
+ u32 freq;
+
+ if (IS_DGFX(gt_to_xe(gt)))
+ return 0;
+
+ if (max_gt_freq <= min_gt_freq)
+ return -EINVAL;
+
+ mutex_lock(&gt->pcode.lock);
+ for (freq = min_gt_freq; freq <= max_gt_freq; freq++) {
+ u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq;
+
+ ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE,
+ &data, NULL, 1, false, false);
+ if (ret)
+ goto unlock;
+ }
+
+unlock:
+ mutex_unlock(&gt->pcode.lock);
+ return ret;
+}
+
+static bool pcode_dgfx_status_complete(struct xe_gt *gt)
+{
+ u32 data = DGFX_GET_INIT_STATUS;
+ int status = pcode_mailbox_rw(gt, DGFX_PCODE_STATUS,
+ &data, NULL, 1, true, false);
+
+ return status == 0 &&
+ (data & DGFX_INIT_STATUS_COMPLETE) == DGFX_INIT_STATUS_COMPLETE;
+}
+
+/**
+ * xe_pcode_init - Ensure PCODE is initialized
+ * @gt: gt instance
+ *
+ * This function ensures that PCODE is properly initialized. To be called during
+ * probe and resume paths.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_init(struct xe_gt *gt)
+{
+ int timeout = 180000; /* 3 min */
+ int ret;
+
+ if (!IS_DGFX(gt_to_xe(gt)))
+ return 0;
+
+ mutex_lock(&gt->pcode.lock);
+ ret = wait_for(pcode_dgfx_status_complete(gt), timeout);
+ mutex_unlock(&gt->pcode.lock);
+
+ if (ret)
+ drm_err(&gt_to_xe(gt)->drm,
+ "PCODE initialization timedout after: %d min\n",
+ timeout / 60000);
+
+ return ret;
+}
+
+/**
+ * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized.
+ * @gt: gt instance
+ *
+ * This function initializes the xe_pcode component, and when needed, it ensures
+ * that PCODE has properly performed its initialization and it is really ready
+ * to go. To be called once only during probe.
+ *
+ * It returns 0 on success, and -error number on failure.
+ */
+int xe_pcode_probe(struct xe_gt *gt)
+{
+ mutex_init(&gt->pcode.lock);
+
+ if (!IS_DGFX(gt_to_xe(gt)))
+ return 0;
+
+ return xe_pcode_init(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
new file mode 100644
index 000000000000..3b4aa8c1a3ba
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PCODE_H_
+#define _XE_PCODE_H_
+
+#include <linux/types.h>
+struct xe_gt;
+
+int xe_pcode_probe(struct xe_gt *gt);
+int xe_pcode_init(struct xe_gt *gt);
+int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+ u32 max_gt_freq);
+int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1);
+int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val,
+ int timeout_ms);
+#define xe_pcode_write(gt, mbox, val) \
+ xe_pcode_write_timeout(gt, mbox, val, 1)
+
+int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+ u32 reply_mask, u32 reply, int timeout_ms);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
new file mode 100644
index 000000000000..0762c8a912c7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+/* Internal to xe_pcode */
+
+#define PCODE_MAILBOX _MMIO(0x138124)
+#define PCODE_READY REG_BIT(31)
+#define PCODE_MB_PARAM2 REG_GENMASK(23, 16)
+#define PCODE_MB_PARAM1 REG_GENMASK(15, 8)
+#define PCODE_MB_COMMAND REG_GENMASK(7, 0)
+#define PCODE_ERROR_MASK 0xFF
+#define PCODE_SUCCESS 0x0
+#define PCODE_ILLEGAL_CMD 0x1
+#define PCODE_TIMEOUT 0x2
+#define PCODE_ILLEGAL_DATA 0x3
+#define PCODE_ILLEGAL_SUBCOMMAND 0x4
+#define PCODE_LOCKED 0x6
+#define PCODE_GT_RATIO_OUT_OF_RANGE 0x10
+#define PCODE_REJECTED 0x11
+
+#define PCODE_DATA0 _MMIO(0x138128)
+#define PCODE_DATA1 _MMIO(0x13812C)
+
+/* Min Freq QOS Table */
+#define PCODE_WRITE_MIN_FREQ_TABLE 0x8
+#define PCODE_READ_MIN_FREQ_TABLE 0x9
+#define PCODE_FREQ_RING_RATIO_SHIFT 16
+
+/* PCODE Init */
+#define DGFX_PCODE_STATUS 0x7E
+#define DGFX_GET_INIT_STATUS 0x0
+#define DGFX_INIT_STATUS_COMPLETE 0x1
+
+struct pcode_err_decode {
+ int errno;
+ const char *str;
+};
+
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
new file mode 100644
index 000000000000..72612c832e88
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PLATFORM_INFO_TYPES_H_
+#define _XE_PLATFORM_INFO_TYPES_H_
+
+/* Keep in gen based order, and chronological order within a gen */
+enum xe_platform {
+ XE_PLATFORM_UNINITIALIZED = 0,
+ /* gen12 */
+ XE_TIGERLAKE,
+ XE_ROCKETLAKE,
+ XE_DG1,
+ XE_DG2,
+ XE_PVC,
+ XE_ALDERLAKE_S,
+ XE_ALDERLAKE_P,
+ XE_METEORLAKE,
+};
+
+enum xe_subplatform {
+ XE_SUBPLATFORM_UNINITIALIZED = 0,
+ XE_SUBPLATFORM_NONE,
+ XE_SUBPLATFORM_DG2_G10,
+ XE_SUBPLATFORM_DG2_G11,
+ XE_SUBPLATFORM_DG2_G12,
+ XE_SUBPLATFORM_ADLP_RPLU,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
new file mode 100644
index 000000000000..fb0355530e7b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/pm_runtime.h>
+
+#include <drm/ttm/ttm_placement.h>
+
+#include "xe_bo.h"
+#include "xe_bo_evict.h"
+#include "xe_device.h"
+#include "xe_pm.h"
+#include "xe_gt.h"
+#include "xe_ggtt.h"
+#include "xe_irq.h"
+#include "xe_pcode.h"
+
+/**
+ * DOC: Xe Power Management
+ *
+ * Xe PM shall be guided by the simplicity.
+ * Use the simplest hook options whenever possible.
+ * Let's not reinvent the runtime_pm references and hooks.
+ * Shall have a clear separation of display and gt underneath this component.
+ *
+ * What's next:
+ *
+ * For now s2idle and s3 are only working in integrated devices. The next step
+ * is to iterate through all VRAM's BO backing them up into the system memory
+ * before allowing the system suspend.
+ *
+ * Also runtime_pm needs to be here from the beginning.
+ *
+ * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC
+ * and no wait boost. Frequency optimizations should come on a next stage.
+ */
+
+/**
+ * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_suspend(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+ int err;
+
+ for_each_gt(gt, xe, id)
+ xe_gt_suspend_prepare(gt);
+
+ /* FIXME: Super racey... */
+ err = xe_bo_evict_all(xe);
+ if (err)
+ return err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_suspend(gt);
+ if (err)
+ return err;
+ }
+
+ xe_irq_suspend(xe);
+
+ return 0;
+}
+
+/**
+ * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0
+ * @xe: xe device instance
+ *
+ * Return: 0 on success
+ */
+int xe_pm_resume(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+ int err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_pcode_init(gt);
+ if (err)
+ return err;
+ }
+
+ /*
+ * This only restores pinned memory which is the memory required for the
+ * GT(s) to resume.
+ */
+ err = xe_bo_restore_kernel(xe);
+ if (err)
+ return err;
+
+ xe_irq_resume(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_resume(gt);
+
+ err = xe_bo_restore_user(xe);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void xe_pm_runtime_init(struct xe_device *xe)
+{
+ struct device *dev = xe->drm.dev;
+
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, 1000);
+ pm_runtime_set_active(dev);
+ pm_runtime_allow(dev);
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+}
+
+int xe_pm_runtime_suspend(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+ int err;
+
+ if (xe->d3cold_allowed) {
+ if (xe_device_mem_access_ongoing(xe))
+ return -EBUSY;
+
+ err = xe_bo_evict_all(xe);
+ if (err)
+ return err;
+ }
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_suspend(gt);
+ if (err)
+ return err;
+ }
+
+ xe_irq_suspend(xe);
+
+ return 0;
+}
+
+int xe_pm_runtime_resume(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ u8 id;
+ int err;
+
+ if (xe->d3cold_allowed) {
+ for_each_gt(gt, xe, id) {
+ err = xe_pcode_init(gt);
+ if (err)
+ return err;
+ }
+
+ /*
+ * This only restores pinned memory which is the memory
+ * required for the GT(s) to resume.
+ */
+ err = xe_bo_restore_kernel(xe);
+ if (err)
+ return err;
+ }
+
+ xe_irq_resume(xe);
+
+ for_each_gt(gt, xe, id)
+ xe_gt_resume(gt);
+
+ if (xe->d3cold_allowed) {
+ err = xe_bo_restore_user(xe);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int xe_pm_runtime_get(struct xe_device *xe)
+{
+ return pm_runtime_get_sync(xe->drm.dev);
+}
+
+int xe_pm_runtime_put(struct xe_device *xe)
+{
+ pm_runtime_mark_last_busy(xe->drm.dev);
+ return pm_runtime_put_autosuspend(xe->drm.dev);
+}
+
+/* Return true if resume operation happened and usage count was increased */
+bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe)
+{
+ /* In case we are suspended we need to immediately wake up */
+ if (pm_runtime_suspended(xe->drm.dev))
+ return !pm_runtime_resume_and_get(xe->drm.dev);
+
+ return false;
+}
+
+int xe_pm_runtime_get_if_active(struct xe_device *xe)
+{
+ WARN_ON(pm_runtime_suspended(xe->drm.dev));
+ return pm_runtime_get_if_active(xe->drm.dev, true);
+}
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
new file mode 100644
index 000000000000..b8c5f9558e26
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PM_H_
+#define _XE_PM_H_
+
+#include <linux/pm_runtime.h>
+
+struct xe_device;
+
+int xe_pm_suspend(struct xe_device *xe);
+int xe_pm_resume(struct xe_device *xe);
+
+void xe_pm_runtime_init(struct xe_device *xe);
+int xe_pm_runtime_suspend(struct xe_device *xe);
+int xe_pm_runtime_resume(struct xe_device *xe);
+int xe_pm_runtime_get(struct xe_device *xe);
+int xe_pm_runtime_put(struct xe_device *xe);
+bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe);
+int xe_pm_runtime_get_if_active(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
new file mode 100644
index 000000000000..6ab9ff442766
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/slab.h>
+
+#include "xe_engine.h"
+#include "xe_preempt_fence.h"
+#include "xe_vm.h"
+
+static void preempt_fence_work_func(struct work_struct *w)
+{
+ bool cookie = dma_fence_begin_signalling();
+ struct xe_preempt_fence *pfence =
+ container_of(w, typeof(*pfence), preempt_work);
+ struct xe_engine *e = pfence->engine;
+
+ if (pfence->error)
+ dma_fence_set_error(&pfence->base, pfence->error);
+ else
+ e->ops->suspend_wait(e);
+
+ dma_fence_signal(&pfence->base);
+ dma_fence_end_signalling(cookie);
+
+ queue_work(system_unbound_wq, &e->vm->preempt.rebind_work);
+
+ xe_engine_put(e);
+}
+
+static const char *
+preempt_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "xe";
+}
+
+static const char *
+preempt_fence_get_timeline_name(struct dma_fence *fence)
+{
+ return "preempt";
+}
+
+static bool preempt_fence_enable_signaling(struct dma_fence *fence)
+{
+ struct xe_preempt_fence *pfence =
+ container_of(fence, typeof(*pfence), base);
+ struct xe_engine *e = pfence->engine;
+
+ pfence->error = e->ops->suspend(e);
+ queue_work(system_unbound_wq, &pfence->preempt_work);
+ return true;
+}
+
+static const struct dma_fence_ops preempt_fence_ops = {
+ .get_driver_name = preempt_fence_get_driver_name,
+ .get_timeline_name = preempt_fence_get_timeline_name,
+ .enable_signaling = preempt_fence_enable_signaling,
+};
+
+/**
+ * xe_preempt_fence_alloc() - Allocate a preempt fence with minimal
+ * initialization
+ *
+ * Allocate a preempt fence, and initialize its list head.
+ * If the preempt_fence allocated has been armed with
+ * xe_preempt_fence_arm(), it must be freed using dma_fence_put(). If not,
+ * it must be freed using xe_preempt_fence_free().
+ *
+ * Return: A struct xe_preempt_fence pointer used for calling into
+ * xe_preempt_fence_arm() or xe_preempt_fence_free().
+ * An error pointer on error.
+ */
+struct xe_preempt_fence *xe_preempt_fence_alloc(void)
+{
+ struct xe_preempt_fence *pfence;
+
+ pfence = kmalloc(sizeof(*pfence), GFP_KERNEL);
+ if (!pfence)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&pfence->link);
+ INIT_WORK(&pfence->preempt_work, preempt_fence_work_func);
+
+ return pfence;
+}
+
+/**
+ * xe_preempt_fence_free() - Free a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: pointer obtained from xe_preempt_fence_alloc();
+ *
+ * Free a preempt fence that has not yet been armed.
+ */
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence)
+{
+ list_del(&pfence->link);
+ kfree(pfence);
+}
+
+/**
+ * xe_preempt_fence_arm() - Arm a preempt fence allocated using
+ * xe_preempt_fence_alloc().
+ * @pfence: The struct xe_preempt_fence pointer returned from
+ * xe_preempt_fence_alloc().
+ * @e: The struct xe_engine used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Inserts the preempt fence into @context's timeline, takes @link off any
+ * list, and registers the struct xe_engine as the xe_engine to be preempted.
+ *
+ * Return: A pointer to a struct dma_fence embedded into the preempt fence.
+ * This function doesn't error.
+ */
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e,
+ u64 context, u32 seqno)
+{
+ list_del_init(&pfence->link);
+ pfence->engine = xe_engine_get(e);
+ dma_fence_init(&pfence->base, &preempt_fence_ops,
+ &e->compute.lock, context, seqno);
+
+ return &pfence->base;
+}
+
+/**
+ * xe_preempt_fence_create() - Helper to create and arm a preempt fence.
+ * @e: The struct xe_engine used for arming.
+ * @context: The dma-fence context used for arming.
+ * @seqno: The dma-fence seqno used for arming.
+ *
+ * Allocates and inserts the preempt fence into @context's timeline,
+ * and registers @e as the struct xe_engine to be preempted.
+ *
+ * Return: A pointer to the resulting struct dma_fence on success. An error
+ * pointer on error. In particular if allocation fails it returns
+ * ERR_PTR(-ENOMEM);
+ */
+struct dma_fence *
+xe_preempt_fence_create(struct xe_engine *e,
+ u64 context, u32 seqno)
+{
+ struct xe_preempt_fence *pfence;
+
+ pfence = xe_preempt_fence_alloc();
+ if (IS_ERR(pfence))
+ return ERR_CAST(pfence);
+
+ return xe_preempt_fence_arm(pfence, e, context, seqno);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence)
+{
+ return fence->ops == &preempt_fence_ops;
+}
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h
new file mode 100644
index 000000000000..4f3966103203
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_H_
+#define _XE_PREEMPT_FENCE_H_
+
+#include "xe_preempt_fence_types.h"
+
+struct list_head;
+
+struct dma_fence *
+xe_preempt_fence_create(struct xe_engine *e,
+ u64 context, u32 seqno);
+
+struct xe_preempt_fence *xe_preempt_fence_alloc(void);
+
+void xe_preempt_fence_free(struct xe_preempt_fence *pfence);
+
+struct dma_fence *
+xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e,
+ u64 context, u32 seqno);
+
+static inline struct xe_preempt_fence *
+to_preempt_fence(struct dma_fence *fence)
+{
+ return container_of(fence, struct xe_preempt_fence, base);
+}
+
+/**
+ * xe_preempt_fence_link() - Return a link used to keep unarmed preempt
+ * fences on a list.
+ * @pfence: Pointer to the preempt fence.
+ *
+ * The link is embedded in the struct xe_preempt_fence. Use
+ * link_to_preempt_fence() to convert back to the preempt fence.
+ *
+ * Return: A pointer to an embedded struct list_head.
+ */
+static inline struct list_head *
+xe_preempt_fence_link(struct xe_preempt_fence *pfence)
+{
+ return &pfence->link;
+}
+
+/**
+ * to_preempt_fence_from_link() - Convert back to a preempt fence pointer
+ * from a link obtained with xe_preempt_fence_link().
+ * @link: The struct list_head obtained from xe_preempt_fence_link().
+ *
+ * Return: A pointer to the embedding struct xe_preempt_fence.
+ */
+static inline struct xe_preempt_fence *
+to_preempt_fence_from_link(struct list_head *link)
+{
+ return container_of(link, struct xe_preempt_fence, link);
+}
+
+bool xe_fence_is_xe_preempt(const struct dma_fence *fence);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
new file mode 100644
index 000000000000..9d9efd8ff0ed
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PREEMPT_FENCE_TYPES_H_
+#define _XE_PREEMPT_FENCE_TYPES_H_
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+struct xe_engine;
+
+/**
+ * struct xe_preempt_fence - XE preempt fence
+ *
+ * A preemption fence which suspends the execution of an xe_engine on the
+ * hardware and triggers a callback once the xe_engine is complete.
+ */
+struct xe_preempt_fence {
+ /** @base: dma fence base */
+ struct dma_fence base;
+ /** @link: link into list of pending preempt fences */
+ struct list_head link;
+ /** @engine: xe engine for this preempt fence */
+ struct xe_engine *engine;
+ /** @preempt_work: work struct which issues preemption */
+ struct work_struct preempt_work;
+ /** @error: preempt fence is in error state */
+ int error;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
new file mode 100644
index 000000000000..81193ddd0af7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -0,0 +1,1542 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_migrate.h"
+#include "xe_pt.h"
+#include "xe_pt_types.h"
+#include "xe_pt_walk.h"
+#include "xe_vm.h"
+#include "xe_res_cursor.h"
+
+struct xe_pt_dir {
+ struct xe_pt pt;
+ /** @dir: Directory structure for the xe_pt_walk functionality */
+ struct xe_ptw_dir dir;
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr))
+#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr)
+#else
+#define xe_pt_set_addr(__xe_pt, __addr)
+#define xe_pt_addr(__xe_pt) 0ull
+#endif
+
+static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48};
+static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48};
+
+#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1)
+
+static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
+{
+ return container_of(pt, struct xe_pt_dir, pt);
+}
+
+static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
+{
+ return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
+}
+
+/**
+ * gen8_pde_encode() - Encode a page-table directory entry pointing to
+ * another page-table.
+ * @bo: The page-table bo of the page-table to point to.
+ * @bo_offset: Offset in the page-table bo to point to.
+ * @level: The cache level indicating the caching of @bo.
+ *
+ * TODO: Rename.
+ *
+ * Return: An encoded page directory entry. No errors.
+ */
+u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
+ const enum xe_cache_level level)
+{
+ u64 pde;
+ bool is_lmem;
+
+ pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem);
+ pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+ XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_lmem);
+
+ /* FIXME: I don't think the PPAT handling is correct for MTL */
+
+ if (level != XE_CACHE_NONE)
+ pde |= PPAT_CACHED_PDE;
+ else
+ pde |= PPAT_UNCACHED;
+
+ return pde;
+}
+
+static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
+ size_t page_size, bool *is_lmem)
+{
+ if (xe_vma_is_userptr(vma)) {
+ struct xe_res_cursor cur;
+ u64 page;
+
+ *is_lmem = false;
+ page = offset >> PAGE_SHIFT;
+ offset &= (PAGE_SIZE - 1);
+
+ xe_res_first_sg(vma->userptr.sg, page << PAGE_SHIFT, page_size,
+ &cur);
+ return xe_res_dma(&cur) + offset;
+ } else {
+ return xe_bo_addr(vma->bo, offset, page_size, is_lmem);
+ }
+}
+
+static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
+ u32 pt_level)
+{
+ pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+ if (unlikely(flags & PTE_READ_ONLY))
+ pte &= ~GEN8_PAGE_RW;
+
+ /* FIXME: I don't think the PPAT handling is correct for MTL */
+
+ switch (cache) {
+ case XE_CACHE_NONE:
+ pte |= PPAT_UNCACHED;
+ break;
+ case XE_CACHE_WT:
+ pte |= PPAT_DISPLAY_ELLC;
+ break;
+ default:
+ pte |= PPAT_CACHED;
+ break;
+ }
+
+ if (pt_level == 1)
+ pte |= GEN8_PDE_PS_2M;
+ else if (pt_level == 2)
+ pte |= GEN8_PDPE_PS_1G;
+
+ /* XXX: Does hw support 1 GiB pages? */
+ XE_BUG_ON(pt_level > 2);
+
+ return pte;
+}
+
+/**
+ * gen8_pte_encode() - Encode a page-table entry pointing to memory.
+ * @vma: The vma representing the memory to point to.
+ * @bo: If @vma is NULL, representing the memory to point to.
+ * @offset: The offset into @vma or @bo.
+ * @cache: The cache level indicating
+ * @flags: Currently only supports PTE_READ_ONLY for read-only access.
+ * @pt_level: The page-table level of the page-table into which the entry
+ * is to be inserted.
+ *
+ * TODO: Rename.
+ *
+ * Return: An encoded page-table entry. No errors.
+ */
+u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
+ u64 offset, enum xe_cache_level cache,
+ u32 flags, u32 pt_level)
+{
+ u64 pte;
+ bool is_vram;
+
+ if (vma)
+ pte = vma_addr(vma, offset, GEN8_PAGE_SIZE, &is_vram);
+ else
+ pte = xe_bo_addr(bo, offset, GEN8_PAGE_SIZE, &is_vram);
+
+ if (is_vram) {
+ pte |= GEN12_PPGTT_PTE_LM;
+ if (vma && vma->use_atomic_access_pte_bit)
+ pte |= GEN12_USM_PPGTT_PTE_AE;
+ }
+
+ return __gen8_pte_encode(pte, cache, flags, pt_level);
+}
+
+static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm,
+ unsigned int level)
+{
+ u8 id = gt->info.id;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ if (!vm->scratch_bo[id])
+ return 0;
+
+ if (level == 0) {
+ u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0,
+ XE_CACHE_WB, 0, 0);
+ if (vm->flags & XE_VM_FLAGS_64K)
+ empty |= GEN12_PTE_PS64;
+
+ return empty;
+ } else {
+ return gen8_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0,
+ XE_CACHE_WB);
+ }
+}
+
+/**
+ * xe_pt_create() - Create a page-table.
+ * @vm: The vm to create for.
+ * @gt: The gt to create for.
+ * @level: The page-table level.
+ *
+ * Allocate and initialize a single struct xe_pt metadata structure. Also
+ * create the corresponding page-table bo, but don't initialize it. If the
+ * level is grater than zero, then it's assumed to be a directory page-
+ * table and the directory structure is also allocated and initialized to
+ * NULL pointers.
+ *
+ * Return: A valid struct xe_pt pointer on success, Pointer error code on
+ * error.
+ */
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
+ unsigned int level)
+{
+ struct xe_pt *pt;
+ struct xe_bo *bo;
+ size_t size;
+ int err;
+
+ size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) +
+ GEN8_PDES * sizeof(struct xe_ptw *);
+ pt = kzalloc(size, GFP_KERNEL);
+ if (!pt)
+ return ERR_PTR(-ENOMEM);
+
+ bo = xe_bo_create_pin_map(vm->xe, gt, vm, SZ_4K,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
+ XE_BO_CREATE_PINNED_BIT);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ goto err_kfree;
+ }
+ pt->bo = bo;
+ pt->level = level;
+ pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL;
+
+ XE_BUG_ON(level > XE_VM_MAX_LEVEL);
+
+ return pt;
+
+err_kfree:
+ kfree(pt);
+ return ERR_PTR(err);
+}
+
+/**
+ * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
+ * entries.
+ * @gt: The gt the scratch pagetable of which to use.
+ * @vm: The vm we populate for.
+ * @pt: The pagetable the bo of which to initialize.
+ *
+ * Populate the page-table bo of @pt with entries pointing into the gt's
+ * scratch page-table tree if any. Otherwise populate with zeros.
+ */
+void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
+ struct xe_pt *pt)
+{
+ struct iosys_map *map = &pt->bo->vmap;
+ u64 empty;
+ int i;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ if (!vm->scratch_bo[gt->info.id]) {
+ /*
+ * FIXME: Some memory is allocated already allocated to zero?
+ * Find out which memory that is and avoid this memset...
+ */
+ xe_map_memset(vm->xe, map, 0, 0, SZ_4K);
+ } else {
+ empty = __xe_pt_empty_pte(gt, vm, pt->level);
+ for (i = 0; i < GEN8_PDES; i++)
+ xe_pt_write(vm->xe, map, i, empty);
+ }
+}
+
+/**
+ * xe_pt_shift() - Return the ilog2 value of the size of the address range of
+ * a page-table at a certain level.
+ * @level: The level.
+ *
+ * Return: The ilog2 value of the size of the address range of a page-table
+ * at level @level.
+ */
+unsigned int xe_pt_shift(unsigned int level)
+{
+ return GEN8_PTE_SHIFT + GEN8_PDE_SHIFT * level;
+}
+
+/**
+ * xe_pt_destroy() - Destroy a page-table tree.
+ * @pt: The root of the page-table tree to destroy.
+ * @flags: vm flags. Currently unused.
+ * @deferred: List head of lockless list for deferred putting. NULL for
+ * immediate putting.
+ *
+ * Puts the page-table bo, recursively calls xe_pt_destroy on all children
+ * and finally frees @pt. TODO: Can we remove the @flags argument?
+ */
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
+{
+ int i;
+
+ if (!pt)
+ return;
+
+ XE_BUG_ON(!list_empty(&pt->bo->vmas));
+ xe_bo_unpin(pt->bo);
+ xe_bo_put_deferred(pt->bo, deferred);
+
+ if (pt->level > 0 && pt->num_live) {
+ struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+
+ for (i = 0; i < GEN8_PDES; i++) {
+ if (xe_pt_entry(pt_dir, i))
+ xe_pt_destroy(xe_pt_entry(pt_dir, i), flags,
+ deferred);
+ }
+ }
+ kfree(pt);
+}
+
+/**
+ * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the
+ * given gt and vm.
+ * @xe: xe device.
+ * @gt: gt to set up for.
+ * @vm: vm to set up for.
+ *
+ * Sets up a pagetable tree with one page-table per level and a single
+ * leaf bo. All pagetable entries point to the single page-table or,
+ * for L0, the single bo one level below.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm)
+{
+ u8 id = gt->info.id;
+ int i;
+
+ vm->scratch_bo[id] = xe_bo_create(xe, gt, vm, SZ_4K,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
+ XE_BO_CREATE_PINNED_BIT);
+ if (IS_ERR(vm->scratch_bo[id]))
+ return PTR_ERR(vm->scratch_bo[id]);
+ xe_bo_pin(vm->scratch_bo[id]);
+
+ for (i = 0; i < vm->pt_root[id]->level; i++) {
+ vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i);
+ if (IS_ERR(vm->scratch_pt[id][i]))
+ return PTR_ERR(vm->scratch_pt[id][i]);
+
+ xe_pt_populate_empty(gt, vm, vm->scratch_pt[id][i]);
+ }
+
+ return 0;
+}
+
+/**
+ * DOC: Pagetable building
+ *
+ * Below we use the term "page-table" for both page-directories, containing
+ * pointers to lower level page-directories or page-tables, and level 0
+ * page-tables that contain only page-table-entries pointing to memory pages.
+ *
+ * When inserting an address range in an already existing page-table tree
+ * there will typically be a set of page-tables that are shared with other
+ * address ranges, and a set that are private to this address range.
+ * The set of shared page-tables can be at most two per level,
+ * and those can't be updated immediately because the entries of those
+ * page-tables may still be in use by the gpu for other mappings. Therefore
+ * when inserting entries into those, we instead stage those insertions by
+ * adding insertion data into struct xe_vm_pgtable_update structures. This
+ * data, (subtrees for the cpu and page-table-entries for the gpu) is then
+ * added in a separate commit step. CPU-data is committed while still under the
+ * vm lock, the object lock and for userptr, the notifier lock in read mode.
+ * The GPU async data is committed either by the GPU or CPU after fulfilling
+ * relevant dependencies.
+ * For non-shared page-tables (and, in fact, for shared ones that aren't
+ * existing at the time of staging), we add the data in-place without the
+ * special update structures. This private part of the page-table tree will
+ * remain disconnected from the vm page-table tree until data is committed to
+ * the shared page tables of the vm tree in the commit phase.
+ */
+
+struct xe_pt_update {
+ /** @update: The update structure we're building for this parent. */
+ struct xe_vm_pgtable_update *update;
+ /** @parent: The parent. Used to detect a parent change. */
+ struct xe_pt *parent;
+ /** @preexisting: Whether the parent was pre-existing or allocated */
+ bool preexisting;
+};
+
+struct xe_pt_stage_bind_walk {
+ /** base: The base class. */
+ struct xe_pt_walk base;
+
+ /* Input parameters for the walk */
+ /** @vm: The vm we're building for. */
+ struct xe_vm *vm;
+ /** @gt: The gt we're building for. */
+ struct xe_gt *gt;
+ /** @cache: Desired cache level for the ptes */
+ enum xe_cache_level cache;
+ /** @default_pte: PTE flag only template. No address is associated */
+ u64 default_pte;
+ /** @dma_offset: DMA offset to add to the PTE. */
+ u64 dma_offset;
+ /**
+ * @needs_64k: This address range enforces 64K alignment and
+ * granularity.
+ */
+ bool needs_64K;
+ /**
+ * @pte_flags: Flags determining PTE setup. These are not flags
+ * encoded directly in the PTE. See @default_pte for those.
+ */
+ u32 pte_flags;
+
+ /* Also input, but is updated during the walk*/
+ /** @curs: The DMA address cursor. */
+ struct xe_res_cursor *curs;
+ /** @va_curs_start: The Virtual address coresponding to @curs->start */
+ u64 va_curs_start;
+
+ /* Output */
+ struct xe_walk_update {
+ /** @wupd.entries: Caller provided storage. */
+ struct xe_vm_pgtable_update *entries;
+ /** @wupd.num_used_entries: Number of update @entries used. */
+ unsigned int num_used_entries;
+ /** @wupd.updates: Tracks the update entry at a given level */
+ struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1];
+ } wupd;
+
+ /* Walk state */
+ /**
+ * @l0_end_addr: The end address of the current l0 leaf. Used for
+ * 64K granularity detection.
+ */
+ u64 l0_end_addr;
+ /** @addr_64K: The start address of the current 64K chunk. */
+ u64 addr_64K;
+ /** @found_64: Whether @add_64K actually points to a 64K chunk. */
+ bool found_64K;
+};
+
+static int
+xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent,
+ pgoff_t offset, bool alloc_entries)
+{
+ struct xe_pt_update *upd = &wupd->updates[parent->level];
+ struct xe_vm_pgtable_update *entry;
+
+ /*
+ * For *each level*, we could only have one active
+ * struct xt_pt_update at any one time. Once we move on to a
+ * new parent and page-directory, the old one is complete, and
+ * updates are either already stored in the build tree or in
+ * @wupd->entries
+ */
+ if (likely(upd->parent == parent))
+ return 0;
+
+ upd->parent = parent;
+ upd->preexisting = true;
+
+ if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1)
+ return -EINVAL;
+
+ entry = wupd->entries + wupd->num_used_entries++;
+ upd->update = entry;
+ entry->ofs = offset;
+ entry->pt_bo = parent->bo;
+ entry->pt = parent;
+ entry->flags = 0;
+ entry->qwords = 0;
+
+ if (alloc_entries) {
+ entry->pt_entries = kmalloc_array(GEN8_PDES,
+ sizeof(*entry->pt_entries),
+ GFP_KERNEL);
+ if (!entry->pt_entries)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * NOTE: This is a very frequently called function so we allow ourselves
+ * to annotate (using branch prediction hints) the fastpath of updating a
+ * non-pre-existing pagetable with leaf ptes.
+ */
+static int
+xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
+ pgoff_t offset, struct xe_pt *xe_child, u64 pte)
+{
+ struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level];
+ struct xe_pt_update *child_upd = xe_child ?
+ &xe_walk->wupd.updates[xe_child->level] : NULL;
+ int ret;
+
+ ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true);
+ if (unlikely(ret))
+ return ret;
+
+ /*
+ * Register this new pagetable so that it won't be recognized as
+ * a shared pagetable by a subsequent insertion.
+ */
+ if (unlikely(child_upd)) {
+ child_upd->update = NULL;
+ child_upd->parent = xe_child;
+ child_upd->preexisting = false;
+ }
+
+ if (likely(!upd->preexisting)) {
+ /* Continue building a non-connected subtree. */
+ struct iosys_map *map = &parent->bo->vmap;
+
+ if (unlikely(xe_child))
+ parent->base.dir->entries[offset] = &xe_child->base;
+
+ xe_pt_write(xe_walk->vm->xe, map, offset, pte);
+ parent->num_live++;
+ } else {
+ /* Shared pt. Stage update. */
+ unsigned int idx;
+ struct xe_vm_pgtable_update *entry = upd->update;
+
+ idx = offset - entry->ofs;
+ entry->pt_entries[idx].pt = xe_child;
+ entry->pt_entries[idx].pte = pte;
+ entry->qwords++;
+ }
+
+ return 0;
+}
+
+static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
+ struct xe_pt_stage_bind_walk *xe_walk)
+{
+ u64 size, dma;
+
+ /* Does the virtual range requested cover a huge pte? */
+ if (!xe_pt_covers(addr, next, level, &xe_walk->base))
+ return false;
+
+ /* Does the DMA segment cover the whole pte? */
+ if (next - xe_walk->va_curs_start > xe_walk->curs->size)
+ return false;
+
+ /* Is the DMA address huge PTE size aligned? */
+ size = next - addr;
+ dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs);
+
+ return IS_ALIGNED(dma, size);
+}
+
+/*
+ * Scan the requested mapping to check whether it can be done entirely
+ * with 64K PTEs.
+ */
+static bool
+xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+ struct xe_res_cursor curs = *xe_walk->curs;
+
+ if (!IS_ALIGNED(addr, SZ_64K))
+ return false;
+
+ if (next > xe_walk->l0_end_addr)
+ return false;
+
+ xe_res_next(&curs, addr - xe_walk->va_curs_start);
+ for (; addr < next; addr += SZ_64K) {
+ if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K)
+ return false;
+
+ xe_res_next(&curs, SZ_64K);
+ }
+
+ return addr == next;
+}
+
+/*
+ * For non-compact "normal" 4K level-0 pagetables, we want to try to group
+ * addresses together in 64K-contigous regions to add a 64K TLB hint for the
+ * device to the PTE.
+ * This function determines whether the address is part of such a
+ * segment. For VRAM in normal pagetables, this is strictly necessary on
+ * some devices.
+ */
+static bool
+xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
+{
+ /* Address is within an already found 64k region */
+ if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K)
+ return true;
+
+ xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk);
+ xe_walk->addr_64K = addr;
+
+ return xe_walk->found_64K;
+}
+
+static int
+xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
+ unsigned int level, u64 addr, u64 next,
+ struct xe_ptw **child,
+ enum page_walk_action *action,
+ struct xe_pt_walk *walk)
+{
+ struct xe_pt_stage_bind_walk *xe_walk =
+ container_of(walk, typeof(*xe_walk), base);
+ struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
+ struct xe_pt *xe_child;
+ bool covers;
+ int ret = 0;
+ u64 pte;
+
+ /* Is this a leaf entry ?*/
+ if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
+ struct xe_res_cursor *curs = xe_walk->curs;
+
+ XE_WARN_ON(xe_walk->va_curs_start != addr);
+
+ pte = __gen8_pte_encode(xe_res_dma(curs) + xe_walk->dma_offset,
+ xe_walk->cache, xe_walk->pte_flags,
+ level);
+ pte |= xe_walk->default_pte;
+
+ /*
+ * Set the GEN12_PTE_PS64 hint if possible, otherwise if
+ * this device *requires* 64K PTE size for VRAM, fail.
+ */
+ if (level == 0 && !xe_parent->is_compact) {
+ if (xe_pt_is_pte_ps64K(addr, next, xe_walk))
+ pte |= GEN12_PTE_PS64;
+ else if (XE_WARN_ON(xe_walk->needs_64K))
+ return -EINVAL;
+ }
+
+ ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte);
+ if (unlikely(ret))
+ return ret;
+
+ xe_res_next(curs, next - addr);
+ xe_walk->va_curs_start = next;
+ *action = ACTION_CONTINUE;
+
+ return ret;
+ }
+
+ /*
+ * Descending to lower level. Determine if we need to allocate a
+ * new page table or -directory, which we do if there is no
+ * previous one or there is one we can completely replace.
+ */
+ if (level == 1) {
+ walk->shifts = xe_normal_pt_shifts;
+ xe_walk->l0_end_addr = next;
+ }
+
+ covers = xe_pt_covers(addr, next, level, &xe_walk->base);
+ if (covers || !*child) {
+ u64 flags = 0;
+
+ xe_child = xe_pt_create(xe_walk->vm, xe_walk->gt, level - 1);
+ if (IS_ERR(xe_child))
+ return PTR_ERR(xe_child);
+
+ xe_pt_set_addr(xe_child,
+ round_down(addr, 1ull << walk->shifts[level]));
+
+ if (!covers)
+ xe_pt_populate_empty(xe_walk->gt, xe_walk->vm, xe_child);
+
+ *child = &xe_child->base;
+
+ /*
+ * Prefer the compact pagetable layout for L0 if possible.
+ * TODO: Suballocate the pt bo to avoid wasting a lot of
+ * memory.
+ */
+ if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 &&
+ covers && xe_pt_scan_64K(addr, next, xe_walk)) {
+ walk->shifts = xe_compact_pt_shifts;
+ flags |= GEN12_PDE_64K;
+ xe_child->is_compact = true;
+ }
+
+ pte = gen8_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags;
+ ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
+ pte);
+ }
+
+ *action = ACTION_SUBTREE;
+ return ret;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
+ .pt_entry = xe_pt_stage_bind_entry,
+};
+
+/**
+ * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
+ * range.
+ * @gt: The gt we're building for.
+ * @vma: The vma indicating the address range.
+ * @entries: Storage for the update entries used for connecting the tree to
+ * the main tree at commit time.
+ * @num_entries: On output contains the number of @entries used.
+ *
+ * This function builds a disconnected page-table tree for a given address
+ * range. The tree is connected to the main vm tree for the gpu using
+ * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind().
+ * The function builds xe_vm_pgtable_update structures for already existing
+ * shared page-tables, and non-existing shared and non-shared page-tables
+ * are built and populated directly.
+ *
+ * Return 0 on success, negative error code on error.
+ */
+static int
+xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries, u32 *num_entries)
+{
+ struct xe_bo *bo = vma->bo;
+ bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo);
+ struct xe_res_cursor curs;
+ struct xe_pt_stage_bind_walk xe_walk = {
+ .base = {
+ .ops = &xe_pt_stage_bind_ops,
+ .shifts = xe_normal_pt_shifts,
+ .max_level = XE_PT_HIGHEST_LEVEL,
+ },
+ .vm = vma->vm,
+ .gt = gt,
+ .curs = &curs,
+ .va_curs_start = vma->start,
+ .pte_flags = vma->pte_flags,
+ .wupd.entries = entries,
+ .needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram,
+ };
+ struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+ int ret;
+
+ if (is_vram) {
+ xe_walk.default_pte = GEN12_PPGTT_PTE_LM;
+ if (vma && vma->use_atomic_access_pte_bit)
+ xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE;
+ xe_walk.dma_offset = gt->mem.vram.io_start -
+ gt_to_xe(gt)->mem.vram.io_start;
+ xe_walk.cache = XE_CACHE_WB;
+ } else {
+ if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT)
+ xe_walk.cache = XE_CACHE_WT;
+ else
+ xe_walk.cache = XE_CACHE_WB;
+ }
+
+ xe_bo_assert_held(bo);
+ if (xe_vma_is_userptr(vma))
+ xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1,
+ &curs);
+ else if (xe_bo_is_vram(bo))
+ xe_res_first(bo->ttm.resource, vma->bo_offset,
+ vma->end - vma->start + 1, &curs);
+ else
+ xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset,
+ vma->end - vma->start + 1, &curs);
+
+ ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1,
+ &xe_walk.base);
+
+ *num_entries = xe_walk.wupd.num_used_entries;
+ return ret;
+}
+
+/**
+ * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a
+ * shared pagetable.
+ * @addr: The start address within the non-shared pagetable.
+ * @end: The end address within the non-shared pagetable.
+ * @level: The level of the non-shared pagetable.
+ * @walk: Walk info. The function adjusts the walk action.
+ * @action: next action to perform (see enum page_walk_action)
+ * @offset: Ignored on input, First non-shared entry on output.
+ * @end_offset: Ignored on input, Last non-shared entry + 1 on output.
+ *
+ * A non-shared page-table has some entries that belong to the address range
+ * and others that don't. This function determines the entries that belong
+ * fully to the address range. Depending on level, some entries may
+ * partially belong to the address range (that can't happen at level 0).
+ * The function detects that and adjust those offsets to not include those
+ * partial entries. Iff it does detect partial entries, we know that there must
+ * be shared page tables also at lower levels, so it adjusts the walk action
+ * accordingly.
+ *
+ * Return: true if there were non-shared entries, false otherwise.
+ */
+static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level,
+ struct xe_pt_walk *walk,
+ enum page_walk_action *action,
+ pgoff_t *offset, pgoff_t *end_offset)
+{
+ u64 size = 1ull << walk->shifts[level];
+
+ *offset = xe_pt_offset(addr, level, walk);
+ *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset;
+
+ if (!level)
+ return true;
+
+ /*
+ * If addr or next are not size aligned, there are shared pts at lower
+ * level, so in that case traverse down the subtree
+ */
+ *action = ACTION_CONTINUE;
+ if (!IS_ALIGNED(addr, size)) {
+ *action = ACTION_SUBTREE;
+ (*offset)++;
+ }
+
+ if (!IS_ALIGNED(end, size)) {
+ *action = ACTION_SUBTREE;
+ (*end_offset)--;
+ }
+
+ return *end_offset > *offset;
+}
+
+struct xe_pt_zap_ptes_walk {
+ /** @base: The walk base-class */
+ struct xe_pt_walk base;
+
+ /* Input parameters for the walk */
+ /** @gt: The gt we're building for */
+ struct xe_gt *gt;
+
+ /* Output */
+ /** @needs_invalidate: Whether we need to invalidate TLB*/
+ bool needs_invalidate;
+};
+
+static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
+ unsigned int level, u64 addr, u64 next,
+ struct xe_ptw **child,
+ enum page_walk_action *action,
+ struct xe_pt_walk *walk)
+{
+ struct xe_pt_zap_ptes_walk *xe_walk =
+ container_of(walk, typeof(*xe_walk), base);
+ struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+ pgoff_t end_offset;
+
+ XE_BUG_ON(!*child);
+ XE_BUG_ON(!level && xe_child->is_compact);
+
+ /*
+ * Note that we're called from an entry callback, and we're dealing
+ * with the child of that entry rather than the parent, so need to
+ * adjust level down.
+ */
+ if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset,
+ &end_offset)) {
+ xe_map_memset(gt_to_xe(xe_walk->gt), &xe_child->bo->vmap,
+ offset * sizeof(u64), 0,
+ (end_offset - offset) * sizeof(u64));
+ xe_walk->needs_invalidate = true;
+ }
+
+ return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = {
+ .pt_entry = xe_pt_zap_ptes_entry,
+};
+
+/**
+ * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range
+ * @gt: The gt we're zapping for.
+ * @vma: GPU VMA detailing address range.
+ *
+ * Eviction and Userptr invalidation needs to be able to zap the
+ * gpu ptes of a given address range in pagefaulting mode.
+ * In order to be able to do that, that function needs access to the shared
+ * page-table entrieaso it can either clear the leaf PTEs or
+ * clear the pointers to lower-level page-tables. The caller is required
+ * to hold the necessary locks to ensure neither the page-table connectivity
+ * nor the page-table entries of the range is updated from under us.
+ *
+ * Return: Whether ptes were actually updated and a TLB invalidation is
+ * required.
+ */
+bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma)
+{
+ struct xe_pt_zap_ptes_walk xe_walk = {
+ .base = {
+ .ops = &xe_pt_zap_ptes_ops,
+ .shifts = xe_normal_pt_shifts,
+ .max_level = XE_PT_HIGHEST_LEVEL,
+ },
+ .gt = gt,
+ };
+ struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+
+ if (!(vma->gt_present & BIT(gt->info.id)))
+ return false;
+
+ (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
+ &xe_walk.base);
+
+ return xe_walk.needs_invalidate;
+}
+
+static void
+xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt,
+ struct iosys_map *map, void *data,
+ u32 qword_ofs, u32 num_qwords,
+ const struct xe_vm_pgtable_update *update)
+{
+ struct xe_pt_entry *ptes = update->pt_entries;
+ u64 *ptr = data;
+ u32 i;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ for (i = 0; i < num_qwords; i++) {
+ if (map)
+ xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+ sizeof(u64), u64, ptes[i].pte);
+ else
+ ptr[i] = ptes[i].pte;
+ }
+}
+
+static void xe_pt_abort_bind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries)
+{
+ u32 i, j;
+
+ for (i = 0; i < num_entries; i++) {
+ if (!entries[i].pt_entries)
+ continue;
+
+ for (j = 0; j < entries[i].qwords; j++)
+ xe_pt_destroy(entries[i].pt_entries[j].pt, vma->vm->flags, NULL);
+ kfree(entries[i].pt_entries);
+ }
+}
+
+static void xe_pt_commit_locks_assert(struct xe_vma *vma)
+{
+ struct xe_vm *vm = vma->vm;
+
+ lockdep_assert_held(&vm->lock);
+
+ if (xe_vma_is_userptr(vma))
+ lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ else
+ dma_resv_assert_held(vma->bo->ttm.base.resv);
+
+ dma_resv_assert_held(&vm->resv);
+}
+
+static void xe_pt_commit_bind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries, bool rebind,
+ struct llist_head *deferred)
+{
+ u32 i, j;
+
+ xe_pt_commit_locks_assert(vma);
+
+ for (i = 0; i < num_entries; i++) {
+ struct xe_pt *pt = entries[i].pt;
+ struct xe_pt_dir *pt_dir;
+
+ if (!rebind)
+ pt->num_live += entries[i].qwords;
+
+ if (!pt->level) {
+ kfree(entries[i].pt_entries);
+ continue;
+ }
+
+ pt_dir = as_xe_pt_dir(pt);
+ for (j = 0; j < entries[i].qwords; j++) {
+ u32 j_ = j + entries[i].ofs;
+ struct xe_pt *newpte = entries[i].pt_entries[j].pt;
+
+ if (xe_pt_entry(pt_dir, j_))
+ xe_pt_destroy(xe_pt_entry(pt_dir, j_),
+ vma->vm->flags, deferred);
+
+ pt_dir->dir.entries[j_] = &newpte->base;
+ }
+ kfree(entries[i].pt_entries);
+ }
+}
+
+static int
+xe_pt_prepare_bind(struct xe_gt *gt, struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries, u32 *num_entries,
+ bool rebind)
+{
+ int err;
+
+ *num_entries = 0;
+ err = xe_pt_stage_bind(gt, vma, entries, num_entries);
+ if (!err)
+ BUG_ON(!*num_entries);
+ else /* abort! */
+ xe_pt_abort_bind(vma, entries, *num_entries);
+
+ return err;
+}
+
+static void xe_vm_dbg_print_entries(struct xe_device *xe,
+ const struct xe_vm_pgtable_update *entries,
+ unsigned int num_entries)
+#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
+{
+ unsigned int i;
+
+ vm_dbg(&xe->drm, "%u entries to update\n", num_entries);
+ for (i = 0; i < num_entries; i++) {
+ const struct xe_vm_pgtable_update *entry = &entries[i];
+ struct xe_pt *xe_pt = entry->pt;
+ u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
+ u64 end;
+ u64 start;
+
+ XE_BUG_ON(entry->pt->is_compact);
+ start = entry->ofs * page_size;
+ end = start + page_size * entry->qwords;
+ vm_dbg(&xe->drm,
+ "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
+ i, xe_pt->level, entry->ofs, entry->qwords,
+ xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0);
+ }
+}
+#else
+{}
+#endif
+
+#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
+
+static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+{
+ u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2;
+ static u32 count;
+
+ if (count++ % divisor == divisor - 1) {
+ struct xe_vm *vm = vma->vm;
+
+ vma->userptr.divisor = divisor << 1;
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_move_tail(&vma->userptr.invalidate_link,
+ &vm->userptr.invalidated);
+ spin_unlock(&vm->userptr.invalidated_lock);
+ return true;
+ }
+
+ return false;
+}
+
+#else
+
+static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+{
+ return false;
+}
+
+#endif
+
+/**
+ * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks
+ * @base: Base we derive from.
+ * @bind: Whether this is a bind or an unbind operation. A bind operation
+ * makes the pre-commit callback error with -EAGAIN if it detects a
+ * pending invalidation.
+ * @locked: Whether the pre-commit callback locked the userptr notifier lock
+ * and it needs unlocking.
+ */
+struct xe_pt_migrate_pt_update {
+ struct xe_migrate_pt_update base;
+ bool bind;
+ bool locked;
+};
+
+static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+{
+ struct xe_pt_migrate_pt_update *userptr_update =
+ container_of(pt_update, typeof(*userptr_update), base);
+ struct xe_vma *vma = pt_update->vma;
+ unsigned long notifier_seq = vma->userptr.notifier_seq;
+ struct xe_vm *vm = vma->vm;
+
+ userptr_update->locked = false;
+
+ /*
+ * Wait until nobody is running the invalidation notifier, and
+ * since we're exiting the loop holding the notifier lock,
+ * nobody can proceed invalidating either.
+ *
+ * Note that we don't update the vma->userptr.notifier_seq since
+ * we don't update the userptr pages.
+ */
+ do {
+ down_read(&vm->userptr.notifier_lock);
+ if (!mmu_interval_read_retry(&vma->userptr.notifier,
+ notifier_seq))
+ break;
+
+ up_read(&vm->userptr.notifier_lock);
+
+ if (userptr_update->bind)
+ return -EAGAIN;
+
+ notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+ } while (true);
+
+ /* Inject errors to test_whether they are handled correctly */
+ if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) {
+ up_read(&vm->userptr.notifier_lock);
+ return -EAGAIN;
+ }
+
+ userptr_update->locked = true;
+
+ return 0;
+}
+
+static const struct xe_migrate_pt_update_ops bind_ops = {
+ .populate = xe_vm_populate_pgtable,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
+ .populate = xe_vm_populate_pgtable,
+ .pre_commit = xe_pt_userptr_pre_commit,
+};
+
+/**
+ * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma
+ * address range.
+ * @gt: The gt to bind for.
+ * @vma: The vma to bind.
+ * @e: The engine with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before binding the built tree to the live vm tree.
+ * @num_syncs: Number of @sync entries.
+ * @rebind: Whether we're rebinding this vma to the same address range without
+ * an unbind in-between.
+ *
+ * This function builds a page-table tree (see xe_pt_stage_bind() for more
+ * information on page-table building), and the xe_vm_pgtable_update entries
+ * abstracting the operations needed to attach it to the main vm tree. It
+ * then takes the relevant locks and updates the metadata side of the main
+ * vm tree and submits the operations for pipelined attachment of the
+ * gpu page-table to the vm main tree, (which can be done either by the
+ * cpu and the GPU).
+ *
+ * Return: A valid dma-fence representing the pipelined attachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+ struct xe_sync_entry *syncs, u32 num_syncs,
+ bool rebind)
+{
+ struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+ struct xe_pt_migrate_pt_update bind_pt_update = {
+ .base = {
+ .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops,
+ .vma = vma,
+ },
+ .bind = true,
+ };
+ struct xe_vm *vm = vma->vm;
+ u32 num_entries;
+ struct dma_fence *fence;
+ int err;
+
+ bind_pt_update.locked = false;
+ xe_bo_assert_held(vma->bo);
+ xe_vm_assert_held(vm);
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ vm_dbg(&vma->vm->xe->drm,
+ "Preparing bind, with range [%llx...%llx) engine %p.\n",
+ vma->start, vma->end, e);
+
+ err = xe_pt_prepare_bind(gt, vma, entries, &num_entries, rebind);
+ if (err)
+ goto err;
+ XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
+
+ xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
+
+ fence = xe_migrate_update_pgtables(gt->migrate,
+ vm, vma->bo,
+ e ? e : vm->eng[gt->info.id],
+ entries, num_entries,
+ syncs, num_syncs,
+ &bind_pt_update.base);
+ if (!IS_ERR(fence)) {
+ LLIST_HEAD(deferred);
+
+ /* add shared fence now for pagetable delayed destroy */
+ dma_resv_add_fence(&vm->resv, fence, !rebind &&
+ vma->last_munmap_rebind ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
+
+ if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
+ dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+ xe_pt_commit_bind(vma, entries, num_entries, rebind,
+ bind_pt_update.locked ? &deferred : NULL);
+
+ /* This vma is live (again?) now */
+ vma->gt_present |= BIT(gt->info.id);
+
+ if (bind_pt_update.locked) {
+ vma->userptr.initial_bind = true;
+ up_read(&vm->userptr.notifier_lock);
+ xe_bo_put_commit(&deferred);
+ }
+ if (!rebind && vma->last_munmap_rebind &&
+ xe_vm_in_compute_mode(vm))
+ queue_work(vm->xe->ordered_wq,
+ &vm->preempt.rebind_work);
+ } else {
+ if (bind_pt_update.locked)
+ up_read(&vm->userptr.notifier_lock);
+ xe_pt_abort_bind(vma, entries, num_entries);
+ }
+
+ return fence;
+
+err:
+ return ERR_PTR(err);
+}
+
+struct xe_pt_stage_unbind_walk {
+ /** @base: The pagewalk base-class. */
+ struct xe_pt_walk base;
+
+ /* Input parameters for the walk */
+ /** @gt: The gt we're unbinding from. */
+ struct xe_gt *gt;
+
+ /**
+ * @modified_start: Walk range start, modified to include any
+ * shared pagetables that we're the only user of and can thus
+ * treat as private.
+ */
+ u64 modified_start;
+ /** @modified_end: Walk range start, modified like @modified_start. */
+ u64 modified_end;
+
+ /* Output */
+ /* @wupd: Structure to track the page-table updates we're building */
+ struct xe_walk_update wupd;
+};
+
+/*
+ * Check whether this range is the only one populating this pagetable,
+ * and in that case, update the walk range checks so that higher levels don't
+ * view us as a shared pagetable.
+ */
+static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
+ const struct xe_pt *child,
+ enum page_walk_action *action,
+ struct xe_pt_walk *walk)
+{
+ struct xe_pt_stage_unbind_walk *xe_walk =
+ container_of(walk, typeof(*xe_walk), base);
+ unsigned int shift = walk->shifts[level];
+ u64 size = 1ull << shift;
+
+ if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) &&
+ ((next - addr) >> shift) == child->num_live) {
+ u64 size = 1ull << walk->shifts[level + 1];
+
+ *action = ACTION_CONTINUE;
+
+ if (xe_walk->modified_start >= addr)
+ xe_walk->modified_start = round_down(addr, size);
+ if (xe_walk->modified_end <= next)
+ xe_walk->modified_end = round_up(next, size);
+
+ return true;
+ }
+
+ return false;
+}
+
+static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
+ unsigned int level, u64 addr, u64 next,
+ struct xe_ptw **child,
+ enum page_walk_action *action,
+ struct xe_pt_walk *walk)
+{
+ struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+
+ XE_BUG_ON(!*child);
+ XE_BUG_ON(!level && xe_child->is_compact);
+
+ xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
+
+ return 0;
+}
+
+static int
+xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
+ unsigned int level, u64 addr, u64 next,
+ struct xe_ptw **child,
+ enum page_walk_action *action,
+ struct xe_pt_walk *walk)
+{
+ struct xe_pt_stage_unbind_walk *xe_walk =
+ container_of(walk, typeof(*xe_walk), base);
+ struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+ pgoff_t end_offset;
+ u64 size = 1ull << walk->shifts[--level];
+
+ if (!IS_ALIGNED(addr, size))
+ addr = xe_walk->modified_start;
+ if (!IS_ALIGNED(next, size))
+ next = xe_walk->modified_end;
+
+ /* Parent == *child is the root pt. Don't kill it. */
+ if (parent != *child &&
+ xe_pt_check_kill(addr, next, level, xe_child, action, walk))
+ return 0;
+
+ if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset,
+ &end_offset))
+ return 0;
+
+ (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false);
+ xe_walk->wupd.updates[level].update->qwords = end_offset - offset;
+
+ return 0;
+}
+
+static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = {
+ .pt_entry = xe_pt_stage_unbind_entry,
+ .pt_post_descend = xe_pt_stage_unbind_post_descend,
+};
+
+/**
+ * xe_pt_stage_unbind() - Build page-table update structures for an unbind
+ * operation
+ * @gt: The gt we're unbinding for.
+ * @vma: The vma we're unbinding.
+ * @entries: Caller-provided storage for the update structures.
+ *
+ * Builds page-table update structures for an unbind operation. The function
+ * will attempt to remove all page-tables that we're the only user
+ * of, and for that to work, the unbind operation must be committed in the
+ * same critical section that blocks racing binds to the same page-table tree.
+ *
+ * Return: The number of entries used.
+ */
+static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries)
+{
+ struct xe_pt_stage_unbind_walk xe_walk = {
+ .base = {
+ .ops = &xe_pt_stage_unbind_ops,
+ .shifts = xe_normal_pt_shifts,
+ .max_level = XE_PT_HIGHEST_LEVEL,
+ },
+ .gt = gt,
+ .modified_start = vma->start,
+ .modified_end = vma->end + 1,
+ .wupd.entries = entries,
+ };
+ struct xe_pt *pt = vma->vm->pt_root[gt->info.id];
+
+ (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1,
+ &xe_walk.base);
+
+ return xe_walk.wupd.num_used_entries;
+}
+
+static void
+xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
+ struct xe_gt *gt, struct iosys_map *map,
+ void *ptr, u32 qword_ofs, u32 num_qwords,
+ const struct xe_vm_pgtable_update *update)
+{
+ struct xe_vma *vma = pt_update->vma;
+ u64 empty = __xe_pt_empty_pte(gt, vma->vm, update->pt->level);
+ int i;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ if (map && map->is_iomem)
+ for (i = 0; i < num_qwords; ++i)
+ xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) *
+ sizeof(u64), u64, empty);
+ else if (map)
+ memset64(map->vaddr + qword_ofs * sizeof(u64), empty,
+ num_qwords);
+ else
+ memset64(ptr, empty, num_qwords);
+}
+
+static void
+xe_pt_commit_unbind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries, u32 num_entries,
+ struct llist_head *deferred)
+{
+ u32 j;
+
+ xe_pt_commit_locks_assert(vma);
+
+ for (j = 0; j < num_entries; ++j) {
+ struct xe_vm_pgtable_update *entry = &entries[j];
+ struct xe_pt *pt = entry->pt;
+
+ pt->num_live -= entry->qwords;
+ if (pt->level) {
+ struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+ u32 i;
+
+ for (i = entry->ofs; i < entry->ofs + entry->qwords;
+ i++) {
+ if (xe_pt_entry(pt_dir, i))
+ xe_pt_destroy(xe_pt_entry(pt_dir, i),
+ vma->vm->flags, deferred);
+
+ pt_dir->dir.entries[i] = NULL;
+ }
+ }
+ }
+}
+
+static const struct xe_migrate_pt_update_ops unbind_ops = {
+ .populate = xe_migrate_clear_pgtable_callback,
+};
+
+static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
+ .populate = xe_migrate_clear_pgtable_callback,
+ .pre_commit = xe_pt_userptr_pre_commit,
+};
+
+/**
+ * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma
+ * address range.
+ * @gt: The gt to unbind for.
+ * @vma: The vma to unbind.
+ * @e: The engine with which to do pipelined page-table updates.
+ * @syncs: Entries to sync on before disconnecting the tree to be destroyed.
+ * @num_syncs: Number of @sync entries.
+ *
+ * This function builds a the xe_vm_pgtable_update entries abstracting the
+ * operations needed to detach the page-table tree to be destroyed from the
+ * man vm tree.
+ * It then takes the relevant locks and submits the operations for
+ * pipelined detachment of the gpu page-table from the vm main tree,
+ * (which can be done either by the cpu and the GPU), Finally it frees the
+ * detached page-table tree.
+ *
+ * Return: A valid dma-fence representing the pipelined detachment operation
+ * on success, an error pointer on error.
+ */
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+ struct xe_sync_entry *syncs, u32 num_syncs)
+{
+ struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+ struct xe_pt_migrate_pt_update unbind_pt_update = {
+ .base = {
+ .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops :
+ &unbind_ops,
+ .vma = vma,
+ },
+ };
+ struct xe_vm *vm = vma->vm;
+ u32 num_entries;
+ struct dma_fence *fence = NULL;
+ LLIST_HEAD(deferred);
+
+ xe_bo_assert_held(vma->bo);
+ xe_vm_assert_held(vm);
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ vm_dbg(&vma->vm->xe->drm,
+ "Preparing unbind, with range [%llx...%llx) engine %p.\n",
+ vma->start, vma->end, e);
+
+ num_entries = xe_pt_stage_unbind(gt, vma, entries);
+ XE_BUG_ON(num_entries > ARRAY_SIZE(entries));
+
+ xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries);
+
+ /*
+ * Even if we were already evicted and unbind to destroy, we need to
+ * clear again here. The eviction may have updated pagetables at a
+ * lower level, because it needs to be more conservative.
+ */
+ fence = xe_migrate_update_pgtables(gt->migrate,
+ vm, NULL, e ? e :
+ vm->eng[gt->info.id],
+ entries, num_entries,
+ syncs, num_syncs,
+ &unbind_pt_update.base);
+ if (!IS_ERR(fence)) {
+ /* add shared fence now for pagetable delayed destroy */
+ dma_resv_add_fence(&vm->resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+
+ /* This fence will be installed by caller when doing eviction */
+ if (!xe_vma_is_userptr(vma) && !vma->bo->vm)
+ dma_resv_add_fence(vma->bo->ttm.base.resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+ xe_pt_commit_unbind(vma, entries, num_entries,
+ unbind_pt_update.locked ? &deferred : NULL);
+ vma->gt_present &= ~BIT(gt->info.id);
+ }
+
+ if (!vma->gt_present)
+ list_del_init(&vma->rebind_link);
+
+ if (unbind_pt_update.locked) {
+ XE_WARN_ON(!xe_vma_is_userptr(vma));
+
+ if (!vma->gt_present) {
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_del_init(&vma->userptr.invalidate_link);
+ spin_unlock(&vm->userptr.invalidated_lock);
+ }
+ up_read(&vm->userptr.notifier_lock);
+ xe_bo_put_commit(&deferred);
+ }
+
+ return fence;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
new file mode 100644
index 000000000000..1152043e5c63
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_PT_H_
+#define _XE_PT_H_
+
+#include <linux/types.h>
+
+#include "xe_pt_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_device;
+struct xe_engine;
+struct xe_gt;
+struct xe_sync_entry;
+struct xe_vm;
+struct xe_vma;
+
+#define xe_pt_write(xe, map, idx, data) \
+ xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data)
+
+unsigned int xe_pt_shift(unsigned int level);
+
+struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt,
+ unsigned int level);
+
+int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_vm *vm);
+
+void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm,
+ struct xe_pt *pt);
+
+void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
+
+struct dma_fence *
+__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+ struct xe_sync_entry *syncs, u32 num_syncs,
+ bool rebind);
+
+struct dma_fence *
+__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
+ struct xe_sync_entry *syncs, u32 num_syncs);
+
+bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma);
+
+u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset,
+ const enum xe_cache_level level);
+
+u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
+ u64 offset, enum xe_cache_level cache,
+ u32 flags, u32 pt_level);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
new file mode 100644
index 000000000000..2ed64c0a4485
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_PT_TYPES_H_
+#define _XE_PT_TYPES_H_
+
+#include "xe_pt_walk.h"
+
+enum xe_cache_level {
+ XE_CACHE_NONE,
+ XE_CACHE_WT,
+ XE_CACHE_WB,
+};
+
+#define XE_VM_MAX_LEVEL 4
+
+struct xe_pt {
+ struct xe_ptw base;
+ struct xe_bo *bo;
+ unsigned int level;
+ unsigned int num_live;
+ bool rebind;
+ bool is_compact;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+ /** addr: Virtual address start address of the PT. */
+ u64 addr;
+#endif
+};
+
+struct xe_pt_entry {
+ struct xe_pt *pt;
+ u64 pte;
+};
+
+struct xe_vm_pgtable_update {
+ /** @bo: page table bo to write to */
+ struct xe_bo *pt_bo;
+
+ /** @ofs: offset inside this PTE to begin writing to (in qwords) */
+ u32 ofs;
+
+ /** @qwords: number of PTE's to write */
+ u32 qwords;
+
+ /** @pt: opaque pointer useful for the caller of xe_migrate_update_pgtables */
+ struct xe_pt *pt;
+
+ /** @pt_entries: Newly added pagetable entries */
+ struct xe_pt_entry *pt_entries;
+
+ /** @flags: Target flags */
+ u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c
new file mode 100644
index 000000000000..0def89af4372
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#include "xe_pt_walk.h"
+
+/**
+ * DOC: GPU page-table tree walking.
+ * The utilities in this file are similar to the CPU page-table walk
+ * utilities in mm/pagewalk.c. The main difference is that we distinguish
+ * the various levels of a page-table tree with an unsigned integer rather
+ * than by name. 0 is the lowest level, and page-tables with level 0 can
+ * not be directories pointing to lower levels, whereas all other levels
+ * can. The user of the utilities determines the highest level.
+ *
+ * Nomenclature:
+ * Each struct xe_ptw, regardless of level is referred to as a page table, and
+ * multiple page tables typically form a page table tree with page tables at
+ * intermediate levels being page directories pointing at page tables at lower
+ * levels. A shared page table for a given address range is a page-table which
+ * is neither fully within nor fully outside the address range and that can
+ * thus be shared by two or more address ranges.
+ *
+ * Please keep this code generic so that it can used as a drm-wide page-
+ * table walker should other drivers find use for it.
+ */
+static u64 xe_pt_addr_end(u64 addr, u64 end, unsigned int level,
+ const struct xe_pt_walk *walk)
+{
+ u64 size = 1ull << walk->shifts[level];
+ u64 tmp = round_up(addr + 1, size);
+
+ return min_t(u64, tmp, end);
+}
+
+static bool xe_pt_next(pgoff_t *offset, u64 *addr, u64 next, u64 end,
+ unsigned int level, const struct xe_pt_walk *walk)
+{
+ pgoff_t step = 1;
+
+ /* Shared pt walk skips to the last pagetable */
+ if (unlikely(walk->shared_pt_mode)) {
+ unsigned int shift = walk->shifts[level];
+ u64 skip_to = round_down(end, 1ull << shift);
+
+ if (skip_to > next) {
+ step += (skip_to - next) >> shift;
+ next = skip_to;
+ }
+ }
+
+ *addr = next;
+ *offset += step;
+
+ return next != end;
+}
+
+/**
+ * xe_pt_walk_range() - Walk a range of a gpu page table tree with callbacks
+ * for each page-table entry in all levels.
+ * @parent: The root page table for walk start.
+ * @level: The root page table level.
+ * @addr: Virtual address start.
+ * @end: Virtual address end + 1.
+ * @walk: Walk info.
+ *
+ * Similar to the CPU page-table walker, this is a helper to walk
+ * a gpu page table and call a provided callback function for each entry.
+ *
+ * Return: 0 on success, negative error code on error. The error is
+ * propagated from the callback and on error the walk is terminated.
+ */
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+ u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+ pgoff_t offset = xe_pt_offset(addr, level, walk);
+ struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL;
+ const struct xe_pt_walk_ops *ops = walk->ops;
+ enum page_walk_action action;
+ struct xe_ptw *child;
+ int err = 0;
+ u64 next;
+
+ do {
+ next = xe_pt_addr_end(addr, end, level, walk);
+ if (walk->shared_pt_mode && xe_pt_covers(addr, next, level,
+ walk))
+ continue;
+again:
+ action = ACTION_SUBTREE;
+ child = entries ? entries[offset] : NULL;
+ err = ops->pt_entry(parent, offset, level, addr, next,
+ &child, &action, walk);
+ if (err)
+ break;
+
+ /* Probably not needed yet for gpu pagetable walk. */
+ if (unlikely(action == ACTION_AGAIN))
+ goto again;
+
+ if (likely(!level || !child || action == ACTION_CONTINUE))
+ continue;
+
+ err = xe_pt_walk_range(child, level - 1, addr, next, walk);
+
+ if (!err && ops->pt_post_descend)
+ err = ops->pt_post_descend(parent, offset, level, addr,
+ next, &child, &action, walk);
+ if (err)
+ break;
+
+ } while (xe_pt_next(&offset, &addr, next, end, level, walk));
+
+ return err;
+}
+
+/**
+ * xe_pt_walk_shared() - Walk shared page tables of a page-table tree.
+ * @parent: Root page table directory.
+ * @level: Level of the root.
+ * @addr: Start address.
+ * @end: Last address + 1.
+ * @walk: Walk info.
+ *
+ * This function is similar to xe_pt_walk_range() but it skips page tables
+ * that are private to the range. Since the root (or @parent) page table is
+ * typically also a shared page table this function is different in that it
+ * calls the pt_entry callback and the post_descend callback also for the
+ * root. The root can be detected in the callbacks by checking whether
+ * parent == *child.
+ * Walking only the shared page tables is common for unbind-type operations
+ * where the page-table entries for an address range are cleared or detached
+ * from the main page-table tree.
+ *
+ * Return: 0 on success, negative error code on error: If a callback
+ * returns an error, the walk will be terminated and the error returned by
+ * this function.
+ */
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+ u64 addr, u64 end, struct xe_pt_walk *walk)
+{
+ const struct xe_pt_walk_ops *ops = walk->ops;
+ enum page_walk_action action = ACTION_SUBTREE;
+ struct xe_ptw *child = parent;
+ int err;
+
+ walk->shared_pt_mode = true;
+ err = walk->ops->pt_entry(parent, 0, level + 1, addr, end,
+ &child, &action, walk);
+
+ if (err || action != ACTION_SUBTREE)
+ return err;
+
+ err = xe_pt_walk_range(parent, level, addr, end, walk);
+ if (!err && ops->pt_post_descend) {
+ err = ops->pt_post_descend(parent, 0, level + 1, addr, end,
+ &child, &action, walk);
+ }
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h
new file mode 100644
index 000000000000..42c51fa601ec
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pt_walk.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef __XE_PT_WALK__
+#define __XE_PT_WALK__
+
+#include <linux/pagewalk.h>
+#include <linux/types.h>
+
+struct xe_ptw_dir;
+
+/**
+ * struct xe_ptw - base class for driver pagetable subclassing.
+ * @dir: Pointer to an array of children if any.
+ *
+ * Drivers could subclass this, and if it's a page-directory, typically
+ * embed the xe_ptw_dir::entries array in the same allocation.
+ */
+struct xe_ptw {
+ struct xe_ptw_dir *dir;
+};
+
+/**
+ * struct xe_ptw_dir - page directory structure
+ * @entries: Array holding page directory children.
+ *
+ * It is the responsibility of the user to ensure @entries is
+ * correctly sized.
+ */
+struct xe_ptw_dir {
+ struct xe_ptw *entries[0];
+};
+
+/**
+ * struct xe_pt_walk - Embeddable struct for walk parameters
+ */
+struct xe_pt_walk {
+ /** @ops: The walk ops used for the pagewalk */
+ const struct xe_pt_walk_ops *ops;
+ /**
+ * @shifts: Array of page-table entry shifts used for the
+ * different levels, starting out with the leaf level 0
+ * page-shift as the first entry. It's legal for this pointer to be
+ * changed during the walk.
+ */
+ const u64 *shifts;
+ /** @max_level: Highest populated level in @sizes */
+ unsigned int max_level;
+ /**
+ * @shared_pt_mode: Whether to skip all entries that are private
+ * to the address range and called only for entries that are
+ * shared with other address ranges. Such entries are referred to
+ * as shared pagetables.
+ */
+ bool shared_pt_mode;
+};
+
+/**
+ * typedef xe_pt_entry_fn - gpu page-table-walk callback-function
+ * @parent: The parent page.table.
+ * @offset: The offset (number of entries) into the page table.
+ * @level: The level of @parent.
+ * @addr: The virtual address.
+ * @next: The virtual address for the next call, or end address.
+ * @child: Pointer to pointer to child page-table at this @offset. The
+ * function may modify the value pointed to if, for example, allocating a
+ * child page table.
+ * @action: The walk action to take upon return. See <linux/pagewalk.h>.
+ * @walk: The walk parameters.
+ */
+typedef int (*xe_pt_entry_fn)(struct xe_ptw *parent, pgoff_t offset,
+ unsigned int level, u64 addr, u64 next,
+ struct xe_ptw **child,
+ enum page_walk_action *action,
+ struct xe_pt_walk *walk);
+
+/**
+ * struct xe_pt_walk_ops - Walk callbacks.
+ */
+struct xe_pt_walk_ops {
+ /**
+ * @pt_entry: Callback to be called for each page table entry prior
+ * to descending to the next level. The returned value of the action
+ * function parameter is honored.
+ */
+ xe_pt_entry_fn pt_entry;
+ /**
+ * @pt_post_descend: Callback to be called for each page table entry
+ * after return from descending to the next level. The returned value
+ * of the action function parameter is ignored.
+ */
+ xe_pt_entry_fn pt_post_descend;
+};
+
+int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
+ u64 addr, u64 end, struct xe_pt_walk *walk);
+
+int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level,
+ u64 addr, u64 end, struct xe_pt_walk *walk);
+
+/**
+ * xe_pt_covers - Whether the address range covers an entire entry in @level
+ * @addr: Start of the range.
+ * @end: End of range + 1.
+ * @level: Page table level.
+ * @walk: Page table walk info.
+ *
+ * This function is a helper to aid in determining whether a leaf page table
+ * entry can be inserted at this @level.
+ *
+ * Return: Whether the range provided covers exactly an entry at this level.
+ */
+static inline bool xe_pt_covers(u64 addr, u64 end, unsigned int level,
+ const struct xe_pt_walk *walk)
+{
+ u64 pt_size = 1ull << walk->shifts[level];
+
+ return end - addr == pt_size && IS_ALIGNED(addr, pt_size);
+}
+
+/**
+ * xe_pt_num_entries: Number of page-table entries of a given range at this
+ * level
+ * @addr: Start address.
+ * @end: End address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The number of page table entries at this level between @start and
+ * @end.
+ */
+static inline pgoff_t
+xe_pt_num_entries(u64 addr, u64 end, unsigned int level,
+ const struct xe_pt_walk *walk)
+{
+ u64 pt_size = 1ull << walk->shifts[level];
+
+ return (round_up(end, pt_size) - round_down(addr, pt_size)) >>
+ walk->shifts[level];
+}
+
+/**
+ * xe_pt_offset: Offset of the page-table entry for a given address.
+ * @addr: The address.
+ * @level: Page table level.
+ * @walk: Walk info.
+ *
+ * Return: The page table entry offset for the given address in a
+ * page table with size indicated by @level.
+ */
+static inline pgoff_t
+xe_pt_offset(u64 addr, unsigned int level, const struct xe_pt_walk *walk)
+{
+ if (level < walk->max_level)
+ addr &= ((1ull << walk->shifts[level + 1]) - 1);
+
+ return addr >> walk->shifts[level];
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
new file mode 100644
index 000000000000..6e904e97f456
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/xe_drm.h>
+#include <drm/ttm/ttm_placement.h>
+#include <linux/nospec.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_query.h"
+#include "xe_ggtt.h"
+#include "xe_guc_hwconfig.h"
+
+static const enum xe_engine_class xe_to_user_engine_class[] = {
+ [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
+ [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY,
+ [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE,
+ [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE,
+ [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
+};
+
+static size_t calc_hw_engine_info_size(struct xe_device *xe)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ struct xe_gt *gt;
+ u8 gt_id;
+ int i = 0;
+
+ for_each_gt(gt, xe, gt_id)
+ for_each_hw_engine(hwe, gt, id) {
+ if (xe_hw_engine_is_reserved(hwe))
+ continue;
+ i++;
+ }
+
+ return i * sizeof(struct drm_xe_engine_class_instance);
+}
+
+static int query_engines(struct xe_device *xe,
+ struct drm_xe_device_query *query)
+{
+ size_t size = calc_hw_engine_info_size(xe);
+ struct drm_xe_engine_class_instance __user *query_ptr =
+ u64_to_user_ptr(query->data);
+ struct drm_xe_engine_class_instance *hw_engine_info;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ struct xe_gt *gt;
+ u8 gt_id;
+ int i = 0;
+
+ if (query->size == 0) {
+ query->size = size;
+ return 0;
+ } else if (XE_IOCTL_ERR(xe, query->size != size)) {
+ return -EINVAL;
+ }
+
+ hw_engine_info = kmalloc(size, GFP_KERNEL);
+ if (XE_IOCTL_ERR(xe, !hw_engine_info))
+ return -ENOMEM;
+
+ for_each_gt(gt, xe, gt_id)
+ for_each_hw_engine(hwe, gt, id) {
+ if (xe_hw_engine_is_reserved(hwe))
+ continue;
+
+ hw_engine_info[i].engine_class =
+ xe_to_user_engine_class[hwe->class];
+ hw_engine_info[i].engine_instance =
+ hwe->logical_instance;
+ hw_engine_info[i++].gt_id = gt->info.id;
+ }
+
+ if (copy_to_user(query_ptr, hw_engine_info, size)) {
+ kfree(hw_engine_info);
+ return -EFAULT;
+ }
+ kfree(hw_engine_info);
+
+ return 0;
+}
+
+static size_t calc_memory_usage_size(struct xe_device *xe)
+{
+ u32 num_managers = 1;
+ int i;
+
+ for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i)
+ if (ttm_manager_type(&xe->ttm, i))
+ num_managers++;
+
+ return offsetof(struct drm_xe_query_mem_usage, regions[num_managers]);
+}
+
+static int query_memory_usage(struct xe_device *xe,
+ struct drm_xe_device_query *query)
+{
+ size_t size = calc_memory_usage_size(xe);
+ struct drm_xe_query_mem_usage *usage;
+ struct drm_xe_query_mem_usage __user *query_ptr =
+ u64_to_user_ptr(query->data);
+ struct ttm_resource_manager *man;
+ int ret, i;
+
+ if (query->size == 0) {
+ query->size = size;
+ return 0;
+ } else if (XE_IOCTL_ERR(xe, query->size != size)) {
+ return -EINVAL;
+ }
+
+ usage = kmalloc(size, GFP_KERNEL);
+ if (XE_IOCTL_ERR(xe, !usage))
+ return -ENOMEM;
+
+ usage->pad = 0;
+
+ man = ttm_manager_type(&xe->ttm, XE_PL_TT);
+ usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM;
+ usage->regions[0].instance = 0;
+ usage->regions[0].pad = 0;
+ usage->regions[0].min_page_size = PAGE_SIZE;
+ usage->regions[0].max_page_size = PAGE_SIZE;
+ usage->regions[0].total_size = man->size << PAGE_SHIFT;
+ usage->regions[0].used = ttm_resource_manager_usage(man);
+ usage->num_regions = 1;
+
+ for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
+ man = ttm_manager_type(&xe->ttm, i);
+ if (man) {
+ usage->regions[usage->num_regions].mem_class =
+ XE_MEM_REGION_CLASS_VRAM;
+ usage->regions[usage->num_regions].instance =
+ usage->num_regions;
+ usage->regions[usage->num_regions].pad = 0;
+ usage->regions[usage->num_regions].min_page_size =
+ xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ?
+ SZ_64K : PAGE_SIZE;
+ usage->regions[usage->num_regions].max_page_size =
+ SZ_1G;
+ usage->regions[usage->num_regions].total_size =
+ man->size;
+ usage->regions[usage->num_regions++].used =
+ ttm_resource_manager_usage(man);
+ }
+ }
+
+ if (!copy_to_user(query_ptr, usage, size))
+ ret = 0;
+ else
+ ret = -ENOSPC;
+
+ kfree(usage);
+ return ret;
+}
+
+static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+ u32 num_params = XE_QUERY_CONFIG_NUM_PARAM;
+ size_t size =
+ sizeof(struct drm_xe_query_config) + num_params * sizeof(u64);
+ struct drm_xe_query_config __user *query_ptr =
+ u64_to_user_ptr(query->data);
+ struct drm_xe_query_config *config;
+
+ if (query->size == 0) {
+ query->size = size;
+ return 0;
+ } else if (XE_IOCTL_ERR(xe, query->size != size)) {
+ return -EINVAL;
+ }
+
+ config = kzalloc(size, GFP_KERNEL);
+ if (XE_IOCTL_ERR(xe, !config))
+ return -ENOMEM;
+
+ config->num_params = num_params;
+ config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
+ xe->info.devid | (xe->info.revid << 16);
+ if (to_gt(xe)->mem.vram.size)
+ config->info[XE_QUERY_CONFIG_FLAGS] =
+ XE_QUERY_CONFIG_FLAGS_HAS_VRAM;
+ if (xe->info.enable_guc)
+ config->info[XE_QUERY_CONFIG_FLAGS] |=
+ XE_QUERY_CONFIG_FLAGS_USE_GUC;
+ config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] =
+ xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+ config->info[XE_QUERY_CONFIG_VA_BITS] = 12 +
+ (9 * (xe->info.vm_max_level + 1));
+ config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count;
+ config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] =
+ hweight_long(xe->info.mem_region_mask);
+
+ if (copy_to_user(query_ptr, config, size)) {
+ kfree(config);
+ return -EFAULT;
+ }
+ kfree(config);
+
+ return 0;
+}
+
+static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+ struct xe_gt *gt;
+ size_t size = sizeof(struct drm_xe_query_gts) +
+ xe->info.tile_count * sizeof(struct drm_xe_query_gt);
+ struct drm_xe_query_gts __user *query_ptr =
+ u64_to_user_ptr(query->data);
+ struct drm_xe_query_gts *gts;
+ u8 id;
+
+ if (query->size == 0) {
+ query->size = size;
+ return 0;
+ } else if (XE_IOCTL_ERR(xe, query->size != size)) {
+ return -EINVAL;
+ }
+
+ gts = kzalloc(size, GFP_KERNEL);
+ if (XE_IOCTL_ERR(xe, !gts))
+ return -ENOMEM;
+
+ gts->num_gt = xe->info.tile_count;
+ for_each_gt(gt, xe, id) {
+ if (id == 0)
+ gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN;
+ else if (xe_gt_is_media_type(gt))
+ gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA;
+ else
+ gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE;
+ gts->gts[id].instance = id;
+ gts->gts[id].clock_freq = gt->info.clock_freq;
+ if (!IS_DGFX(xe))
+ gts->gts[id].native_mem_regions = 0x1;
+ else
+ gts->gts[id].native_mem_regions =
+ BIT(gt->info.vram_id) << 1;
+ gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^
+ gts->gts[id].native_mem_regions;
+ }
+
+ if (copy_to_user(query_ptr, gts, size)) {
+ kfree(gts);
+ return -EFAULT;
+ }
+ kfree(gts);
+
+ return 0;
+}
+
+static int query_hwconfig(struct xe_device *xe,
+ struct drm_xe_device_query *query)
+{
+ struct xe_gt *gt = xe_device_get_gt(xe, 0);
+ size_t size = xe_guc_hwconfig_size(&gt->uc.guc);
+ void __user *query_ptr = u64_to_user_ptr(query->data);
+ void *hwconfig;
+
+ if (query->size == 0) {
+ query->size = size;
+ return 0;
+ } else if (XE_IOCTL_ERR(xe, query->size != size)) {
+ return -EINVAL;
+ }
+
+ hwconfig = kzalloc(size, GFP_KERNEL);
+ if (XE_IOCTL_ERR(xe, !hwconfig))
+ return -ENOMEM;
+
+ xe_device_mem_access_get(xe);
+ xe_guc_hwconfig_copy(&gt->uc.guc, hwconfig);
+ xe_device_mem_access_put(xe);
+
+ if (copy_to_user(query_ptr, hwconfig, size)) {
+ kfree(hwconfig);
+ return -EFAULT;
+ }
+ kfree(hwconfig);
+
+ return 0;
+}
+
+static size_t calc_topo_query_size(struct xe_device *xe)
+{
+ return xe->info.tile_count *
+ (3 * sizeof(struct drm_xe_query_topology_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
+}
+
+static void __user *copy_mask(void __user *ptr,
+ struct drm_xe_query_topology_mask *topo,
+ void *mask, size_t mask_size)
+{
+ topo->num_bytes = mask_size;
+
+ if (copy_to_user(ptr, topo, sizeof(*topo)))
+ return ERR_PTR(-EFAULT);
+ ptr += sizeof(topo);
+
+ if (copy_to_user(ptr, mask, mask_size))
+ return ERR_PTR(-EFAULT);
+ ptr += mask_size;
+
+ return ptr;
+}
+
+static int query_gt_topology(struct xe_device *xe,
+ struct drm_xe_device_query *query)
+{
+ void __user *query_ptr = u64_to_user_ptr(query->data);
+ size_t size = calc_topo_query_size(xe);
+ struct drm_xe_query_topology_mask topo;
+ struct xe_gt *gt;
+ int id;
+
+ if (query->size == 0) {
+ query->size = size;
+ return 0;
+ } else if (XE_IOCTL_ERR(xe, query->size != size)) {
+ return -EINVAL;
+ }
+
+ for_each_gt(gt, xe, id) {
+ topo.gt_id = id;
+
+ topo.type = XE_TOPO_DSS_GEOMETRY;
+ query_ptr = copy_mask(query_ptr, &topo,
+ gt->fuse_topo.g_dss_mask,
+ sizeof(gt->fuse_topo.g_dss_mask));
+ if (IS_ERR(query_ptr))
+ return PTR_ERR(query_ptr);
+
+ topo.type = XE_TOPO_DSS_COMPUTE;
+ query_ptr = copy_mask(query_ptr, &topo,
+ gt->fuse_topo.c_dss_mask,
+ sizeof(gt->fuse_topo.c_dss_mask));
+ if (IS_ERR(query_ptr))
+ return PTR_ERR(query_ptr);
+
+ topo.type = XE_TOPO_EU_PER_DSS;
+ query_ptr = copy_mask(query_ptr, &topo,
+ gt->fuse_topo.eu_mask_per_dss,
+ sizeof(gt->fuse_topo.eu_mask_per_dss));
+ if (IS_ERR(query_ptr))
+ return PTR_ERR(query_ptr);
+ }
+
+ return 0;
+}
+
+static int (* const xe_query_funcs[])(struct xe_device *xe,
+ struct drm_xe_device_query *query) = {
+ query_engines,
+ query_memory_usage,
+ query_config,
+ query_gts,
+ query_hwconfig,
+ query_gt_topology,
+};
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct drm_xe_device_query *query = data;
+ u32 idx;
+
+ if (XE_IOCTL_ERR(xe, query->extensions != 0))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs)))
+ return -EINVAL;
+
+ idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs));
+ if (XE_IOCTL_ERR(xe, !xe_query_funcs[idx]))
+ return -EINVAL;
+
+ return xe_query_funcs[idx](xe, query);
+}
diff --git a/drivers/gpu/drm/xe/xe_query.h b/drivers/gpu/drm/xe/xe_query.h
new file mode 100644
index 000000000000..beeb7a8192b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_query.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_QUERY_H_
+#define _XE_QUERY_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
new file mode 100644
index 000000000000..16e025dcf2cc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_reg_sr.h"
+
+#include <linux/align.h>
+#include <linux/string_helpers.h>
+#include <linux/xarray.h>
+
+#include <drm/drm_print.h>
+#include <drm/drm_managed.h>
+
+#include "xe_rtp_types.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
+
+#define XE_REG_SR_GROW_STEP_DEFAULT 16
+
+static void reg_sr_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_reg_sr *sr = arg;
+
+ xa_destroy(&sr->xa);
+ kfree(sr->pool.arr);
+ memset(&sr->pool, 0, sizeof(sr->pool));
+}
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
+{
+ xa_init(&sr->xa);
+ memset(&sr->pool, 0, sizeof(sr->pool));
+ sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT;
+ sr->name = name;
+
+ return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
+}
+
+int xe_reg_sr_dump_kv(struct xe_reg_sr *sr,
+ struct xe_reg_sr_kv **dst)
+{
+ struct xe_reg_sr_kv *iter;
+ struct xe_reg_sr_entry *entry;
+ unsigned long idx;
+
+ if (xa_empty(&sr->xa)) {
+ *dst = NULL;
+ return 0;
+ }
+
+ *dst = kmalloc_array(sr->pool.used, sizeof(**dst), GFP_KERNEL);
+ if (!*dst)
+ return -ENOMEM;
+
+ iter = *dst;
+ xa_for_each(&sr->xa, idx, entry) {
+ iter->k = idx;
+ iter->v = *entry;
+ iter++;
+ }
+
+ return 0;
+}
+
+static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
+{
+ if (sr->pool.used == sr->pool.allocated) {
+ struct xe_reg_sr_entry *arr;
+
+ arr = krealloc_array(sr->pool.arr,
+ ALIGN(sr->pool.allocated + 1, sr->pool.grow_step),
+ sizeof(*arr), GFP_KERNEL);
+ if (!arr)
+ return NULL;
+
+ sr->pool.arr = arr;
+ sr->pool.allocated += sr->pool.grow_step;
+ }
+
+ return &sr->pool.arr[sr->pool.used++];
+}
+
+static bool compatible_entries(const struct xe_reg_sr_entry *e1,
+ const struct xe_reg_sr_entry *e2)
+{
+ /*
+ * Don't allow overwriting values: clr_bits/set_bits should be disjoint
+ * when operating in the same register
+ */
+ if (e1->clr_bits & e2->clr_bits || e1->set_bits & e2->set_bits ||
+ e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits)
+ return false;
+
+ if (e1->masked_reg != e2->masked_reg)
+ return false;
+
+ if (e1->reg_type != e2->reg_type)
+ return false;
+
+ return true;
+}
+
+int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
+ const struct xe_reg_sr_entry *e)
+{
+ unsigned long idx = reg;
+ struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx);
+ int ret;
+
+ if (pentry) {
+ if (!compatible_entries(pentry, e)) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ pentry->clr_bits |= e->clr_bits;
+ pentry->set_bits |= e->set_bits;
+ pentry->read_mask |= e->read_mask;
+
+ return 0;
+ }
+
+ pentry = alloc_entry(sr);
+ if (!pentry) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ *pentry = *e;
+ ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL));
+ if (ret)
+ goto fail;
+
+ return 0;
+
+fail:
+ DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n",
+ idx, e->clr_bits, e->set_bits,
+ str_yes_no(e->masked_reg), ret);
+
+ return ret;
+}
+
+static void apply_one_mmio(struct xe_gt *gt, u32 reg,
+ struct xe_reg_sr_entry *entry)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 val;
+
+ /*
+ * If this is a masked register, need to figure what goes on the upper
+ * 16 bits: it's either the clr_bits (when using FIELD_SET and WR) or
+ * the set_bits, when using SET.
+ *
+ * When it's not masked, we have to read it from hardware, unless we are
+ * supposed to set all bits.
+ */
+ if (entry->masked_reg)
+ val = (entry->clr_bits ?: entry->set_bits << 16);
+ else if (entry->clr_bits + 1)
+ val = (entry->reg_type == XE_RTP_REG_MCR ?
+ xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) :
+ xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
+ else
+ val = 0;
+
+ /*
+ * TODO: add selftest to validate all tables, regardless of platform:
+ * - Masked registers can't have set_bits with upper bits set
+ * - set_bits must be contained in clr_bits
+ */
+ val |= entry->set_bits;
+
+ drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val);
+
+ if (entry->reg_type == XE_RTP_REG_MCR)
+ xe_gt_mcr_multicast_write(gt, MCR_REG(reg), val);
+ else
+ xe_mmio_write32(gt, reg, val);
+}
+
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_reg_sr_entry *entry;
+ unsigned long reg;
+ int err;
+
+ drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name);
+
+ err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+ if (err)
+ goto err_force_wake;
+
+ xa_for_each(&sr->xa, reg, entry)
+ apply_one_mmio(gt, reg, entry);
+
+ err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+ XE_WARN_ON(err);
+
+ return;
+
+err_force_wake:
+ drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+}
+
+void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
+ struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_reg_sr_entry *entry;
+ unsigned long reg;
+ unsigned int slot = 0;
+ int err;
+
+ drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name);
+
+ err = xe_force_wake_get(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+ if (err)
+ goto err_force_wake;
+
+ xa_for_each(&sr->xa, reg, entry) {
+ xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg,
+ reg | entry->set_bits);
+ slot++;
+ }
+
+ /* And clear the rest just in case of garbage */
+ for (; slot < RING_MAX_NONPRIV_SLOTS; slot++)
+ xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg,
+ RING_NOPID(mmio_base).reg);
+
+ err = xe_force_wake_put(&gt->mmio.fw, XE_FORCEWAKE_ALL);
+ XE_WARN_ON(err);
+
+ return;
+
+err_force_wake:
+ drm_err(&xe->drm, "Failed to apply, err=%d\n", err);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
new file mode 100644
index 000000000000..c3a9db251e92
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_
+#define _XE_REG_SR_
+
+#include "xe_reg_sr_types.h"
+
+/*
+ * Reg save/restore bookkeeping
+ */
+
+struct xe_device;
+struct xe_gt;
+
+int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
+int xe_reg_sr_dump_kv(struct xe_reg_sr *sr,
+ struct xe_reg_sr_kv **dst);
+
+int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg,
+ const struct xe_reg_sr_entry *e);
+void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt);
+void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base,
+ struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h
new file mode 100644
index 000000000000..2fa7ff3966ba
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_REG_SR_TYPES_
+#define _XE_REG_SR_TYPES_
+
+#include <linux/xarray.h>
+#include <linux/types.h>
+
+#include "i915_reg_defs.h"
+
+struct xe_reg_sr_entry {
+ u32 clr_bits;
+ u32 set_bits;
+ /* Mask for bits to consider when reading value back */
+ u32 read_mask;
+ /*
+ * "Masked registers" are marked in spec as register with the upper 16
+ * bits as a mask for the bits that is being updated on the lower 16
+ * bits when writing to it.
+ */
+ u8 masked_reg;
+ u8 reg_type;
+};
+
+struct xe_reg_sr_kv {
+ u32 k;
+ struct xe_reg_sr_entry v;
+};
+
+struct xe_reg_sr {
+ struct {
+ struct xe_reg_sr_entry *arr;
+ unsigned int used;
+ unsigned int allocated;
+ unsigned int grow_step;
+ } pool;
+ struct xarray xa;
+ const char *name;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
new file mode 100644
index 000000000000..2e0c87b72395
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_reg_whitelist.h"
+
+#include "xe_platform_types.h"
+#include "xe_gt_types.h"
+#include "xe_rtp.h"
+
+#include "../i915/gt/intel_engine_regs.h"
+#include "../i915/gt/intel_gt_regs.h"
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x) _XE_RTP_REG(x)
+#define MCR_REG(x) _XE_RTP_MCR_REG(x)
+
+static bool match_not_render(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ return hwe->class != XE_ENGINE_CLASS_RENDER;
+}
+
+static const struct xe_rtp_entry register_whitelist[] = {
+ { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+ XE_WHITELIST_REGISTER(PS_INVOCATION_COUNT,
+ RING_FORCE_TO_NONPRIV_ACCESS_RD |
+ RING_FORCE_TO_NONPRIV_RANGE_4)
+ },
+ { XE_RTP_NAME("1508744258, 14012131227, 1808121037"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+ XE_WHITELIST_REGISTER(GEN7_COMMON_SLICE_CHICKEN1, 0)
+ },
+ { XE_RTP_NAME("1806527549"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+ XE_WHITELIST_REGISTER(HIZ_CHICKEN, 0)
+ },
+ { XE_RTP_NAME("allow_read_ctx_timestamp"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)),
+ XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD,
+ XE_RTP_FLAG(ENGINE_BASE))
+ },
+ { XE_RTP_NAME("16014440446_part_1"),
+ XE_RTP_RULES(PLATFORM(PVC)),
+ XE_WHITELIST_REGISTER(_MMIO(0x4400),
+ RING_FORCE_TO_NONPRIV_DENY |
+ RING_FORCE_TO_NONPRIV_RANGE_64)
+ },
+ { XE_RTP_NAME("16014440446_part_2"),
+ XE_RTP_RULES(PLATFORM(PVC)),
+ XE_WHITELIST_REGISTER(_MMIO(0x4500),
+ RING_FORCE_TO_NONPRIV_DENY |
+ RING_FORCE_TO_NONPRIV_RANGE_64)
+ },
+ {}
+};
+
+/**
+ * xe_reg_whitelist_process_engine - process table of registers to whitelist
+ * @hwe: engine instance to process whitelist for
+ *
+ * Process wwhitelist table for this platform, saving in @hwe all the
+ * registers that need to be whitelisted by the hardware so they can be accessed
+ * by userspace.
+ */
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
+{
+ xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe);
+}
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h
new file mode 100644
index 000000000000..6e861b1bdb01
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_REG_WHITELIST_
+#define _XE_REG_WHITELIST_
+
+struct xe_hw_engine;
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
new file mode 100644
index 000000000000..f54409850d74
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XE_RES_CURSOR_H__
+#define __XE_RES_CURSOR_H__
+
+#include <linux/scatterlist.h>
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_resource.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_macros.h"
+#include "xe_ttm_vram_mgr.h"
+
+/* state back for walking over vram_mgr and gtt_mgr allocations */
+struct xe_res_cursor {
+ u64 start;
+ u64 size;
+ u64 remaining;
+ void *node;
+ u32 mem_type;
+ struct scatterlist *sgl;
+};
+
+/**
+ * xe_res_first - initialize a xe_res_cursor
+ *
+ * @res: TTM resource object to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first(struct ttm_resource *res,
+ u64 start, u64 size,
+ struct xe_res_cursor *cur)
+{
+ struct drm_buddy_block *block;
+ struct list_head *head, *next;
+
+ cur->sgl = NULL;
+ if (!res)
+ goto fallback;
+
+ XE_BUG_ON(start + size > res->size);
+
+ cur->mem_type = res->mem_type;
+
+ switch (cur->mem_type) {
+ case XE_PL_VRAM0:
+ case XE_PL_VRAM1:
+ head = &to_xe_ttm_vram_mgr_resource(res)->blocks;
+
+ block = list_first_entry_or_null(head,
+ struct drm_buddy_block,
+ link);
+ if (!block)
+ goto fallback;
+
+ while (start >= xe_ttm_vram_mgr_block_size(block)) {
+ start -= xe_ttm_vram_mgr_block_size(block);
+
+ next = block->link.next;
+ if (next != head)
+ block = list_entry(next, struct drm_buddy_block,
+ link);
+ }
+
+ cur->start = xe_ttm_vram_mgr_block_start(block) + start;
+ cur->size = min(xe_ttm_vram_mgr_block_size(block) - start,
+ size);
+ cur->remaining = size;
+ cur->node = block;
+ break;
+ default:
+ goto fallback;
+ }
+
+ return;
+
+fallback:
+ cur->start = start;
+ cur->size = size;
+ cur->remaining = size;
+ cur->node = NULL;
+ cur->mem_type = XE_PL_TT;
+ XE_WARN_ON(res && start + size > res->size);
+ return;
+}
+
+static inline void __xe_res_sg_next(struct xe_res_cursor *cur)
+{
+ struct scatterlist *sgl = cur->sgl;
+ u64 start = cur->start;
+
+ while (start >= sg_dma_len(sgl)) {
+ start -= sg_dma_len(sgl);
+ sgl = sg_next(sgl);
+ XE_BUG_ON(!sgl);
+ }
+
+ cur->start = start;
+ cur->size = sg_dma_len(sgl) - start;
+ cur->sgl = sgl;
+}
+
+/**
+ * xe_res_first_sg - initialize a xe_res_cursor with a scatter gather table
+ *
+ * @sg: scatter gather table to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void xe_res_first_sg(const struct sg_table *sg,
+ u64 start, u64 size,
+ struct xe_res_cursor *cur)
+{
+ XE_BUG_ON(!sg);
+ XE_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE) ||
+ !IS_ALIGNED(size, PAGE_SIZE));
+ cur->node = NULL;
+ cur->start = start;
+ cur->remaining = size;
+ cur->size = 0;
+ cur->sgl = sg->sgl;
+ cur->mem_type = XE_PL_TT;
+ __xe_res_sg_next(cur);
+}
+
+/**
+ * xe_res_next - advance the cursor
+ *
+ * @cur: the cursor to advance
+ * @size: number of bytes to move forward
+ *
+ * Move the cursor @size bytes forwrad, walking to the next node if necessary.
+ */
+static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
+{
+ struct drm_buddy_block *block;
+ struct list_head *next;
+ u64 start;
+
+ XE_BUG_ON(size > cur->remaining);
+
+ cur->remaining -= size;
+ if (!cur->remaining)
+ return;
+
+ if (cur->size > size) {
+ cur->size -= size;
+ cur->start += size;
+ return;
+ }
+
+ if (cur->sgl) {
+ cur->start += size;
+ __xe_res_sg_next(cur);
+ return;
+ }
+
+ switch (cur->mem_type) {
+ case XE_PL_VRAM0:
+ case XE_PL_VRAM1:
+ start = size - cur->size;
+ block = cur->node;
+
+ next = block->link.next;
+ block = list_entry(next, struct drm_buddy_block, link);
+
+
+ while (start >= xe_ttm_vram_mgr_block_size(block)) {
+ start -= xe_ttm_vram_mgr_block_size(block);
+
+ next = block->link.next;
+ block = list_entry(next, struct drm_buddy_block, link);
+ }
+
+ cur->start = xe_ttm_vram_mgr_block_start(block) + start;
+ cur->size = min(xe_ttm_vram_mgr_block_size(block) - start,
+ cur->remaining);
+ cur->node = block;
+ break;
+ default:
+ return;
+ }
+}
+
+/**
+ * xe_res_dma - return dma address of cursor at current position
+ *
+ * @cur: the cursor to return the dma address from
+ */
+static inline u64 xe_res_dma(const struct xe_res_cursor *cur)
+{
+ return cur->sgl ? sg_dma_address(cur->sgl) + cur->start : cur->start;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
new file mode 100644
index 000000000000..fda7978a63e0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_engine_types.h"
+#include "xe_gt.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_ring_ops.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+#include "i915_reg.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc_reg.h"
+
+static u32 preparser_disable(bool state)
+{
+ return MI_ARB_CHECK | BIT(8) | state;
+}
+
+static int emit_aux_table_inv(struct xe_gt *gt, u32 addr, u32 *dw, int i)
+{
+ dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
+ dw[i++] = addr + gt->mmio.adj_offset;
+ dw[i++] = AUX_INV;
+ dw[i++] = MI_NOOP;
+
+ return i;
+}
+
+static int emit_user_interrupt(u32 *dw, int i)
+{
+ dw[i++] = MI_USER_INTERRUPT;
+ dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ dw[i++] = MI_ARB_CHECK;
+
+ return i;
+}
+
+static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+{
+ dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2;
+ dw[i++] = addr;
+ dw[i++] = 0;
+ dw[i++] = value;
+
+ return i;
+}
+
+static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
+{
+ dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
+ dw[i++] = 0;
+ dw[i++] = value;
+
+ return i;
+}
+
+static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
+{
+ dw[i++] = MI_BATCH_BUFFER_START_GEN8 | ppgtt_flag;
+ dw[i++] = lower_32_bits(batch_addr);
+ dw[i++] = upper_32_bits(batch_addr);
+
+ return i;
+}
+
+static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
+{
+ dw[i] = MI_FLUSH_DW + 1;
+ dw[i] |= flag;
+ dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
+ MI_FLUSH_DW_STORE_INDEX;
+
+ dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+ dw[i++] = 0;
+ dw[i++] = ~0U;
+
+ return i;
+}
+
+static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
+{
+ u32 flags = PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
+ PIPE_CONTROL_TLB_INVALIDATE |
+ PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+ PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+ PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_STORE_DATA_INDEX;
+
+ flags &= ~mask_flags;
+
+ dw[i++] = GFX_OP_PIPE_CONTROL(6);
+ dw[i++] = flags;
+ dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
+ dw[i++] = 0;
+ dw[i++] = 0;
+ dw[i++] = 0;
+
+ return i;
+}
+
+#define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21))
+
+static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
+ u32 *dw, int i)
+{
+ dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED;
+ dw[i++] = lower_32_bits(addr);
+ dw[i++] = upper_32_bits(addr);
+ dw[i++] = lower_32_bits(value);
+ dw[i++] = upper_32_bits(value);
+
+ return i;
+}
+
+static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
+ int i)
+{
+ dw[i++] = GFX_OP_PIPE_CONTROL(6);
+ dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL :
+ PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) |
+ PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
+ dw[i++] = addr;
+ dw[i++] = 0;
+ dw[i++] = value;
+ dw[i++] = 0; /* We're thrashing one extra dword. */
+
+ return i;
+}
+
+static u32 get_ppgtt_flag(struct xe_sched_job *job)
+{
+ return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0;
+}
+
+static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
+ u64 batch_addr, u32 seqno)
+{
+ u32 dw[MAX_JOB_SIZE_DW], i = 0;
+ u32 ppgtt_flag = get_ppgtt_flag(job);
+
+ /* XXX: Conditional flushing possible */
+ dw[i++] = preparser_disable(true);
+ i = emit_flush_invalidate(0, dw, i);
+ dw[i++] = preparser_disable(false);
+
+ i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+ seqno, dw, i);
+
+ i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+ if (job->user_fence.used)
+ i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+ job->user_fence.value,
+ dw, i);
+
+ i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+
+ i = emit_user_interrupt(dw, i);
+
+ XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+ xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
+ u64 batch_addr, u32 seqno)
+{
+ u32 dw[MAX_JOB_SIZE_DW], i = 0;
+ u32 ppgtt_flag = get_ppgtt_flag(job);
+ struct xe_gt *gt = job->engine->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
+
+ /* XXX: Conditional flushing possible */
+ dw[i++] = preparser_disable(true);
+ i = emit_flush_invalidate(decode ? MI_INVALIDATE_BSD : 0, dw, i);
+ /* Wa_1809175790 */
+ if (!xe->info.has_flat_ccs) {
+ if (decode)
+ i = emit_aux_table_inv(gt, GEN12_VD0_AUX_INV.reg, dw, i);
+ else
+ i = emit_aux_table_inv(gt, GEN12_VE0_AUX_INV.reg, dw, i);
+ }
+ dw[i++] = preparser_disable(false);
+
+ i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+ seqno, dw, i);
+
+ i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+ if (job->user_fence.used)
+ i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+ job->user_fence.value,
+ dw, i);
+
+ i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
+
+ i = emit_user_interrupt(dw, i);
+
+ XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+ xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+/*
+ * 3D-related flags that can't be set on _engines_ that lack access to the 3D
+ * pipeline (i.e., CCS engines).
+ */
+#define PIPE_CONTROL_3D_ENGINE_FLAGS (\
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
+ PIPE_CONTROL_TILE_CACHE_FLUSH | \
+ PIPE_CONTROL_DEPTH_STALL | \
+ PIPE_CONTROL_STALL_AT_SCOREBOARD | \
+ PIPE_CONTROL_PSD_SYNC | \
+ PIPE_CONTROL_AMFS_FLUSH | \
+ PIPE_CONTROL_VF_CACHE_INVALIDATE | \
+ PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
+
+/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
+#define PIPE_CONTROL_3D_ARCH_FLAGS ( \
+ PIPE_CONTROL_3D_ENGINE_FLAGS | \
+ PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
+ PIPE_CONTROL_FLUSH_ENABLE | \
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+ PIPE_CONTROL_DC_FLUSH_ENABLE)
+
+static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
+ struct xe_lrc *lrc,
+ u64 batch_addr, u32 seqno)
+{
+ u32 dw[MAX_JOB_SIZE_DW], i = 0;
+ u32 ppgtt_flag = get_ppgtt_flag(job);
+ struct xe_gt *gt = job->engine->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ bool pvc = xe->info.platform == XE_PVC;
+ u32 mask_flags = 0;
+
+ /* XXX: Conditional flushing possible */
+ dw[i++] = preparser_disable(true);
+ if (pvc)
+ mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
+ else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
+ mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
+ i = emit_pipe_invalidate(mask_flags, dw, i);
+ /* Wa_1809175790 */
+ if (!xe->info.has_flat_ccs)
+ i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i);
+ dw[i++] = preparser_disable(false);
+
+ i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+ seqno, dw, i);
+
+ i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
+
+ if (job->user_fence.used)
+ i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
+ job->user_fence.value,
+ dw, i);
+
+ i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, pvc, dw, i);
+
+ i = emit_user_interrupt(dw, i);
+
+ XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+ xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_migration_job_gen12(struct xe_sched_job *job,
+ struct xe_lrc *lrc, u32 seqno)
+{
+ u32 dw[MAX_JOB_SIZE_DW], i = 0;
+
+ i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
+ seqno, dw, i);
+
+ i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
+
+ dw[i++] = preparser_disable(true);
+ i = emit_flush_invalidate(0, dw, i);
+ dw[i++] = preparser_disable(false);
+
+ i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
+
+ dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
+ MI_FLUSH_DW_OP_STOREDW) + 1;
+ dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
+ dw[i++] = 0;
+ dw[i++] = seqno; /* value */
+
+ i = emit_user_interrupt(dw, i);
+
+ XE_BUG_ON(i > MAX_JOB_SIZE_DW);
+
+ xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
+}
+
+static void emit_job_gen12_copy(struct xe_sched_job *job)
+{
+ int i;
+
+ if (xe_sched_job_is_migration(job->engine)) {
+ emit_migration_job_gen12(job, job->engine->lrc,
+ xe_sched_job_seqno(job));
+ return;
+ }
+
+ for (i = 0; i < job->engine->width; ++i)
+ __emit_job_gen12_copy(job, job->engine->lrc + i,
+ job->batch_addr[i],
+ xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_video(struct xe_sched_job *job)
+{
+ int i;
+
+ /* FIXME: Not doing parallel handshake for now */
+ for (i = 0; i < job->engine->width; ++i)
+ __emit_job_gen12_video(job, job->engine->lrc + i,
+ job->batch_addr[i],
+ xe_sched_job_seqno(job));
+}
+
+static void emit_job_gen12_render_compute(struct xe_sched_job *job)
+{
+ int i;
+
+ for (i = 0; i < job->engine->width; ++i)
+ __emit_job_gen12_render_compute(job, job->engine->lrc + i,
+ job->batch_addr[i],
+ xe_sched_job_seqno(job));
+}
+
+static const struct xe_ring_ops ring_ops_gen12_copy = {
+ .emit_job = emit_job_gen12_copy,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_video = {
+ .emit_job = emit_job_gen12_video,
+};
+
+static const struct xe_ring_ops ring_ops_gen12_render_compute = {
+ .emit_job = emit_job_gen12_render_compute,
+};
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
+{
+ switch (class) {
+ case XE_ENGINE_CLASS_COPY:
+ return &ring_ops_gen12_copy;
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ return &ring_ops_gen12_video;
+ case XE_ENGINE_CLASS_RENDER:
+ case XE_ENGINE_CLASS_COMPUTE:
+ return &ring_ops_gen12_render_compute;
+ default:
+ return NULL;
+ }
+}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.h b/drivers/gpu/drm/xe/xe_ring_ops.h
new file mode 100644
index 000000000000..e942735d76a6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_H_
+#define _XE_RING_OPS_H_
+
+#include "xe_hw_engine_types.h"
+#include "xe_ring_ops_types.h"
+
+struct xe_gt;
+
+const struct xe_ring_ops *
+xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h
new file mode 100644
index 000000000000..1ae56e2ee7b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RING_OPS_TYPES_H_
+#define _XE_RING_OPS_TYPES_H_
+
+struct xe_sched_job;
+
+#define MAX_JOB_SIZE_DW 48
+#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)
+
+/**
+ * struct xe_ring_ops - Ring operations
+ */
+struct xe_ring_ops {
+ /** @emit_job: Write job to ring */
+ void (*emit_job)(struct xe_sched_job *job);
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
new file mode 100644
index 000000000000..9e8d0e43c643
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_rtp.h"
+
+#include <drm/xe_drm.h>
+
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_reg_sr.h"
+
+/**
+ * DOC: Register Table Processing
+ *
+ * Internal infrastructure to define how registers should be updated based on
+ * rules and actions. This can be used to define tables with multiple entries
+ * (one per register) that will be walked over at some point in time to apply
+ * the values to the registers that have matching rules.
+ */
+
+static bool rule_matches(struct xe_gt *gt,
+ struct xe_hw_engine *hwe,
+ const struct xe_rtp_entry *entry)
+{
+ const struct xe_device *xe = gt_to_xe(gt);
+ const struct xe_rtp_rule *r;
+ unsigned int i;
+ bool match;
+
+ for (r = entry->rules, i = 0; i < entry->n_rules;
+ r = &entry->rules[++i]) {
+ switch (r->match_type) {
+ case XE_RTP_MATCH_PLATFORM:
+ match = xe->info.platform == r->platform;
+ break;
+ case XE_RTP_MATCH_SUBPLATFORM:
+ match = xe->info.platform == r->platform &&
+ xe->info.subplatform == r->subplatform;
+ break;
+ case XE_RTP_MATCH_GRAPHICS_VERSION:
+ /* TODO: match display */
+ match = xe->info.graphics_verx100 == r->ver_start;
+ break;
+ case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE:
+ match = xe->info.graphics_verx100 >= r->ver_start &&
+ xe->info.graphics_verx100 <= r->ver_end;
+ break;
+ case XE_RTP_MATCH_MEDIA_VERSION:
+ match = xe->info.media_verx100 == r->ver_start;
+ break;
+ case XE_RTP_MATCH_MEDIA_VERSION_RANGE:
+ match = xe->info.media_verx100 >= r->ver_start &&
+ xe->info.media_verx100 <= r->ver_end;
+ break;
+ case XE_RTP_MATCH_STEP:
+ /* TODO: match media/display */
+ match = xe->info.step.graphics >= r->step_start &&
+ xe->info.step.graphics < r->step_end;
+ break;
+ case XE_RTP_MATCH_ENGINE_CLASS:
+ match = hwe->class == r->engine_class;
+ break;
+ case XE_RTP_MATCH_NOT_ENGINE_CLASS:
+ match = hwe->class != r->engine_class;
+ break;
+ case XE_RTP_MATCH_FUNC:
+ match = r->match_func(gt, hwe);
+ break;
+ case XE_RTP_MATCH_INTEGRATED:
+ match = !xe->info.is_dgfx;
+ break;
+ case XE_RTP_MATCH_DISCRETE:
+ match = xe->info.is_dgfx;
+ break;
+
+ default:
+ XE_WARN_ON(r->match_type);
+ }
+
+ if (!match)
+ return false;
+ }
+
+ return true;
+}
+
+static void rtp_add_sr_entry(const struct xe_rtp_entry *entry,
+ struct xe_gt *gt,
+ u32 mmio_base,
+ struct xe_reg_sr *sr)
+{
+ u32 reg = entry->regval.reg + mmio_base;
+ struct xe_reg_sr_entry sr_entry = {
+ .clr_bits = entry->regval.clr_bits,
+ .set_bits = entry->regval.set_bits,
+ .read_mask = entry->regval.read_mask,
+ .masked_reg = entry->regval.flags & XE_RTP_FLAG_MASKED_REG,
+ .reg_type = entry->regval.reg_type,
+ };
+
+ xe_reg_sr_add(sr, reg, &sr_entry);
+}
+
+/**
+ * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr
+ * @entries: Table with RTP definitions
+ * @sr: Where to add an entry to with the values for matching. This can be
+ * viewed as the "coalesced view" of multiple the tables. The bits for each
+ * register set are expected not to collide with previously added entries
+ * @gt: The GT to be used for matching rules
+ * @hwe: Engine instance to use for matching rules and as mmio base
+ *
+ * Walk the table pointed by @entries (with an empty sentinel) and add all
+ * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is
+ * used to calculate the right register offset
+ */
+void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
+ struct xe_gt *gt, struct xe_hw_engine *hwe)
+{
+ const struct xe_rtp_entry *entry;
+
+ for (entry = entries; entry && entry->name; entry++) {
+ u32 mmio_base = 0;
+
+ if (entry->regval.flags & XE_RTP_FLAG_FOREACH_ENGINE) {
+ struct xe_hw_engine *each_hwe;
+ enum xe_hw_engine_id id;
+
+ for_each_hw_engine(each_hwe, gt, id) {
+ mmio_base = each_hwe->mmio_base;
+
+ if (rule_matches(gt, each_hwe, entry))
+ rtp_add_sr_entry(entry, gt, mmio_base, sr);
+ }
+ } else if (rule_matches(gt, hwe, entry)) {
+ if (entry->regval.flags & XE_RTP_FLAG_ENGINE_BASE)
+ mmio_base = hwe->mmio_base;
+
+ rtp_add_sr_entry(entry, gt, mmio_base, sr);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
new file mode 100644
index 000000000000..d4e11fdde77f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_
+#define _XE_RTP_
+
+#include <linux/xarray.h>
+#include <linux/types.h>
+
+#include "xe_rtp_types.h"
+
+#include "i915_reg_defs.h"
+
+/*
+ * Register table poke infrastructure
+ */
+
+struct xe_hw_engine;
+struct xe_gt;
+struct xe_reg_sr;
+
+/*
+ * Helper macros - not to be used outside this header.
+ */
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
+#define __CALL_FOR_EACH_1(MACRO_, x, ...) MACRO_(x)
+#define __CALL_FOR_EACH_2(MACRO_, x, ...) \
+ MACRO_(x) __CALL_FOR_EACH_1(MACRO_, ##__VA_ARGS__)
+#define __CALL_FOR_EACH_3(MACRO_, x, ...) \
+ MACRO_(x) __CALL_FOR_EACH_2(MACRO_, ##__VA_ARGS__)
+#define __CALL_FOR_EACH_4(MACRO_, x, ...) \
+ MACRO_(x) __CALL_FOR_EACH_3(MACRO_, ##__VA_ARGS__)
+
+#define _CALL_FOR_EACH(NARGS_, MACRO_, x, ...) \
+ CONCATENATE(__CALL_FOR_EACH_, NARGS_)(MACRO_, x, ##__VA_ARGS__)
+#define CALL_FOR_EACH(MACRO_, x, ...) \
+ _CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__)
+
+#define _XE_RTP_REG(x_) (x_), \
+ .reg_type = XE_RTP_REG_REGULAR
+#define _XE_RTP_MCR_REG(x_) (x_), \
+ .reg_type = XE_RTP_REG_MCR
+
+/*
+ * Helper macros for concatenating prefix - do not use them directly outside
+ * this header
+ */
+#define __ADD_XE_RTP_FLAG_PREFIX(x) CONCATENATE(XE_RTP_FLAG_, x) |
+#define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) ,
+
+/*
+ * Macros to encode rules to match against platform, IP version, stepping, etc.
+ * Shouldn't be used directly - see XE_RTP_RULES()
+ */
+
+#define _XE_RTP_RULE_PLATFORM(plat__) \
+ { .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ }
+
+#define _XE_RTP_RULE_SUBPLATFORM(plat__, sub__) \
+ { .match_type = XE_RTP_MATCH_SUBPLATFORM, \
+ .platform = plat__, .subplatform = sub__ }
+
+#define _XE_RTP_RULE_STEP(start__, end__) \
+ { .match_type = XE_RTP_MATCH_STEP, \
+ .step_start = start__, .step_end = end__ }
+
+#define _XE_RTP_RULE_ENGINE_CLASS(cls__) \
+ { .match_type = XE_RTP_MATCH_ENGINE_CLASS, \
+ .engine_class = (cls__) }
+
+/**
+ * XE_RTP_RULE_PLATFORM - Create rule matching platform
+ * @plat_: platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_PLATFORM(plat_) \
+ _XE_RTP_RULE_PLATFORM(XE_##plat_)
+
+/**
+ * XE_RTP_RULE_SUBPLATFORM - Create rule matching platform and sub-platform
+ * @plat_: platform to match
+ * @sub_: sub-platform to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_SUBPLATFORM(plat_, sub_) \
+ _XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_)
+
+/**
+ * XE_RTP_RULE_STEP - Create rule matching platform stepping
+ * @start_: First stepping matching the rule
+ * @end_: First stepping that does not match the rule
+ *
+ * Note that the range matching this rule [ @start_, @end_ ), i.e. inclusive on
+ * the left, exclusive on the right.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_STEP(start_, end_) \
+ _XE_RTP_RULE_STEP(STEP_##start_, STEP_##end_)
+
+/**
+ * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class
+ * @cls_: Engine class to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_ENGINE_CLASS(cls_) \
+ _XE_RTP_RULE_ENGINE_CLASS(XE_ENGINE_CLASS_##cls_)
+
+/**
+ * XE_RTP_RULE_FUNC - Create rule using callback function for match
+ * @func__: Function to call to decide if rule matches
+ *
+ * This allows more complex checks to be performed. The ``XE_RTP``
+ * infrastructure will simply call the function @func_ passed to decide if this
+ * rule matches the device.
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_FUNC(func__) \
+ { .match_type = XE_RTP_MATCH_FUNC, \
+ .match_func = (func__) }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION - Create rule matching graphics version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION(ver__) \
+ { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION, \
+ .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_GRAPHICS_VERSION_RANGE - Create rule matching a range of graphics version
+ * @ver_start__: First graphics IP version to match
+ * @ver_end__: Last graphics IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_GRAPHICS_VERSION_RANGE(ver_start__, ver_end__) \
+ { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, \
+ .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version
+ * @ver__: Graphics IP version to match
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION(ver__) \
+ { .match_type = XE_RTP_MATCH_MEDIA_VERSION, \
+ .ver_start = ver__, }
+
+/**
+ * XE_RTP_RULE_MEDIA_VERSION_RANGE - Create rule matching a range of media version
+ * @ver_start__: First media IP version to match
+ * @ver_end__: Last media IP version to match
+ *
+ * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e.
+ * inclusive on boths sides
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_MEDIA_VERSION_RANGE(ver_start__, ver_end__) \
+ { .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE, \
+ .ver_start = ver_start__, .ver_end = ver_end__, }
+
+/**
+ * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_INTEGRATED \
+ { .match_type = XE_RTP_MATCH_INTEGRATED }
+
+/**
+ * XE_RTP_RULE_IS_DISCRETE - Create a rule matching discrete graphics devices
+ *
+ * Refer to XE_RTP_RULES() for expected usage.
+ */
+#define XE_RTP_RULE_IS_DISCRETE \
+ { .match_type = XE_RTP_MATCH_DISCRETE }
+
+/**
+ * XE_RTP_WR - Helper to write a value to the register, overriding all the bits
+ * @reg_: Register
+ * @val_: Value to set
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * The correspondent notation in bspec is:
+ *
+ * REGNAME = VALUE
+ */
+#define XE_RTP_WR(reg_, val_, ...) \
+ .regval = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_), \
+ .read_mask = (~0u), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_SET - Set bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to set in the register
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ * REGNAME[2] = 1
+ * REGNAME[5] = 1
+ */
+#define XE_RTP_SET(reg_, val_, ...) \
+ .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_), \
+ .read_mask = (val_), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_CLR: Clear bits from @val_ in the register.
+ * @reg_: Register
+ * @val_: Bits to clear in the register
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is (example for bits 2
+ * and 5, but could be any):
+ *
+ * REGNAME[2] = 0
+ * REGNAME[5] = 0
+ */
+#define XE_RTP_CLR(reg_, val_, ...) \
+ .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = 0, \
+ .read_mask = (val_), ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_FIELD_SET: Set a bit range, defined by @mask_bits_, to the value in
+ * @reg_: Register
+ * @mask_bits_: Mask of bits to be changed in the register, forming a field
+ * @val_: Value to set in the field denoted by @mask_bits_
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * For masked registers this translates to a single write, while for other
+ * registers it's a RMW. The correspondent bspec notation is:
+ *
+ * REGNAME[<end>:<start>] = VALUE
+ */
+#define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...) \
+ .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
+ .read_mask = (mask_bits_), ##__VA_ARGS__ }
+
+#define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...) \
+ .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\
+ .read_mask = 0, ##__VA_ARGS__ }
+
+/**
+ * XE_WHITELIST_REGISTER - Add register to userspace whitelist
+ * @reg_: Register
+ * @flags_: Whitelist-specific flags to set
+ * @...: Additional fields to override in the struct xe_rtp_regval entry
+ *
+ * Add a register to the whitelist, allowing userspace to modify the ster with
+ * regular user privileges.
+ */
+#define XE_WHITELIST_REGISTER(reg_, flags_, ...) \
+ /* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\
+ .regval = { .reg = reg_, .set_bits = (flags_), \
+ .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID, \
+ ##__VA_ARGS__ }
+
+/**
+ * XE_RTP_NAME - Helper to set the name in xe_rtp_entry
+ * @s_: Name describing this rule, often a HW-specific number
+ *
+ * TODO: maybe move this behind a debug config?
+ */
+#define XE_RTP_NAME(s_) .name = (s_)
+
+/**
+ * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_regval entry
+ * @f1_: Last part of a ``XE_RTP_FLAG_*``
+ * @...: Additional flags, defined like @f1_
+ *
+ * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be
+ * easily used to define struct xe_rtp_regval entries. Example:
+ *
+ * .. code-block:: c
+ *
+ * const struct xe_rtp_entry wa_entries[] = {
+ * ...
+ * { XE_RTP_NAME("test-entry"),
+ * XE_RTP_FLAG(FOREACH_ENGINE, MASKED_REG),
+ * ...
+ * },
+ * ...
+ * };
+ */
+#define XE_RTP_FLAG(f1_, ...) \
+ .flags = (CALL_FOR_EACH(__ADD_XE_RTP_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0)
+
+/**
+ * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry
+ * @r1: Last part of XE_RTP_MATCH_*
+ * @...: Additional rules, defined like @r1
+ *
+ * At least one rule is needed and up to 4 are supported. Multiple rules are
+ * AND'ed together, i.e. all the rules must evaluate to true for the entry to
+ * be processed. See XE_RTP_MATCH_* for the possible match rules. Example:
+ *
+ * .. code-block:: c
+ *
+ * const struct xe_rtp_entry wa_entries[] = {
+ * ...
+ * { XE_RTP_NAME("test-entry"),
+ * XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ * ...
+ * },
+ * ...
+ * };
+ */
+#define XE_RTP_RULES(r1, ...) \
+ .n_rules = COUNT_ARGS(r1, ##__VA_ARGS__), \
+ .rules = (struct xe_rtp_rule[]) { \
+ CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__) \
+ }
+
+void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
+ struct xe_gt *gt, struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
new file mode 100644
index 000000000000..b55b556a2495
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_RTP_TYPES_
+#define _XE_RTP_TYPES_
+
+#include <linux/types.h>
+
+#include "i915_reg_defs.h"
+
+struct xe_hw_engine;
+struct xe_gt;
+
+enum {
+ XE_RTP_REG_REGULAR,
+ XE_RTP_REG_MCR,
+};
+
+/**
+ * struct xe_rtp_regval - register and value for rtp table
+ */
+struct xe_rtp_regval {
+ /** @reg: Register */
+ u32 reg;
+ /*
+ * TODO: maybe we need a union here with a func pointer for cases
+ * that are too specific to be generalized
+ */
+ /** @clr_bits: bits to clear when updating register */
+ u32 clr_bits;
+ /** @set_bits: bits to set when updating register */
+ u32 set_bits;
+#define XE_RTP_NOCHECK .read_mask = 0
+ /** @read_mask: mask for bits to consider when reading value back */
+ u32 read_mask;
+#define XE_RTP_FLAG_FOREACH_ENGINE BIT(0)
+#define XE_RTP_FLAG_MASKED_REG BIT(1)
+#define XE_RTP_FLAG_ENGINE_BASE BIT(2)
+ /** @flags: flags to apply on rule evaluation or action */
+ u8 flags;
+ /** @reg_type: register type, see ``XE_RTP_REG_*`` */
+ u8 reg_type;
+};
+
+enum {
+ XE_RTP_MATCH_PLATFORM,
+ XE_RTP_MATCH_SUBPLATFORM,
+ XE_RTP_MATCH_GRAPHICS_VERSION,
+ XE_RTP_MATCH_GRAPHICS_VERSION_RANGE,
+ XE_RTP_MATCH_MEDIA_VERSION,
+ XE_RTP_MATCH_MEDIA_VERSION_RANGE,
+ XE_RTP_MATCH_INTEGRATED,
+ XE_RTP_MATCH_DISCRETE,
+ XE_RTP_MATCH_STEP,
+ XE_RTP_MATCH_ENGINE_CLASS,
+ XE_RTP_MATCH_NOT_ENGINE_CLASS,
+ XE_RTP_MATCH_FUNC,
+};
+
+/** struct xe_rtp_rule - match rule for processing entry */
+struct xe_rtp_rule {
+ u8 match_type;
+
+ /* match filters */
+ union {
+ /* MATCH_PLATFORM / MATCH_SUBPLATFORM */
+ struct {
+ u8 platform;
+ u8 subplatform;
+ };
+ /*
+ * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE /
+ * MATCH_MEDIA_VERSION / XE_RTP_MATCH_MEDIA_VERSION_RANGE
+ */
+ struct {
+ u32 ver_start;
+#define XE_RTP_END_VERSION_UNDEFINED U32_MAX
+ u32 ver_end;
+ };
+ /* MATCH_STEP */
+ struct {
+ u8 step_start;
+ u8 step_end;
+ };
+ /* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */
+ struct {
+ u8 engine_class;
+ };
+ /* MATCH_FUNC */
+ bool (*match_func)(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe);
+ };
+};
+
+/** struct xe_rtp_entry - Entry in an rtp table */
+struct xe_rtp_entry {
+ const char *name;
+ const struct xe_rtp_regval regval;
+ const struct xe_rtp_rule *rules;
+ unsigned int n_rules;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
new file mode 100644
index 000000000000..7403410cd806
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_sa.h"
+
+static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_sa_manager *sa_manager = arg;
+ struct xe_bo *bo = sa_manager->bo;
+
+ if (!bo) {
+ drm_err(drm, "no bo for sa manager\n");
+ return;
+ }
+
+ drm_suballoc_manager_fini(&sa_manager->base);
+
+ if (bo->vmap.is_iomem)
+ kvfree(sa_manager->cpu_ptr);
+
+ xe_bo_unpin_map_no_vm(bo);
+ sa_manager->bo = NULL;
+}
+
+int xe_sa_bo_manager_init(struct xe_gt *gt,
+ struct xe_sa_manager *sa_manager,
+ u32 size, u32 align)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 managed_size = size - SZ_4K;
+ struct xe_bo *bo;
+
+ sa_manager->bo = NULL;
+
+ bo = xe_bo_create_pin_map(xe, gt, NULL, size, ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(bo)) {
+ drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
+ PTR_ERR(bo));
+ return PTR_ERR(bo);
+ }
+ sa_manager->bo = bo;
+
+ drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
+ sa_manager->gpu_addr = xe_bo_ggtt_addr(bo);
+
+ if (bo->vmap.is_iomem) {
+ sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL);
+ if (!sa_manager->cpu_ptr) {
+ xe_bo_unpin_map_no_vm(sa_manager->bo);
+ sa_manager->bo = NULL;
+ return -ENOMEM;
+ }
+ } else {
+ sa_manager->cpu_ptr = bo->vmap.vaddr;
+ memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
+ }
+
+ return drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
+ sa_manager);
+}
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+ unsigned size)
+{
+ return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0);
+}
+
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo)
+{
+ struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
+ struct xe_device *xe = gt_to_xe(sa_manager->bo->gt);
+
+ if (!sa_manager->bo->vmap.is_iomem)
+ return;
+
+ xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo),
+ xe_sa_bo_cpu_addr(sa_bo),
+ drm_suballoc_size(sa_bo));
+}
+
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+ struct dma_fence *fence)
+{
+ drm_suballoc_free(sa_bo, fence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
new file mode 100644
index 000000000000..742282ef7179
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_H_
+#define _XE_SA_H_
+
+#include "xe_sa_types.h"
+
+struct dma_fence;
+struct xe_bo;
+struct xe_gt;
+
+int xe_sa_bo_manager_init(struct xe_gt *gt,
+ struct xe_sa_manager *sa_manager,
+ u32 size, u32 align);
+
+struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
+ u32 size);
+void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo);
+void xe_sa_bo_free(struct drm_suballoc *sa_bo,
+ struct dma_fence *fence);
+
+static inline struct xe_sa_manager *
+to_xe_sa_manager(struct drm_suballoc_manager *mng)
+{
+ return container_of(mng, struct xe_sa_manager, base);
+}
+
+static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa)
+{
+ return to_xe_sa_manager(sa->manager)->gpu_addr +
+ drm_suballoc_soffset(sa);
+}
+
+static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
+{
+ return to_xe_sa_manager(sa->manager)->cpu_ptr +
+ drm_suballoc_soffset(sa);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
new file mode 100644
index 000000000000..2ef896aeca1d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sa_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _XE_SA_TYPES_H_
+#define _XE_SA_TYPES_H_
+
+#include <drm/drm_suballoc.h>
+
+struct xe_bo;
+
+struct xe_sa_manager {
+ struct drm_suballoc_manager base;
+ struct xe_bo *bo;
+ u64 gpu_addr;
+ void *cpu_ptr;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
new file mode 100644
index 000000000000..ab81bfe17e8a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sched_job.h"
+
+#include <linux/dma-fence-array.h>
+#include <linux/slab.h>
+
+#include "xe_device_types.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_hw_fence.h"
+#include "xe_lrc.h"
+#include "xe_macros.h"
+#include "xe_trace.h"
+#include "xe_vm.h"
+
+static struct kmem_cache *xe_sched_job_slab;
+static struct kmem_cache *xe_sched_job_parallel_slab;
+
+int __init xe_sched_job_module_init(void)
+{
+ xe_sched_job_slab =
+ kmem_cache_create("xe_sched_job",
+ sizeof(struct xe_sched_job) +
+ sizeof(u64), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!xe_sched_job_slab)
+ return -ENOMEM;
+
+ xe_sched_job_parallel_slab =
+ kmem_cache_create("xe_sched_job_parallel",
+ sizeof(struct xe_sched_job) +
+ sizeof(u64) *
+ XE_HW_ENGINE_MAX_INSTANCE , 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!xe_sched_job_parallel_slab) {
+ kmem_cache_destroy(xe_sched_job_slab);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void xe_sched_job_module_exit(void)
+{
+ kmem_cache_destroy(xe_sched_job_slab);
+ kmem_cache_destroy(xe_sched_job_parallel_slab);
+}
+
+static struct xe_sched_job *job_alloc(bool parallel)
+{
+ return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
+ xe_sched_job_slab, GFP_KERNEL);
+}
+
+bool xe_sched_job_is_migration(struct xe_engine *e)
+{
+ return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION) &&
+ !(e->flags & ENGINE_FLAG_WA);
+}
+
+static void job_free(struct xe_sched_job *job)
+{
+ struct xe_engine *e = job->engine;
+ bool is_migration = xe_sched_job_is_migration(e);
+
+ kmem_cache_free(xe_engine_is_parallel(job->engine) || is_migration ?
+ xe_sched_job_parallel_slab : xe_sched_job_slab, job);
+}
+
+struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
+ u64 *batch_addr)
+{
+ struct xe_sched_job *job;
+ struct dma_fence **fences;
+ bool is_migration = xe_sched_job_is_migration(e);
+ int err;
+ int i, j;
+ u32 width;
+
+ /* Migration and kernel engines have their own locking */
+ if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM |
+ ENGINE_FLAG_WA))) {
+ lockdep_assert_held(&e->vm->lock);
+ if (!xe_vm_no_dma_fences(e->vm))
+ xe_vm_assert_held(e->vm);
+ }
+
+ job = job_alloc(xe_engine_is_parallel(e) || is_migration);
+ if (!job)
+ return ERR_PTR(-ENOMEM);
+
+ job->engine = e;
+ kref_init(&job->refcount);
+ xe_engine_get(job->engine);
+
+ err = drm_sched_job_init(&job->drm, e->entity, 1, NULL);
+ if (err)
+ goto err_free;
+
+ if (!xe_engine_is_parallel(e)) {
+ job->fence = xe_lrc_create_seqno_fence(e->lrc);
+ if (IS_ERR(job->fence)) {
+ err = PTR_ERR(job->fence);
+ goto err_sched_job;
+ }
+ } else {
+ struct dma_fence_array *cf;
+
+ fences = kmalloc_array(e->width, sizeof(*fences), GFP_KERNEL);
+ if (!fences) {
+ err = -ENOMEM;
+ goto err_sched_job;
+ }
+
+ for (j = 0; j < e->width; ++j) {
+ fences[j] = xe_lrc_create_seqno_fence(e->lrc + j);
+ if (IS_ERR(fences[j])) {
+ err = PTR_ERR(fences[j]);
+ goto err_fences;
+ }
+ }
+
+ cf = dma_fence_array_create(e->width, fences,
+ e->parallel.composite_fence_ctx,
+ e->parallel.composite_fence_seqno++,
+ false);
+ if (!cf) {
+ --e->parallel.composite_fence_seqno;
+ err = -ENOMEM;
+ goto err_fences;
+ }
+
+ /* Sanity check */
+ for (j = 0; j < e->width; ++j)
+ XE_BUG_ON(cf->base.seqno != fences[j]->seqno);
+
+ job->fence = &cf->base;
+ }
+
+ width = e->width;
+ if (is_migration)
+ width = 2;
+
+ for (i = 0; i < width; ++i)
+ job->batch_addr[i] = batch_addr[i];
+
+ trace_xe_sched_job_create(job);
+ return job;
+
+err_fences:
+ for (j = j - 1; j >= 0; --j) {
+ --e->lrc[j].fence_ctx.next_seqno;
+ dma_fence_put(fences[j]);
+ }
+ kfree(fences);
+err_sched_job:
+ drm_sched_job_cleanup(&job->drm);
+err_free:
+ xe_engine_put(e);
+ job_free(job);
+ return ERR_PTR(err);
+}
+
+/**
+ * xe_sched_job_destroy - Destroy XE schedule job
+ * @ref: reference to XE schedule job
+ *
+ * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
+ * base DRM schedule job, and free memory for XE schedule job.
+ */
+void xe_sched_job_destroy(struct kref *ref)
+{
+ struct xe_sched_job *job =
+ container_of(ref, struct xe_sched_job, refcount);
+
+ xe_engine_put(job->engine);
+ dma_fence_put(job->fence);
+ drm_sched_job_cleanup(&job->drm);
+ job_free(job);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error)
+{
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
+ return;
+
+ dma_fence_set_error(job->fence, error);
+
+ if (dma_fence_is_array(job->fence)) {
+ struct dma_fence_array *array =
+ to_dma_fence_array(job->fence);
+ struct dma_fence **child = array->fences;
+ unsigned int nchild = array->num_fences;
+
+ do {
+ struct dma_fence *current_fence = *child++;
+
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &current_fence->flags))
+ continue;
+ dma_fence_set_error(current_fence, error);
+ } while (--nchild);
+ }
+
+ trace_xe_sched_job_set_error(job);
+
+ dma_fence_enable_sw_signaling(job->fence);
+ xe_hw_fence_irq_run(job->engine->fence_irq);
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job)
+{
+ struct xe_lrc *lrc = job->engine->lrc;
+
+ return xe_lrc_start_seqno(lrc) >= xe_sched_job_seqno(job);
+}
+
+bool xe_sched_job_completed(struct xe_sched_job *job)
+{
+ struct xe_lrc *lrc = job->engine->lrc;
+
+ /*
+ * Can safely check just LRC[0] seqno as that is last seqno written when
+ * parallel handshake is done.
+ */
+
+ return xe_lrc_seqno(lrc) >= xe_sched_job_seqno(job);
+}
+
+void xe_sched_job_arm(struct xe_sched_job *job)
+{
+ drm_sched_job_arm(&job->drm);
+}
+
+void xe_sched_job_push(struct xe_sched_job *job)
+{
+ xe_sched_job_get(job);
+ trace_xe_sched_job_exec(job);
+ drm_sched_entity_push_job(&job->drm);
+ xe_sched_job_put(job);
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
new file mode 100644
index 000000000000..5315ad8656c2
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_H_
+#define _XE_SCHED_JOB_H_
+
+#include "xe_sched_job_types.h"
+
+#define XE_SCHED_HANG_LIMIT 1
+#define XE_SCHED_JOB_TIMEOUT LONG_MAX
+
+int xe_sched_job_module_init(void);
+void xe_sched_job_module_exit(void);
+
+struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
+ u64 *batch_addr);
+void xe_sched_job_destroy(struct kref *ref);
+
+/**
+ * xe_sched_job_get - get reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Increment XE schedule job's reference count
+ */
+static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job)
+{
+ kref_get(&job->refcount);
+ return job;
+}
+
+/**
+ * xe_sched_job_put - put reference to XE schedule job
+ * @job: XE schedule job object
+ *
+ * Decrement XE schedule job's reference count, call xe_sched_job_destroy when
+ * reference count == 0.
+ */
+static inline void xe_sched_job_put(struct xe_sched_job *job)
+{
+ kref_put(&job->refcount, xe_sched_job_destroy);
+}
+
+void xe_sched_job_set_error(struct xe_sched_job *job, int error);
+static inline bool xe_sched_job_is_error(struct xe_sched_job *job)
+{
+ return job->fence->error < 0;
+}
+
+bool xe_sched_job_started(struct xe_sched_job *job);
+bool xe_sched_job_completed(struct xe_sched_job *job);
+
+void xe_sched_job_arm(struct xe_sched_job *job);
+void xe_sched_job_push(struct xe_sched_job *job);
+
+static inline struct xe_sched_job *
+to_xe_sched_job(struct drm_sched_job *drm)
+{
+ return container_of(drm, struct xe_sched_job, drm);
+}
+
+static inline u32 xe_sched_job_seqno(struct xe_sched_job *job)
+{
+ return job->fence->seqno;
+}
+
+static inline void
+xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
+{
+ job->migrate_flush_flags = flags;
+}
+
+bool xe_sched_job_is_migration(struct xe_engine *e);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
new file mode 100644
index 000000000000..fd1d75996127
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SCHED_JOB_TYPES_H_
+#define _XE_SCHED_JOB_TYPES_H_
+
+#include <linux/kref.h>
+
+#include <drm/gpu_scheduler.h>
+
+struct xe_engine;
+
+/**
+ * struct xe_sched_job - XE schedule job (batch buffer tracking)
+ */
+struct xe_sched_job {
+ /** @drm: base DRM scheduler job */
+ struct drm_sched_job drm;
+ /** @engine: XE submission engine */
+ struct xe_engine *engine;
+ /** @refcount: ref count of this job */
+ struct kref refcount;
+ /**
+ * @fence: dma fence to indicate completion. 1 way relationship - job
+ * can safely reference fence, fence cannot safely reference job.
+ */
+#define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS
+ struct dma_fence *fence;
+ /** @user_fence: write back value when BB is complete */
+ struct {
+ /** @used: user fence is used */
+ bool used;
+ /** @addr: address to write to */
+ u64 addr;
+ /** @value: write back value */
+ u64 value;
+ } user_fence;
+ /** @migrate_flush_flags: Additional flush flags for migration jobs */
+ u32 migrate_flush_flags;
+ /** @batch_addr: batch buffer address of job */
+ u64 batch_addr[0];
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
new file mode 100644
index 000000000000..ca77d0971529
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_step.h"
+
+#include "xe_device.h"
+#include "xe_platform_types.h"
+
+/*
+ * Provide mapping between PCI's revision ID to the individual GMD
+ * (Graphics/Media/Display) stepping values that can be compared numerically.
+ *
+ * Some platforms may have unusual ways of mapping PCI revision ID to GMD
+ * steppings. E.g., in some cases a higher PCI revision may translate to a
+ * lower stepping of the GT and/or display IP.
+ *
+ * Also note that some revisions/steppings may have been set aside as
+ * placeholders but never materialized in real hardware; in those cases there
+ * may be jumps in the revision IDs or stepping values in the tables below.
+ */
+
+/*
+ * Some platforms always have the same stepping value for GT and display;
+ * use a macro to define these to make it easier to identify the platforms
+ * where the two steppings can deviate.
+ */
+#define COMMON_GT_MEDIA_STEP(x_) \
+ .graphics = STEP_##x_, \
+ .media = STEP_##x_
+
+#define COMMON_STEP(x_) \
+ COMMON_GT_MEDIA_STEP(x_), \
+ .graphics = STEP_##x_, \
+ .media = STEP_##x_, \
+ .display = STEP_##x_
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
+
+/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
+static const struct xe_step_info tgl_revids[] = {
+ [0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+ [1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 },
+};
+
+static const struct xe_step_info dg1_revids[] = {
+ [0] = { COMMON_STEP(A0) },
+ [1] = { COMMON_STEP(B0) },
+};
+
+static const struct xe_step_info adls_revids[] = {
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+ [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 },
+ [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+ [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 },
+ [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g10_revid_step_tbl[] = {
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 },
+ [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 },
+ [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 },
+ [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g11_revid_step_tbl[] = {
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 },
+ [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 },
+ [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info dg2_g12_revid_step_tbl[] = {
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 },
+ [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 },
+};
+
+static const struct xe_step_info pvc_revid_step_tbl[] = {
+ [0x3] = { .graphics = STEP_A0 },
+ [0x5] = { .graphics = STEP_B0 },
+ [0x6] = { .graphics = STEP_B1 },
+ [0x7] = { .graphics = STEP_C0 },
+};
+
+static const int pvc_basedie_subids[] = {
+ [0x0] = STEP_A0,
+ [0x3] = STEP_B0,
+ [0x4] = STEP_B1,
+ [0x5] = STEP_B3,
+};
+
+__diag_pop();
+
+struct xe_step_info xe_step_get(struct xe_device *xe)
+{
+ const struct xe_step_info *revids = NULL;
+ struct xe_step_info step = {};
+ u16 revid = xe->info.revid;
+ int size = 0;
+ const int *basedie_info = NULL;
+ int basedie_size = 0;
+ int baseid = 0;
+
+ if (xe->info.platform == XE_PVC) {
+ baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid);
+ revid = FIELD_GET(GENMASK(2, 0), xe->info.revid);
+ revids = pvc_revid_step_tbl;
+ size = ARRAY_SIZE(pvc_revid_step_tbl);
+ basedie_info = pvc_basedie_subids;
+ basedie_size = ARRAY_SIZE(pvc_basedie_subids);
+ } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10) {
+ revids = dg2_g10_revid_step_tbl;
+ size = ARRAY_SIZE(dg2_g10_revid_step_tbl);
+ } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G11) {
+ revids = dg2_g11_revid_step_tbl;
+ size = ARRAY_SIZE(dg2_g11_revid_step_tbl);
+ } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) {
+ revids = dg2_g12_revid_step_tbl;
+ size = ARRAY_SIZE(dg2_g12_revid_step_tbl);
+ } else if (xe->info.platform == XE_ALDERLAKE_S) {
+ revids = adls_revids;
+ size = ARRAY_SIZE(adls_revids);
+ } else if (xe->info.platform == XE_DG1) {
+ revids = dg1_revids;
+ size = ARRAY_SIZE(dg1_revids);
+ } else if (xe->info.platform == XE_TIGERLAKE) {
+ revids = tgl_revids;
+ size = ARRAY_SIZE(tgl_revids);
+ }
+
+ /* Not using the stepping scheme for the platform yet. */
+ if (!revids)
+ return step;
+
+ if (revid < size && revids[revid].graphics != STEP_NONE) {
+ step = revids[revid];
+ } else {
+ drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid);
+
+ /*
+ * If we hit a gap in the revid array, use the information for
+ * the next revid.
+ *
+ * This may be wrong in all sorts of ways, especially if the
+ * steppings in the array are not monotonically increasing, but
+ * it's better than defaulting to 0.
+ */
+ while (revid < size && revids[revid].graphics == STEP_NONE)
+ revid++;
+
+ if (revid < size) {
+ drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n",
+ revid);
+ step = revids[revid];
+ } else {
+ drm_dbg(&xe->drm, "Using future steppings\n");
+ step.graphics = STEP_FUTURE;
+ step.display = STEP_FUTURE;
+ }
+ }
+
+ drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE);
+
+ if (basedie_info && basedie_size) {
+ if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) {
+ step.basedie = basedie_info[baseid];
+ } else {
+ drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid);
+ step.basedie = STEP_FUTURE;
+ }
+ }
+
+ return step;
+}
+
+#define STEP_NAME_CASE(name) \
+ case STEP_##name: \
+ return #name;
+
+const char *xe_step_name(enum xe_step step)
+{
+ switch (step) {
+ STEP_NAME_LIST(STEP_NAME_CASE);
+
+ default:
+ return "**";
+ }
+}
diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h
new file mode 100644
index 000000000000..0c596c8579fb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_H_
+#define _XE_STEP_H_
+
+#include <linux/types.h>
+
+#include "xe_step_types.h"
+
+struct xe_device;
+
+struct xe_step_info xe_step_get(struct xe_device *xe);
+const char *xe_step_name(enum xe_step step);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h
new file mode 100644
index 000000000000..b7859f9647ca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_step_types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_STEP_TYPES_H_
+#define _XE_STEP_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_step_info {
+ u8 graphics;
+ u8 media;
+ u8 display;
+ u8 basedie;
+};
+
+#define STEP_ENUM_VAL(name) STEP_##name,
+
+#define STEP_NAME_LIST(func) \
+ func(A0) \
+ func(A1) \
+ func(A2) \
+ func(B0) \
+ func(B1) \
+ func(B2) \
+ func(B3) \
+ func(C0) \
+ func(C1) \
+ func(D0) \
+ func(D1) \
+ func(E0) \
+ func(F0) \
+ func(G0) \
+ func(H0) \
+ func(I0) \
+ func(I1) \
+ func(J0)
+
+/*
+ * Symbolic steppings that do not match the hardware. These are valid both as gt
+ * and display steppings as symbolic names.
+ */
+enum xe_step {
+ STEP_NONE = 0,
+ STEP_NAME_LIST(STEP_ENUM_VAL)
+ STEP_FUTURE,
+ STEP_FOREVER,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
new file mode 100644
index 000000000000..0fbd8d0978cf
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_sync.h"
+
+#include <linux/kthread.h>
+#include <linux/sched/mm.h>
+#include <linux/uaccess.h>
+#include <drm/xe_drm.h>
+#include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+
+#include "xe_device_types.h"
+#include "xe_sched_job_types.h"
+#include "xe_macros.h"
+
+#define SYNC_FLAGS_TYPE_MASK 0x3
+#define SYNC_FLAGS_FENCE_INSTALLED 0x10000
+
+struct user_fence {
+ struct xe_device *xe;
+ struct kref refcount;
+ struct dma_fence_cb cb;
+ struct work_struct worker;
+ struct mm_struct *mm;
+ u64 __user *addr;
+ u64 value;
+};
+
+static void user_fence_destroy(struct kref *kref)
+{
+ struct user_fence *ufence = container_of(kref, struct user_fence,
+ refcount);
+
+ mmdrop(ufence->mm);
+ kfree(ufence);
+}
+
+static void user_fence_get(struct user_fence *ufence)
+{
+ kref_get(&ufence->refcount);
+}
+
+static void user_fence_put(struct user_fence *ufence)
+{
+ kref_put(&ufence->refcount, user_fence_destroy);
+}
+
+static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr,
+ u64 value)
+{
+ struct user_fence *ufence;
+
+ ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
+ if (!ufence)
+ return NULL;
+
+ ufence->xe = xe;
+ kref_init(&ufence->refcount);
+ ufence->addr = u64_to_user_ptr(addr);
+ ufence->value = value;
+ ufence->mm = current->mm;
+ mmgrab(ufence->mm);
+
+ return ufence;
+}
+
+static void user_fence_worker(struct work_struct *w)
+{
+ struct user_fence *ufence = container_of(w, struct user_fence, worker);
+
+ if (mmget_not_zero(ufence->mm)) {
+ kthread_use_mm(ufence->mm);
+ if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value)))
+ XE_WARN_ON("Copy to user failed");
+ kthread_unuse_mm(ufence->mm);
+ mmput(ufence->mm);
+ }
+
+ wake_up_all(&ufence->xe->ufence_wq);
+ user_fence_put(ufence);
+}
+
+static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence)
+{
+ INIT_WORK(&ufence->worker, user_fence_worker);
+ queue_work(ufence->xe->ordered_wq, &ufence->worker);
+ dma_fence_put(fence);
+}
+
+static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct user_fence *ufence = container_of(cb, struct user_fence, cb);
+
+ kick_ufence(ufence, fence);
+}
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+ struct xe_sync_entry *sync,
+ struct drm_xe_sync __user *sync_user,
+ bool exec, bool no_dma_fences)
+{
+ struct drm_xe_sync sync_in;
+ int err;
+
+ if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user)))
+ return -EFAULT;
+
+ if (XE_IOCTL_ERR(xe, sync_in.flags &
+ ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)))
+ return -EINVAL;
+
+ switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) {
+ case DRM_XE_SYNC_SYNCOBJ:
+ if (XE_IOCTL_ERR(xe, no_dma_fences))
+ return -ENOTSUPP;
+
+ if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
+ return -EINVAL;
+
+ sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+ if (XE_IOCTL_ERR(xe, !sync->syncobj))
+ return -ENOENT;
+
+ if (!(sync_in.flags & DRM_XE_SYNC_SIGNAL)) {
+ sync->fence = drm_syncobj_fence_get(sync->syncobj);
+ if (XE_IOCTL_ERR(xe, !sync->fence))
+ return -EINVAL;
+ }
+ break;
+
+ case DRM_XE_SYNC_TIMELINE_SYNCOBJ:
+ if (XE_IOCTL_ERR(xe, no_dma_fences))
+ return -ENOTSUPP;
+
+ if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr)))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, sync_in.timeline_value == 0))
+ return -EINVAL;
+
+ sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle);
+ if (XE_IOCTL_ERR(xe, !sync->syncobj))
+ return -ENOENT;
+
+ if (sync_in.flags & DRM_XE_SYNC_SIGNAL) {
+ sync->chain_fence = dma_fence_chain_alloc();
+ if (!sync->chain_fence)
+ return -ENOMEM;
+ } else {
+ sync->fence = drm_syncobj_fence_get(sync->syncobj);
+ if (XE_IOCTL_ERR(xe, !sync->fence))
+ return -EINVAL;
+
+ err = dma_fence_chain_find_seqno(&sync->fence,
+ sync_in.timeline_value);
+ if (err)
+ return err;
+ }
+ break;
+
+ case DRM_XE_SYNC_DMA_BUF:
+ if (XE_IOCTL_ERR(xe, "TODO"))
+ return -EINVAL;
+ break;
+
+ case DRM_XE_SYNC_USER_FENCE:
+ if (XE_IOCTL_ERR(xe, !(sync_in.flags & DRM_XE_SYNC_SIGNAL)))
+ return -ENOTSUPP;
+
+ if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7))
+ return -EINVAL;
+
+ if (exec) {
+ sync->addr = sync_in.addr;
+ } else {
+ sync->ufence = user_fence_create(xe, sync_in.addr,
+ sync_in.timeline_value);
+ if (XE_IOCTL_ERR(xe, !sync->ufence))
+ return -ENOMEM;
+ }
+
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ sync->flags = sync_in.flags;
+ sync->timeline_value = sync_in.timeline_value;
+
+ return 0;
+}
+
+int xe_sync_entry_wait(struct xe_sync_entry *sync)
+{
+ if (sync->fence)
+ dma_fence_wait(sync->fence, true);
+
+ return 0;
+}
+
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
+{
+ int err;
+
+ if (sync->fence) {
+ err = drm_sched_job_add_dependency(&job->drm,
+ dma_fence_get(sync->fence));
+ if (err) {
+ dma_fence_put(sync->fence);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
+ struct dma_fence *fence)
+{
+ if (!(sync->flags & DRM_XE_SYNC_SIGNAL) ||
+ sync->flags & SYNC_FLAGS_FENCE_INSTALLED)
+ return false;
+
+ if (sync->chain_fence) {
+ drm_syncobj_add_point(sync->syncobj, sync->chain_fence,
+ fence, sync->timeline_value);
+ /*
+ * The chain's ownership is transferred to the
+ * timeline.
+ */
+ sync->chain_fence = NULL;
+ } else if (sync->syncobj) {
+ drm_syncobj_replace_fence(sync->syncobj, fence);
+ } else if (sync->ufence) {
+ int err;
+
+ dma_fence_get(fence);
+ user_fence_get(sync->ufence);
+ err = dma_fence_add_callback(fence, &sync->ufence->cb,
+ user_fence_cb);
+ if (err == -ENOENT) {
+ kick_ufence(sync->ufence, fence);
+ } else if (err) {
+ XE_WARN_ON("failed to add user fence");
+ user_fence_put(sync->ufence);
+ dma_fence_put(fence);
+ }
+ } else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) ==
+ DRM_XE_SYNC_USER_FENCE) {
+ job->user_fence.used = true;
+ job->user_fence.addr = sync->addr;
+ job->user_fence.value = sync->timeline_value;
+ }
+
+ /* TODO: external BO? */
+
+ sync->flags |= SYNC_FLAGS_FENCE_INSTALLED;
+
+ return true;
+}
+
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
+{
+ if (sync->syncobj)
+ drm_syncobj_put(sync->syncobj);
+ if (sync->fence)
+ dma_fence_put(sync->fence);
+ if (sync->chain_fence)
+ dma_fence_put(&sync->chain_fence->base);
+ if (sync->ufence)
+ user_fence_put(sync->ufence);
+}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
new file mode 100644
index 000000000000..4cbcf7a19911
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_H_
+#define _XE_SYNC_H_
+
+#include "xe_sync_types.h"
+
+struct xe_device;
+struct xe_file;
+struct xe_sched_job;
+
+int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
+ struct xe_sync_entry *sync,
+ struct drm_xe_sync __user *sync_user,
+ bool exec, bool compute_mode);
+int xe_sync_entry_wait(struct xe_sync_entry *sync);
+int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
+ struct xe_sched_job *job);
+bool xe_sync_entry_signal(struct xe_sync_entry *sync,
+ struct xe_sched_job *job,
+ struct dma_fence *fence);
+void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h
new file mode 100644
index 000000000000..24fccc26cb53
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_SYNC_TYPES_H_
+#define _XE_SYNC_TYPES_H_
+
+#include <linux/types.h>
+
+struct drm_syncobj;
+struct dma_fence;
+struct dma_fence_chain;
+struct drm_xe_sync;
+struct user_fence;
+
+struct xe_sync_entry {
+ struct drm_syncobj *syncobj;
+ struct dma_fence *fence;
+ struct dma_fence_chain *chain_fence;
+ struct user_fence *ufence;
+ u64 addr;
+ u64 timeline_value;
+ u32 flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c
new file mode 100644
index 000000000000..2570c0b859c4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "xe_trace.h"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
new file mode 100644
index 000000000000..a5f963f1f6eb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -0,0 +1,513 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xe
+
+#if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _XE_TRACE_H_
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include "xe_bo_types.h"
+#include "xe_engine_types.h"
+#include "xe_gpu_scheduler_types.h"
+#include "xe_gt_types.h"
+#include "xe_guc_engine_types.h"
+#include "xe_sched_job.h"
+#include "xe_vm_types.h"
+
+DECLARE_EVENT_CLASS(xe_bo,
+ TP_PROTO(struct xe_bo *bo),
+ TP_ARGS(bo),
+
+ TP_STRUCT__entry(
+ __field(size_t, size)
+ __field(u32, flags)
+ __field(u64, vm)
+ ),
+
+ TP_fast_assign(
+ __entry->size = bo->size;
+ __entry->flags = bo->flags;
+ __entry->vm = (u64)bo->vm;
+ ),
+
+ TP_printk("size=%ld, flags=0x%02x, vm=0x%016llx",
+ __entry->size, __entry->flags, __entry->vm)
+);
+
+DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
+ TP_PROTO(struct xe_bo *bo),
+ TP_ARGS(bo)
+);
+
+DEFINE_EVENT(xe_bo, xe_bo_move,
+ TP_PROTO(struct xe_bo *bo),
+ TP_ARGS(bo)
+);
+
+DECLARE_EVENT_CLASS(xe_engine,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e),
+
+ TP_STRUCT__entry(
+ __field(enum xe_engine_class, class)
+ __field(u32, logical_mask)
+ __field(u8, gt_id)
+ __field(u16, width)
+ __field(u16, guc_id)
+ __field(u32, guc_state)
+ __field(u32, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->class = e->class;
+ __entry->logical_mask = e->logical_mask;
+ __entry->gt_id = e->gt->info.id;
+ __entry->width = e->width;
+ __entry->guc_id = e->guc->id;
+ __entry->guc_state = atomic_read(&e->guc->state);
+ __entry->flags = e->flags;
+ ),
+
+ TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
+ __entry->class, __entry->logical_mask,
+ __entry->gt_id, __entry->width, __entry->guc_id,
+ __entry->guc_state, __entry->flags)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_create,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_supress_resume,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_submit,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_scheduling_enable,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_scheduling_disable,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_scheduling_done,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_register,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_deregister,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_deregister_done,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_close,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_kill,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_cleanup_entity,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_destroy,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_reset,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_memory_cat_error,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_stop,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DEFINE_EVENT(xe_engine, xe_engine_resubmit,
+ TP_PROTO(struct xe_engine *e),
+ TP_ARGS(e)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_job,
+ TP_PROTO(struct xe_sched_job *job),
+ TP_ARGS(job),
+
+ TP_STRUCT__entry(
+ __field(u32, seqno)
+ __field(u16, guc_id)
+ __field(u32, guc_state)
+ __field(u32, flags)
+ __field(int, error)
+ __field(u64, fence)
+ __field(u64, batch_addr)
+ ),
+
+ TP_fast_assign(
+ __entry->seqno = xe_sched_job_seqno(job);
+ __entry->guc_id = job->engine->guc->id;
+ __entry->guc_state =
+ atomic_read(&job->engine->guc->state);
+ __entry->flags = job->engine->flags;
+ __entry->error = job->fence->error;
+ __entry->fence = (u64)job->fence;
+ __entry->batch_addr = (u64)job->batch_addr[0];
+ ),
+
+ TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+ __entry->fence, __entry->seqno, __entry->guc_id,
+ __entry->batch_addr, __entry->guc_state,
+ __entry->flags, __entry->error)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_create,
+ TP_PROTO(struct xe_sched_job *job),
+ TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_exec,
+ TP_PROTO(struct xe_sched_job *job),
+ TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_run,
+ TP_PROTO(struct xe_sched_job *job),
+ TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_free,
+ TP_PROTO(struct xe_sched_job *job),
+ TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_timedout,
+ TP_PROTO(struct xe_sched_job *job),
+ TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_set_error,
+ TP_PROTO(struct xe_sched_job *job),
+ TP_ARGS(job)
+);
+
+DEFINE_EVENT(xe_sched_job, xe_sched_job_ban,
+ TP_PROTO(struct xe_sched_job *job),
+ TP_ARGS(job)
+);
+
+DECLARE_EVENT_CLASS(xe_sched_msg,
+ TP_PROTO(struct xe_sched_msg *msg),
+ TP_ARGS(msg),
+
+ TP_STRUCT__entry(
+ __field(u32, opcode)
+ __field(u16, guc_id)
+ ),
+
+ TP_fast_assign(
+ __entry->opcode = msg->opcode;
+ __entry->guc_id =
+ ((struct xe_engine *)msg->private_data)->guc->id;
+ ),
+
+ TP_printk("guc_id=%d, opcode=%u", __entry->guc_id,
+ __entry->opcode)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_add,
+ TP_PROTO(struct xe_sched_msg *msg),
+ TP_ARGS(msg)
+);
+
+DEFINE_EVENT(xe_sched_msg, xe_sched_msg_recv,
+ TP_PROTO(struct xe_sched_msg *msg),
+ TP_ARGS(msg)
+);
+
+DECLARE_EVENT_CLASS(xe_hw_fence,
+ TP_PROTO(struct xe_hw_fence *fence),
+ TP_ARGS(fence),
+
+ TP_STRUCT__entry(
+ __field(u64, ctx)
+ __field(u32, seqno)
+ __field(u64, fence)
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = fence->dma.context;
+ __entry->seqno = fence->dma.seqno;
+ __entry->fence = (u64)fence;
+ ),
+
+ TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u",
+ __entry->ctx, __entry->fence, __entry->seqno)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create,
+ TP_PROTO(struct xe_hw_fence *fence),
+ TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_signal,
+ TP_PROTO(struct xe_hw_fence *fence),
+ TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal,
+ TP_PROTO(struct xe_hw_fence *fence),
+ TP_ARGS(fence)
+);
+
+DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free,
+ TP_PROTO(struct xe_hw_fence *fence),
+ TP_ARGS(fence)
+);
+
+DECLARE_EVENT_CLASS(xe_vma,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma),
+
+ TP_STRUCT__entry(
+ __field(u64, vma)
+ __field(u32, asid)
+ __field(u64, start)
+ __field(u64, end)
+ __field(u64, ptr)
+ ),
+
+ TP_fast_assign(
+ __entry->vma = (u64)vma;
+ __entry->asid = vma->vm->usm.asid;
+ __entry->start = vma->start;
+ __entry->end = vma->end;
+ __entry->ptr = (u64)vma->userptr.ptr;
+ ),
+
+ TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,",
+ __entry->vma, __entry->asid, __entry->start,
+ __entry->end, __entry->ptr)
+)
+
+DEFINE_EVENT(xe_vma, xe_vma_flush,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pagefault,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_acc,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_fail,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_bind,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_pf_bind,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_unbind,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_worker,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_rebind_exec,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_evict,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete,
+ TP_PROTO(struct xe_vma *vma),
+ TP_ARGS(vma)
+);
+
+DECLARE_EVENT_CLASS(xe_vm,
+ TP_PROTO(struct xe_vm *vm),
+ TP_ARGS(vm),
+
+ TP_STRUCT__entry(
+ __field(u64, vm)
+ __field(u32, asid)
+ ),
+
+ TP_fast_assign(
+ __entry->vm = (u64)vm;
+ __entry->asid = vm->usm.asid;
+ ),
+
+ TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm,
+ __entry->asid)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_create,
+ TP_PROTO(struct xe_vm *vm),
+ TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_free,
+ TP_PROTO(struct xe_vm *vm),
+ TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_cpu_bind,
+ TP_PROTO(struct xe_vm *vm),
+ TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_restart,
+ TP_PROTO(struct xe_vm *vm),
+ TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter,
+ TP_PROTO(struct xe_vm *vm),
+ TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry,
+ TP_PROTO(struct xe_vm *vm),
+ TP_ARGS(vm)
+);
+
+DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
+ TP_PROTO(struct xe_vm *vm),
+ TP_ARGS(vm)
+);
+
+TRACE_EVENT(xe_guc_ct_h2g_flow_control,
+ TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+ TP_ARGS(_head, _tail, size, space, len),
+
+ TP_STRUCT__entry(
+ __field(u32, _head)
+ __field(u32, _tail)
+ __field(u32, size)
+ __field(u32, space)
+ __field(u32, len)
+ ),
+
+ TP_fast_assign(
+ __entry->_head = _head;
+ __entry->_tail = _tail;
+ __entry->size = size;
+ __entry->space = space;
+ __entry->len = len;
+ ),
+
+ TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u",
+ __entry->_head, __entry->_tail, __entry->size,
+ __entry->space, __entry->len)
+);
+
+TRACE_EVENT(xe_guc_ct_g2h_flow_control,
+ TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len),
+ TP_ARGS(_head, _tail, size, space, len),
+
+ TP_STRUCT__entry(
+ __field(u32, _head)
+ __field(u32, _tail)
+ __field(u32, size)
+ __field(u32, space)
+ __field(u32, len)
+ ),
+
+ TP_fast_assign(
+ __entry->_head = _head;
+ __entry->_tail = _tail;
+ __entry->size = size;
+ __entry->space = space;
+ __entry->len = len;
+ ),
+
+ TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u",
+ __entry->_head, __entry->_tail, __entry->size,
+ __entry->space, __entry->len)
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe
+#define TRACE_INCLUDE_FILE xe_trace
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
new file mode 100644
index 000000000000..a0ba8bba84d1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_ttm_gtt_mgr.h"
+
+struct xe_ttm_gtt_node {
+ struct ttm_buffer_object *tbo;
+ struct ttm_range_mgr_node base;
+};
+
+static inline struct xe_ttm_gtt_mgr *
+to_gtt_mgr(struct ttm_resource_manager *man)
+{
+ return container_of(man, struct xe_ttm_gtt_mgr, manager);
+}
+
+static inline struct xe_ttm_gtt_node *
+to_xe_ttm_gtt_node(struct ttm_resource *res)
+{
+ return container_of(res, struct xe_ttm_gtt_node, base.base);
+}
+
+static int xe_ttm_gtt_mgr_new(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ struct xe_ttm_gtt_node *node;
+ int r;
+
+ node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ node->tbo = tbo;
+ ttm_resource_init(tbo, place, &node->base.base);
+
+ if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+ ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) {
+ r = -ENOSPC;
+ goto err_fini;
+ }
+
+ node->base.mm_nodes[0].start = 0;
+ node->base.mm_nodes[0].size = PFN_UP(node->base.base.size);
+ node->base.base.start = XE_BO_INVALID_OFFSET;
+
+ *res = &node->base.base;
+
+ return 0;
+
+err_fini:
+ ttm_resource_fini(man, &node->base.base);
+ kfree(node);
+ return r;
+}
+
+static void xe_ttm_gtt_mgr_del(struct ttm_resource_manager *man,
+ struct ttm_resource *res)
+{
+ struct xe_ttm_gtt_node *node = to_xe_ttm_gtt_node(res);
+
+ ttm_resource_fini(man, res);
+ kfree(node);
+}
+
+static void xe_ttm_gtt_mgr_debug(struct ttm_resource_manager *man,
+ struct drm_printer *printer)
+{
+
+}
+
+static const struct ttm_resource_manager_func xe_ttm_gtt_mgr_func = {
+ .alloc = xe_ttm_gtt_mgr_new,
+ .free = xe_ttm_gtt_mgr_del,
+ .debug = xe_ttm_gtt_mgr_debug
+};
+
+static void ttm_gtt_mgr_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_ttm_gtt_mgr *mgr = arg;
+ struct xe_device *xe = gt_to_xe(mgr->gt);
+ struct ttm_resource_manager *man = &mgr->manager;
+ int err;
+
+ ttm_resource_manager_set_used(man, false);
+
+ err = ttm_resource_manager_evict_all(&xe->ttm, man);
+ if (err)
+ return;
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL);
+}
+
+int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr,
+ u64 gtt_size)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct ttm_resource_manager *man = &mgr->manager;
+ int err;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ mgr->gt = gt;
+ man->use_tt = true;
+ man->func = &xe_ttm_gtt_mgr_func;
+
+ ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT);
+
+ ttm_set_driver_manager(&xe->ttm, XE_PL_TT, &mgr->manager);
+ ttm_resource_manager_set_used(man, true);
+
+ err = drmm_add_action_or_reset(&xe->drm, ttm_gtt_mgr_fini, mgr);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
new file mode 100644
index 000000000000..d1d57cb9c2b8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTGM_GTT_MGR_H_
+#define _XE_TTGM_GTT_MGR_H_
+
+#include "xe_ttm_gtt_mgr_types.h"
+
+struct xe_gt;
+
+int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr,
+ u64 gtt_size);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
new file mode 100644
index 000000000000..c66737488326
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_GTT_MGR_TYPES_H_
+#define _XE_TTM_GTT_MGR_TYPES_H_
+
+#include <drm/ttm/ttm_device.h>
+
+struct xe_gt;
+
+struct xe_ttm_gtt_mgr {
+ struct xe_gt *gt;
+ struct ttm_resource_manager manager;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
new file mode 100644
index 000000000000..e391e81d3640
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_placement.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_res_cursor.h"
+#include "xe_ttm_vram_mgr.h"
+
+static inline struct xe_ttm_vram_mgr *
+to_vram_mgr(struct ttm_resource_manager *man)
+{
+ return container_of(man, struct xe_ttm_vram_mgr, manager);
+}
+
+static inline struct xe_gt *
+mgr_to_gt(struct xe_ttm_vram_mgr *mgr)
+{
+ return mgr->gt;
+}
+
+static inline struct drm_buddy_block *
+xe_ttm_vram_mgr_first_block(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 start, size;
+
+ block = xe_ttm_vram_mgr_first_block(head);
+ if (!block)
+ return false;
+
+ while (head != block->link.next) {
+ start = xe_ttm_vram_mgr_block_start(block);
+ size = xe_ttm_vram_mgr_block_size(block);
+
+ block = list_entry(block->link.next, struct drm_buddy_block,
+ link);
+ if (start + size != xe_ttm_vram_mgr_block_start(block))
+ return false;
+ }
+
+ return true;
+}
+
+static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ u64 max_bytes, cur_size, min_block_size;
+ struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+ struct xe_ttm_vram_mgr_resource *vres;
+ u64 size, remaining_size, lpfn, fpfn;
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
+ unsigned long pages_per_block;
+ int r;
+
+ lpfn = (u64)place->lpfn << PAGE_SHIFT;
+ if (!lpfn)
+ lpfn = man->size;
+
+ fpfn = (u64)place->fpfn << PAGE_SHIFT;
+
+ max_bytes = mgr->gt->mem.vram.size;
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ pages_per_block = ~0ul;
+ } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ pages_per_block = HPAGE_PMD_NR;
+#else
+ /* default to 2MB */
+ pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+
+ pages_per_block = max_t(uint32_t, pages_per_block,
+ tbo->page_alignment);
+ }
+
+ vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+ if (!vres)
+ return -ENOMEM;
+
+ ttm_resource_init(tbo, place, &vres->base);
+ remaining_size = vres->base.size;
+
+ /* bail out quickly if there's likely not enough VRAM for this BO */
+ if (ttm_resource_manager_usage(man) > max_bytes) {
+ r = -ENOSPC;
+ goto error_fini;
+ }
+
+ INIT_LIST_HEAD(&vres->blocks);
+
+ if (place->flags & TTM_PL_FLAG_TOPDOWN)
+ vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+ if (fpfn || lpfn != man->size)
+ /* Allocate blocks in desired range */
+ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+ mutex_lock(&mgr->lock);
+ while (remaining_size) {
+ if (tbo->page_alignment)
+ min_block_size = tbo->page_alignment << PAGE_SHIFT;
+ else
+ min_block_size = mgr->default_page_size;
+
+ XE_BUG_ON(min_block_size < mm->chunk_size);
+
+ /* Limit maximum size to 2GiB due to SG table limitations */
+ size = min(remaining_size, 2ULL << 30);
+
+ if (size >= pages_per_block << PAGE_SHIFT)
+ min_block_size = pages_per_block << PAGE_SHIFT;
+
+ cur_size = size;
+
+ if (fpfn + size != place->lpfn << PAGE_SHIFT) {
+ /*
+ * Except for actual range allocation, modify the size and
+ * min_block_size conforming to continuous flag enablement
+ */
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ size = roundup_pow_of_two(size);
+ min_block_size = size;
+ /*
+ * Modify the size value if size is not
+ * aligned with min_block_size
+ */
+ } else if (!IS_ALIGNED(size, min_block_size)) {
+ size = round_up(size, min_block_size);
+ }
+ }
+
+ r = drm_buddy_alloc_blocks(mm, fpfn,
+ lpfn,
+ size,
+ min_block_size,
+ &vres->blocks,
+ vres->flags);
+ if (unlikely(r))
+ goto error_free_blocks;
+
+ if (size > remaining_size)
+ remaining_size = 0;
+ else
+ remaining_size -= size;
+ }
+ mutex_unlock(&mgr->lock);
+
+ if (cur_size != size) {
+ struct drm_buddy_block *block;
+ struct list_head *trim_list;
+ u64 original_size;
+ LIST_HEAD(temp);
+
+ trim_list = &vres->blocks;
+ original_size = vres->base.size;
+
+ /*
+ * If size value is rounded up to min_block_size, trim the last
+ * block to the required size
+ */
+ if (!list_is_singular(&vres->blocks)) {
+ block = list_last_entry(&vres->blocks, typeof(*block), link);
+ list_move_tail(&block->link, &temp);
+ trim_list = &temp;
+ /*
+ * Compute the original_size value by subtracting the
+ * last block size with (aligned size - original size)
+ */
+ original_size = xe_ttm_vram_mgr_block_size(block) -
+ (size - cur_size);
+ }
+
+ mutex_lock(&mgr->lock);
+ drm_buddy_block_trim(mm,
+ original_size,
+ trim_list);
+ mutex_unlock(&mgr->lock);
+
+ if (!list_empty(&temp))
+ list_splice_tail(trim_list, &vres->blocks);
+ }
+
+ vres->base.start = 0;
+ list_for_each_entry(block, &vres->blocks, link) {
+ unsigned long start;
+
+ start = xe_ttm_vram_mgr_block_start(block) +
+ xe_ttm_vram_mgr_block_size(block);
+ start >>= PAGE_SHIFT;
+
+ if (start > PFN_UP(vres->base.size))
+ start -= PFN_UP(vres->base.size);
+ else
+ start = 0;
+ vres->base.start = max(vres->base.start, start);
+ }
+
+ if (xe_is_vram_mgr_blocks_contiguous(&vres->blocks))
+ vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+
+ *res = &vres->base;
+ return 0;
+
+error_free_blocks:
+ drm_buddy_free_list(mm, &vres->blocks);
+ mutex_unlock(&mgr->lock);
+error_fini:
+ ttm_resource_fini(man, &vres->base);
+ kfree(vres);
+
+ return r;
+}
+
+static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
+ struct ttm_resource *res)
+{
+ struct xe_ttm_vram_mgr_resource *vres =
+ to_xe_ttm_vram_mgr_resource(res);
+ struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+ struct drm_buddy *mm = &mgr->mm;
+
+ mutex_lock(&mgr->lock);
+ drm_buddy_free_list(mm, &vres->blocks);
+ mutex_unlock(&mgr->lock);
+
+ ttm_resource_fini(man, res);
+
+ kfree(vres);
+}
+
+static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
+ struct drm_printer *printer)
+{
+ struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man);
+ struct drm_buddy *mm = &mgr->mm;
+
+ mutex_lock(&mgr->lock);
+ drm_buddy_print(mm, printer);
+ mutex_unlock(&mgr->lock);
+ drm_printf(printer, "man size:%llu\n", man->size);
+}
+
+static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = {
+ .alloc = xe_ttm_vram_mgr_new,
+ .free = xe_ttm_vram_mgr_del,
+ .debug = xe_ttm_vram_mgr_debug
+};
+
+static void ttm_vram_mgr_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_ttm_vram_mgr *mgr = arg;
+ struct xe_device *xe = gt_to_xe(mgr->gt);
+ struct ttm_resource_manager *man = &mgr->manager;
+ int err;
+
+ ttm_resource_manager_set_used(man, false);
+
+ err = ttm_resource_manager_evict_all(&xe->ttm, man);
+ if (err)
+ return;
+
+ drm_buddy_fini(&mgr->mm);
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + mgr->gt->info.vram_id,
+ NULL);
+}
+
+int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct ttm_resource_manager *man = &mgr->manager;
+ int err;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ mgr->gt = gt;
+ man->func = &xe_ttm_vram_mgr_func;
+
+ ttm_resource_manager_init(man, &xe->ttm, gt->mem.vram.size);
+ err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ if (err)
+ return err;
+
+ mutex_init(&mgr->lock);
+ mgr->default_page_size = PAGE_SIZE;
+
+ ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + gt->info.vram_id,
+ &mgr->manager);
+ ttm_resource_manager_set_used(man, true);
+
+ err = drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+ struct ttm_resource *res,
+ u64 offset, u64 length,
+ struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table **sgt)
+{
+ struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0);
+ struct xe_res_cursor cursor;
+ struct scatterlist *sg;
+ int num_entries = 0;
+ int i, r;
+
+ *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
+ if (!*sgt)
+ return -ENOMEM;
+
+ /* Determine the number of DRM_BUDDY blocks to export */
+ xe_res_first(res, offset, length, &cursor);
+ while (cursor.remaining) {
+ num_entries++;
+ xe_res_next(&cursor, cursor.size);
+ }
+
+ r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
+ if (r)
+ goto error_free;
+
+ /* Initialize scatterlist nodes of sg_table */
+ for_each_sgtable_sg((*sgt), sg, i)
+ sg->length = 0;
+
+ /*
+ * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+ * @note: Use iterator api to get first the DRM_BUDDY block
+ * and the number of bytes from it. Access the following
+ * DRM_BUDDY block(s) if more buffer needs to exported
+ */
+ xe_res_first(res, offset, length, &cursor);
+ for_each_sgtable_sg((*sgt), sg, i) {
+ phys_addr_t phys = cursor.start + gt->mem.vram.io_start;
+ size_t size = cursor.size;
+ dma_addr_t addr;
+
+ addr = dma_map_resource(dev, phys, size, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ r = dma_mapping_error(dev, addr);
+ if (r)
+ goto error_unmap;
+
+ sg_set_page(sg, NULL, size, 0);
+ sg_dma_address(sg) = addr;
+ sg_dma_len(sg) = size;
+
+ xe_res_next(&cursor, cursor.size);
+ }
+
+ return 0;
+
+error_unmap:
+ for_each_sgtable_sg((*sgt), sg, i) {
+ if (!sg->length)
+ continue;
+
+ dma_unmap_resource(dev, sg->dma_address,
+ sg->length, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
+ sg_free_table(*sgt);
+
+error_free:
+ kfree(*sgt);
+ return r;
+}
+
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+ struct sg_table *sgt)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sgtable_sg(sgt, sg, i)
+ dma_unmap_resource(dev, sg->dma_address,
+ sg->length, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ sg_free_table(sgt);
+ kfree(sgt);
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
new file mode 100644
index 000000000000..537fccec4318
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_H_
+#define _XE_TTM_VRAM_MGR_H_
+
+#include "xe_ttm_vram_mgr_types.h"
+
+enum dma_data_direction;
+struct xe_device;
+struct xe_gt;
+
+int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr);
+int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
+ struct ttm_resource *res,
+ u64 offset, u64 length,
+ struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table **sgt);
+void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
+ struct sg_table *sgt);
+
+static inline u64 xe_ttm_vram_mgr_block_start(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_offset(block);
+}
+
+static inline u64 xe_ttm_vram_mgr_block_size(struct drm_buddy_block *block)
+{
+ return PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline struct xe_ttm_vram_mgr_resource *
+to_xe_ttm_vram_mgr_resource(struct ttm_resource *res)
+{
+ return container_of(res, struct xe_ttm_vram_mgr_resource, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
new file mode 100644
index 000000000000..39b93c71c21b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TTM_VRAM_MGR_TYPES_H_
+#define _XE_TTM_VRAM_MGR_TYPES_H_
+
+#include <drm/drm_buddy.h>
+#include <drm/ttm/ttm_device.h>
+
+struct xe_gt;
+
+/**
+ * struct xe_ttm_vram_mgr - XE TTM VRAM manager
+ *
+ * Manages placement of TTM resource in VRAM.
+ */
+struct xe_ttm_vram_mgr {
+ /** @gt: Graphics tile which the VRAM belongs to */
+ struct xe_gt *gt;
+ /** @manager: Base TTM resource manager */
+ struct ttm_resource_manager manager;
+ /** @mm: DRM buddy allocator which manages the VRAM */
+ struct drm_buddy mm;
+ /** @default_page_size: default page size */
+ u64 default_page_size;
+ /** @lock: protects allocations of VRAM */
+ struct mutex lock;
+};
+
+/**
+ * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource
+ */
+struct xe_ttm_vram_mgr_resource {
+ /** @base: Base TTM resource */
+ struct ttm_resource base;
+ /** @blocks: list of DRM buddy blocks */
+ struct list_head blocks;
+ /** @flags: flags associated with the resource */
+ unsigned long flags;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
new file mode 100644
index 000000000000..e043db037368
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wa.h"
+
+#include "xe_platform_types.h"
+#include "xe_gt_types.h"
+#include "xe_rtp.h"
+
+#include "gt/intel_gt_regs.h"
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x) _XE_RTP_REG(x)
+#define MCR_REG(x) _XE_RTP_MCR_REG(x)
+
+static const struct xe_rtp_entry gt_tunings[] = {
+ { XE_RTP_NAME("Tuning: 32B Access Enable"),
+ XE_RTP_RULES(PLATFORM(DG2)),
+ XE_RTP_SET(XEHP_SQCM, EN_32B_ACCESS)
+ },
+ {}
+};
+
+static const struct xe_rtp_entry context_tunings[] = {
+ { XE_RTP_NAME("1604555607"),
+ XE_RTP_RULES(GRAPHICS_VERSION(1200)),
+ XE_RTP_FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_TDS_TIMER_128)
+ },
+ {}
+};
+
+void xe_tuning_process_gt(struct xe_gt *gt)
+{
+ xe_rtp_process(gt_tunings, &gt->reg_sr, gt, NULL);
+}
diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h
new file mode 100644
index 000000000000..66dbc93192bd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tuning.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_TUNING_
+#define _XE_TUNING_
+
+struct xe_gt;
+
+void xe_tuning_process_gt(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
new file mode 100644
index 000000000000..938d14698003
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_device.h"
+#include "xe_huc.h"
+#include "xe_gt.h"
+#include "xe_guc.h"
+#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
+#include "xe_uc.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+
+static struct xe_gt *
+uc_to_gt(struct xe_uc *uc)
+{
+ return container_of(uc, struct xe_gt, uc);
+}
+
+static struct xe_device *
+uc_to_xe(struct xe_uc *uc)
+{
+ return gt_to_xe(uc_to_gt(uc));
+}
+
+/* Should be called once at driver load only */
+int xe_uc_init(struct xe_uc *uc)
+{
+ int ret;
+
+ /* GuC submission not enabled, nothing to do */
+ if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+ return 0;
+
+ ret = xe_guc_init(&uc->guc);
+ if (ret)
+ goto err;
+
+ ret = xe_huc_init(&uc->huc);
+ if (ret)
+ goto err;
+
+ ret = xe_wopcm_init(&uc->wopcm);
+ if (ret)
+ goto err;
+
+ ret = xe_guc_submit_init(&uc->guc);
+ if (ret)
+ goto err;
+
+ return 0;
+
+err:
+ /* If any uC firmwares not found, fall back to execlists */
+ xe_device_guc_submission_disable(uc_to_xe(uc));
+
+ return ret;
+}
+
+/**
+ * xe_uc_init_post_hwconfig - init Uc post hwconfig load
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_post_hwconfig(struct xe_uc *uc)
+{
+ /* GuC submission not enabled, nothing to do */
+ if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+ return 0;
+
+ return xe_guc_init_post_hwconfig(&uc->guc);
+}
+
+static int uc_reset(struct xe_uc *uc)
+{
+ struct xe_device *xe = uc_to_xe(uc);
+ int ret;
+
+ ret = xe_guc_reset(&uc->guc);
+ if (ret) {
+ drm_err(&xe->drm, "Failed to reset GuC, ret = %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int uc_sanitize(struct xe_uc *uc)
+{
+ xe_huc_sanitize(&uc->huc);
+ xe_guc_sanitize(&uc->guc);
+
+ return uc_reset(uc);
+}
+
+/**
+ * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig
+ * @uc: The UC object
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_uc_init_hwconfig(struct xe_uc *uc)
+{
+ int ret;
+
+ /* GuC submission not enabled, nothing to do */
+ if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+ return 0;
+
+ ret = xe_guc_min_load_for_hwconfig(&uc->guc);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Should be called during driver load, after every GT reset, and after every
+ * suspend to reload / auth the firmwares.
+ */
+int xe_uc_init_hw(struct xe_uc *uc)
+{
+ int ret;
+
+ /* GuC submission not enabled, nothing to do */
+ if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+ return 0;
+
+ ret = uc_sanitize(uc);
+ if (ret)
+ return ret;
+
+ ret = xe_huc_upload(&uc->huc);
+ if (ret)
+ return ret;
+
+ ret = xe_guc_upload(&uc->guc);
+ if (ret)
+ return ret;
+
+ ret = xe_guc_enable_communication(&uc->guc);
+ if (ret)
+ return ret;
+
+ ret = xe_gt_record_default_lrcs(uc_to_gt(uc));
+ if (ret)
+ return ret;
+
+ ret = xe_guc_post_load_init(&uc->guc);
+ if (ret)
+ return ret;
+
+ ret = xe_guc_pc_start(&uc->guc.pc);
+ if (ret)
+ return ret;
+
+ /* We don't fail the driver load if HuC fails to auth, but let's warn */
+ ret = xe_huc_auth(&uc->huc);
+ XE_WARN_ON(ret);
+
+ return 0;
+}
+
+int xe_uc_reset_prepare(struct xe_uc *uc)
+{
+ /* GuC submission not enabled, nothing to do */
+ if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+ return 0;
+
+ return xe_guc_reset_prepare(&uc->guc);
+}
+
+void xe_uc_stop_prepare(struct xe_uc *uc)
+{
+ xe_guc_stop_prepare(&uc->guc);
+}
+
+int xe_uc_stop(struct xe_uc *uc)
+{
+ /* GuC submission not enabled, nothing to do */
+ if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+ return 0;
+
+ return xe_guc_stop(&uc->guc);
+}
+
+int xe_uc_start(struct xe_uc *uc)
+{
+ /* GuC submission not enabled, nothing to do */
+ if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+ return 0;
+
+ return xe_guc_start(&uc->guc);
+}
+
+static void uc_reset_wait(struct xe_uc *uc)
+{
+ int ret;
+
+again:
+ xe_guc_reset_wait(&uc->guc);
+
+ ret = xe_uc_reset_prepare(uc);
+ if (ret)
+ goto again;
+}
+
+int xe_uc_suspend(struct xe_uc *uc)
+{
+ int ret;
+
+ /* GuC submission not enabled, nothing to do */
+ if (!xe_device_guc_submission_enabled(uc_to_xe(uc)))
+ return 0;
+
+ uc_reset_wait(uc);
+
+ ret = xe_uc_stop(uc);
+ if (ret)
+ return ret;
+
+ return xe_guc_suspend(&uc->guc);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
new file mode 100644
index 000000000000..380e722f95fc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_H_
+#define _XE_UC_H_
+
+#include "xe_uc_types.h"
+
+int xe_uc_init(struct xe_uc *uc);
+int xe_uc_init_hwconfig(struct xe_uc *uc);
+int xe_uc_init_post_hwconfig(struct xe_uc *uc);
+int xe_uc_init_hw(struct xe_uc *uc);
+int xe_uc_reset_prepare(struct xe_uc *uc);
+void xe_uc_stop_prepare(struct xe_uc *uc);
+int xe_uc_stop(struct xe_uc *uc);
+int xe_uc_start(struct xe_uc *uc);
+int xe_uc_suspend(struct xe_uc *uc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c
new file mode 100644
index 000000000000..0a39ec5a6e99
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_gt.h"
+#include "xe_guc_debugfs.h"
+#include "xe_huc_debugfs.h"
+#include "xe_macros.h"
+#include "xe_uc_debugfs.h"
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent)
+{
+ struct dentry *root;
+
+ root = debugfs_create_dir("uc", parent);
+ if (IS_ERR(root)) {
+ XE_WARN_ON("Create UC directory failed");
+ return;
+ }
+
+ xe_guc_debugfs_register(&uc->guc, root);
+ xe_huc_debugfs_register(&uc->huc, root);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.h b/drivers/gpu/drm/xe/xe_uc_debugfs.h
new file mode 100644
index 000000000000..a13382df2bd7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_DEBUGFS_H_
+#define _XE_UC_DEBUGFS_H_
+
+struct dentry;
+struct xe_uc;
+
+void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
new file mode 100644
index 000000000000..86c47b7f0901
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/firmware.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_reg.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+
+static struct xe_gt *
+__uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type)
+{
+ if (type == XE_UC_FW_TYPE_GUC)
+ return container_of(uc_fw, struct xe_gt, uc.guc.fw);
+
+ XE_BUG_ON(type != XE_UC_FW_TYPE_HUC);
+ return container_of(uc_fw, struct xe_gt, uc.huc.fw);
+}
+
+static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw)
+{
+ return __uc_fw_to_gt(uc_fw, uc_fw->type);
+}
+
+static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw)
+{
+ return gt_to_xe(uc_fw_to_gt(uc_fw));
+}
+
+/*
+ * List of required GuC and HuC binaries per-platform.
+ * Must be ordered based on platform + revid, from newer to older.
+ */
+#define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
+ fw_def(METEORLAKE, 0, guc_def(mtl, 70, 5, 2)) \
+ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 5, 2)) \
+ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 5, 2)) \
+ fw_def(PVC, 0, guc_def(pvc, 70, 5, 2)) \
+ fw_def(DG2, 0, guc_def(dg2, 70, 5, 2)) \
+ fw_def(DG1, 0, guc_def(dg1, 70, 5, 2)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 5, 2))
+
+#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
+ fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \
+ fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3))
+
+#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \
+ "xe/" \
+ __stringify(prefix_) "_" name_ "_" \
+ __stringify(major_) ".bin"
+
+#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
+ "xe/" \
+ __stringify(prefix_) name_ \
+ __stringify(major_) "." \
+ __stringify(minor_) "." \
+ __stringify(patch_) ".bin"
+
+#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
+ __MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_)
+
+#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \
+ __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
+
+/* All blobs need to be declared via MODULE_FIRMWARE() */
+#define XE_UC_MODULE_FW(platform_, revid_, uc_) \
+ MODULE_FIRMWARE(uc_);
+
+XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH)
+
+/* The below structs and macros are used to iterate across the list of blobs */
+struct __packed uc_fw_blob {
+ u8 major;
+ u8 minor;
+ const char *path;
+};
+
+#define UC_FW_BLOB(major_, minor_, path_) \
+ { .major = major_, .minor = minor_, .path = path_ }
+
+#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \
+ UC_FW_BLOB(major_, minor_, \
+ MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_))
+
+#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \
+ UC_FW_BLOB(major_, minor_, \
+ MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_))
+
+struct __packed uc_fw_platform_requirement {
+ enum xe_platform p;
+ u8 rev; /* first platform rev using this FW */
+ const struct uc_fw_blob blob;
+};
+
+#define MAKE_FW_LIST(platform_, revid_, uc_) \
+{ \
+ .p = XE_##platform_, \
+ .rev = revid_, \
+ .blob = uc_, \
+},
+
+struct fw_blobs_by_type {
+ const struct uc_fw_platform_requirement *blobs;
+ u32 count;
+};
+
+static void
+uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
+{
+ static const struct uc_fw_platform_requirement blobs_guc[] = {
+ XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
+ };
+ static const struct uc_fw_platform_requirement blobs_huc[] = {
+ XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
+ };
+ static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = {
+ [XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
+ [XE_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+ };
+ static const struct uc_fw_platform_requirement *fw_blobs;
+ enum xe_platform p = xe->info.platform;
+ u32 fw_count;
+ u8 rev = xe->info.revid;
+ int i;
+
+ XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
+ fw_blobs = blobs_all[uc_fw->type].blobs;
+ fw_count = blobs_all[uc_fw->type].count;
+
+ for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) {
+ if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
+ const struct uc_fw_blob *blob = &fw_blobs[i].blob;
+
+ uc_fw->path = blob->path;
+ uc_fw->major_ver_wanted = blob->major;
+ uc_fw->minor_ver_wanted = blob->minor;
+ break;
+ }
+ }
+}
+
+/**
+ * xe_uc_fw_copy_rsa - copy fw RSA to buffer
+ *
+ * @uc_fw: uC firmware
+ * @dst: dst buffer
+ * @max_len: max number of bytes to copy
+ *
+ * Return: number of copied bytes.
+ */
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len)
+{
+ struct xe_device *xe = uc_fw_to_xe(uc_fw);
+ u32 size = min_t(u32, uc_fw->rsa_size, max_len);
+
+ XE_BUG_ON(size % 4);
+ XE_BUG_ON(!xe_uc_fw_is_available(uc_fw));
+
+ xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap,
+ xe_uc_fw_rsa_offset(uc_fw), size);
+
+ return size;
+}
+
+static void uc_fw_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_uc_fw *uc_fw = arg;
+
+ if (!xe_uc_fw_is_available(uc_fw))
+ return;
+
+ xe_bo_unpin_map_no_vm(uc_fw->bo);
+ xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
+}
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
+{
+ struct xe_device *xe = uc_fw_to_xe(uc_fw);
+ struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+ struct device *dev = xe->drm.dev;
+ const struct firmware *fw = NULL;
+ struct uc_css_header *css;
+ struct xe_bo *obj;
+ size_t size;
+ int err;
+
+ /*
+ * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
+ * before we're looked at the HW caps to see if we have uc support
+ */
+ BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
+ XE_BUG_ON(uc_fw->status);
+ XE_BUG_ON(uc_fw->path);
+
+ uc_fw_auto_select(xe, uc_fw);
+ xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?
+ XE_UC_FIRMWARE_SELECTED :
+ XE_UC_FIRMWARE_DISABLED :
+ XE_UC_FIRMWARE_NOT_SUPPORTED);
+
+ /* Transform no huc in the list into firmware disabled */
+ if (uc_fw->type == XE_UC_FW_TYPE_HUC && !xe_uc_fw_is_supported(uc_fw)) {
+ xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
+ err = -ENOPKG;
+ return err;
+ }
+ err = request_firmware(&fw, uc_fw->path, dev);
+ if (err)
+ goto fail;
+
+ /* Check the size of the blob before examining buffer contents */
+ if (unlikely(fw->size < sizeof(struct uc_css_header))) {
+ drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+ xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ fw->size, sizeof(struct uc_css_header));
+ err = -ENODATA;
+ goto fail;
+ }
+
+ css = (struct uc_css_header *)fw->data;
+
+ /* Check integrity of size values inside CSS header */
+ size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
+ css->exponent_size_dw) * sizeof(u32);
+ if (unlikely(size != sizeof(struct uc_css_header))) {
+ drm_warn(&xe->drm,
+ "%s firmware %s: unexpected header size: %zu != %zu\n",
+ xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ fw->size, sizeof(struct uc_css_header));
+ err = -EPROTO;
+ goto fail;
+ }
+
+ /* uCode size must calculated from other sizes */
+ uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+ /* now RSA */
+ uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+ /* At least, it should have header, uCode and RSA. Size of all three. */
+ size = sizeof(struct uc_css_header) + uc_fw->ucode_size +
+ uc_fw->rsa_size;
+ if (unlikely(fw->size < size)) {
+ drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n",
+ xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ fw->size, size);
+ err = -ENOEXEC;
+ goto fail;
+ }
+
+ /* Get version numbers from the CSS header */
+ uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+ css->sw_version);
+ uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+ css->sw_version);
+
+ if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
+ uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
+ drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+ xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ uc_fw->major_ver_found, uc_fw->minor_ver_found,
+ uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+ if (!xe_uc_fw_is_overridden(uc_fw)) {
+ err = -ENOEXEC;
+ goto fail;
+ }
+ }
+
+ if (uc_fw->type == XE_UC_FW_TYPE_GUC)
+ uc_fw->private_data_size = css->private_data_size;
+
+ obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_VRAM_IF_DGFX(gt) |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(obj)) {
+ drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo",
+ xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+ err = PTR_ERR(obj);
+ goto fail;
+ }
+
+ uc_fw->bo = obj;
+ uc_fw->size = fw->size;
+ xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE);
+
+ release_firmware(fw);
+
+ err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw);
+ if (err)
+ return err;
+
+ return 0;
+
+fail:
+ xe_uc_fw_change_status(uc_fw, err == -ENOENT ?
+ XE_UC_FIRMWARE_MISSING :
+ XE_UC_FIRMWARE_ERROR);
+
+ drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n",
+ xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+ drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n",
+ xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
+
+ release_firmware(fw); /* OK even if fw is NULL */
+ return err;
+}
+
+static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw)
+{
+ return xe_bo_ggtt_addr(uc_fw->bo);
+}
+
+static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+ struct xe_device *xe = uc_fw_to_xe(uc_fw);
+ struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+ u32 src_offset;
+ int ret;
+
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+
+ /* Set the source address for the uCode */
+ src_offset = uc_fw_ggtt_offset(uc_fw);
+ xe_mmio_write32(gt, DMA_ADDR_0_LOW.reg, lower_32_bits(src_offset));
+ xe_mmio_write32(gt, DMA_ADDR_0_HIGH.reg, upper_32_bits(src_offset));
+
+ /* Set the DMA destination */
+ xe_mmio_write32(gt, DMA_ADDR_1_LOW.reg, offset);
+ xe_mmio_write32(gt, DMA_ADDR_1_HIGH.reg, DMA_ADDRESS_SPACE_WOPCM);
+
+ /*
+ * Set the transfer size. The header plus uCode will be copied to WOPCM
+ * via DMA, excluding any other components
+ */
+ xe_mmio_write32(gt, DMA_COPY_SIZE.reg,
+ sizeof(struct uc_css_header) + uc_fw->ucode_size);
+
+ /* Start the DMA */
+ xe_mmio_write32(gt, DMA_CTRL.reg,
+ _MASKED_BIT_ENABLE(dma_flags | START_DMA));
+
+ /* Wait for DMA to finish */
+ ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100);
+ if (ret)
+ drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n",
+ xe_uc_fw_type_repr(uc_fw->type),
+ xe_mmio_read32(gt, DMA_CTRL.reg));
+
+ /* Disable the bits once DMA is over */
+ xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags));
+
+ return ret;
+}
+
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
+{
+ struct xe_device *xe = uc_fw_to_xe(uc_fw);
+ int err;
+
+ /* make sure the status was cleared the last time we reset the uc */
+ XE_BUG_ON(xe_uc_fw_is_loaded(uc_fw));
+
+ if (!xe_uc_fw_is_loadable(uc_fw))
+ return -ENOEXEC;
+
+ /* Call custom loader */
+ err = uc_fw_xfer(uc_fw, offset, dma_flags);
+ if (err)
+ goto fail;
+
+ xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_TRANSFERRED);
+ return 0;
+
+fail:
+ drm_err(&xe->drm, "Failed to load %s firmware %s (%d)\n",
+ xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ err);
+ xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOAD_FAIL);
+ return err;
+}
+
+
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p)
+{
+ drm_printf(p, "%s firmware: %s\n",
+ xe_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+ drm_printf(p, "\tstatus: %s\n",
+ xe_uc_fw_status_repr(uc_fw->status));
+ drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
+ uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
+ uc_fw->major_ver_found, uc_fw->minor_ver_found);
+ drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
+ drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
new file mode 100644
index 000000000000..b0df5064b27d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_H_
+#define _XE_UC_FW_H_
+
+#include <linux/errno.h>
+
+#include "xe_uc_fw_types.h"
+#include "xe_uc_fw_abi.h"
+#include "xe_macros.h"
+
+struct drm_printer;
+
+int xe_uc_fw_init(struct xe_uc_fw *uc_fw);
+size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len);
+int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags);
+void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p);
+
+static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw)
+{
+ return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+}
+
+static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw,
+ enum xe_uc_fw_status status)
+{
+ uc_fw->__status = status;
+}
+
+static inline
+const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status)
+{
+ switch (status) {
+ case XE_UC_FIRMWARE_NOT_SUPPORTED:
+ return "N/A";
+ case XE_UC_FIRMWARE_UNINITIALIZED:
+ return "UNINITIALIZED";
+ case XE_UC_FIRMWARE_DISABLED:
+ return "DISABLED";
+ case XE_UC_FIRMWARE_SELECTED:
+ return "SELECTED";
+ case XE_UC_FIRMWARE_MISSING:
+ return "MISSING";
+ case XE_UC_FIRMWARE_ERROR:
+ return "ERROR";
+ case XE_UC_FIRMWARE_AVAILABLE:
+ return "AVAILABLE";
+ case XE_UC_FIRMWARE_INIT_FAIL:
+ return "INIT FAIL";
+ case XE_UC_FIRMWARE_LOADABLE:
+ return "LOADABLE";
+ case XE_UC_FIRMWARE_LOAD_FAIL:
+ return "LOAD FAIL";
+ case XE_UC_FIRMWARE_TRANSFERRED:
+ return "TRANSFERRED";
+ case XE_UC_FIRMWARE_RUNNING:
+ return "RUNNING";
+ }
+ return "<invalid>";
+}
+
+static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status)
+{
+ switch (status) {
+ case XE_UC_FIRMWARE_NOT_SUPPORTED:
+ return -ENODEV;
+ case XE_UC_FIRMWARE_UNINITIALIZED:
+ return -EACCES;
+ case XE_UC_FIRMWARE_DISABLED:
+ return -EPERM;
+ case XE_UC_FIRMWARE_MISSING:
+ return -ENOENT;
+ case XE_UC_FIRMWARE_ERROR:
+ return -ENOEXEC;
+ case XE_UC_FIRMWARE_INIT_FAIL:
+ case XE_UC_FIRMWARE_LOAD_FAIL:
+ return -EIO;
+ case XE_UC_FIRMWARE_SELECTED:
+ return -ESTALE;
+ case XE_UC_FIRMWARE_AVAILABLE:
+ case XE_UC_FIRMWARE_LOADABLE:
+ case XE_UC_FIRMWARE_TRANSFERRED:
+ case XE_UC_FIRMWARE_RUNNING:
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type)
+{
+ switch (type) {
+ case XE_UC_FW_TYPE_GUC:
+ return "GuC";
+ case XE_UC_FW_TYPE_HUC:
+ return "HuC";
+ }
+ return "uC";
+}
+
+static inline enum xe_uc_fw_status
+__xe_uc_fw_status(struct xe_uc_fw *uc_fw)
+{
+ /* shouldn't call this before checking hw/blob availability */
+ XE_BUG_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED);
+ return uc_fw->status;
+}
+
+static inline bool xe_uc_fw_is_supported(struct xe_uc_fw *uc_fw)
+{
+ return __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_NOT_SUPPORTED;
+}
+
+static inline bool xe_uc_fw_is_enabled(struct xe_uc_fw *uc_fw)
+{
+ return __xe_uc_fw_status(uc_fw) > XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_disabled(struct xe_uc_fw *uc_fw)
+{
+ return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw)
+{
+ return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_AVAILABLE;
+}
+
+static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw)
+{
+ return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE;
+}
+
+static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw)
+{
+ return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_TRANSFERRED;
+}
+
+static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw)
+{
+ return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING;
+}
+
+static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw)
+{
+ return uc_fw->user_overridden;
+}
+
+static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw)
+{
+ if (xe_uc_fw_is_loaded(uc_fw))
+ xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE);
+}
+
+static inline u32 __xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+ return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+}
+
+/**
+ * xe_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded.
+ * @uc_fw: uC firmware.
+ *
+ * Get the size of the firmware and header that will be uploaded to WOPCM.
+ *
+ * Return: Upload firmware size, or zero on firmware fetch failure.
+ */
+static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw)
+{
+ if (!xe_uc_fw_is_available(uc_fw))
+ return 0;
+
+ return __xe_uc_fw_get_upload_size(uc_fw);
+}
+
+#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/xe"
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
new file mode 100644
index 000000000000..dafd26cb0c41
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_ABI_H
+#define _XE_UC_FW_ABI_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/**
+ * DOC: Firmware Layout
+ *
+ * The GuC/HuC firmware layout looks like this::
+ *
+ * +======================================================================+
+ * | Firmware blob |
+ * +===============+===============+============+============+============+
+ * | CSS header | uCode | RSA key | modulus | exponent |
+ * +===============+===============+============+============+============+
+ * <-header size-> <---header size continued ----------->
+ * <--- size ----------------------------------------------------------->
+ * <-key size->
+ * <-mod size->
+ * <-exp size->
+ *
+ * The firmware may or may not have modulus key and exponent data. The header,
+ * uCode and RSA signature are must-have components that will be used by driver.
+ * Length of each components, which is all in dwords, can be found in header.
+ * In the case that modulus and exponent are not present in fw, a.k.a truncated
+ * image, the length value still appears in header.
+ *
+ * Driver will do some basic fw size validation based on the following rules:
+ *
+ * 1. Header, uCode and RSA are must-have components.
+ * 2. All firmware components, if they present, are in the sequence illustrated
+ * in the layout table above.
+ * 3. Length info of each component can be found in header, in dwords.
+ * 4. Modulus and exponent key are not required by driver. They may not appear
+ * in fw. So driver will load a truncated firmware in this case.
+ */
+
+struct uc_css_header {
+ u32 module_type;
+ /*
+ * header_size includes all non-uCode bits, including css_header, rsa
+ * key, modulus key and exponent data.
+ */
+ u32 header_size_dw;
+ u32 header_version;
+ u32 module_id;
+ u32 module_vendor;
+ u32 date;
+#define CSS_DATE_DAY (0xFF << 0)
+#define CSS_DATE_MONTH (0xFF << 8)
+#define CSS_DATE_YEAR (0xFFFF << 16)
+ u32 size_dw; /* uCode plus header_size_dw */
+ u32 key_size_dw;
+ u32 modulus_size_dw;
+ u32 exponent_size_dw;
+ u32 time;
+#define CSS_TIME_HOUR (0xFF << 0)
+#define CSS_DATE_MIN (0xFF << 8)
+#define CSS_DATE_SEC (0xFFFF << 16)
+ char username[8];
+ char buildnumber[12];
+ u32 sw_version;
+#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
+ u32 reserved0[13];
+ union {
+ u32 private_data_size; /* only applies to GuC */
+ u32 reserved1;
+ };
+ u32 header_info;
+} __packed;
+static_assert(sizeof(struct uc_css_header) == 128);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
new file mode 100644
index 000000000000..1cfd30a655df
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_FW_TYPES_H_
+#define _XE_UC_FW_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+
+/*
+ * +------------+---------------------------------------------------+
+ * | PHASE | FIRMWARE STATUS TRANSITIONS |
+ * +============+===================================================+
+ * | | UNINITIALIZED |
+ * +------------+- / | \ -+
+ * | | DISABLED <--/ | \--> NOT_SUPPORTED |
+ * | init_early | V |
+ * | | SELECTED |
+ * +------------+- / | \ -+
+ * | | MISSING <--/ | \--> ERROR |
+ * | fetch | V |
+ * | | AVAILABLE |
+ * +------------+- | \ -+
+ * | | | \--> INIT FAIL |
+ * | init | V |
+ * | | /------> LOADABLE <----<-----------\ |
+ * +------------+- \ / \ \ \ -+
+ * | | LOAD FAIL <--< \--> TRANSFERRED \ |
+ * | upload | \ / \ / |
+ * | | \---------/ \--> RUNNING |
+ * +------------+---------------------------------------------------+
+ */
+
+/*
+ * FIXME: Ported from the i915 and this is state machine is way too complicated.
+ * Circle back and simplify this.
+ */
+enum xe_uc_fw_status {
+ XE_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */
+ XE_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */
+ XE_UC_FIRMWARE_DISABLED, /* disabled */
+ XE_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */
+ XE_UC_FIRMWARE_MISSING, /* blob not found on the system */
+ XE_UC_FIRMWARE_ERROR, /* invalid format or version */
+ XE_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+ XE_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */
+ XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
+ XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
+ XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
+ XE_UC_FIRMWARE_RUNNING /* init/auth done */
+};
+
+enum xe_uc_fw_type {
+ XE_UC_FW_TYPE_GUC = 0,
+ XE_UC_FW_TYPE_HUC
+};
+#define XE_UC_FW_NUM_TYPES 2
+
+/**
+ * struct xe_uc_fw - XE micro controller firmware
+ */
+struct xe_uc_fw {
+ /** @type: type uC firmware */
+ enum xe_uc_fw_type type;
+ union {
+ /** @status: firmware load status */
+ const enum xe_uc_fw_status status;
+ /**
+ * @__status: private firmware load status - only to be used
+ * by firmware laoding code
+ */
+ enum xe_uc_fw_status __status;
+ };
+ /** @path: path to uC firmware */
+ const char *path;
+ /** @user_overridden: user provided path to uC firmware via modparam */
+ bool user_overridden;
+ /** @size: size of uC firmware including css header */
+ size_t size;
+
+ /** @bo: XE BO for uC firmware */
+ struct xe_bo *bo;
+
+ /*
+ * The firmware build process will generate a version header file with
+ * major and minor version defined. The versions are built into CSS
+ * header of firmware. The xe kernel driver set the minimal firmware
+ * version required per platform.
+ */
+
+ /** @major_ver_wanted: major firmware version wanted by platform */
+ u16 major_ver_wanted;
+ /** @minor_ver_wanted: minor firmware version wanted by platform */
+ u16 minor_ver_wanted;
+ /** @major_ver_found: major version found in firmware blob */
+ u16 major_ver_found;
+ /** @minor_ver_found: major version found in firmware blob */
+ u16 minor_ver_found;
+
+ /** @rsa_size: RSA size */
+ u32 rsa_size;
+ /** @ucode_size: micro kernel size */
+ u32 ucode_size;
+
+ /** @private_data_size: size of private data found in uC css header */
+ u32 private_data_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h
new file mode 100644
index 000000000000..49bef6498b85
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_uc_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_UC_TYPES_H_
+#define _XE_UC_TYPES_H_
+
+#include "xe_guc_types.h"
+#include "xe_huc_types.h"
+#include "xe_wopcm_types.h"
+
+/**
+ * struct xe_uc - XE micro controllers
+ */
+struct xe_uc {
+ /** @guc: Graphics micro controller */
+ struct xe_guc guc;
+ /** @huc: HuC */
+ struct xe_huc huc;
+ /** @wopcm: WOPCM */
+ struct xe_wopcm wopcm;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
new file mode 100644
index 000000000000..d47a8617c5b6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -0,0 +1,3407 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "xe_vm.h"
+
+#include <linux/dma-fence-array.h>
+
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_engine.h"
+#include "xe_gt.h"
+#include "xe_gt_pagefault.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_preempt_fence.h"
+#include "xe_pt.h"
+#include "xe_res_cursor.h"
+#include "xe_trace.h"
+#include "xe_sync.h"
+
+#define TEST_VM_ASYNC_OPS_ERROR
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @vma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->userptr.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_vma *vma)
+{
+ return mmu_interval_check_retry(&vma->userptr.notifier,
+ vma->userptr.notifier_seq) ?
+ -EAGAIN : 0;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_vma *vma)
+{
+ struct xe_vm *vm = vma->vm;
+ struct xe_device *xe = vm->xe;
+ const unsigned long num_pages =
+ (vma->end - vma->start + 1) >> PAGE_SHIFT;
+ struct page **pages;
+ bool in_kthread = !current->mm;
+ unsigned long notifier_seq;
+ int pinned, ret, i;
+ bool read_only = vma->pte_flags & PTE_READ_ONLY;
+
+ lockdep_assert_held(&vm->lock);
+ XE_BUG_ON(!xe_vma_is_userptr(vma));
+retry:
+ if (vma->destroyed)
+ return 0;
+
+ notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+ if (notifier_seq == vma->userptr.notifier_seq)
+ return 0;
+
+ pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ if (vma->userptr.sg) {
+ dma_unmap_sgtable(xe->drm.dev,
+ vma->userptr.sg,
+ read_only ? DMA_TO_DEVICE :
+ DMA_BIDIRECTIONAL, 0);
+ sg_free_table(vma->userptr.sg);
+ vma->userptr.sg = NULL;
+ }
+
+ pinned = ret = 0;
+ if (in_kthread) {
+ if (!mmget_not_zero(vma->userptr.notifier.mm)) {
+ ret = -EFAULT;
+ goto mm_closed;
+ }
+ kthread_use_mm(vma->userptr.notifier.mm);
+ }
+
+ while (pinned < num_pages) {
+ ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE,
+ num_pages - pinned,
+ read_only ? 0 : FOLL_WRITE,
+ &pages[pinned]);
+ if (ret < 0) {
+ if (in_kthread)
+ ret = 0;
+ break;
+ }
+
+ pinned += ret;
+ ret = 0;
+ }
+
+ if (in_kthread) {
+ kthread_unuse_mm(vma->userptr.notifier.mm);
+ mmput(vma->userptr.notifier.mm);
+ }
+mm_closed:
+ if (ret)
+ goto out;
+
+ ret = sg_alloc_table_from_pages(&vma->userptr.sgt, pages, pinned,
+ 0, (u64)pinned << PAGE_SHIFT,
+ GFP_KERNEL);
+ if (ret) {
+ vma->userptr.sg = NULL;
+ goto out;
+ }
+ vma->userptr.sg = &vma->userptr.sgt;
+
+ ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
+ read_only ? DMA_TO_DEVICE :
+ DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_NO_KERNEL_MAPPING);
+ if (ret) {
+ sg_free_table(vma->userptr.sg);
+ vma->userptr.sg = NULL;
+ goto out;
+ }
+
+ for (i = 0; i < pinned; ++i) {
+ if (!read_only) {
+ lock_page(pages[i]);
+ set_page_dirty(pages[i]);
+ unlock_page(pages[i]);
+ }
+
+ mark_page_accessed(pages[i]);
+ }
+
+out:
+ release_pages(pages, pinned);
+ kvfree(pages);
+
+ if (!(ret < 0)) {
+ vma->userptr.notifier_seq = notifier_seq;
+ if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
+ goto retry;
+ }
+
+ return ret < 0 ? ret : 0;
+}
+
+static bool preempt_fences_waiting(struct xe_vm *vm)
+{
+ struct xe_engine *e;
+
+ lockdep_assert_held(&vm->lock);
+ xe_vm_assert_held(vm);
+
+ list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+ if (!e->compute.pfence || (e->compute.pfence &&
+ test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &e->compute.pfence->flags))) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void free_preempt_fences(struct list_head *list)
+{
+ struct list_head *link, *next;
+
+ list_for_each_safe(link, next, list)
+ xe_preempt_fence_free(to_preempt_fence_from_link(link));
+}
+
+static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
+ unsigned int *count)
+{
+ lockdep_assert_held(&vm->lock);
+ xe_vm_assert_held(vm);
+
+ if (*count >= vm->preempt.num_engines)
+ return 0;
+
+ for (; *count < vm->preempt.num_engines; ++(*count)) {
+ struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
+
+ if (IS_ERR(pfence))
+ return PTR_ERR(pfence);
+
+ list_move_tail(xe_preempt_fence_link(pfence), list);
+ }
+
+ return 0;
+}
+
+static int wait_for_existing_preempt_fences(struct xe_vm *vm)
+{
+ struct xe_engine *e;
+
+ xe_vm_assert_held(vm);
+
+ list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+ if (e->compute.pfence) {
+ long timeout = dma_fence_wait(e->compute.pfence, false);
+
+ if (timeout < 0)
+ return -ETIME;
+ dma_fence_put(e->compute.pfence);
+ e->compute.pfence = NULL;
+ }
+ }
+
+ return 0;
+}
+
+static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
+{
+ struct list_head *link;
+ struct xe_engine *e;
+
+ list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+ struct dma_fence *fence;
+
+ link = list->next;
+ XE_BUG_ON(link == list);
+
+ fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
+ e, e->compute.context,
+ ++e->compute.seqno);
+ dma_fence_put(e->compute.pfence);
+ e->compute.pfence = fence;
+ }
+}
+
+static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
+{
+ struct xe_engine *e;
+ struct ww_acquire_ctx ww;
+ int err;
+
+ err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true);
+ if (err)
+ return err;
+
+ list_for_each_entry(e, &vm->preempt.engines, compute.link)
+ if (e->compute.pfence) {
+ dma_resv_add_fence(bo->ttm.base.resv,
+ e->compute.pfence,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+
+ xe_bo_unlock(bo, &ww);
+ return 0;
+}
+
+/**
+ * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv
+ * @vm: The vm.
+ * @fence: The fence to add.
+ * @usage: The resv usage for the fence.
+ *
+ * Loops over all of the vm's external object bindings and adds a @fence
+ * with the given @usage to all of the external object's reservation
+ * objects.
+ */
+void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
+ enum dma_resv_usage usage)
+{
+ struct xe_vma *vma;
+
+ list_for_each_entry(vma, &vm->extobj.list, extobj.link)
+ dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage);
+}
+
+static void resume_and_reinstall_preempt_fences(struct xe_vm *vm)
+{
+ struct xe_engine *e;
+
+ lockdep_assert_held(&vm->lock);
+ xe_vm_assert_held(vm);
+
+ list_for_each_entry(e, &vm->preempt.engines, compute.link) {
+ e->ops->resume(e);
+
+ dma_resv_add_fence(&vm->resv, e->compute.pfence,
+ DMA_RESV_USAGE_BOOKKEEP);
+ xe_vm_fence_all_extobjs(vm, e->compute.pfence,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+}
+
+int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e)
+{
+ struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
+ struct ttm_validate_buffer *tv;
+ struct ww_acquire_ctx ww;
+ struct list_head objs;
+ struct dma_fence *pfence;
+ int err;
+ bool wait;
+
+ XE_BUG_ON(!xe_vm_in_compute_mode(vm));
+
+ down_write(&vm->lock);
+
+ err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1);
+ if (err)
+ goto out_unlock_outer;
+
+ pfence = xe_preempt_fence_create(e, e->compute.context,
+ ++e->compute.seqno);
+ if (!pfence) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ list_add(&e->compute.link, &vm->preempt.engines);
+ ++vm->preempt.num_engines;
+ e->compute.pfence = pfence;
+
+ down_read(&vm->userptr.notifier_lock);
+
+ dma_resv_add_fence(&vm->resv, pfence,
+ DMA_RESV_USAGE_BOOKKEEP);
+
+ xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP);
+
+ /*
+ * Check to see if a preemption on VM is in flight or userptr
+ * invalidation, if so trigger this preempt fence to sync state with
+ * other preempt fences on the VM.
+ */
+ wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
+ if (wait)
+ dma_fence_enable_sw_signaling(pfence);
+
+ up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+ xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
+out_unlock_outer:
+ up_write(&vm->lock);
+
+ return err;
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the userptr.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+ lockdep_assert_held_read(&vm->userptr.notifier_lock);
+
+ return (list_empty(&vm->userptr.repin_list) &&
+ list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+/**
+ * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv
+ * objects of the vm's external buffer objects.
+ * @vm: The vm.
+ * @ww: Pointer to a struct ww_acquire_ctx locking context.
+ * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct
+ * ttm_validate_buffers used for locking.
+ * @tv: Pointer to a pointer that on output contains the actual storage used.
+ * @objs: List head for the buffer objects locked.
+ * @intr: Whether to lock interruptible.
+ * @num_shared: Number of dma-fence slots to reserve in the locked objects.
+ *
+ * Locks the vm dma-resv objects and all the dma-resv objects of the
+ * buffer objects on the vm external object list. The TTM utilities require
+ * a list of struct ttm_validate_buffers pointing to the actual buffer
+ * objects to lock. Storage for those struct ttm_validate_buffers should
+ * be provided in @tv_onstack, and is typically reserved on the stack
+ * of the caller. If the size of @tv_onstack isn't sufficient, then
+ * storage will be allocated internally using kvmalloc().
+ *
+ * The function performs deadlock handling internally, and after a
+ * successful return the ww locking transaction should be considered
+ * sealed.
+ *
+ * Return: 0 on success, Negative error code on error. In particular if
+ * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case
+ * of error, any locking performed has been reverted.
+ */
+int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+ struct ttm_validate_buffer *tv_onstack,
+ struct ttm_validate_buffer **tv,
+ struct list_head *objs,
+ bool intr,
+ unsigned int num_shared)
+{
+ struct ttm_validate_buffer *tv_vm, *tv_bo;
+ struct xe_vma *vma, *next;
+ LIST_HEAD(dups);
+ int err;
+
+ lockdep_assert_held(&vm->lock);
+
+ if (vm->extobj.entries < XE_ONSTACK_TV) {
+ tv_vm = tv_onstack;
+ } else {
+ tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm),
+ GFP_KERNEL);
+ if (!tv_vm)
+ return -ENOMEM;
+ }
+ tv_bo = tv_vm + 1;
+
+ INIT_LIST_HEAD(objs);
+ list_for_each_entry(vma, &vm->extobj.list, extobj.link) {
+ tv_bo->num_shared = num_shared;
+ tv_bo->bo = &vma->bo->ttm;
+
+ list_add_tail(&tv_bo->head, objs);
+ tv_bo++;
+ }
+ tv_vm->num_shared = num_shared;
+ tv_vm->bo = xe_vm_ttm_bo(vm);
+ list_add_tail(&tv_vm->head, objs);
+ err = ttm_eu_reserve_buffers(ww, objs, intr, &dups);
+ if (err)
+ goto out_err;
+
+ spin_lock(&vm->notifier.list_lock);
+ list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list,
+ notifier.rebind_link) {
+ xe_bo_assert_held(vma->bo);
+
+ list_del_init(&vma->notifier.rebind_link);
+ if (vma->gt_present && !vma->destroyed)
+ list_move_tail(&vma->rebind_link, &vm->rebind_list);
+ }
+ spin_unlock(&vm->notifier.list_lock);
+
+ *tv = tv_vm;
+ return 0;
+
+out_err:
+ if (tv_vm != tv_onstack)
+ kvfree(tv_vm);
+
+ return err;
+}
+
+/**
+ * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by
+ * xe_vm_lock_dma_resv()
+ * @vm: The vm.
+ * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv().
+ * @tv: The value of *@tv given by xe_vm_lock_dma_resv().
+ * @ww: The ww_acquire_context used for locking.
+ * @objs: The list returned from xe_vm_lock_dma_resv().
+ *
+ * Unlocks the reservation objects and frees any memory allocated by
+ * xe_vm_lock_dma_resv().
+ */
+void xe_vm_unlock_dma_resv(struct xe_vm *vm,
+ struct ttm_validate_buffer *tv_onstack,
+ struct ttm_validate_buffer *tv,
+ struct ww_acquire_ctx *ww,
+ struct list_head *objs)
+{
+ /*
+ * Nothing should've been able to enter the list while we were locked,
+ * since we've held the dma-resvs of all the vm's external objects,
+ * and holding the dma_resv of an object is required for list
+ * addition, and we shouldn't add ourselves.
+ */
+ XE_WARN_ON(!list_empty(&vm->notifier.rebind_list));
+
+ ttm_eu_backoff_reservation(ww, objs);
+ if (tv && tv != tv_onstack)
+ kvfree(tv);
+}
+
+static void preempt_rebind_work_func(struct work_struct *w)
+{
+ struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+ struct xe_vma *vma;
+ struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
+ struct ttm_validate_buffer *tv;
+ struct ww_acquire_ctx ww;
+ struct list_head objs;
+ struct dma_fence *rebind_fence;
+ unsigned int fence_count = 0;
+ LIST_HEAD(preempt_fences);
+ int err;
+ long wait;
+ int __maybe_unused tries = 0;
+
+ XE_BUG_ON(!xe_vm_in_compute_mode(vm));
+ trace_xe_vm_rebind_worker_enter(vm);
+
+ if (xe_vm_is_closed(vm)) {
+ trace_xe_vm_rebind_worker_exit(vm);
+ return;
+ }
+
+ down_write(&vm->lock);
+
+retry:
+ if (vm->async_ops.error)
+ goto out_unlock_outer;
+
+ /*
+ * Extreme corner where we exit a VM error state with a munmap style VM
+ * unbind inflight which requires a rebind. In this case the rebind
+ * needs to install some fences into the dma-resv slots. The worker to
+ * do this queued, let that worker make progress by dropping vm->lock
+ * and trying this again.
+ */
+ if (vm->async_ops.munmap_rebind_inflight) {
+ up_write(&vm->lock);
+ flush_work(&vm->async_ops.work);
+ goto retry;
+ }
+
+ if (xe_vm_userptr_check_repin(vm)) {
+ err = xe_vm_userptr_pin(vm);
+ if (err)
+ goto out_unlock_outer;
+ }
+
+ err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs,
+ false, vm->preempt.num_engines);
+ if (err)
+ goto out_unlock_outer;
+
+ /* Fresh preempt fences already installed. Everyting is running. */
+ if (!preempt_fences_waiting(vm))
+ goto out_unlock;
+
+ /*
+ * This makes sure vm is completely suspended and also balances
+ * xe_engine suspend- and resume; we resume *all* vm engines below.
+ */
+ err = wait_for_existing_preempt_fences(vm);
+ if (err)
+ goto out_unlock;
+
+ err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
+ if (err)
+ goto out_unlock;
+
+ list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
+ if (xe_vma_is_userptr(vma) || vma->destroyed)
+ continue;
+
+ err = xe_bo_validate(vma->bo, vm, false);
+ if (err)
+ goto out_unlock;
+ }
+
+ rebind_fence = xe_vm_rebind(vm, true);
+ if (IS_ERR(rebind_fence)) {
+ err = PTR_ERR(rebind_fence);
+ goto out_unlock;
+ }
+
+ if (rebind_fence) {
+ dma_fence_wait(rebind_fence, false);
+ dma_fence_put(rebind_fence);
+ }
+
+ /* Wait on munmap style VM unbinds */
+ wait = dma_resv_wait_timeout(&vm->resv,
+ DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (wait <= 0) {
+ err = -ETIME;
+ goto out_unlock;
+ }
+
+#define retry_required(__tries, __vm) \
+ (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
+ (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
+ __xe_vm_userptr_needs_repin(__vm))
+
+ down_read(&vm->userptr.notifier_lock);
+ if (retry_required(tries, vm)) {
+ up_read(&vm->userptr.notifier_lock);
+ err = -EAGAIN;
+ goto out_unlock;
+ }
+
+#undef retry_required
+
+ /* Point of no return. */
+ arm_preempt_fences(vm, &preempt_fences);
+ resume_and_reinstall_preempt_fences(vm);
+ up_read(&vm->userptr.notifier_lock);
+
+out_unlock:
+ xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs);
+out_unlock_outer:
+ if (err == -EAGAIN) {
+ trace_xe_vm_rebind_worker_retry(vm);
+ goto retry;
+ }
+ up_write(&vm->lock);
+
+ free_preempt_fences(&preempt_fences);
+
+ XE_WARN_ON(err < 0); /* TODO: Kill VM or put in error state */
+ trace_xe_vm_rebind_worker_exit(vm);
+}
+
+struct async_op_fence;
+static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_engine *e, struct xe_sync_entry *syncs,
+ u32 num_syncs, struct async_op_fence *afence);
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
+ struct xe_vm *vm = vma->vm;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+ long err;
+
+ XE_BUG_ON(!xe_vma_is_userptr(vma));
+ trace_xe_vma_userptr_invalidate(vma);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ down_write(&vm->userptr.notifier_lock);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ /* No need to stop gpu access if the userptr is not yet bound. */
+ if (!vma->userptr.initial_bind) {
+ up_write(&vm->userptr.notifier_lock);
+ return true;
+ }
+
+ /*
+ * Tell exec and rebind worker they need to repin and rebind this
+ * userptr.
+ */
+ if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) {
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_move_tail(&vma->userptr.invalidate_link,
+ &vm->userptr.invalidated);
+ spin_unlock(&vm->userptr.invalidated_lock);
+ }
+
+ up_write(&vm->userptr.notifier_lock);
+
+ /*
+ * Preempt fences turn into schedule disables, pipeline these.
+ * Note that even in fault mode, we need to wait for binds and
+ * unbinds to complete, and those are attached as BOOKMARK fences
+ * to the vm.
+ */
+ dma_resv_iter_begin(&cursor, &vm->resv,
+ DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, fence)
+ dma_fence_enable_sw_signaling(fence);
+ dma_resv_iter_end(&cursor);
+
+ err = dma_resv_wait_timeout(&vm->resv,
+ DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ XE_WARN_ON(err <= 0);
+
+ if (xe_vm_in_fault_mode(vm)) {
+ err = xe_vm_invalidate_vma(vma);
+ XE_WARN_ON(err);
+ }
+
+ trace_xe_vma_userptr_invalidate_complete(vma);
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+ .invalidate = vma_userptr_invalidate,
+};
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+ struct xe_vma *vma, *next;
+ int err = 0;
+ LIST_HEAD(tmp_evict);
+
+ lockdep_assert_held_write(&vm->lock);
+
+ /* Collect invalidated userptrs */
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
+ userptr.invalidate_link) {
+ list_del_init(&vma->userptr.invalidate_link);
+ list_move_tail(&vma->userptr_link, &vm->userptr.repin_list);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+
+ /* Pin and move to temporary list */
+ list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) {
+ err = xe_vma_userptr_pin_pages(vma);
+ if (err < 0)
+ goto out_err;
+
+ list_move_tail(&vma->userptr_link, &tmp_evict);
+ }
+
+ /* Take lock and move to rebind_list for rebinding. */
+ err = dma_resv_lock_interruptible(&vm->resv, NULL);
+ if (err)
+ goto out_err;
+
+ list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) {
+ list_del_init(&vma->userptr_link);
+ list_move_tail(&vma->rebind_link, &vm->rebind_list);
+ }
+
+ dma_resv_unlock(&vm->resv);
+
+ return 0;
+
+out_err:
+ list_splice_tail(&tmp_evict, &vm->userptr.repin_list);
+
+ return err;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+ return (list_empty_careful(&vm->userptr.repin_list) &&
+ list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
+ struct xe_sync_entry *syncs, u32 num_syncs);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+{
+ struct dma_fence *fence = NULL;
+ struct xe_vma *vma, *next;
+
+ lockdep_assert_held(&vm->lock);
+ if (xe_vm_no_dma_fences(vm) && !rebind_worker)
+ return NULL;
+
+ xe_vm_assert_held(vm);
+ list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) {
+ XE_WARN_ON(!vma->gt_present);
+
+ list_del_init(&vma->rebind_link);
+ dma_fence_put(fence);
+ if (rebind_worker)
+ trace_xe_vma_rebind_worker(vma);
+ else
+ trace_xe_vma_rebind_exec(vma);
+ fence = xe_vm_bind_vma(vma, NULL, NULL, 0);
+ if (IS_ERR(fence))
+ return fence;
+ }
+
+ return fence;
+}
+
+static struct xe_vma *xe_vma_create(struct xe_vm *vm,
+ struct xe_bo *bo,
+ u64 bo_offset_or_userptr,
+ u64 start, u64 end,
+ bool read_only,
+ u64 gt_mask)
+{
+ struct xe_vma *vma;
+ struct xe_gt *gt;
+ u8 id;
+
+ XE_BUG_ON(start >= end);
+ XE_BUG_ON(end >= vm->size);
+
+ vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+ if (!vma) {
+ vma = ERR_PTR(-ENOMEM);
+ return vma;
+ }
+
+ INIT_LIST_HEAD(&vma->rebind_link);
+ INIT_LIST_HEAD(&vma->unbind_link);
+ INIT_LIST_HEAD(&vma->userptr_link);
+ INIT_LIST_HEAD(&vma->userptr.invalidate_link);
+ INIT_LIST_HEAD(&vma->notifier.rebind_link);
+ INIT_LIST_HEAD(&vma->extobj.link);
+
+ vma->vm = vm;
+ vma->start = start;
+ vma->end = end;
+ if (read_only)
+ vma->pte_flags = PTE_READ_ONLY;
+
+ if (gt_mask) {
+ vma->gt_mask = gt_mask;
+ } else {
+ for_each_gt(gt, vm->xe, id)
+ if (!xe_gt_is_media_type(gt))
+ vma->gt_mask |= 0x1 << id;
+ }
+
+ if (vm->xe->info.platform == XE_PVC)
+ vma->use_atomic_access_pte_bit = true;
+
+ if (bo) {
+ xe_bo_assert_held(bo);
+ vma->bo_offset = bo_offset_or_userptr;
+ vma->bo = xe_bo_get(bo);
+ list_add_tail(&vma->bo_link, &bo->vmas);
+ } else /* userptr */ {
+ u64 size = end - start + 1;
+ int err;
+
+ vma->userptr.ptr = bo_offset_or_userptr;
+
+ err = mmu_interval_notifier_insert(&vma->userptr.notifier,
+ current->mm,
+ vma->userptr.ptr, size,
+ &vma_userptr_notifier_ops);
+ if (err) {
+ kfree(vma);
+ vma = ERR_PTR(err);
+ return vma;
+ }
+
+ vma->userptr.notifier_seq = LONG_MAX;
+ xe_vm_get(vm);
+ }
+
+ return vma;
+}
+
+static bool vm_remove_extobj(struct xe_vma *vma)
+{
+ if (!list_empty(&vma->extobj.link)) {
+ vma->vm->extobj.entries--;
+ list_del_init(&vma->extobj.link);
+ return true;
+ }
+ return false;
+}
+
+static void xe_vma_destroy_late(struct xe_vma *vma)
+{
+ struct xe_vm *vm = vma->vm;
+ struct xe_device *xe = vm->xe;
+ bool read_only = vma->pte_flags & PTE_READ_ONLY;
+
+ if (xe_vma_is_userptr(vma)) {
+ if (vma->userptr.sg) {
+ dma_unmap_sgtable(xe->drm.dev,
+ vma->userptr.sg,
+ read_only ? DMA_TO_DEVICE :
+ DMA_BIDIRECTIONAL, 0);
+ sg_free_table(vma->userptr.sg);
+ vma->userptr.sg = NULL;
+ }
+
+ /*
+ * Since userptr pages are not pinned, we can't remove
+ * the notifer until we're sure the GPU is not accessing
+ * them anymore
+ */
+ mmu_interval_notifier_remove(&vma->userptr.notifier);
+ xe_vm_put(vm);
+ } else {
+ xe_bo_put(vma->bo);
+ }
+
+ kfree(vma);
+}
+
+static void vma_destroy_work_func(struct work_struct *w)
+{
+ struct xe_vma *vma =
+ container_of(w, struct xe_vma, destroy_work);
+
+ xe_vma_destroy_late(vma);
+}
+
+static struct xe_vma *
+bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm,
+ struct xe_vma *ignore)
+{
+ struct xe_vma *vma;
+
+ list_for_each_entry(vma, &bo->vmas, bo_link) {
+ if (vma != ignore && vma->vm == vm && !vma->destroyed)
+ return vma;
+ }
+
+ return NULL;
+}
+
+static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm,
+ struct xe_vma *ignore)
+{
+ struct ww_acquire_ctx ww;
+ bool ret;
+
+ xe_bo_lock(bo, &ww, 0, false);
+ ret = !!bo_has_vm_references_locked(bo, vm, ignore);
+ xe_bo_unlock(bo, &ww);
+
+ return ret;
+}
+
+static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
+{
+ list_add(&vma->extobj.link, &vm->extobj.list);
+ vm->extobj.entries++;
+}
+
+static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma)
+{
+ struct xe_bo *bo = vma->bo;
+
+ lockdep_assert_held_write(&vm->lock);
+
+ if (bo_has_vm_references(bo, vm, vma))
+ return;
+
+ __vm_insert_extobj(vm, vma);
+}
+
+static void vma_destroy_cb(struct dma_fence *fence,
+ struct dma_fence_cb *cb)
+{
+ struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
+
+ INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
+ queue_work(system_unbound_wq, &vma->destroy_work);
+}
+
+static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
+{
+ struct xe_vm *vm = vma->vm;
+
+ lockdep_assert_held_write(&vm->lock);
+ XE_BUG_ON(!list_empty(&vma->unbind_link));
+
+ if (xe_vma_is_userptr(vma)) {
+ XE_WARN_ON(!vma->destroyed);
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_del_init(&vma->userptr.invalidate_link);
+ spin_unlock(&vm->userptr.invalidated_lock);
+ list_del(&vma->userptr_link);
+ } else {
+ xe_bo_assert_held(vma->bo);
+ list_del(&vma->bo_link);
+
+ spin_lock(&vm->notifier.list_lock);
+ list_del(&vma->notifier.rebind_link);
+ spin_unlock(&vm->notifier.list_lock);
+
+ if (!vma->bo->vm && vm_remove_extobj(vma)) {
+ struct xe_vma *other;
+
+ other = bo_has_vm_references_locked(vma->bo, vm, NULL);
+
+ if (other)
+ __vm_insert_extobj(vm, other);
+ }
+ }
+
+ xe_vm_assert_held(vm);
+ if (!list_empty(&vma->rebind_link))
+ list_del(&vma->rebind_link);
+
+ if (fence) {
+ int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
+ vma_destroy_cb);
+
+ if (ret) {
+ XE_WARN_ON(ret != -ENOENT);
+ xe_vma_destroy_late(vma);
+ }
+ } else {
+ xe_vma_destroy_late(vma);
+ }
+}
+
+static void xe_vma_destroy_unlocked(struct xe_vma *vma)
+{
+ struct ttm_validate_buffer tv[2];
+ struct ww_acquire_ctx ww;
+ struct xe_bo *bo = vma->bo;
+ LIST_HEAD(objs);
+ LIST_HEAD(dups);
+ int err;
+
+ memset(tv, 0, sizeof(tv));
+ tv[0].bo = xe_vm_ttm_bo(vma->vm);
+ list_add(&tv[0].head, &objs);
+
+ if (bo) {
+ tv[1].bo = &xe_bo_get(bo)->ttm;
+ list_add(&tv[1].head, &objs);
+ }
+ err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups);
+ XE_WARN_ON(err);
+
+ xe_vma_destroy(vma, NULL);
+
+ ttm_eu_backoff_reservation(&ww, &objs);
+ if (bo)
+ xe_bo_put(bo);
+}
+
+static struct xe_vma *to_xe_vma(const struct rb_node *node)
+{
+ BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
+ return (struct xe_vma *)node;
+}
+
+static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b)
+{
+ if (a->end < b->start) {
+ return -1;
+ } else if (b->end < a->start) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b)
+{
+ return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0;
+}
+
+int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node)
+{
+ struct xe_vma *cmp = to_xe_vma(node);
+ const struct xe_vma *own = key;
+
+ if (own->start > cmp->end)
+ return 1;
+
+ if (own->end < cmp->start)
+ return -1;
+
+ return 0;
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma)
+{
+ struct rb_node *node;
+
+ if (xe_vm_is_closed(vm))
+ return NULL;
+
+ XE_BUG_ON(vma->end >= vm->size);
+ lockdep_assert_held(&vm->lock);
+
+ node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb);
+
+ return node ? to_xe_vma(node) : NULL;
+}
+
+static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+ XE_BUG_ON(vma->vm != vm);
+ lockdep_assert_held(&vm->lock);
+
+ rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb);
+}
+
+static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+ XE_BUG_ON(vma->vm != vm);
+ lockdep_assert_held(&vm->lock);
+
+ rb_erase(&vma->vm_node, &vm->vmas);
+ if (vm->usm.last_fault_vma == vma)
+ vm->usm.last_fault_vma = NULL;
+}
+
+static void async_op_work_func(struct work_struct *w);
+static void vm_destroy_work_func(struct work_struct *w);
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
+{
+ struct xe_vm *vm;
+ int err, i = 0, number_gts = 0;
+ struct xe_gt *gt;
+ u8 id;
+
+ vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+ if (!vm)
+ return ERR_PTR(-ENOMEM);
+
+ vm->xe = xe;
+ kref_init(&vm->refcount);
+ dma_resv_init(&vm->resv);
+
+ vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1);
+
+ vm->vmas = RB_ROOT;
+ vm->flags = flags;
+
+ init_rwsem(&vm->lock);
+
+ INIT_LIST_HEAD(&vm->rebind_list);
+
+ INIT_LIST_HEAD(&vm->userptr.repin_list);
+ INIT_LIST_HEAD(&vm->userptr.invalidated);
+ init_rwsem(&vm->userptr.notifier_lock);
+ spin_lock_init(&vm->userptr.invalidated_lock);
+
+ INIT_LIST_HEAD(&vm->notifier.rebind_list);
+ spin_lock_init(&vm->notifier.list_lock);
+
+ INIT_LIST_HEAD(&vm->async_ops.pending);
+ INIT_WORK(&vm->async_ops.work, async_op_work_func);
+ spin_lock_init(&vm->async_ops.lock);
+
+ INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
+ INIT_LIST_HEAD(&vm->preempt.engines);
+ vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
+
+ INIT_LIST_HEAD(&vm->extobj.list);
+
+ if (!(flags & XE_VM_FLAG_MIGRATION)) {
+ /* We need to immeditatelly exit from any D3 state */
+ xe_pm_runtime_get(xe);
+ xe_device_mem_access_get(xe);
+ }
+
+ err = dma_resv_lock_interruptible(&vm->resv, NULL);
+ if (err)
+ goto err_put;
+
+ if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+ vm->flags |= XE_VM_FLAGS_64K;
+
+ for_each_gt(gt, xe, id) {
+ if (xe_gt_is_media_type(gt))
+ continue;
+
+ if (flags & XE_VM_FLAG_MIGRATION &&
+ gt->info.id != XE_VM_FLAG_GT_ID(flags))
+ continue;
+
+ vm->pt_root[id] = xe_pt_create(vm, gt, xe->info.vm_max_level);
+ if (IS_ERR(vm->pt_root[id])) {
+ err = PTR_ERR(vm->pt_root[id]);
+ vm->pt_root[id] = NULL;
+ goto err_destroy_root;
+ }
+ }
+
+ if (flags & XE_VM_FLAG_SCRATCH_PAGE) {
+ for_each_gt(gt, xe, id) {
+ if (!vm->pt_root[id])
+ continue;
+
+ err = xe_pt_create_scratch(xe, gt, vm);
+ if (err)
+ goto err_scratch_pt;
+ }
+ }
+
+ if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) {
+ INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+ vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
+ }
+
+ if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) {
+ vm->async_ops.fence.context = dma_fence_context_alloc(1);
+ vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
+ }
+
+ /* Fill pt_root after allocating scratch tables */
+ for_each_gt(gt, xe, id) {
+ if (!vm->pt_root[id])
+ continue;
+
+ xe_pt_populate_empty(gt, vm, vm->pt_root[id]);
+ }
+ dma_resv_unlock(&vm->resv);
+
+ /* Kernel migration VM shouldn't have a circular loop.. */
+ if (!(flags & XE_VM_FLAG_MIGRATION)) {
+ for_each_gt(gt, xe, id) {
+ struct xe_vm *migrate_vm;
+ struct xe_engine *eng;
+
+ if (!vm->pt_root[id])
+ continue;
+
+ migrate_vm = xe_migrate_get_vm(gt->migrate);
+ eng = xe_engine_create_class(xe, gt, migrate_vm,
+ XE_ENGINE_CLASS_COPY,
+ ENGINE_FLAG_VM);
+ xe_vm_put(migrate_vm);
+ if (IS_ERR(eng)) {
+ xe_vm_close_and_put(vm);
+ return ERR_CAST(eng);
+ }
+ vm->eng[id] = eng;
+ number_gts++;
+ }
+ }
+
+ if (number_gts > 1)
+ vm->composite_fence_ctx = dma_fence_context_alloc(1);
+
+ mutex_lock(&xe->usm.lock);
+ if (flags & XE_VM_FLAG_FAULT_MODE)
+ xe->usm.num_vm_in_fault_mode++;
+ else if (!(flags & XE_VM_FLAG_MIGRATION))
+ xe->usm.num_vm_in_non_fault_mode++;
+ mutex_unlock(&xe->usm.lock);
+
+ trace_xe_vm_create(vm);
+
+ return vm;
+
+err_scratch_pt:
+ for_each_gt(gt, xe, id) {
+ if (!vm->pt_root[id])
+ continue;
+
+ i = vm->pt_root[id]->level;
+ while (i)
+ if (vm->scratch_pt[id][--i])
+ xe_pt_destroy(vm->scratch_pt[id][i],
+ vm->flags, NULL);
+ xe_bo_unpin(vm->scratch_bo[id]);
+ xe_bo_put(vm->scratch_bo[id]);
+ }
+err_destroy_root:
+ for_each_gt(gt, xe, id) {
+ if (vm->pt_root[id])
+ xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+ }
+ dma_resv_unlock(&vm->resv);
+err_put:
+ dma_resv_fini(&vm->resv);
+ kfree(vm);
+ if (!(flags & XE_VM_FLAG_MIGRATION)) {
+ xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
+ }
+ return ERR_PTR(err);
+}
+
+static void flush_async_ops(struct xe_vm *vm)
+{
+ queue_work(system_unbound_wq, &vm->async_ops.work);
+ flush_work(&vm->async_ops.work);
+}
+
+static void vm_error_capture(struct xe_vm *vm, int err,
+ u32 op, u64 addr, u64 size)
+{
+ struct drm_xe_vm_bind_op_error_capture capture;
+ u64 __user *address =
+ u64_to_user_ptr(vm->async_ops.error_capture.addr);
+ bool in_kthread = !current->mm;
+
+ capture.error = err;
+ capture.op = op;
+ capture.addr = addr;
+ capture.size = size;
+
+ if (in_kthread) {
+ if (!mmget_not_zero(vm->async_ops.error_capture.mm))
+ goto mm_closed;
+ kthread_use_mm(vm->async_ops.error_capture.mm);
+ }
+
+ if (copy_to_user(address, &capture, sizeof(capture)))
+ XE_WARN_ON("Copy to user failed");
+
+ if (in_kthread) {
+ kthread_unuse_mm(vm->async_ops.error_capture.mm);
+ mmput(vm->async_ops.error_capture.mm);
+ }
+
+mm_closed:
+ wake_up_all(&vm->async_ops.error_capture.wq);
+}
+
+void xe_vm_close_and_put(struct xe_vm *vm)
+{
+ struct rb_root contested = RB_ROOT;
+ struct ww_acquire_ctx ww;
+ struct xe_device *xe = vm->xe;
+ struct xe_gt *gt;
+ u8 id;
+
+ XE_BUG_ON(vm->preempt.num_engines);
+
+ vm->size = 0;
+ smp_mb();
+ flush_async_ops(vm);
+ if (xe_vm_in_compute_mode(vm))
+ flush_work(&vm->preempt.rebind_work);
+
+ for_each_gt(gt, xe, id) {
+ if (vm->eng[id]) {
+ xe_engine_kill(vm->eng[id]);
+ xe_engine_put(vm->eng[id]);
+ vm->eng[id] = NULL;
+ }
+ }
+
+ down_write(&vm->lock);
+ xe_vm_lock(vm, &ww, 0, false);
+ while (vm->vmas.rb_node) {
+ struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node);
+
+ if (xe_vma_is_userptr(vma)) {
+ down_read(&vm->userptr.notifier_lock);
+ vma->destroyed = true;
+ up_read(&vm->userptr.notifier_lock);
+ }
+
+ rb_erase(&vma->vm_node, &vm->vmas);
+
+ /* easy case, remove from VMA? */
+ if (xe_vma_is_userptr(vma) || vma->bo->vm) {
+ xe_vma_destroy(vma, NULL);
+ continue;
+ }
+
+ rb_add(&vma->vm_node, &contested, xe_vma_less_cb);
+ }
+
+ /*
+ * All vm operations will add shared fences to resv.
+ * The only exception is eviction for a shared object,
+ * but even so, the unbind when evicted would still
+ * install a fence to resv. Hence it's safe to
+ * destroy the pagetables immediately.
+ */
+ for_each_gt(gt, xe, id) {
+ if (vm->scratch_bo[id]) {
+ u32 i;
+
+ xe_bo_unpin(vm->scratch_bo[id]);
+ xe_bo_put(vm->scratch_bo[id]);
+ for (i = 0; i < vm->pt_root[id]->level; i++)
+ xe_pt_destroy(vm->scratch_pt[id][i], vm->flags,
+ NULL);
+ }
+ }
+ xe_vm_unlock(vm, &ww);
+
+ if (contested.rb_node) {
+
+ /*
+ * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
+ * Since we hold a refcount to the bo, we can remove and free
+ * the members safely without locking.
+ */
+ while (contested.rb_node) {
+ struct xe_vma *vma = to_xe_vma(contested.rb_node);
+
+ rb_erase(&vma->vm_node, &contested);
+ xe_vma_destroy_unlocked(vma);
+ }
+ }
+
+ if (vm->async_ops.error_capture.addr)
+ wake_up_all(&vm->async_ops.error_capture.wq);
+
+ XE_WARN_ON(!list_empty(&vm->extobj.list));
+ up_write(&vm->lock);
+
+ xe_vm_put(vm);
+}
+
+static void vm_destroy_work_func(struct work_struct *w)
+{
+ struct xe_vm *vm =
+ container_of(w, struct xe_vm, destroy_work);
+ struct ww_acquire_ctx ww;
+ struct xe_device *xe = vm->xe;
+ struct xe_gt *gt;
+ u8 id;
+ void *lookup;
+
+ /* xe_vm_close_and_put was not called? */
+ XE_WARN_ON(vm->size);
+
+ if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+ xe_device_mem_access_put(xe);
+ xe_pm_runtime_put(xe);
+
+ mutex_lock(&xe->usm.lock);
+ lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+ XE_WARN_ON(lookup != vm);
+ mutex_unlock(&xe->usm.lock);
+ }
+
+ /*
+ * XXX: We delay destroying the PT root until the VM if freed as PT root
+ * is needed for xe_vm_lock to work. If we remove that dependency this
+ * can be moved to xe_vm_close_and_put.
+ */
+ xe_vm_lock(vm, &ww, 0, false);
+ for_each_gt(gt, xe, id) {
+ if (vm->pt_root[id]) {
+ xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+ vm->pt_root[id] = NULL;
+ }
+ }
+ xe_vm_unlock(vm, &ww);
+
+ mutex_lock(&xe->usm.lock);
+ if (vm->flags & XE_VM_FLAG_FAULT_MODE)
+ xe->usm.num_vm_in_fault_mode--;
+ else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+ xe->usm.num_vm_in_non_fault_mode--;
+ mutex_unlock(&xe->usm.lock);
+
+ trace_xe_vm_free(vm);
+ dma_fence_put(vm->rebind_fence);
+ dma_resv_fini(&vm->resv);
+ kfree(vm);
+
+}
+
+void xe_vm_free(struct kref *ref)
+{
+ struct xe_vm *vm = container_of(ref, struct xe_vm, refcount);
+
+ /* To destroy the VM we need to be able to sleep */
+ queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
+{
+ struct xe_vm *vm;
+
+ mutex_lock(&xef->vm.lock);
+ vm = xa_load(&xef->vm.xa, id);
+ mutex_unlock(&xef->vm.lock);
+
+ if (vm)
+ xe_vm_get(vm);
+
+ return vm;
+}
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt)
+{
+ XE_BUG_ON(xe_gt_is_media_type(full_gt));
+
+ return gen8_pde_encode(vm->pt_root[full_gt->info.id]->bo, 0,
+ XE_CACHE_WB);
+}
+
+static struct dma_fence *
+xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e,
+ struct xe_sync_entry *syncs, u32 num_syncs)
+{
+ struct xe_gt *gt;
+ struct dma_fence *fence = NULL;
+ struct dma_fence **fences = NULL;
+ struct dma_fence_array *cf = NULL;
+ struct xe_vm *vm = vma->vm;
+ int cur_fence = 0, i;
+ int number_gts = hweight_long(vma->gt_present);
+ int err;
+ u8 id;
+
+ trace_xe_vma_unbind(vma);
+
+ if (number_gts > 1) {
+ fences = kmalloc_array(number_gts, sizeof(*fences),
+ GFP_KERNEL);
+ if (!fences)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for_each_gt(gt, vm->xe, id) {
+ if (!(vma->gt_present & BIT(id)))
+ goto next;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+
+ fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto err_fences;
+ }
+
+ if (fences)
+ fences[cur_fence++] = fence;
+
+next:
+ if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
+ e = list_next_entry(e, multi_gt_list);
+ }
+
+ if (fences) {
+ cf = dma_fence_array_create(number_gts, fences,
+ vm->composite_fence_ctx,
+ vm->composite_fence_seqno++,
+ false);
+ if (!cf) {
+ --vm->composite_fence_seqno;
+ err = -ENOMEM;
+ goto err_fences;
+ }
+ }
+
+ for (i = 0; i < num_syncs; i++)
+ xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
+
+ return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence;
+
+err_fences:
+ if (fences) {
+ while (cur_fence) {
+ /* FIXME: Rewind the previous binds? */
+ dma_fence_put(fences[--cur_fence]);
+ }
+ kfree(fences);
+ }
+
+ return ERR_PTR(err);
+}
+
+static struct dma_fence *
+xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e,
+ struct xe_sync_entry *syncs, u32 num_syncs)
+{
+ struct xe_gt *gt;
+ struct dma_fence *fence;
+ struct dma_fence **fences = NULL;
+ struct dma_fence_array *cf = NULL;
+ struct xe_vm *vm = vma->vm;
+ int cur_fence = 0, i;
+ int number_gts = hweight_long(vma->gt_mask);
+ int err;
+ u8 id;
+
+ trace_xe_vma_bind(vma);
+
+ if (number_gts > 1) {
+ fences = kmalloc_array(number_gts, sizeof(*fences),
+ GFP_KERNEL);
+ if (!fences)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for_each_gt(gt, vm->xe, id) {
+ if (!(vma->gt_mask & BIT(id)))
+ goto next;
+
+ XE_BUG_ON(xe_gt_is_media_type(gt));
+ fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs,
+ vma->gt_present & BIT(id));
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto err_fences;
+ }
+
+ if (fences)
+ fences[cur_fence++] = fence;
+
+next:
+ if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list))
+ e = list_next_entry(e, multi_gt_list);
+ }
+
+ if (fences) {
+ cf = dma_fence_array_create(number_gts, fences,
+ vm->composite_fence_ctx,
+ vm->composite_fence_seqno++,
+ false);
+ if (!cf) {
+ --vm->composite_fence_seqno;
+ err = -ENOMEM;
+ goto err_fences;
+ }
+ }
+
+ for (i = 0; i < num_syncs; i++)
+ xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence);
+
+ return cf ? &cf->base : fence;
+
+err_fences:
+ if (fences) {
+ while (cur_fence) {
+ /* FIXME: Rewind the previous binds? */
+ dma_fence_put(fences[--cur_fence]);
+ }
+ kfree(fences);
+ }
+
+ return ERR_PTR(err);
+}
+
+struct async_op_fence {
+ struct dma_fence fence;
+ struct dma_fence_cb cb;
+ struct xe_vm *vm;
+ wait_queue_head_t wq;
+ bool started;
+};
+
+static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+ return "xe";
+}
+
+static const char *
+async_op_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+ return "async_op_fence";
+}
+
+static const struct dma_fence_ops async_op_fence_ops = {
+ .get_driver_name = async_op_fence_get_driver_name,
+ .get_timeline_name = async_op_fence_get_timeline_name,
+};
+
+static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct async_op_fence *afence =
+ container_of(cb, struct async_op_fence, cb);
+
+ dma_fence_signal(&afence->fence);
+ xe_vm_put(afence->vm);
+ dma_fence_put(&afence->fence);
+}
+
+static void add_async_op_fence_cb(struct xe_vm *vm,
+ struct dma_fence *fence,
+ struct async_op_fence *afence)
+{
+ int ret;
+
+ if (!xe_vm_no_dma_fences(vm)) {
+ afence->started = true;
+ smp_wmb();
+ wake_up_all(&afence->wq);
+ }
+
+ afence->vm = xe_vm_get(vm);
+ dma_fence_get(&afence->fence);
+ ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb);
+ if (ret == -ENOENT)
+ dma_fence_signal(&afence->fence);
+ if (ret) {
+ xe_vm_put(vm);
+ dma_fence_put(&afence->fence);
+ }
+ XE_WARN_ON(ret && ret != -ENOENT);
+}
+
+int xe_vm_async_fence_wait_start(struct dma_fence *fence)
+{
+ if (fence->ops == &async_op_fence_ops) {
+ struct async_op_fence *afence =
+ container_of(fence, struct async_op_fence, fence);
+
+ XE_BUG_ON(xe_vm_no_dma_fences(afence->vm));
+
+ smp_rmb();
+ return wait_event_interruptible(afence->wq, afence->started);
+ }
+
+ return 0;
+}
+
+static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_engine *e, struct xe_sync_entry *syncs,
+ u32 num_syncs, struct async_op_fence *afence)
+{
+ struct dma_fence *fence;
+
+ xe_vm_assert_held(vm);
+
+ fence = xe_vm_bind_vma(vma, e, syncs, num_syncs);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ if (afence)
+ add_async_op_fence_cb(vm, fence, afence);
+
+ dma_fence_put(fence);
+ return 0;
+}
+
+static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e,
+ struct xe_bo *bo, struct xe_sync_entry *syncs,
+ u32 num_syncs, struct async_op_fence *afence)
+{
+ int err;
+
+ xe_vm_assert_held(vm);
+ xe_bo_assert_held(bo);
+
+ if (bo) {
+ err = xe_bo_validate(bo, vm, true);
+ if (err)
+ return err;
+ }
+
+ return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence);
+}
+
+static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_engine *e, struct xe_sync_entry *syncs,
+ u32 num_syncs, struct async_op_fence *afence)
+{
+ struct dma_fence *fence;
+
+ xe_vm_assert_held(vm);
+ xe_bo_assert_held(vma->bo);
+
+ fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ if (afence)
+ add_async_op_fence_cb(vm, fence, afence);
+
+ xe_vma_destroy(vma, fence);
+ dma_fence_put(fence);
+
+ return 0;
+}
+
+static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm,
+ u64 value)
+{
+ if (XE_IOCTL_ERR(xe, !value))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
+ return -ENOTSUPP;
+
+ if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr))
+ return -ENOTSUPP;
+
+ vm->async_ops.error_capture.mm = current->mm;
+ vm->async_ops.error_capture.addr = value;
+ init_waitqueue_head(&vm->async_ops.error_capture.wq);
+
+ return 0;
+}
+
+typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm,
+ u64 value);
+
+static const xe_vm_set_property_fn vm_set_property_funcs[] = {
+ [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] =
+ vm_set_error_capture_address,
+};
+
+static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm,
+ u64 extension)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_ext_vm_set_property ext;
+ int err;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_ERR(xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_ERR(xe, ext.property >=
+ ARRAY_SIZE(vm_set_property_funcs)))
+ return -EINVAL;
+
+ return vm_set_property_funcs[ext.property](xe, vm, ext.value);
+}
+
+typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm,
+ u64 extension);
+
+static const xe_vm_set_property_fn vm_user_extension_funcs[] = {
+ [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS 16
+static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm,
+ u64 extensions, int ext_number)
+{
+ u64 __user *address = u64_to_user_ptr(extensions);
+ struct xe_user_extension ext;
+ int err;
+
+ if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS))
+ return -E2BIG;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_ERR(xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_ERR(xe, ext.name >=
+ ARRAY_SIZE(vm_user_extension_funcs)))
+ return -EINVAL;
+
+ err = vm_user_extension_funcs[ext.name](xe, vm, extensions);
+ if (XE_IOCTL_ERR(xe, err))
+ return err;
+
+ if (ext.next_extension)
+ return vm_user_extensions(xe, vm, ext.next_extension,
+ ++ext_number);
+
+ return 0;
+}
+
+#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \
+ DRM_XE_VM_CREATE_COMPUTE_MODE | \
+ DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \
+ DRM_XE_VM_CREATE_FAULT_MODE)
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_vm_create *args = data;
+ struct xe_vm *vm;
+ u32 id, asid;
+ int err;
+ u32 flags = 0;
+
+ if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE &&
+ args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE &&
+ args->flags & DRM_XE_VM_CREATE_FAULT_MODE))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+ xe_device_in_non_fault_mode(xe)))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) &&
+ xe_device_in_fault_mode(xe)))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE &&
+ !xe->info.supports_usm))
+ return -EINVAL;
+
+ if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE)
+ flags |= XE_VM_FLAG_SCRATCH_PAGE;
+ if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE)
+ flags |= XE_VM_FLAG_COMPUTE_MODE;
+ if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS)
+ flags |= XE_VM_FLAG_ASYNC_BIND_OPS;
+ if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE)
+ flags |= XE_VM_FLAG_FAULT_MODE;
+
+ vm = xe_vm_create(xe, flags);
+ if (IS_ERR(vm))
+ return PTR_ERR(vm);
+
+ if (args->extensions) {
+ err = vm_user_extensions(xe, vm, args->extensions, 0);
+ if (XE_IOCTL_ERR(xe, err)) {
+ xe_vm_close_and_put(vm);
+ return err;
+ }
+ }
+
+ mutex_lock(&xef->vm.lock);
+ err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+ mutex_unlock(&xef->vm.lock);
+ if (err) {
+ xe_vm_close_and_put(vm);
+ return err;
+ }
+
+ mutex_lock(&xe->usm.lock);
+ err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
+ XA_LIMIT(0, XE_MAX_ASID - 1),
+ &xe->usm.next_asid, GFP_KERNEL);
+ mutex_unlock(&xe->usm.lock);
+ if (err) {
+ xe_vm_close_and_put(vm);
+ return err;
+ }
+ vm->usm.asid = asid;
+
+ args->vm_id = id;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
+ /* Warning: Security issue - never enable by default */
+ args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, GEN8_PAGE_SIZE);
+#endif
+
+ return 0;
+}
+
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_vm_destroy *args = data;
+ struct xe_vm *vm;
+
+ if (XE_IOCTL_ERR(xe, args->pad))
+ return -EINVAL;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_ERR(xe, !vm))
+ return -ENOENT;
+ xe_vm_put(vm);
+
+ /* FIXME: Extend this check to non-compute mode VMs */
+ if (XE_IOCTL_ERR(xe, vm->preempt.num_engines))
+ return -EBUSY;
+
+ mutex_lock(&xef->vm.lock);
+ xa_erase(&xef->vm.xa, args->vm_id);
+ mutex_unlock(&xef->vm.lock);
+
+ xe_vm_close_and_put(vm);
+
+ return 0;
+}
+
+static const u32 region_to_mem_type[] = {
+ XE_PL_TT,
+ XE_PL_VRAM0,
+ XE_PL_VRAM1,
+};
+
+static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_engine *e, u32 region,
+ struct xe_sync_entry *syncs, u32 num_syncs,
+ struct async_op_fence *afence)
+{
+ int err;
+
+ XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type));
+
+ if (!xe_vma_is_userptr(vma)) {
+ err = xe_bo_migrate(vma->bo, region_to_mem_type[region]);
+ if (err)
+ return err;
+ }
+
+ if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) {
+ return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs,
+ afence);
+ } else {
+ int i;
+
+ /* Nothing to do, signal fences now */
+ for (i = 0; i < num_syncs; i++)
+ xe_sync_entry_signal(&syncs[i], NULL,
+ dma_fence_get_stub());
+ if (afence)
+ dma_fence_signal(&afence->fence);
+ return 0;
+ }
+}
+
+#define VM_BIND_OP(op) (op & 0xffff)
+
+static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_engine *e, struct xe_bo *bo, u32 op,
+ u32 region, struct xe_sync_entry *syncs,
+ u32 num_syncs, struct async_op_fence *afence)
+{
+ switch (VM_BIND_OP(op)) {
+ case XE_VM_BIND_OP_MAP:
+ return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence);
+ case XE_VM_BIND_OP_UNMAP:
+ case XE_VM_BIND_OP_UNMAP_ALL:
+ return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence);
+ case XE_VM_BIND_OP_MAP_USERPTR:
+ return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence);
+ case XE_VM_BIND_OP_PREFETCH:
+ return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs,
+ afence);
+ break;
+ default:
+ XE_BUG_ON("NOT POSSIBLE");
+ return -EINVAL;
+ }
+}
+
+struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm)
+{
+ int idx = vm->flags & XE_VM_FLAG_MIGRATION ?
+ XE_VM_FLAG_GT_ID(vm->flags) : 0;
+
+ /* Safe to use index 0 as all BO in the VM share a single dma-resv lock */
+ return &vm->pt_root[idx]->bo->ttm;
+}
+
+static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv)
+{
+ tv->num_shared = 1;
+ tv->bo = xe_vm_ttm_bo(vm);
+}
+
+static bool is_map_op(u32 op)
+{
+ return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP ||
+ VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR;
+}
+
+static bool is_unmap_op(u32 op)
+{
+ return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP ||
+ VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL;
+}
+
+static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_engine *e, struct xe_bo *bo,
+ struct drm_xe_vm_bind_op *bind_op,
+ struct xe_sync_entry *syncs, u32 num_syncs,
+ struct async_op_fence *afence)
+{
+ LIST_HEAD(objs);
+ LIST_HEAD(dups);
+ struct ttm_validate_buffer tv_bo, tv_vm;
+ struct ww_acquire_ctx ww;
+ struct xe_bo *vbo;
+ int err, i;
+
+ lockdep_assert_held(&vm->lock);
+ XE_BUG_ON(!list_empty(&vma->unbind_link));
+
+ /* Binds deferred to faults, signal fences now */
+ if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) &&
+ !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) {
+ for (i = 0; i < num_syncs; i++)
+ xe_sync_entry_signal(&syncs[i], NULL,
+ dma_fence_get_stub());
+ if (afence)
+ dma_fence_signal(&afence->fence);
+ return 0;
+ }
+
+ xe_vm_tv_populate(vm, &tv_vm);
+ list_add_tail(&tv_vm.head, &objs);
+ vbo = vma->bo;
+ if (vbo) {
+ /*
+ * An unbind can drop the last reference to the BO and
+ * the BO is needed for ttm_eu_backoff_reservation so
+ * take a reference here.
+ */
+ xe_bo_get(vbo);
+
+ tv_bo.bo = &vbo->ttm;
+ tv_bo.num_shared = 1;
+ list_add(&tv_bo.head, &objs);
+ }
+
+again:
+ err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups);
+ if (!err) {
+ err = __vm_bind_ioctl(vm, vma, e, bo,
+ bind_op->op, bind_op->region, syncs,
+ num_syncs, afence);
+ ttm_eu_backoff_reservation(&ww, &objs);
+ if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
+ lockdep_assert_held_write(&vm->lock);
+ err = xe_vma_userptr_pin_pages(vma);
+ if (!err)
+ goto again;
+ }
+ }
+ xe_bo_put(vbo);
+
+ return err;
+}
+
+struct async_op {
+ struct xe_vma *vma;
+ struct xe_engine *engine;
+ struct xe_bo *bo;
+ struct drm_xe_vm_bind_op bind_op;
+ struct xe_sync_entry *syncs;
+ u32 num_syncs;
+ struct list_head link;
+ struct async_op_fence *fence;
+};
+
+static void async_op_cleanup(struct xe_vm *vm, struct async_op *op)
+{
+ while (op->num_syncs--)
+ xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
+ kfree(op->syncs);
+ xe_bo_put(op->bo);
+ if (op->engine)
+ xe_engine_put(op->engine);
+ xe_vm_put(vm);
+ if (op->fence)
+ dma_fence_put(&op->fence->fence);
+ kfree(op);
+}
+
+static struct async_op *next_async_op(struct xe_vm *vm)
+{
+ return list_first_entry_or_null(&vm->async_ops.pending,
+ struct async_op, link);
+}
+
+static void vm_set_async_error(struct xe_vm *vm, int err)
+{
+ lockdep_assert_held(&vm->lock);
+ vm->async_ops.error = err;
+}
+
+static void async_op_work_func(struct work_struct *w)
+{
+ struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work);
+
+ for (;;) {
+ struct async_op *op;
+ int err;
+
+ if (vm->async_ops.error && !xe_vm_is_closed(vm))
+ break;
+
+ spin_lock_irq(&vm->async_ops.lock);
+ op = next_async_op(vm);
+ if (op)
+ list_del_init(&op->link);
+ spin_unlock_irq(&vm->async_ops.lock);
+
+ if (!op)
+ break;
+
+ if (!xe_vm_is_closed(vm)) {
+ bool first, last;
+
+ down_write(&vm->lock);
+again:
+ first = op->vma->first_munmap_rebind;
+ last = op->vma->last_munmap_rebind;
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+#define FORCE_ASYNC_OP_ERROR BIT(31)
+ if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) {
+ err = vm_bind_ioctl(vm, op->vma, op->engine,
+ op->bo, &op->bind_op,
+ op->syncs, op->num_syncs,
+ op->fence);
+ } else {
+ err = -ENOMEM;
+ op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR;
+ }
+#else
+ err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo,
+ &op->bind_op, op->syncs,
+ op->num_syncs, op->fence);
+#endif
+ /*
+ * In order for the fencing to work (stall behind
+ * existing jobs / prevent new jobs from running) all
+ * the dma-resv slots need to be programmed in a batch
+ * relative to execs / the rebind worker. The vm->lock
+ * ensure this.
+ */
+ if (!err && ((first && VM_BIND_OP(op->bind_op.op) ==
+ XE_VM_BIND_OP_UNMAP) ||
+ vm->async_ops.munmap_rebind_inflight)) {
+ if (last) {
+ op->vma->last_munmap_rebind = false;
+ vm->async_ops.munmap_rebind_inflight =
+ false;
+ } else {
+ vm->async_ops.munmap_rebind_inflight =
+ true;
+
+ async_op_cleanup(vm, op);
+
+ spin_lock_irq(&vm->async_ops.lock);
+ op = next_async_op(vm);
+ XE_BUG_ON(!op);
+ list_del_init(&op->link);
+ spin_unlock_irq(&vm->async_ops.lock);
+
+ goto again;
+ }
+ }
+ if (err) {
+ trace_xe_vma_fail(op->vma);
+ drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d",
+ VM_BIND_OP(op->bind_op.op),
+ err);
+
+ spin_lock_irq(&vm->async_ops.lock);
+ list_add(&op->link, &vm->async_ops.pending);
+ spin_unlock_irq(&vm->async_ops.lock);
+
+ vm_set_async_error(vm, err);
+ up_write(&vm->lock);
+
+ if (vm->async_ops.error_capture.addr)
+ vm_error_capture(vm, err,
+ op->bind_op.op,
+ op->bind_op.addr,
+ op->bind_op.range);
+ break;
+ }
+ up_write(&vm->lock);
+ } else {
+ trace_xe_vma_flush(op->vma);
+
+ if (is_unmap_op(op->bind_op.op)) {
+ down_write(&vm->lock);
+ xe_vma_destroy_unlocked(op->vma);
+ up_write(&vm->lock);
+ }
+
+ if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &op->fence->fence.flags)) {
+ if (!xe_vm_no_dma_fences(vm)) {
+ op->fence->started = true;
+ smp_wmb();
+ wake_up_all(&op->fence->wq);
+ }
+ dma_fence_signal(&op->fence->fence);
+ }
+ }
+
+ async_op_cleanup(vm, op);
+ }
+}
+
+static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_engine *e, struct xe_bo *bo,
+ struct drm_xe_vm_bind_op *bind_op,
+ struct xe_sync_entry *syncs, u32 num_syncs)
+{
+ struct async_op *op;
+ bool installed = false;
+ u64 seqno;
+ int i;
+
+ lockdep_assert_held(&vm->lock);
+
+ op = kmalloc(sizeof(*op), GFP_KERNEL);
+ if (!op) {
+ return -ENOMEM;
+ }
+
+ if (num_syncs) {
+ op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL);
+ if (!op->fence) {
+ kfree(op);
+ return -ENOMEM;
+ }
+
+ seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno;
+ dma_fence_init(&op->fence->fence, &async_op_fence_ops,
+ &vm->async_ops.lock, e ? e->bind.fence_ctx :
+ vm->async_ops.fence.context, seqno);
+
+ if (!xe_vm_no_dma_fences(vm)) {
+ op->fence->vm = vm;
+ op->fence->started = false;
+ init_waitqueue_head(&op->fence->wq);
+ }
+ } else {
+ op->fence = NULL;
+ }
+ op->vma = vma;
+ op->engine = e;
+ op->bo = bo;
+ op->bind_op = *bind_op;
+ op->syncs = syncs;
+ op->num_syncs = num_syncs;
+ INIT_LIST_HEAD(&op->link);
+
+ for (i = 0; i < num_syncs; i++)
+ installed |= xe_sync_entry_signal(&syncs[i], NULL,
+ &op->fence->fence);
+
+ if (!installed && op->fence)
+ dma_fence_signal(&op->fence->fence);
+
+ spin_lock_irq(&vm->async_ops.lock);
+ list_add_tail(&op->link, &vm->async_ops.pending);
+ spin_unlock_irq(&vm->async_ops.lock);
+
+ if (!vm->async_ops.error)
+ queue_work(system_unbound_wq, &vm->async_ops.work);
+
+ return 0;
+}
+
+static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_engine *e, struct xe_bo *bo,
+ struct drm_xe_vm_bind_op *bind_op,
+ struct xe_sync_entry *syncs, u32 num_syncs)
+{
+ struct xe_vma *__vma, *next;
+ struct list_head rebind_list;
+ struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL;
+ u32 num_in_syncs = 0, num_out_syncs = 0;
+ bool first = true, last;
+ int err;
+ int i;
+
+ lockdep_assert_held(&vm->lock);
+
+ /* Not a linked list of unbinds + rebinds, easy */
+ if (list_empty(&vma->unbind_link))
+ return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op,
+ syncs, num_syncs);
+
+ /*
+ * Linked list of unbinds + rebinds, decompose syncs into 'in / out'
+ * passing the 'in' to the first operation and 'out' to the last. Also
+ * the reference counting is a little tricky, increment the VM / bind
+ * engine ref count on all but the last operation and increment the BOs
+ * ref count on each rebind.
+ */
+
+ XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP &&
+ VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL &&
+ VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH);
+
+ /* Decompose syncs */
+ if (num_syncs) {
+ in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL);
+ out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL);
+ if (!in_syncs || !out_syncs) {
+ err = -ENOMEM;
+ goto out_error;
+ }
+
+ for (i = 0; i < num_syncs; ++i) {
+ bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL;
+
+ if (signal)
+ out_syncs[num_out_syncs++] = syncs[i];
+ else
+ in_syncs[num_in_syncs++] = syncs[i];
+ }
+ }
+
+ /* Do unbinds + move rebinds to new list */
+ INIT_LIST_HEAD(&rebind_list);
+ list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) {
+ if (__vma->destroyed ||
+ VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) {
+ list_del_init(&__vma->unbind_link);
+ xe_bo_get(bo);
+ err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma,
+ e ? xe_engine_get(e) : NULL,
+ bo, bind_op, first ?
+ in_syncs : NULL,
+ first ? num_in_syncs : 0);
+ if (err) {
+ xe_bo_put(bo);
+ xe_vm_put(vm);
+ if (e)
+ xe_engine_put(e);
+ goto out_error;
+ }
+ in_syncs = NULL;
+ first = false;
+ } else {
+ list_move_tail(&__vma->unbind_link, &rebind_list);
+ }
+ }
+ last = list_empty(&rebind_list);
+ if (!last) {
+ xe_vm_get(vm);
+ if (e)
+ xe_engine_get(e);
+ }
+ err = __vm_bind_ioctl_async(vm, vma, e,
+ bo, bind_op,
+ first ? in_syncs :
+ last ? out_syncs : NULL,
+ first ? num_in_syncs :
+ last ? num_out_syncs : 0);
+ if (err) {
+ if (!last) {
+ xe_vm_put(vm);
+ if (e)
+ xe_engine_put(e);
+ }
+ goto out_error;
+ }
+ in_syncs = NULL;
+
+ /* Do rebinds */
+ list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) {
+ list_del_init(&__vma->unbind_link);
+ last = list_empty(&rebind_list);
+
+ if (xe_vma_is_userptr(__vma)) {
+ bind_op->op = XE_VM_BIND_FLAG_ASYNC |
+ XE_VM_BIND_OP_MAP_USERPTR;
+ } else {
+ bind_op->op = XE_VM_BIND_FLAG_ASYNC |
+ XE_VM_BIND_OP_MAP;
+ xe_bo_get(__vma->bo);
+ }
+
+ if (!last) {
+ xe_vm_get(vm);
+ if (e)
+ xe_engine_get(e);
+ }
+
+ err = __vm_bind_ioctl_async(vm, __vma, e,
+ __vma->bo, bind_op, last ?
+ out_syncs : NULL,
+ last ? num_out_syncs : 0);
+ if (err) {
+ if (!last) {
+ xe_vm_put(vm);
+ if (e)
+ xe_engine_put(e);
+ }
+ goto out_error;
+ }
+ }
+
+ kfree(syncs);
+ return 0;
+
+out_error:
+ kfree(in_syncs);
+ kfree(out_syncs);
+ kfree(syncs);
+
+ return err;
+}
+
+static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo,
+ u64 addr, u64 range, u32 op)
+{
+ struct xe_device *xe = vm->xe;
+ struct xe_vma *vma, lookup;
+ bool async = !!(op & XE_VM_BIND_FLAG_ASYNC);
+
+ lockdep_assert_held(&vm->lock);
+
+ lookup.start = addr;
+ lookup.end = addr + range - 1;
+
+ switch (VM_BIND_OP(op)) {
+ case XE_VM_BIND_OP_MAP:
+ case XE_VM_BIND_OP_MAP_USERPTR:
+ vma = xe_vm_find_overlapping_vma(vm, &lookup);
+ if (XE_IOCTL_ERR(xe, vma))
+ return -EBUSY;
+ break;
+ case XE_VM_BIND_OP_UNMAP:
+ case XE_VM_BIND_OP_PREFETCH:
+ vma = xe_vm_find_overlapping_vma(vm, &lookup);
+ if (XE_IOCTL_ERR(xe, !vma) ||
+ XE_IOCTL_ERR(xe, (vma->start != addr ||
+ vma->end != addr + range - 1) && !async))
+ return -EINVAL;
+ break;
+ case XE_VM_BIND_OP_UNMAP_ALL:
+ break;
+ default:
+ XE_BUG_ON("NOT POSSIBLE");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma)
+{
+ down_read(&vm->userptr.notifier_lock);
+ vma->destroyed = true;
+ up_read(&vm->userptr.notifier_lock);
+ xe_vm_remove_vma(vm, vma);
+}
+
+static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma)
+{
+ int err;
+
+ if (vma->bo && !vma->bo->vm) {
+ vm_insert_extobj(vm, vma);
+ err = add_preempt_fences(vm, vma->bo);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * Find all overlapping VMAs in lookup range and add to a list in the returned
+ * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that
+ * need to be bound if first / last VMAs are not fully unbound. This is akin to
+ * how munmap works.
+ */
+static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm,
+ struct xe_vma *lookup)
+{
+ struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup);
+ struct rb_node *node;
+ struct xe_vma *first = vma, *last = vma, *new_first = NULL,
+ *new_last = NULL, *__vma, *next;
+ int err = 0;
+ bool first_munmap_rebind = false;
+
+ lockdep_assert_held(&vm->lock);
+ XE_BUG_ON(!vma);
+
+ node = &vma->vm_node;
+ while ((node = rb_next(node))) {
+ if (!xe_vma_cmp_vma_cb(lookup, node)) {
+ __vma = to_xe_vma(node);
+ list_add_tail(&__vma->unbind_link, &vma->unbind_link);
+ last = __vma;
+ } else {
+ break;
+ }
+ }
+
+ node = &vma->vm_node;
+ while ((node = rb_prev(node))) {
+ if (!xe_vma_cmp_vma_cb(lookup, node)) {
+ __vma = to_xe_vma(node);
+ list_add(&__vma->unbind_link, &vma->unbind_link);
+ first = __vma;
+ } else {
+ break;
+ }
+ }
+
+ if (first->start != lookup->start) {
+ struct ww_acquire_ctx ww;
+
+ if (first->bo)
+ err = xe_bo_lock(first->bo, &ww, 0, true);
+ if (err)
+ goto unwind;
+ new_first = xe_vma_create(first->vm, first->bo,
+ first->bo ? first->bo_offset :
+ first->userptr.ptr,
+ first->start,
+ lookup->start - 1,
+ (first->pte_flags & PTE_READ_ONLY),
+ first->gt_mask);
+ if (first->bo)
+ xe_bo_unlock(first->bo, &ww);
+ if (!new_first) {
+ err = -ENOMEM;
+ goto unwind;
+ }
+ if (!first->bo) {
+ err = xe_vma_userptr_pin_pages(new_first);
+ if (err)
+ goto unwind;
+ }
+ err = prep_replacement_vma(vm, new_first);
+ if (err)
+ goto unwind;
+ }
+
+ if (last->end != lookup->end) {
+ struct ww_acquire_ctx ww;
+ u64 chunk = lookup->end + 1 - last->start;
+
+ if (last->bo)
+ err = xe_bo_lock(last->bo, &ww, 0, true);
+ if (err)
+ goto unwind;
+ new_last = xe_vma_create(last->vm, last->bo,
+ last->bo ? last->bo_offset + chunk :
+ last->userptr.ptr + chunk,
+ last->start + chunk,
+ last->end,
+ (last->pte_flags & PTE_READ_ONLY),
+ last->gt_mask);
+ if (last->bo)
+ xe_bo_unlock(last->bo, &ww);
+ if (!new_last) {
+ err = -ENOMEM;
+ goto unwind;
+ }
+ if (!last->bo) {
+ err = xe_vma_userptr_pin_pages(new_last);
+ if (err)
+ goto unwind;
+ }
+ err = prep_replacement_vma(vm, new_last);
+ if (err)
+ goto unwind;
+ }
+
+ prep_vma_destroy(vm, vma);
+ if (list_empty(&vma->unbind_link) && (new_first || new_last))
+ vma->first_munmap_rebind = true;
+ list_for_each_entry(__vma, &vma->unbind_link, unbind_link) {
+ if ((new_first || new_last) && !first_munmap_rebind) {
+ __vma->first_munmap_rebind = true;
+ first_munmap_rebind = true;
+ }
+ prep_vma_destroy(vm, __vma);
+ }
+ if (new_first) {
+ xe_vm_insert_vma(vm, new_first);
+ list_add_tail(&new_first->unbind_link, &vma->unbind_link);
+ if (!new_last)
+ new_first->last_munmap_rebind = true;
+ }
+ if (new_last) {
+ xe_vm_insert_vma(vm, new_last);
+ list_add_tail(&new_last->unbind_link, &vma->unbind_link);
+ new_last->last_munmap_rebind = true;
+ }
+
+ return vma;
+
+unwind:
+ list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link)
+ list_del_init(&__vma->unbind_link);
+ if (new_last) {
+ prep_vma_destroy(vm, new_last);
+ xe_vma_destroy_unlocked(new_last);
+ }
+ if (new_first) {
+ prep_vma_destroy(vm, new_first);
+ xe_vma_destroy_unlocked(new_first);
+ }
+
+ return ERR_PTR(err);
+}
+
+/*
+ * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch
+ */
+static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm,
+ struct xe_vma *lookup,
+ u32 region)
+{
+ struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma,
+ *next;
+ struct rb_node *node;
+
+ if (!xe_vma_is_userptr(vma)) {
+ if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region]))
+ return ERR_PTR(-EINVAL);
+ }
+
+ node = &vma->vm_node;
+ while ((node = rb_next(node))) {
+ if (!xe_vma_cmp_vma_cb(lookup, node)) {
+ __vma = to_xe_vma(node);
+ if (!xe_vma_is_userptr(__vma)) {
+ if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
+ goto flush_list;
+ }
+ list_add_tail(&__vma->unbind_link, &vma->unbind_link);
+ } else {
+ break;
+ }
+ }
+
+ node = &vma->vm_node;
+ while ((node = rb_prev(node))) {
+ if (!xe_vma_cmp_vma_cb(lookup, node)) {
+ __vma = to_xe_vma(node);
+ if (!xe_vma_is_userptr(__vma)) {
+ if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region]))
+ goto flush_list;
+ }
+ list_add(&__vma->unbind_link, &vma->unbind_link);
+ } else {
+ break;
+ }
+ }
+
+ return vma;
+
+flush_list:
+ list_for_each_entry_safe(__vma, next, &vma->unbind_link,
+ unbind_link)
+ list_del_init(&__vma->unbind_link);
+
+ return ERR_PTR(-EINVAL);
+}
+
+static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm,
+ struct xe_bo *bo)
+{
+ struct xe_vma *first = NULL, *vma;
+
+ lockdep_assert_held(&vm->lock);
+ xe_bo_assert_held(bo);
+
+ list_for_each_entry(vma, &bo->vmas, bo_link) {
+ if (vma->vm != vm)
+ continue;
+
+ prep_vma_destroy(vm, vma);
+ if (!first)
+ first = vma;
+ else
+ list_add_tail(&vma->unbind_link, &first->unbind_link);
+ }
+
+ return first;
+}
+
+static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm,
+ struct xe_bo *bo,
+ u64 bo_offset_or_userptr,
+ u64 addr, u64 range, u32 op,
+ u64 gt_mask, u32 region)
+{
+ struct ww_acquire_ctx ww;
+ struct xe_vma *vma, lookup;
+ int err;
+
+ lockdep_assert_held(&vm->lock);
+
+ lookup.start = addr;
+ lookup.end = addr + range - 1;
+
+ switch (VM_BIND_OP(op)) {
+ case XE_VM_BIND_OP_MAP:
+ XE_BUG_ON(!bo);
+
+ err = xe_bo_lock(bo, &ww, 0, true);
+ if (err)
+ return ERR_PTR(err);
+ vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr,
+ addr + range - 1,
+ op & XE_VM_BIND_FLAG_READONLY,
+ gt_mask);
+ xe_bo_unlock(bo, &ww);
+ if (!vma)
+ return ERR_PTR(-ENOMEM);
+
+ xe_vm_insert_vma(vm, vma);
+ if (!bo->vm) {
+ vm_insert_extobj(vm, vma);
+ err = add_preempt_fences(vm, bo);
+ if (err) {
+ prep_vma_destroy(vm, vma);
+ xe_vma_destroy_unlocked(vma);
+
+ return ERR_PTR(err);
+ }
+ }
+ break;
+ case XE_VM_BIND_OP_UNMAP:
+ vma = vm_unbind_lookup_vmas(vm, &lookup);
+ break;
+ case XE_VM_BIND_OP_PREFETCH:
+ vma = vm_prefetch_lookup_vmas(vm, &lookup, region);
+ break;
+ case XE_VM_BIND_OP_UNMAP_ALL:
+ XE_BUG_ON(!bo);
+
+ err = xe_bo_lock(bo, &ww, 0, true);
+ if (err)
+ return ERR_PTR(err);
+ vma = vm_unbind_all_lookup_vmas(vm, bo);
+ if (!vma)
+ vma = ERR_PTR(-EINVAL);
+ xe_bo_unlock(bo, &ww);
+ break;
+ case XE_VM_BIND_OP_MAP_USERPTR:
+ XE_BUG_ON(bo);
+
+ vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr,
+ addr + range - 1,
+ op & XE_VM_BIND_FLAG_READONLY,
+ gt_mask);
+ if (!vma)
+ return ERR_PTR(-ENOMEM);
+
+ err = xe_vma_userptr_pin_pages(vma);
+ if (err) {
+ xe_vma_destroy(vma, NULL);
+
+ return ERR_PTR(err);
+ } else {
+ xe_vm_insert_vma(vm, vma);
+ }
+ break;
+ default:
+ XE_BUG_ON("NOT POSSIBLE");
+ vma = ERR_PTR(-EINVAL);
+ }
+
+ return vma;
+}
+
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+#define SUPPORTED_FLAGS \
+ (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \
+ XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
+#else
+#define SUPPORTED_FLAGS \
+ (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \
+ XE_VM_BIND_FLAG_IMMEDIATE | 0xffff)
+#endif
+#define XE_64K_PAGE_MASK 0xffffull
+
+#define MAX_BINDS 512 /* FIXME: Picking random upper limit */
+
+static int vm_bind_ioctl_check_args(struct xe_device *xe,
+ struct drm_xe_vm_bind *args,
+ struct drm_xe_vm_bind_op **bind_ops,
+ bool *async)
+{
+ int err;
+ int i;
+
+ if (XE_IOCTL_ERR(xe, args->extensions) ||
+ XE_IOCTL_ERR(xe, !args->num_binds) ||
+ XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS))
+ return -EINVAL;
+
+ if (args->num_binds > 1) {
+ u64 __user *bind_user =
+ u64_to_user_ptr(args->vector_of_binds);
+
+ *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
+ args->num_binds, GFP_KERNEL);
+ if (!*bind_ops)
+ return -ENOMEM;
+
+ err = __copy_from_user(*bind_ops, bind_user,
+ sizeof(struct drm_xe_vm_bind_op) *
+ args->num_binds);
+ if (XE_IOCTL_ERR(xe, err)) {
+ err = -EFAULT;
+ goto free_bind_ops;
+ }
+ } else {
+ *bind_ops = &args->bind;
+ }
+
+ for (i = 0; i < args->num_binds; ++i) {
+ u64 range = (*bind_ops)[i].range;
+ u64 addr = (*bind_ops)[i].addr;
+ u32 op = (*bind_ops)[i].op;
+ u32 obj = (*bind_ops)[i].obj;
+ u64 obj_offset = (*bind_ops)[i].obj_offset;
+ u32 region = (*bind_ops)[i].region;
+
+ if (i == 0) {
+ *async = !!(op & XE_VM_BIND_FLAG_ASYNC);
+ } else if (XE_IOCTL_ERR(xe, !*async) ||
+ XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) ||
+ XE_IOCTL_ERR(xe, VM_BIND_OP(op) ==
+ XE_VM_BIND_OP_RESTART)) {
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
+
+ if (XE_IOCTL_ERR(xe, !*async &&
+ VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) {
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
+
+ if (XE_IOCTL_ERR(xe, !*async &&
+ VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) {
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
+
+ if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) >
+ XE_VM_BIND_OP_PREFETCH) ||
+ XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) ||
+ XE_IOCTL_ERR(xe, !obj &&
+ VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) ||
+ XE_IOCTL_ERR(xe, !obj &&
+ VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+ XE_IOCTL_ERR(xe, addr &&
+ VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+ XE_IOCTL_ERR(xe, range &&
+ VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) ||
+ XE_IOCTL_ERR(xe, obj &&
+ VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) ||
+ XE_IOCTL_ERR(xe, obj &&
+ VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) ||
+ XE_IOCTL_ERR(xe, region &&
+ VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) ||
+ XE_IOCTL_ERR(xe, !(BIT(region) &
+ xe->info.mem_region_mask)) ||
+ XE_IOCTL_ERR(xe, obj &&
+ VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) {
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
+
+ if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) ||
+ XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) ||
+ XE_IOCTL_ERR(xe, range & ~PAGE_MASK) ||
+ XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) !=
+ XE_VM_BIND_OP_RESTART &&
+ VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) {
+ err = -EINVAL;
+ goto free_bind_ops;
+ }
+ }
+
+ return 0;
+
+free_bind_ops:
+ if (args->num_binds > 1)
+ kfree(*bind_ops);
+ return err;
+}
+
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_vm_bind *args = data;
+ struct drm_xe_sync __user *syncs_user;
+ struct xe_bo **bos = NULL;
+ struct xe_vma **vmas = NULL;
+ struct xe_vm *vm;
+ struct xe_engine *e = NULL;
+ u32 num_syncs;
+ struct xe_sync_entry *syncs = NULL;
+ struct drm_xe_vm_bind_op *bind_ops;
+ bool async;
+ int err;
+ int i, j = 0;
+
+ err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
+ if (err)
+ return err;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_ERR(xe, !vm)) {
+ err = -EINVAL;
+ goto free_objs;
+ }
+
+ if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
+ DRM_ERROR("VM closed while we began looking up?\n");
+ err = -ENOENT;
+ goto put_vm;
+ }
+
+ if (args->engine_id) {
+ e = xe_engine_lookup(xef, args->engine_id);
+ if (XE_IOCTL_ERR(xe, !e)) {
+ err = -ENOENT;
+ goto put_vm;
+ }
+ if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) {
+ err = -EINVAL;
+ goto put_engine;
+ }
+ }
+
+ if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) {
+ if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS)))
+ err = -ENOTSUPP;
+ if (XE_IOCTL_ERR(xe, !err && args->num_syncs))
+ err = EINVAL;
+ if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error))
+ err = -EPROTO;
+
+ if (!err) {
+ down_write(&vm->lock);
+ trace_xe_vm_restart(vm);
+ vm_set_async_error(vm, 0);
+ up_write(&vm->lock);
+
+ queue_work(system_unbound_wq, &vm->async_ops.work);
+
+ /* Rebinds may have been blocked, give worker a kick */
+ if (xe_vm_in_compute_mode(vm))
+ queue_work(vm->xe->ordered_wq,
+ &vm->preempt.rebind_work);
+ }
+
+ goto put_engine;
+ }
+
+ if (XE_IOCTL_ERR(xe, !vm->async_ops.error &&
+ async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) {
+ err = -ENOTSUPP;
+ goto put_engine;
+ }
+
+ for (i = 0; i < args->num_binds; ++i) {
+ u64 range = bind_ops[i].range;
+ u64 addr = bind_ops[i].addr;
+
+ if (XE_IOCTL_ERR(xe, range > vm->size) ||
+ XE_IOCTL_ERR(xe, addr > vm->size - range)) {
+ err = -EINVAL;
+ goto put_engine;
+ }
+
+ if (bind_ops[i].gt_mask) {
+ u64 valid_gts = BIT(xe->info.tile_count) - 1;
+
+ if (XE_IOCTL_ERR(xe, bind_ops[i].gt_mask &
+ ~valid_gts)) {
+ err = -EINVAL;
+ goto put_engine;
+ }
+ }
+ }
+
+ bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL);
+ if (!bos) {
+ err = -ENOMEM;
+ goto put_engine;
+ }
+
+ vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL);
+ if (!vmas) {
+ err = -ENOMEM;
+ goto put_engine;
+ }
+
+ for (i = 0; i < args->num_binds; ++i) {
+ struct drm_gem_object *gem_obj;
+ u64 range = bind_ops[i].range;
+ u64 addr = bind_ops[i].addr;
+ u32 obj = bind_ops[i].obj;
+ u64 obj_offset = bind_ops[i].obj_offset;
+
+ if (!obj)
+ continue;
+
+ gem_obj = drm_gem_object_lookup(file, obj);
+ if (XE_IOCTL_ERR(xe, !gem_obj)) {
+ err = -ENOENT;
+ goto put_obj;
+ }
+ bos[i] = gem_to_xe_bo(gem_obj);
+
+ if (XE_IOCTL_ERR(xe, range > bos[i]->size) ||
+ XE_IOCTL_ERR(xe, obj_offset >
+ bos[i]->size - range)) {
+ err = -EINVAL;
+ goto put_obj;
+ }
+
+ if (bos[i]->flags & XE_BO_INTERNAL_64K) {
+ if (XE_IOCTL_ERR(xe, obj_offset &
+ XE_64K_PAGE_MASK) ||
+ XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) ||
+ XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) {
+ err = -EINVAL;
+ goto put_obj;
+ }
+ }
+ }
+
+ if (args->num_syncs) {
+ syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
+ if (!syncs) {
+ err = -ENOMEM;
+ goto put_obj;
+ }
+ }
+
+ syncs_user = u64_to_user_ptr(args->syncs);
+ for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+ err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
+ &syncs_user[num_syncs], false,
+ xe_vm_no_dma_fences(vm));
+ if (err)
+ goto free_syncs;
+ }
+
+ err = down_write_killable(&vm->lock);
+ if (err)
+ goto free_syncs;
+
+ /* Do some error checking first to make the unwind easier */
+ for (i = 0; i < args->num_binds; ++i) {
+ u64 range = bind_ops[i].range;
+ u64 addr = bind_ops[i].addr;
+ u32 op = bind_ops[i].op;
+
+ err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op);
+ if (err)
+ goto release_vm_lock;
+ }
+
+ for (i = 0; i < args->num_binds; ++i) {
+ u64 range = bind_ops[i].range;
+ u64 addr = bind_ops[i].addr;
+ u32 op = bind_ops[i].op;
+ u64 obj_offset = bind_ops[i].obj_offset;
+ u64 gt_mask = bind_ops[i].gt_mask;
+ u32 region = bind_ops[i].region;
+
+ vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset,
+ addr, range, op, gt_mask,
+ region);
+ if (IS_ERR(vmas[i])) {
+ err = PTR_ERR(vmas[i]);
+ vmas[i] = NULL;
+ goto destroy_vmas;
+ }
+ }
+
+ for (j = 0; j < args->num_binds; ++j) {
+ struct xe_sync_entry *__syncs;
+ u32 __num_syncs = 0;
+ bool first_or_last = j == 0 || j == args->num_binds - 1;
+
+ if (args->num_binds == 1) {
+ __num_syncs = num_syncs;
+ __syncs = syncs;
+ } else if (first_or_last && num_syncs) {
+ bool first = j == 0;
+
+ __syncs = kmalloc(sizeof(*__syncs) * num_syncs,
+ GFP_KERNEL);
+ if (!__syncs) {
+ err = ENOMEM;
+ break;
+ }
+
+ /* in-syncs on first bind, out-syncs on last bind */
+ for (i = 0; i < num_syncs; ++i) {
+ bool signal = syncs[i].flags &
+ DRM_XE_SYNC_SIGNAL;
+
+ if ((first && !signal) || (!first && signal))
+ __syncs[__num_syncs++] = syncs[i];
+ }
+ } else {
+ __num_syncs = 0;
+ __syncs = NULL;
+ }
+
+ if (async) {
+ bool last = j == args->num_binds - 1;
+
+ /*
+ * Each pass of async worker drops the ref, take a ref
+ * here, 1 set of refs taken above
+ */
+ if (!last) {
+ if (e)
+ xe_engine_get(e);
+ xe_vm_get(vm);
+ }
+
+ err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j],
+ bind_ops + j, __syncs,
+ __num_syncs);
+ if (err && !last) {
+ if (e)
+ xe_engine_put(e);
+ xe_vm_put(vm);
+ }
+ if (err)
+ break;
+ } else {
+ XE_BUG_ON(j != 0); /* Not supported */
+ err = vm_bind_ioctl(vm, vmas[j], e, bos[j],
+ bind_ops + j, __syncs,
+ __num_syncs, NULL);
+ break; /* Needed so cleanup loops work */
+ }
+ }
+
+ /* Most of cleanup owned by the async bind worker */
+ if (async && !err) {
+ up_write(&vm->lock);
+ if (args->num_binds > 1)
+ kfree(syncs);
+ goto free_objs;
+ }
+
+destroy_vmas:
+ for (i = j; err && i < args->num_binds; ++i) {
+ u32 op = bind_ops[i].op;
+ struct xe_vma *vma, *next;
+
+ if (!vmas[i])
+ break;
+
+ list_for_each_entry_safe(vma, next, &vma->unbind_link,
+ unbind_link) {
+ list_del_init(&vma->unbind_link);
+ if (!vma->destroyed) {
+ prep_vma_destroy(vm, vma);
+ xe_vma_destroy_unlocked(vma);
+ }
+ }
+
+ switch (VM_BIND_OP(op)) {
+ case XE_VM_BIND_OP_MAP:
+ prep_vma_destroy(vm, vmas[i]);
+ xe_vma_destroy_unlocked(vmas[i]);
+ break;
+ case XE_VM_BIND_OP_MAP_USERPTR:
+ prep_vma_destroy(vm, vmas[i]);
+ xe_vma_destroy_unlocked(vmas[i]);
+ break;
+ }
+ }
+release_vm_lock:
+ up_write(&vm->lock);
+free_syncs:
+ while (num_syncs--) {
+ if (async && j &&
+ !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL))
+ continue; /* Still in async worker */
+ xe_sync_entry_cleanup(&syncs[num_syncs]);
+ }
+
+ kfree(syncs);
+put_obj:
+ for (i = j; i < args->num_binds; ++i)
+ xe_bo_put(bos[i]);
+put_engine:
+ if (e)
+ xe_engine_put(e);
+put_vm:
+ xe_vm_put(vm);
+free_objs:
+ kfree(bos);
+ kfree(vmas);
+ if (args->num_binds > 1)
+ kfree(bind_ops);
+ return err;
+}
+
+/*
+ * XXX: Using the TTM wrappers for now, likely can call into dma-resv code
+ * directly to optimize. Also this likely should be an inline function.
+ */
+int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+ int num_resv, bool intr)
+{
+ struct ttm_validate_buffer tv_vm;
+ LIST_HEAD(objs);
+ LIST_HEAD(dups);
+
+ XE_BUG_ON(!ww);
+
+ tv_vm.num_shared = num_resv;
+ tv_vm.bo = xe_vm_ttm_bo(vm);;
+ list_add_tail(&tv_vm.head, &objs);
+
+ return ttm_eu_reserve_buffers(ww, &objs, intr, &dups);
+}
+
+void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww)
+{
+ dma_resv_unlock(&vm->resv);
+ ww_acquire_fini(ww);
+}
+
+/**
+ * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
+ * @vma: VMA to invalidate
+ *
+ * Walks a list of page tables leaves which it memset the entries owned by this
+ * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
+ * complete.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_invalidate_vma(struct xe_vma *vma)
+{
+ struct xe_device *xe = vma->vm->xe;
+ struct xe_gt *gt;
+ u32 gt_needs_invalidate = 0;
+ int seqno[XE_MAX_GT];
+ u8 id;
+ int ret;
+
+ XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm));
+ trace_xe_vma_usm_invalidate(vma);
+
+ /* Check that we don't race with page-table updates */
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+ if (xe_vma_is_userptr(vma)) {
+ WARN_ON_ONCE(!mmu_interval_check_retry
+ (&vma->userptr.notifier,
+ vma->userptr.notifier_seq));
+ WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv,
+ DMA_RESV_USAGE_BOOKKEEP));
+
+ } else {
+ xe_bo_assert_held(vma->bo);
+ }
+ }
+
+ for_each_gt(gt, xe, id) {
+ if (xe_pt_zap_ptes(gt, vma)) {
+ gt_needs_invalidate |= BIT(id);
+ xe_device_wmb(xe);
+ seqno[id] = xe_gt_tlb_invalidation(gt);
+ if (seqno[id] < 0)
+ return seqno[id];
+ }
+ }
+
+ for_each_gt(gt, xe, id) {
+ if (gt_needs_invalidate & BIT(id)) {
+ ret = xe_gt_tlb_invalidation_wait(gt, seqno[id]);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ vma->usm.gt_invalidated = vma->gt_mask;
+
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+{
+ struct rb_node *node;
+ bool is_lmem;
+ uint64_t addr;
+
+ if (!down_read_trylock(&vm->lock)) {
+ drm_printf(p, " Failed to acquire VM lock to dump capture");
+ return 0;
+ }
+ if (vm->pt_root[gt_id]) {
+ addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_lmem);
+ drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_lmem ? "LMEM" : "SYS");
+ }
+
+ for (node = rb_first(&vm->vmas); node; node = rb_next(node)) {
+ struct xe_vma *vma = to_xe_vma(node);
+ bool is_userptr = xe_vma_is_userptr(vma);
+
+ if (is_userptr) {
+ struct xe_res_cursor cur;
+
+ xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur);
+ addr = xe_res_dma(&cur);
+ } else {
+ addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_lmem);
+ }
+ drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
+ vma->start, vma->end, vma->end - vma->start + 1ull,
+ addr, is_userptr ? "USR" : is_lmem ? "VRAM" : "SYS");
+ }
+ up_read(&vm->lock);
+
+ return 0;
+}
+#else
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
+{
+ return 0;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
new file mode 100644
index 000000000000..3468ed9d0528
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_VM_H_
+#define _XE_VM_H_
+
+#include "xe_macros.h"
+#include "xe_map.h"
+#include "xe_vm_types.h"
+
+struct drm_device;
+struct drm_printer;
+struct drm_file;
+
+struct ttm_buffer_object;
+struct ttm_validate_buffer;
+
+struct xe_engine;
+struct xe_file;
+struct xe_sync_entry;
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
+void xe_vm_free(struct kref *ref);
+
+struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id);
+int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node);
+
+static inline struct xe_vm *xe_vm_get(struct xe_vm *vm)
+{
+ kref_get(&vm->refcount);
+ return vm;
+}
+
+static inline void xe_vm_put(struct xe_vm *vm)
+{
+ kref_put(&vm->refcount, xe_vm_free);
+}
+
+int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+ int num_resv, bool intr);
+
+void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww);
+
+static inline bool xe_vm_is_closed(struct xe_vm *vm)
+{
+ /* Only guaranteed not to change when vm->resv is held */
+ return !vm->size;
+}
+
+struct xe_vma *
+xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma);
+
+#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
+
+u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt);
+
+int xe_vm_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+void xe_vm_close_and_put(struct xe_vm *vm);
+
+static inline bool xe_vm_in_compute_mode(struct xe_vm *vm)
+{
+ return vm->flags & XE_VM_FLAG_COMPUTE_MODE;
+}
+
+static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
+{
+ return vm->flags & XE_VM_FLAG_FAULT_MODE;
+}
+
+static inline bool xe_vm_no_dma_fences(struct xe_vm *vm)
+{
+ return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm);
+}
+
+int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e);
+
+int xe_vm_userptr_pin(struct xe_vm *vm);
+
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
+
+int xe_vm_userptr_check_repin(struct xe_vm *vm);
+
+struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+
+int xe_vm_invalidate_vma(struct xe_vma *vma);
+
+int xe_vm_async_fence_wait_start(struct dma_fence *fence);
+
+extern struct ttm_device_funcs xe_ttm_funcs;
+
+struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm);
+
+static inline bool xe_vma_is_userptr(struct xe_vma *vma)
+{
+ return !vma->bo;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_vma *vma);
+
+int xe_vma_userptr_check_repin(struct xe_vma *vma);
+
+/*
+ * XE_ONSTACK_TV is used to size the tv_onstack array that is input
+ * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv().
+ */
+#define XE_ONSTACK_TV 20
+int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
+ struct ttm_validate_buffer *tv_onstack,
+ struct ttm_validate_buffer **tv,
+ struct list_head *objs,
+ bool intr,
+ unsigned int num_shared);
+
+void xe_vm_unlock_dma_resv(struct xe_vm *vm,
+ struct ttm_validate_buffer *tv_onstack,
+ struct ttm_validate_buffer *tv,
+ struct ww_acquire_ctx *ww,
+ struct list_head *objs);
+
+void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,
+ enum dma_resv_usage usage);
+
+int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
+#define vm_dbg drm_dbg
+#else
+__printf(2, 3)
+static inline void vm_dbg(const struct drm_device *dev,
+ const char *format, ...)
+{ /* noop */ }
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
new file mode 100644
index 000000000000..5b6216964c45
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -0,0 +1,555 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_DOC_H_
+#define _XE_VM_DOC_H_
+
+/**
+ * DOC: XE VM (user address space)
+ *
+ * VM creation
+ * ===========
+ *
+ * Allocate a physical page for root of the page table structure, create default
+ * bind engine, and return a handle to the user.
+ *
+ * Scratch page
+ * ------------
+ *
+ * If the VM is created with the flag, DRM_XE_VM_CREATE_SCRATCH_PAGE, set the
+ * entire page table structure defaults pointing to blank page allocated by the
+ * VM. Invalid memory access rather than fault just read / write to this page.
+ *
+ * VM bind (create GPU mapping for a BO or userptr)
+ * ================================================
+ *
+ * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same
+ * in / out fence interface (struct drm_xe_sync) as execs which allows users to
+ * think of binds and execs as more or less the same operation.
+ *
+ * Operations
+ * ----------
+ *
+ * XE_VM_BIND_OP_MAP - Create mapping for a BO
+ * XE_VM_BIND_OP_UNMAP - Destroy mapping for a BO / userptr
+ * XE_VM_BIND_OP_MAP_USERPTR - Create mapping for userptr
+ *
+ * Implementation details
+ * ~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * All bind operations are implemented via a hybrid approach of using the CPU
+ * and GPU to modify page tables. If a new physical page is allocated in the
+ * page table structure we populate that page via the CPU and insert that new
+ * page into the existing page table structure via a GPU job. Also any existing
+ * pages in the page table structure that need to be modified also are updated
+ * via the GPU job. As the root physical page is prealloced on VM creation our
+ * GPU job will always have at least 1 update. The in / out fences are passed to
+ * this job so again this is conceptually the same as an exec.
+ *
+ * Very simple example of few binds on an empty VM with 48 bits of address space
+ * and the resulting operations:
+ *
+ * .. code-block::
+ *
+ * bind BO0 0x0-0x1000
+ * alloc page level 3a, program PTE[0] to BO0 phys address (CPU)
+ * alloc page level 2, program PDE[0] page level 3a phys address (CPU)
+ * alloc page level 1, program PDE[0] page level 2 phys address (CPU)
+ * update root PDE[0] to page level 1 phys address (GPU)
+ *
+ * bind BO1 0x201000-0x202000
+ * alloc page level 3b, program PTE[1] to BO1 phys address (CPU)
+ * update page level 2 PDE[1] to page level 3b phys address (GPU)
+ *
+ * bind BO2 0x1ff000-0x201000
+ * update page level 3a PTE[511] to BO2 phys addres (GPU)
+ * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU)
+ *
+ * GPU bypass
+ * ~~~~~~~~~~
+ *
+ * In the above example the steps using the GPU can be converted to CPU if the
+ * bind can be done immediately (all in-fences satisfied, VM dma-resv kernel
+ * slot is idle).
+ *
+ * Address space
+ * -------------
+ *
+ * Depending on platform either 48 or 57 bits of address space is supported.
+ *
+ * Page sizes
+ * ----------
+ *
+ * The minimum page size is either 4k or 64k depending on platform and memory
+ * placement (sysmem vs. VRAM). We enforce that binds must be aligned to the
+ * minimum page size.
+ *
+ * Larger pages (2M or 1GB) can be used for BOs in VRAM, the BO physical address
+ * is aligned to the larger pages size, and VA is aligned to the larger page
+ * size. Larger pages for userptrs / BOs in sysmem should be possible but is not
+ * yet implemented.
+ *
+ * Sync error handling mode
+ * ------------------------
+ *
+ * In both modes during the bind IOCTL the user input is validated. In sync
+ * error handling mode the newly bound BO is validated (potentially moved back
+ * to a region of memory where is can be used), page tables are updated by the
+ * CPU and the job to do the GPU binds is created in the IOCTL itself. This step
+ * can fail due to memory pressure. The user can recover by freeing memory and
+ * trying this operation again.
+ *
+ * Async error handling mode
+ * -------------------------
+ *
+ * In async error handling the step of validating the BO, updating page tables,
+ * and generating a job are deferred to an async worker. As this step can now
+ * fail after the IOCTL has reported success we need an error handling flow for
+ * which the user can recover from.
+ *
+ * The solution is for a user to register a user address with the VM which the
+ * VM uses to report errors to. The ufence wait interface can be used to wait on
+ * a VM going into an error state. Once an error is reported the VM's async
+ * worker is paused. While the VM's async worker is paused sync,
+ * XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the
+ * uses believe the error state is fixed, the async worker can be resumed via
+ * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the
+ * first operation processed is the operation that caused the original error.
+ *
+ * Bind queues / engines
+ * ---------------------
+ *
+ * Think of the case where we have two bind operations A + B and are submitted
+ * in that order. A has in fences while B has none. If using a single bind
+ * queue, B is now blocked on A's in fences even though it is ready to run. This
+ * example is a real use case for VK sparse binding. We work around this
+ * limitation by implementing bind engines.
+ *
+ * In the bind IOCTL the user can optionally pass in an engine ID which must map
+ * to an engine which is of the special class DRM_XE_ENGINE_CLASS_VM_BIND.
+ * Underneath this is a really virtual engine that can run on any of the copy
+ * hardware engines. The job(s) created each IOCTL are inserted into this
+ * engine's ring. In the example above if A and B have different bind engines B
+ * is free to pass A. If the engine ID field is omitted, the default bind queue
+ * for the VM is used.
+ *
+ * TODO: Explain race in issue 41 and how we solve it
+ *
+ * Array of bind operations
+ * ------------------------
+ *
+ * The uAPI allows multiple binds operations to be passed in via a user array,
+ * of struct drm_xe_vm_bind_op, in a single VM bind IOCTL. This interface
+ * matches the VK sparse binding API. The implementation is rather simple, parse
+ * the array into a list of operations, pass the in fences to the first operation,
+ * and pass the out fences to the last operation. The ordered nature of a bind
+ * engine makes this possible.
+ *
+ * Munmap semantics for unbinds
+ * ----------------------------
+ *
+ * Munmap allows things like:
+ *
+ * .. code-block::
+ *
+ * 0x0000-0x2000 and 0x3000-0x5000 have mappings
+ * Munmap 0x1000-0x4000, results in mappings 0x0000-0x1000 and 0x4000-0x5000
+ *
+ * To support this semantic in the above example we decompose the above example
+ * into 4 operations:
+ *
+ * .. code-block::
+ *
+ * unbind 0x0000-0x2000
+ * unbind 0x3000-0x5000
+ * rebind 0x0000-0x1000
+ * rebind 0x4000-0x5000
+ *
+ * Why not just do a partial unbind of 0x1000-0x2000 and 0x3000-0x4000? This
+ * falls apart when using large pages at the edges and the unbind forces us to
+ * use a smaller page size. For simplity we always issue a set of unbinds
+ * unmapping anything in the range and at most 2 rebinds on the edges.
+ *
+ * Similar to an array of binds, in fences are passed to the first operation and
+ * out fences are signaled on the last operation.
+ *
+ * In this example there is a window of time where 0x0000-0x1000 and
+ * 0x4000-0x5000 are invalid but the user didn't ask for these addresses to be
+ * removed from the mapping. To work around this we treat any munmap style
+ * unbinds which require a rebind as a kernel operations (BO eviction or userptr
+ * invalidation). The first operation waits on the VM's
+ * DMA_RESV_USAGE_PREEMPT_FENCE slots (waits for all pending jobs on VM to
+ * complete / triggers preempt fences) and the last operation is installed in
+ * the VM's DMA_RESV_USAGE_KERNEL slot (blocks future jobs / resume compute mode
+ * VM). The caveat is all dma-resv slots must be updated atomically with respect
+ * to execs and compute mode rebind worker. To accomplish this, hold the
+ * vm->lock in write mode from the first operation until the last.
+ *
+ * Deferred binds in fault mode
+ * ----------------------------
+ *
+ * In a VM is in fault mode (TODO: link to fault mode), new bind operations that
+ * create mappings are by default are deferred to the page fault handler (first
+ * use). This behavior can be overriden by setting the flag
+ * XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
+ * immediately.
+ *
+ * User pointer
+ * ============
+ *
+ * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the
+ * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO
+ * was created and then a binding was created. We bypass creating a dummy BO in
+ * XE and simply create a binding directly from the userptr.
+ *
+ * Invalidation
+ * ------------
+ *
+ * Since this a core kernel managed memory the kernel can move this memory
+ * whenever it wants. We register an invalidation MMU notifier to alert XE when
+ * a user poiter is about to move. The invalidation notifier needs to block
+ * until all pending users (jobs or compute mode engines) of the userptr are
+ * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots.
+ *
+ * Rebinds
+ * -------
+ *
+ * Either the next exec (non-compute) or rebind worker (compute mode) will
+ * rebind the userptr. The invalidation MMU notifier kicks the rebind worker
+ * after the VM dma-resv wait if the VM is in compute mode.
+ *
+ * Compute mode
+ * ============
+ *
+ * A VM in compute mode enables long running workloads and ultra low latency
+ * submission (ULLS). ULLS is implemented via a continuously running batch +
+ * semaphores. This enables to the user to insert jump to new batch commands
+ * into the continuously running batch. In both cases these batches exceed the
+ * time a dma fence is allowed to exist for before signaling, as such dma fences
+ * are not used when a VM is in compute mode. User fences (TODO: link user fence
+ * doc) are used instead to signal operation's completion.
+ *
+ * Preempt fences
+ * --------------
+ *
+ * If the kernel decides to move memory around (either userptr invalidate, BO
+ * eviction, or mumap style unbind which results in a rebind) and a batch is
+ * running on an engine, that batch can fault or cause a memory corruption as
+ * page tables for the moved memory are no longer valid. To work around this we
+ * introduce the concept of preempt fences. When sw signaling is enabled on a
+ * preempt fence it tells the submission backend to kick that engine off the
+ * hardware and the preempt fence signals when the engine is off the hardware.
+ * Once all preempt fences are signaled for a VM the kernel can safely move the
+ * memory and kick the rebind worker which resumes all the engines execution.
+ *
+ * A preempt fence, for every engine using the VM, is installed the VM's
+ * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every
+ * engine using the VM, is also installed into the same dma-resv slot of every
+ * external BO mapped in the VM.
+ *
+ * Rebind worker
+ * -------------
+ *
+ * The rebind worker is very similar to an exec. It is resposible for rebinding
+ * evicted BOs or userptrs, waiting on those operations, installing new preempt
+ * fences, and finally resuming executing of engines in the VM.
+ *
+ * Flow
+ * ~~~~
+ *
+ * .. code-block::
+ *
+ * <----------------------------------------------------------------------|
+ * Check if VM is closed, if so bail out |
+ * Lock VM global lock in read mode |
+ * Pin userptrs (also finds userptr invalidated since last rebind worker) |
+ * Lock VM dma-resv and external BOs dma-resv |
+ * Validate BOs that have been evicted |
+ * Wait on and allocate new preempt fences for every engine using the VM |
+ * Rebind invalidated userptrs + evicted BOs |
+ * Wait on last rebind fence |
+ * Wait VM's DMA_RESV_USAGE_KERNEL dma-resv slot |
+ * Install preeempt fences and issue resume for every engine using the VM |
+ * Check if any userptrs invalidated since pin |
+ * Squash resume for all engines |
+ * Unlock all |
+ * Wait all VM's dma-resv slots |
+ * Retry ----------------------------------------------------------
+ * Release all engines waiting to resume
+ * Unlock all
+ *
+ * Timeslicing
+ * -----------
+ *
+ * In order to prevent an engine from continuously being kicked off the hardware
+ * and making no forward progress an engine has a period of time it allowed to
+ * run after resume before it can be kicked off again. This effectively gives
+ * each engine a timeslice.
+ *
+ * Handling multiple GTs
+ * =====================
+ *
+ * If a GT has slower access to some regions and the page table structure are in
+ * the slow region, the performance on that GT could adversely be affected. To
+ * work around this we allow a VM page tables to be shadowed in multiple GTs.
+ * When VM is created, a default bind engine and PT table structure are created
+ * on each GT.
+ *
+ * Binds can optionally pass in a mask of GTs where a mapping should be created,
+ * if this mask is zero then default to all the GTs where the VM has page
+ * tables.
+ *
+ * The implementation for this breaks down into a bunch for_each_gt loops in
+ * various places plus exporting a composite fence for multi-GT binds to the
+ * user.
+ *
+ * Fault mode (unified shared memory)
+ * ==================================
+ *
+ * A VM in fault mode can be enabled on devices that support page faults. If
+ * page faults are enabled, using dma fences can potentially induce a deadlock:
+ * A pending page fault can hold up the GPU work which holds up the dma fence
+ * signaling, and memory allocation is usually required to resolve a page
+ * fault, but memory allocation is not allowed to gate dma fence signaling. As
+ * such, dma fences are not allowed when VM is in fault mode. Because dma-fences
+ * are not allowed, long running workloads and ULLS are enabled on a faulting
+ * VM.
+ *
+ * Defered VM binds
+ * ----------------
+ *
+ * By default, on a faulting VM binds just allocate the VMA and the actual
+ * updating of the page tables is defered to the page fault handler. This
+ * behavior can be overridden by setting the flag XE_VM_BIND_FLAG_IMMEDIATE in
+ * the VM bind which will then do the bind immediately.
+ *
+ * Page fault handler
+ * ------------------
+ *
+ * Page faults are received in the G2H worker under the CT lock which is in the
+ * path of dma fences (no memory allocations are allowed, faults require memory
+ * allocations) thus we cannot process faults under the CT lock. Another issue
+ * is faults issue TLB invalidations which require G2H credits and we cannot
+ * allocate G2H credits in the G2H handlers without deadlocking. Lastly, we do
+ * not want the CT lock to be an outer lock of the VM global lock (VM global
+ * lock required to fault processing).
+ *
+ * To work around the above issue with processing faults in the G2H worker, we
+ * sink faults to a buffer which is large enough to sink all possible faults on
+ * the GT (1 per hardware engine) and kick a worker to process the faults. Since
+ * the page faults G2H are already received in a worker, kicking another worker
+ * adds more latency to a critical performance path. We add a fast path in the
+ * G2H irq handler which looks at first G2H and if it is a page fault we sink
+ * the fault to the buffer and kick the worker to process the fault. TLB
+ * invalidation responses are also in the critical path so these can also be
+ * processed in this fast path.
+ *
+ * Multiple buffers and workers are used and hashed over based on the ASID so
+ * faults from different VMs can be processed in parallel.
+ *
+ * The page fault handler itself is rather simple, flow is below.
+ *
+ * .. code-block::
+ *
+ * Lookup VM from ASID in page fault G2H
+ * Lock VM global lock in read mode
+ * Lookup VMA from address in page fault G2H
+ * Check if VMA is valid, if not bail
+ * Check if VMA's BO has backing store, if not allocate
+ * <----------------------------------------------------------------------|
+ * If userptr, pin pages |
+ * Lock VM & BO dma-resv locks |
+ * If atomic fault, migrate to VRAM, else validate BO location |
+ * Issue rebind |
+ * Wait on rebind to complete |
+ * Check if userptr invalidated since pin |
+ * Drop VM & BO dma-resv locks |
+ * Retry ----------------------------------------------------------
+ * Unlock all
+ * Issue blocking TLB invalidation |
+ * Send page fault response to GuC
+ *
+ * Access counters
+ * ---------------
+ *
+ * Access counters can be configured to trigger a G2H indicating the device is
+ * accessing VMAs in system memory frequently as hint to migrate those VMAs to
+ * VRAM.
+ *
+ * Same as the page fault handler, access counters G2H cannot be processed the
+ * G2H worker under the CT lock. Again we use a buffer to sink access counter
+ * G2H. Unlike page faults there is no upper bound so if the buffer is full we
+ * simply drop the G2H. Access counters are a best case optimization and it is
+ * safe to drop these unlike page faults.
+ *
+ * The access counter handler itself is rather simple flow is below.
+ *
+ * .. code-block::
+ *
+ * Lookup VM from ASID in access counter G2H
+ * Lock VM global lock in read mode
+ * Lookup VMA from address in access counter G2H
+ * If userptr, bail nothing to do
+ * Lock VM & BO dma-resv locks
+ * Issue migration to VRAM
+ * Unlock all
+ *
+ * Notice no rebind is issued in the access counter handler as the rebind will
+ * be issued on next page fault.
+ *
+ * Cavets with eviction / user pointer invalidation
+ * ------------------------------------------------
+ *
+ * In the case of eviction and user pointer invalidation on a faulting VM, there
+ * is no need to issue a rebind rather we just need to blow away the page tables
+ * for the VMAs and the page fault handler will rebind the VMAs when they fault.
+ * The cavet is to update / read the page table structure the VM global lock is
+ * neeeed. In both the case of eviction and user pointer invalidation locks are
+ * held which make acquiring the VM global lock impossible. To work around this
+ * every VMA maintains a list of leaf page table entries which should be written
+ * to zero to blow away the VMA's page tables. After writing zero to these
+ * entries a blocking TLB invalidate is issued. At this point it is safe for the
+ * kernel to move the VMA's memory around. This is a necessary lockless
+ * algorithm and is safe as leafs cannot be changed while either an eviction or
+ * userptr invalidation is occurring.
+ *
+ * Locking
+ * =======
+ *
+ * VM locking protects all of the core data paths (bind operations, execs,
+ * evictions, and compute mode rebind worker) in XE.
+ *
+ * Locks
+ * -----
+ *
+ * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects
+ * the list of userptrs mapped in the VM, the list of engines using this VM, and
+ * the array of external BOs mapped in the VM. When adding or removing any of the
+ * aforemented state from the VM should acquire this lock in write mode. The VM
+ * bind path also acquires this lock in write while while the exec / compute
+ * mode rebind worker acquire this lock in read mode.
+ *
+ * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv
+ * slots which is shared with any private BO in the VM. Expected to be acquired
+ * during VM binds, execs, and compute mode rebind worker. This lock is also
+ * held when private BOs are being evicted.
+ *
+ * external BO dma-resv lock (bo->ttm.base.resv->lock) - WW lock. Protects
+ * external BO dma-resv slots. Expected to be acquired during VM binds (in
+ * addition to the VM dma-resv lock). All external BO dma-locks within a VM are
+ * expected to be acquired (in addition to the VM dma-resv lock) during execs
+ * and the compute mode rebind worker. This lock is also held when an external
+ * BO is being evicted.
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. An exec and bind operation with the same VM can't be executing at the same
+ * time (vm->lock).
+ *
+ * 2. A compute mode rebind worker and bind operation with the same VM can't be
+ * executing at the same time (vm->lock).
+ *
+ * 3. We can't add / remove userptrs or external BOs to a VM while an exec with
+ * the same VM is executing (vm->lock).
+ *
+ * 4. We can't add / remove userptrs, external BOs, or engines to a VM while a
+ * compute mode rebind worker with the same VM is executing (vm->lock).
+ *
+ * 5. Evictions within a VM can't be happen while an exec with the same VM is
+ * executing (dma-resv locks).
+ *
+ * 6. Evictions within a VM can't be happen while a compute mode rebind worker
+ * with the same VM is executing (dma-resv locks).
+ *
+ * dma-resv usage
+ * ==============
+ *
+ * As previously stated to enforce the ordering of kernel ops (eviction, userptr
+ * invalidation, munmap style unbinds which result in a rebind), rebinds during
+ * execs, execs, and resumes in the rebind worker we use both the VMs and
+ * external BOs dma-resv slots. Let try to make this as clear as possible.
+ *
+ * Slot installation
+ * -----------------
+ *
+ * 1. Jobs from kernel ops install themselves into the DMA_RESV_USAGE_KERNEL
+ * slot of either an external BO or VM (depends on if kernel op is operating on
+ * an external or private BO)
+ *
+ * 2. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_BOOKKEEP slot of the VM
+ *
+ * 3. In non-compute mode, jobs from execs install themselves into the
+ * DMA_RESV_USAGE_WRITE slot of all external BOs in the VM
+ *
+ * 4. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the VM
+ *
+ * 5. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot
+ * of the external BO (if the bind is to an external BO, this is addition to #4)
+ *
+ * 6. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of the VM
+ *
+ * 7. Every engine using a compute mode VM has a preempt fence in installed into
+ * the DMA_RESV_USAGE_PREEMPT_FENCE slot of all the external BOs in the VM
+ *
+ * Slot waiting
+ * ------------
+ *
+ * 1. The exection of all jobs from kernel ops shall wait on all slots
+ * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if
+ * kernel op is operating on external or private BO)
+ *
+ * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall
+ * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM
+ * (depends on if the rebind is operatiing on an external or private BO)
+ *
+ * 3. In non-compute mode, the exection of all jobs from execs shall wait on the
+ * last rebind job
+ *
+ * 4. In compute mode, the exection of all jobs from rebinds in the rebind
+ * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO
+ * or VM (depends on if rebind is operating on external or private BO)
+ *
+ * 5. In compute mode, resumes in rebind worker shall wait on last rebind fence
+ *
+ * 6. In compute mode, resumes in rebind worker shall wait on the
+ * DMA_RESV_USAGE_KERNEL slot of the VM
+ *
+ * Putting it all together
+ * -----------------------
+ *
+ * 1. New jobs from kernel ops are blocked behind any existing jobs from
+ * non-compute mode execs
+ *
+ * 2. New jobs from non-compute mode execs are blocked behind any existing jobs
+ * from kernel ops and rebinds
+ *
+ * 3. New jobs from kernel ops are blocked behind all preempt fences signaling in
+ * compute mode
+ *
+ * 4. Compute mode engine resumes are blocked behind any existing jobs from
+ * kernel ops and rebinds
+ *
+ * Future work
+ * ===========
+ *
+ * Support large pages for sysmem and userptr.
+ *
+ * Update page faults to handle BOs are page level grainularity (e.g. part of BO
+ * could be in system memory while another part could be in VRAM).
+ *
+ * Page fault handler likely we be optimized a bit more (e.g. Rebinds always
+ * wait on the dma-resv kernel slots of VM or BO, technically we only have to
+ * wait the BO moving. If using a job to do the rebind, we could not block in
+ * the page fault handler rather attach a callback to fence of the rebind job to
+ * signal page fault complete. Our handling of short circuting for atomic faults
+ * for bound VMAs could be better. etc...). We can tune all of this once we have
+ * benchmarks / performance number from workloads up and running.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
new file mode 100644
index 000000000000..4498aa2fbd47
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/xe_drm.h>
+#include <drm/ttm/ttm_tt.h>
+#include <linux/nospec.h>
+
+#include "xe_bo.h"
+#include "xe_vm.h"
+#include "xe_vm_madvise.h"
+
+static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ u64 value)
+{
+ int i, err;
+
+ if (XE_IOCTL_ERR(xe, value > XE_MEM_REGION_CLASS_VRAM))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, value == XE_MEM_REGION_CLASS_VRAM &&
+ !xe->info.is_dgfx))
+ return -EINVAL;
+
+ for (i = 0; i < num_vmas; ++i) {
+ struct xe_bo *bo;
+ struct ww_acquire_ctx ww;
+
+ bo = vmas[i]->bo;
+
+ err = xe_bo_lock(bo, &ww, 0, true);
+ if (err)
+ return err;
+ bo->props.preferred_mem_class = value;
+ xe_bo_placement_for_flags(xe, bo, bo->flags);
+ xe_bo_unlock(bo, &ww);
+ }
+
+ return 0;
+}
+
+static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas, u64 value)
+{
+ int i, err;
+
+ if (XE_IOCTL_ERR(xe, value > xe->info.tile_count))
+ return -EINVAL;
+
+ for (i = 0; i < num_vmas; ++i) {
+ struct xe_bo *bo;
+ struct ww_acquire_ctx ww;
+
+ bo = vmas[i]->bo;
+
+ err = xe_bo_lock(bo, &ww, 0, true);
+ if (err)
+ return err;
+ bo->props.preferred_gt = value;
+ xe_bo_placement_for_flags(xe, bo, bo->flags);
+ xe_bo_unlock(bo, &ww);
+ }
+
+ return 0;
+}
+
+static int madvise_preferred_mem_class_gt(struct xe_device *xe,
+ struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ u64 value)
+{
+ int i, err;
+ u32 gt_id = upper_32_bits(value);
+ u32 mem_class = lower_32_bits(value);
+
+ if (XE_IOCTL_ERR(xe, mem_class > XE_MEM_REGION_CLASS_VRAM))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, mem_class == XE_MEM_REGION_CLASS_VRAM &&
+ !xe->info.is_dgfx))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, gt_id > xe->info.tile_count))
+ return -EINVAL;
+
+ for (i = 0; i < num_vmas; ++i) {
+ struct xe_bo *bo;
+ struct ww_acquire_ctx ww;
+
+ bo = vmas[i]->bo;
+
+ err = xe_bo_lock(bo, &ww, 0, true);
+ if (err)
+ return err;
+ bo->props.preferred_mem_class = mem_class;
+ bo->props.preferred_gt = gt_id;
+ xe_bo_placement_for_flags(xe, bo, bo->flags);
+ xe_bo_unlock(bo, &ww);
+ }
+
+ return 0;
+}
+
+static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas, u64 value)
+{
+ int i, err;
+
+ for (i = 0; i < num_vmas; ++i) {
+ struct xe_bo *bo;
+ struct ww_acquire_ctx ww;
+
+ bo = vmas[i]->bo;
+ if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT)))
+ return -EINVAL;
+
+ err = xe_bo_lock(bo, &ww, 0, true);
+ if (err)
+ return err;
+ bo->props.cpu_atomic = !!value;
+
+ /*
+ * All future CPU accesses must be from system memory only, we
+ * just invalidate the CPU page tables which will trigger a
+ * migration on next access.
+ */
+ if (bo->props.cpu_atomic)
+ ttm_bo_unmap_virtual(&bo->ttm);
+ xe_bo_unlock(bo, &ww);
+ }
+
+ return 0;
+}
+
+static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas, u64 value)
+{
+ int i, err;
+
+ for (i = 0; i < num_vmas; ++i) {
+ struct xe_bo *bo;
+ struct ww_acquire_ctx ww;
+
+ bo = vmas[i]->bo;
+ if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) &&
+ !(bo->flags & XE_BO_CREATE_VRAM1_BIT)))
+ return -EINVAL;
+
+ err = xe_bo_lock(bo, &ww, 0, true);
+ if (err)
+ return err;
+ bo->props.device_atomic = !!value;
+ xe_bo_unlock(bo, &ww);
+ }
+
+ return 0;
+}
+
+static int madvise_priority(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas, u64 value)
+{
+ int i, err;
+
+ if (XE_IOCTL_ERR(xe, value > DRM_XE_VMA_PRIORITY_HIGH))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, value == DRM_XE_VMA_PRIORITY_HIGH &&
+ !capable(CAP_SYS_NICE)))
+ return -EPERM;
+
+ for (i = 0; i < num_vmas; ++i) {
+ struct xe_bo *bo;
+ struct ww_acquire_ctx ww;
+
+ bo = vmas[i]->bo;
+
+ err = xe_bo_lock(bo, &ww, 0, true);
+ if (err)
+ return err;
+ bo->ttm.priority = value;
+ ttm_bo_move_to_lru_tail(&bo->ttm);
+ xe_bo_unlock(bo, &ww);
+ }
+
+ return 0;
+}
+
+static int madvise_pin(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas, u64 value)
+{
+ XE_WARN_ON("NIY");
+ return 0;
+}
+
+typedef int (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas, u64 value);
+
+static const madvise_func madvise_funcs[] = {
+ [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS] = madvise_preferred_mem_class,
+ [DRM_XE_VM_MADVISE_PREFERRED_GT] = madvise_preferred_gt,
+ [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT] =
+ madvise_preferred_mem_class_gt,
+ [DRM_XE_VM_MADVISE_CPU_ATOMIC] = madvise_cpu_atomic,
+ [DRM_XE_VM_MADVISE_DEVICE_ATOMIC] = madvise_device_atomic,
+ [DRM_XE_VM_MADVISE_PRIORITY] = madvise_priority,
+ [DRM_XE_VM_MADVISE_PIN] = madvise_pin,
+};
+
+static struct xe_vma *node_to_vma(const struct rb_node *node)
+{
+ BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
+ return (struct xe_vma *)node;
+}
+
+static struct xe_vma **
+get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
+{
+ struct xe_vma **vmas;
+ struct xe_vma *vma, *__vma, lookup;
+ int max_vmas = 8;
+ struct rb_node *node;
+
+ lockdep_assert_held(&vm->lock);
+
+ vmas = kmalloc(max_vmas * sizeof(*vmas), GFP_KERNEL);
+ if (!vmas)
+ return NULL;
+
+ lookup.start = addr;
+ lookup.end = addr + range - 1;
+
+ vma = xe_vm_find_overlapping_vma(vm, &lookup);
+ if (!vma)
+ return vmas;
+
+ if (!xe_vma_is_userptr(vma)) {
+ vmas[*num_vmas] = vma;
+ *num_vmas += 1;
+ }
+
+ node = &vma->vm_node;
+ while ((node = rb_next(node))) {
+ if (!xe_vma_cmp_vma_cb(&lookup, node)) {
+ __vma = node_to_vma(node);
+ if (xe_vma_is_userptr(__vma))
+ continue;
+
+ if (*num_vmas == max_vmas) {
+ struct xe_vma **__vmas =
+ krealloc(vmas, max_vmas * sizeof(*vmas),
+ GFP_KERNEL);
+
+ if (!__vmas)
+ return NULL;
+ vmas = __vmas;
+ }
+ vmas[*num_vmas] = __vma;
+ *num_vmas += 1;
+ } else {
+ break;
+ }
+ }
+
+ node = &vma->vm_node;
+ while ((node = rb_prev(node))) {
+ if (!xe_vma_cmp_vma_cb(&lookup, node)) {
+ __vma = node_to_vma(node);
+ if (xe_vma_is_userptr(__vma))
+ continue;
+
+ if (*num_vmas == max_vmas) {
+ struct xe_vma **__vmas =
+ krealloc(vmas, max_vmas * sizeof(*vmas),
+ GFP_KERNEL);
+
+ if (!__vmas)
+ return NULL;
+ vmas = __vmas;
+ }
+ vmas[*num_vmas] = __vma;
+ *num_vmas += 1;
+ } else {
+ break;
+ }
+ }
+
+ return vmas;
+}
+
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_vm_madvise *args = data;
+ struct xe_vm *vm;
+ struct xe_vma **vmas = NULL;
+ int num_vmas = 0, err = 0, idx;
+
+ if (XE_IOCTL_ERR(xe, args->extensions))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs)))
+ return -EINVAL;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_ERR(xe, !vm))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) {
+ err = -ENOENT;
+ goto put_vm;
+ }
+
+ if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) {
+ err = -EINVAL;
+ goto put_vm;
+ }
+
+ down_read(&vm->lock);
+
+ vmas = get_vmas(vm, &num_vmas, args->addr, args->range);
+ if (XE_IOCTL_ERR(xe, err))
+ goto unlock_vm;
+
+ if (XE_IOCTL_ERR(xe, !vmas)) {
+ err = -ENOMEM;
+ goto unlock_vm;
+ }
+
+ if (XE_IOCTL_ERR(xe, !num_vmas)) {
+ err = -EINVAL;
+ goto unlock_vm;
+ }
+
+ idx = array_index_nospec(args->property, ARRAY_SIZE(madvise_funcs));
+ err = madvise_funcs[idx](xe, vm, vmas, num_vmas, args->value);
+
+unlock_vm:
+ up_read(&vm->lock);
+put_vm:
+ xe_vm_put(vm);
+ kfree(vmas);
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h
new file mode 100644
index 000000000000..eecd33acd248
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _XE_VM_MADVISE_H_
+#define _XE_VM_MADVISE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
new file mode 100644
index 000000000000..2a3b911ab358
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_VM_TYPES_H_
+#define _XE_VM_TYPES_H_
+
+#include <linux/dma-resv.h>
+#include <linux/kref.h>
+#include <linux/mmu_notifier.h>
+#include <linux/scatterlist.h>
+
+#include "xe_device_types.h"
+#include "xe_pt_types.h"
+
+struct xe_bo;
+struct xe_vm;
+
+struct xe_vma {
+ struct rb_node vm_node;
+ /** @vm: VM which this VMA belongs to */
+ struct xe_vm *vm;
+
+ /**
+ * @start: start address of this VMA within its address domain, end -
+ * start + 1 == VMA size
+ */
+ u64 start;
+ /** @end: end address of this VMA within its address domain */
+ u64 end;
+ /** @pte_flags: pte flags for this VMA */
+ u32 pte_flags;
+
+ /** @bo: BO if not a userptr, must be NULL is userptr */
+ struct xe_bo *bo;
+ /** @bo_offset: offset into BO if not a userptr, unused for userptr */
+ u64 bo_offset;
+
+ /** @gt_mask: GT mask of where to create binding for this VMA */
+ u64 gt_mask;
+
+ /**
+ * @gt_present: GT mask of binding are present for this VMA.
+ * protected by vm->lock, vm->resv and for userptrs,
+ * vm->userptr.notifier_lock for writing. Needs either for reading,
+ * but if reading is done under the vm->lock only, it needs to be held
+ * in write mode.
+ */
+ u64 gt_present;
+
+ /**
+ * @destroyed: VMA is destroyed, in the sense that it shouldn't be
+ * subject to rebind anymore. This field must be written under
+ * the vm lock in write mode and the userptr.notifier_lock in
+ * either mode. Read under the vm lock or the userptr.notifier_lock in
+ * write mode.
+ */
+ bool destroyed;
+
+ /**
+ * @first_munmap_rebind: VMA is first in a sequence of ops that triggers
+ * a rebind (munmap style VM unbinds). This indicates the operation
+ * using this VMA must wait on all dma-resv slots (wait for pending jobs
+ * / trigger preempt fences).
+ */
+ bool first_munmap_rebind;
+
+ /**
+ * @last_munmap_rebind: VMA is first in a sequence of ops that triggers
+ * a rebind (munmap style VM unbinds). This indicates the operation
+ * using this VMA must install itself into kernel dma-resv slot (blocks
+ * future jobs) and kick the rebind work in compute mode.
+ */
+ bool last_munmap_rebind;
+
+ /** @use_atomic_access_pte_bit: Set atomic access bit in PTE */
+ bool use_atomic_access_pte_bit;
+
+ union {
+ /** @bo_link: link into BO if not a userptr */
+ struct list_head bo_link;
+ /** @userptr_link: link into VM repin list if userptr */
+ struct list_head userptr_link;
+ };
+
+ /**
+ * @rebind_link: link into VM if this VMA needs rebinding, and
+ * if it's a bo (not userptr) needs validation after a possible
+ * eviction. Protected by the vm's resv lock.
+ */
+ struct list_head rebind_link;
+
+ /**
+ * @unbind_link: link or list head if an unbind of multiple VMAs, in
+ * single unbind op, is being done.
+ */
+ struct list_head unbind_link;
+
+ /** @destroy_cb: callback to destroy VMA when unbind job is done */
+ struct dma_fence_cb destroy_cb;
+
+ /** @destroy_work: worker to destroy this BO */
+ struct work_struct destroy_work;
+
+ /** @userptr: user pointer state */
+ struct {
+ /** @ptr: user pointer */
+ uintptr_t ptr;
+ /** @invalidate_link: Link for the vm::userptr.invalidated list */
+ struct list_head invalidate_link;
+ /**
+ * @notifier: MMU notifier for user pointer (invalidation call back)
+ */
+ struct mmu_interval_notifier notifier;
+ /** @sgt: storage for a scatter gather table */
+ struct sg_table sgt;
+ /** @sg: allocated scatter gather table */
+ struct sg_table *sg;
+ /** @notifier_seq: notifier sequence number */
+ unsigned long notifier_seq;
+ /**
+ * @initial_bind: user pointer has been bound at least once.
+ * write: vm->userptr.notifier_lock in read mode and vm->resv held.
+ * read: vm->userptr.notifier_lock in write mode or vm->resv held.
+ */
+ bool initial_bind;
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+ u32 divisor;
+#endif
+ } userptr;
+
+ /** @usm: unified shared memory state */
+ struct {
+ /** @gt_invalidated: VMA has been invalidated */
+ u64 gt_invalidated;
+ } usm;
+
+ struct {
+ struct list_head rebind_link;
+ } notifier;
+
+ struct {
+ /**
+ * @extobj.link: Link into vm's external object list.
+ * protected by the vm lock.
+ */
+ struct list_head link;
+ } extobj;
+};
+
+struct xe_device;
+
+#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
+
+struct xe_vm {
+ struct xe_device *xe;
+
+ struct kref refcount;
+
+ /* engine used for (un)binding vma's */
+ struct xe_engine *eng[XE_MAX_GT];
+
+ /** Protects @rebind_list and the page-table structures */
+ struct dma_resv resv;
+
+ u64 size;
+ struct rb_root vmas;
+
+ struct xe_pt *pt_root[XE_MAX_GT];
+ struct xe_bo *scratch_bo[XE_MAX_GT];
+ struct xe_pt *scratch_pt[XE_MAX_GT][XE_VM_MAX_LEVEL];
+
+ /** @flags: flags for this VM, statically setup a creation time */
+#define XE_VM_FLAGS_64K BIT(0)
+#define XE_VM_FLAG_COMPUTE_MODE BIT(1)
+#define XE_VM_FLAG_ASYNC_BIND_OPS BIT(2)
+#define XE_VM_FLAG_MIGRATION BIT(3)
+#define XE_VM_FLAG_SCRATCH_PAGE BIT(4)
+#define XE_VM_FLAG_FAULT_MODE BIT(5)
+#define XE_VM_FLAG_GT_ID(flags) (((flags) >> 6) & 0x3)
+#define XE_VM_FLAG_SET_GT_ID(gt) ((gt)->info.id << 6)
+ unsigned long flags;
+
+ /** @composite_fence_ctx: context composite fence */
+ u64 composite_fence_ctx;
+ /** @composite_fence_seqno: seqno for composite fence */
+ u32 composite_fence_seqno;
+
+ /**
+ * @lock: outer most lock, protects objects of anything attached to this
+ * VM
+ */
+ struct rw_semaphore lock;
+
+ /**
+ * @rebind_list: list of VMAs that need rebinding, and if they are
+ * bos (not userptr), need validation after a possible eviction. The
+ * list is protected by @resv.
+ */
+ struct list_head rebind_list;
+
+ /** @rebind_fence: rebind fence from execbuf */
+ struct dma_fence *rebind_fence;
+
+ /**
+ * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
+ * from an irq context can be last put and the destroy needs to be able
+ * to sleep.
+ */
+ struct work_struct destroy_work;
+
+ /** @extobj: bookkeeping for external objects. Protected by the vm lock */
+ struct {
+ /** @enties: number of external BOs attached this VM */
+ u32 entries;
+ /** @list: list of vmas with external bos attached */
+ struct list_head list;
+ } extobj;
+
+ /** @async_ops: async VM operations (bind / unbinds) */
+ struct {
+ /** @list: list of pending async VM ops */
+ struct list_head pending;
+ /** @work: worker to execute async VM ops */
+ struct work_struct work;
+ /** @lock: protects list of pending async VM ops and fences */
+ spinlock_t lock;
+ /** @error_capture: error capture state */
+ struct {
+ /** @mm: user MM */
+ struct mm_struct *mm;
+ /**
+ * @addr: user pointer to copy error capture state too
+ */
+ u64 addr;
+ /** @wq: user fence wait queue for VM errors */
+ wait_queue_head_t wq;
+ } error_capture;
+ /** @fence: fence state */
+ struct {
+ /** @context: context of async fence */
+ u64 context;
+ /** @seqno: seqno of async fence */
+ u32 seqno;
+ } fence;
+ /** @error: error state for async VM ops */
+ int error;
+ /**
+ * @munmap_rebind_inflight: an munmap style VM bind is in the
+ * middle of a set of ops which requires a rebind at the end.
+ */
+ bool munmap_rebind_inflight;
+ } async_ops;
+
+ /** @userptr: user pointer state */
+ struct {
+ /**
+ * @userptr.repin_list: list of VMAs which are user pointers,
+ * and needs repinning. Protected by @lock.
+ */
+ struct list_head repin_list;
+ /**
+ * @notifier_lock: protects notifier in write mode and
+ * submission in read mode.
+ */
+ struct rw_semaphore notifier_lock;
+ /**
+ * @userptr.invalidated_lock: Protects the
+ * @userptr.invalidated list.
+ */
+ spinlock_t invalidated_lock;
+ /**
+ * @userptr.invalidated: List of invalidated userptrs, not yet
+ * picked
+ * up for revalidation. Protected from access with the
+ * @invalidated_lock. Removing items from the list
+ * additionally requires @lock in write mode, and adding
+ * items to the list requires the @userptr.notifer_lock in
+ * write mode.
+ */
+ struct list_head invalidated;
+ } userptr;
+
+ /** @preempt: preempt state */
+ struct {
+ /**
+ * @min_run_period_ms: The minimum run period before preempting
+ * an engine again
+ */
+ s64 min_run_period_ms;
+ /** @engines: list of engines attached to this VM */
+ struct list_head engines;
+ /** @num_engines: number user engines attached to this VM */
+ int num_engines;
+ /**
+ * @rebind_work: worker to rebind invalidated userptrs / evicted
+ * BOs
+ */
+ struct work_struct rebind_work;
+ } preempt;
+
+ /** @um: unified memory state */
+ struct {
+ /** @asid: address space ID, unique to each VM */
+ u32 asid;
+ /**
+ * @last_fault_vma: Last fault VMA, used for fast lookup when we
+ * get a flood of faults to the same VMA
+ */
+ struct xe_vma *last_fault_vma;
+ } usm;
+
+ /**
+ * @notifier: Lists and locks for temporary usage within notifiers where
+ * we either can't grab the vm lock or the vm resv.
+ */
+ struct {
+ /** @notifier.list_lock: lock protecting @rebind_list */
+ spinlock_t list_lock;
+ /**
+ * @notifier.rebind_list: list of vmas that we want to put on the
+ * main @rebind_list. This list is protected for writing by both
+ * notifier.list_lock, and the resv of the bo the vma points to,
+ * and for reading by the notifier.list_lock only.
+ */
+ struct list_head rebind_list;
+ } notifier;
+
+ /** @error_capture: allow to track errors */
+ struct {
+ /** @capture_once: capture only one error per VM */
+ bool capture_once;
+ } error_capture;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
new file mode 100644
index 000000000000..b56141ba7145
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_wa.h"
+
+#include <linux/compiler_types.h>
+
+#include "xe_device_types.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_types.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+#include "xe_rtp.h"
+#include "xe_step.h"
+
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt_regs.h"
+#include "i915_reg.h"
+
+/**
+ * DOC: Hardware workarounds
+ *
+ * Hardware workarounds are register programming documented to be executed in
+ * the driver that fall outside of the normal programming sequences for a
+ * platform. There are some basic categories of workarounds, depending on
+ * how/when they are applied:
+ *
+ * - LRC workarounds: workarounds that touch registers that are
+ * saved/restored to/from the HW context image. The list is emitted (via Load
+ * Register Immediate commands) once when initializing the device and saved in
+ * the default context. That default context is then used on every context
+ * creation to have a "primed golden context", i.e. a context image that
+ * already contains the changes needed to all the registers.
+ *
+ * TODO: Although these workarounds are maintained here, they are not
+ * currently being applied.
+ *
+ * - Engine workarounds: the list of these WAs is applied whenever the specific
+ * engine is reset. It's also possible that a set of engine classes share a
+ * common power domain and they are reset together. This happens on some
+ * platforms with render and compute engines. In this case (at least) one of
+ * them need to keeep the workaround programming: the approach taken in the
+ * driver is to tie those workarounds to the first compute/render engine that
+ * is registered. When executing with GuC submission, engine resets are
+ * outside of kernel driver control, hence the list of registers involved in
+ * written once, on engine initialization, and then passed to GuC, that
+ * saves/restores their values before/after the reset takes place. See
+ * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference.
+ *
+ * - GT workarounds: the list of these WAs is applied whenever these registers
+ * revert to their default values: on GPU reset, suspend/resume [1]_, etc.
+ *
+ * - Register whitelist: some workarounds need to be implemented in userspace,
+ * but need to touch privileged registers. The whitelist in the kernel
+ * instructs the hardware to allow the access to happen. From the kernel side,
+ * this is just a special case of a MMIO workaround (as we write the list of
+ * these to/be-whitelisted registers to some special HW registers).
+ *
+ * - Workaround batchbuffers: buffers that get executed automatically by the
+ * hardware on every HW context restore. These buffers are created and
+ * programmed in the default context so the hardware always go through those
+ * programming sequences when switching contexts. The support for workaround
+ * batchbuffers is enabled these hardware mechanisms:
+ *
+ * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
+ * context, pointing the hardware to jump to that location when that offset
+ * is reached in the context restore. Workaround batchbuffer in the driver
+ * currently uses this mechanism for all platforms.
+ *
+ * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
+ * pointing the hardware to a buffer to continue executing after the
+ * engine registers are restored in a context restore sequence. This is
+ * currently not used in the driver.
+ *
+ * - Other: There are WAs that, due to their nature, cannot be applied from a
+ * central place. Those are peppered around the rest of the code, as needed.
+ * Workarounds related to the display IP are the main example.
+ *
+ * .. [1] Technically, some registers are powercontext saved & restored, so they
+ * survive a suspend/resume. In practice, writing them again is not too
+ * costly and simplifies things, so it's the approach taken in the driver.
+ *
+ * .. note::
+ * Hardware workarounds in xe work the same way as in i915, with the
+ * difference of how they are maintained in the code. In xe it uses the
+ * xe_rtp infrastructure so the workarounds can be kept in tables, following
+ * a more declarative approach rather than procedural.
+ */
+
+#undef _MMIO
+#undef MCR_REG
+#define _MMIO(x) _XE_RTP_REG(x)
+#define MCR_REG(x) _XE_RTP_MCR_REG(x)
+
+static bool match_14011060649(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ return hwe->instance % 2 == 0;
+}
+
+static const struct xe_rtp_entry gt_was[] = {
+ { XE_RTP_NAME("14011060649"),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255),
+ ENGINE_CLASS(VIDEO_DECODE),
+ FUNC(match_14011060649)),
+ XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS,
+ XE_RTP_FLAG(FOREACH_ENGINE))
+ },
+ { XE_RTP_NAME("16010515920"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10),
+ STEP(A0, B0),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS,
+ XE_RTP_FLAG(FOREACH_ENGINE))
+ },
+ { XE_RTP_NAME("22010523718"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+ XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("14011006942"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
+ XE_RTP_SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("14010948348"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ XE_RTP_SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("14011037102"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ XE_RTP_SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("14011371254"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ XE_RTP_SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("14011431319/0"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ XE_RTP_SET(UNSLCGCTL9440,
+ GAMTLBOACS_CLKGATE_DIS |
+ GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
+ GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
+ GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
+ GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
+ GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
+ GAMTLBBLT_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("14011431319/1"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ XE_RTP_SET(UNSLCGCTL9444,
+ GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
+ GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
+ GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
+ GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
+ GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
+ GAMTLBMERT_CLKGATE_DIS |
+ GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
+ GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("14010569222"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("14011028019"),
+ XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)),
+ XE_RTP_SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("14014830051"),
+ XE_RTP_RULES(PLATFORM(DG2)),
+ XE_RTP_CLR(SARB_CHICKEN1, COMP_CKN_IN)
+ },
+ { XE_RTP_NAME("14015795083"),
+ XE_RTP_RULES(PLATFORM(DG2)),
+ XE_RTP_CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)
+ },
+ { XE_RTP_NAME("14011059788"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+ XE_RTP_SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)
+ },
+ { XE_RTP_NAME("1409420604"),
+ XE_RTP_RULES(PLATFORM(DG1)),
+ XE_RTP_SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS)
+ },
+ { XE_RTP_NAME("1408615072"),
+ XE_RTP_RULES(PLATFORM(DG1)),
+ XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL)
+ },
+ {}
+};
+
+static const struct xe_rtp_entry engine_was[] = {
+ { XE_RTP_NAME("14015227452"),
+ XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+ XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("1606931601"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+ XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("22010931296, 18011464164, 14010919138"),
+ XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)),
+ XE_RTP_SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE)
+ },
+ { XE_RTP_NAME("14010826681, 1606700617, 22010271021"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+ XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("18019627453"),
+ XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
+ XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("1409804808"),
+ XE_RTP_RULES(GRAPHICS_VERSION(1200),
+ ENGINE_CLASS(RENDER),
+ IS_INTEGRATED),
+ XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("14010229206, 1409085225"),
+ XE_RTP_RULES(GRAPHICS_VERSION(1200),
+ ENGINE_CLASS(RENDER),
+ IS_INTEGRATED),
+ XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+ XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)),
+ XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
+ GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+ GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("1607297627, 1607030317, 1607186500"),
+ XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)),
+ XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE),
+ GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+ GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("1406941453"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
+ XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)),
+ XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ {}
+};
+
+static const struct xe_rtp_entry lrc_was[] = {
+ { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+ XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3,
+ GEN12_DISABLE_CPS_AWARE_COLOR_PIPE,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+ XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+ GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("16011163337"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)),
+ /* read verification is ignored due to 1608008084. */
+ XE_RTP_FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK,
+ FF_MODE2_GS_TIMER_224)
+ },
+ { XE_RTP_NAME("1409044764"),
+ XE_RTP_RULES(PLATFORM(DG1)),
+ XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3,
+ DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ { XE_RTP_NAME("22010493298"),
+ XE_RTP_RULES(PLATFORM(DG1)),
+ XE_RTP_SET(HIZ_CHICKEN,
+ DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE,
+ XE_RTP_FLAG(MASKED_REG))
+ },
+ {}
+};
+
+/**
+ * xe_wa_process_gt - process GT workaround table
+ * @gt: GT instance to process workarounds for
+ *
+ * Process GT workaround table for this platform, saving in @gt all the
+ * workarounds that need to be applied at the GT level.
+ */
+void xe_wa_process_gt(struct xe_gt *gt)
+{
+ xe_rtp_process(gt_was, &gt->reg_sr, gt, NULL);
+}
+
+/**
+ * xe_wa_process_engine - process engine workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process engine workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied at the engine level that match this
+ * engine.
+ */
+void xe_wa_process_engine(struct xe_hw_engine *hwe)
+{
+ xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe);
+}
+
+/**
+ * xe_wa_process_lrc - process context workaround table
+ * @hwe: engine instance to process workarounds for
+ *
+ * Process context workaround table for this platform, saving in @hwe all the
+ * workarounds that need to be applied on context restore. These are workarounds
+ * touching registers that are part of the HW context image.
+ */
+void xe_wa_process_lrc(struct xe_hw_engine *hwe)
+{
+ xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe);
+}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
new file mode 100644
index 000000000000..cd2307d58795
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WA_
+#define _XE_WA_
+
+struct xe_gt;
+struct xe_hw_engine;
+
+void xe_wa_process_gt(struct xe_gt *gt);
+void xe_wa_process_engine(struct xe_hw_engine *hwe);
+void xe_wa_process_lrc(struct xe_hw_engine *hwe);
+
+void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
new file mode 100644
index 000000000000..8a8d814a0e7a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/xe_drm.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_macros.h"
+#include "xe_vm.h"
+
+static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
+{
+ u64 rvalue;
+ int err;
+ bool passed;
+
+ err = copy_from_user(&rvalue, u64_to_user_ptr(addr), sizeof(rvalue));
+ if (err)
+ return -EFAULT;
+
+ switch (op) {
+ case DRM_XE_UFENCE_WAIT_EQ:
+ passed = (rvalue & mask) == (value & mask);
+ break;
+ case DRM_XE_UFENCE_WAIT_NEQ:
+ passed = (rvalue & mask) != (value & mask);
+ break;
+ case DRM_XE_UFENCE_WAIT_GT:
+ passed = (rvalue & mask) > (value & mask);
+ break;
+ case DRM_XE_UFENCE_WAIT_GTE:
+ passed = (rvalue & mask) >= (value & mask);
+ break;
+ case DRM_XE_UFENCE_WAIT_LT:
+ passed = (rvalue & mask) < (value & mask);
+ break;
+ case DRM_XE_UFENCE_WAIT_LTE:
+ passed = (rvalue & mask) <= (value & mask);
+ break;
+ default:
+ XE_BUG_ON("Not possible");
+ }
+
+ return passed ? 0 : 1;
+}
+
+static const enum xe_engine_class user_to_xe_engine_class[] = {
+ [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
+ [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
+ [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
+ [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
+ [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
+};
+
+int check_hw_engines(struct xe_device *xe,
+ struct drm_xe_engine_class_instance *eci,
+ int num_engines)
+{
+ int i;
+
+ for (i = 0; i < num_engines; ++i) {
+ enum xe_engine_class user_class =
+ user_to_xe_engine_class[eci[i].engine_class];
+
+ if (eci[i].gt_id >= xe->info.tile_count)
+ return -EINVAL;
+
+ if (!xe_gt_hw_engine(xe_device_get_gt(xe, eci[i].gt_id),
+ user_class, eci[i].engine_instance, true))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#define VALID_FLAGS (DRM_XE_UFENCE_WAIT_SOFT_OP | \
+ DRM_XE_UFENCE_WAIT_ABSTIME | \
+ DRM_XE_UFENCE_WAIT_VM_ERROR)
+#define MAX_OP DRM_XE_UFENCE_WAIT_LTE
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ DEFINE_WAIT_FUNC(w_wait, woken_wake_function);
+ struct drm_xe_wait_user_fence *args = data;
+ struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
+ struct drm_xe_engine_class_instance __user *user_eci =
+ u64_to_user_ptr(args->instances);
+ struct xe_vm *vm = NULL;
+ u64 addr = args->addr;
+ int err;
+ bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP ||
+ args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR;
+ unsigned long timeout = args->timeout;
+
+ if (XE_IOCTL_ERR(xe, args->extensions))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, args->op > MAX_OP))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, no_engines &&
+ (args->num_engines || args->instances)))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, !no_engines && !args->num_engines))
+ return -EINVAL;
+
+ if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) &&
+ addr & 0x7))
+ return -EINVAL;
+
+ if (!no_engines) {
+ err = copy_from_user(eci, user_eci,
+ sizeof(struct drm_xe_engine_class_instance) *
+ args->num_engines);
+ if (XE_IOCTL_ERR(xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_ERR(xe, check_hw_engines(xe, eci,
+ args->num_engines)))
+ return -EINVAL;
+ }
+
+ if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) {
+ if (XE_IOCTL_ERR(xe, args->vm_id >> 32))
+ return -EINVAL;
+
+ vm = xe_vm_lookup(to_xe_file(file), args->vm_id);
+ if (XE_IOCTL_ERR(xe, !vm))
+ return -ENOENT;
+
+ if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) {
+ xe_vm_put(vm);
+ return -ENOTSUPP;
+ }
+
+ addr = vm->async_ops.error_capture.addr;
+ }
+
+ if (XE_IOCTL_ERR(xe, timeout > MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ /*
+ * FIXME: Very simple implementation at the moment, single wait queue
+ * for everything. Could be optimized to have a wait queue for every
+ * hardware engine. Open coding as 'do_compare' can sleep which doesn't
+ * work with the wait_event_* macros.
+ */
+ if (vm)
+ add_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
+ else
+ add_wait_queue(&xe->ufence_wq, &w_wait);
+ for (;;) {
+ if (vm && xe_vm_is_closed(vm)) {
+ err = -ENODEV;
+ break;
+ }
+ err = do_compare(addr, args->value, args->mask, args->op);
+ if (err <= 0)
+ break;
+
+ if (signal_pending(current)) {
+ err = -ERESTARTSYS;
+ break;
+ }
+
+ if (!timeout) {
+ err = -ETIME;
+ break;
+ }
+
+ timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout);
+ }
+ if (vm) {
+ remove_wait_queue(&vm->async_ops.error_capture.wq, &w_wait);
+ xe_vm_put(vm);
+ } else {
+ remove_wait_queue(&xe->ufence_wq, &w_wait);
+ }
+ if (XE_IOCTL_ERR(xe, err < 0))
+ return err;
+ else if (XE_IOCTL_ERR(xe, !timeout))
+ return -ETIME;
+
+ /*
+ * Again very simple, return the time in jiffies that has past, may need
+ * a more precision
+ */
+ if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)
+ args->timeout = args->timeout - timeout;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h
new file mode 100644
index 000000000000..0e268978f9e6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WAIT_USER_FENCE_H_
+#define _XE_WAIT_USER_FENCE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
new file mode 100644
index 000000000000..e4a8d4a1899e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_guc_reg.h"
+#include "xe_mmio.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
+
+#include "i915_utils.h"
+
+/**
+ * DOC: Write Once Protected Content Memory (WOPCM) Layout
+ *
+ * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and
+ * offset registers whose values are calculated and determined by HuC/GuC
+ * firmware size and set of hardware requirements/restrictions as shown below:
+ *
+ * ::
+ *
+ * +=========> +====================+ <== WOPCM Top
+ * ^ | HW contexts RSVD |
+ * | +===> +====================+ <== GuC WOPCM Top
+ * | ^ | |
+ * | | | |
+ * | | | |
+ * | GuC | |
+ * | WOPCM | |
+ * | Size +--------------------+
+ * WOPCM | | GuC FW RSVD |
+ * | | +--------------------+
+ * | | | GuC Stack RSVD |
+ * | | +------------------- +
+ * | v | GuC WOPCM RSVD |
+ * | +===> +====================+ <== GuC WOPCM base
+ * | | WOPCM RSVD |
+ * | +------------------- + <== HuC Firmware Top
+ * v | HuC FW |
+ * +=========> +====================+ <== WOPCM Base
+ *
+ * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top.
+ * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6
+ * context).
+ */
+
+/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
+#define DGFX_WOPCM_SIZE SZ_4M /* FIXME: Larger size require
+ for 2 tile PVC, do a proper
+ probe sooner or later */
+#define MTL_WOPCM_SIZE SZ_4M /* FIXME: Larger size require
+ for MTL, do a proper probe
+ sooner or later */
+#define GEN11_WOPCM_SIZE SZ_2M
+/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
+#define WOPCM_RESERVED_SIZE SZ_16K
+
+/* 16KB reserved at the beginning of GuC WOPCM. */
+#define GUC_WOPCM_RESERVED SZ_16K
+/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */
+#define GUC_WOPCM_STACK_RESERVED SZ_8K
+
+/* GuC WOPCM Offset value needs to be aligned to 16KB. */
+#define GUC_WOPCM_OFFSET_ALIGNMENT (1UL << GUC_WOPCM_OFFSET_SHIFT)
+
+/* 36KB WOPCM reserved at the end of WOPCM on GEN11. */
+#define GEN11_WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K)
+
+static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm)
+{
+ return container_of(wopcm, struct xe_gt, uc.wopcm);
+}
+
+static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm)
+{
+ return gt_to_xe(wopcm_to_gt(wopcm));
+}
+
+static u32 context_reserved_size(void)
+{
+ return GEN11_WOPCM_HW_CTX_RESERVED;
+}
+
+static bool __check_layout(struct xe_device *xe, u32 wopcm_size,
+ u32 guc_wopcm_base, u32 guc_wopcm_size,
+ u32 guc_fw_size, u32 huc_fw_size)
+{
+ const u32 ctx_rsvd = context_reserved_size();
+ u32 size;
+
+ size = wopcm_size - ctx_rsvd;
+ if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) {
+ drm_err(&xe->drm,
+ "WOPCM: invalid GuC region layout: %uK + %uK > %uK\n",
+ guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K,
+ size / SZ_1K);
+ return false;
+ }
+
+ size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED;
+ if (unlikely(guc_wopcm_size < size)) {
+ drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+ xe_uc_fw_type_repr(XE_UC_FW_TYPE_GUC),
+ guc_wopcm_size / SZ_1K, size / SZ_1K);
+ return false;
+ }
+
+ size = huc_fw_size + WOPCM_RESERVED_SIZE;
+ if (unlikely(guc_wopcm_base < size)) {
+ drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n",
+ xe_uc_fw_type_repr(XE_UC_FW_TYPE_HUC),
+ guc_wopcm_base / SZ_1K, size / SZ_1K);
+ return false;
+ }
+
+ return true;
+}
+
+static bool __wopcm_regs_locked(struct xe_gt *gt,
+ u32 *guc_wopcm_base, u32 *guc_wopcm_size)
+{
+ u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg);
+ u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg);
+
+ if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) ||
+ !(reg_base & GUC_WOPCM_OFFSET_VALID))
+ return false;
+
+ *guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK;
+ *guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK;
+ return true;
+}
+
+static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_wopcm *wopcm)
+{
+ u32 base = wopcm->guc.base;
+ u32 size = wopcm->guc.size;
+ u32 huc_agent = xe_uc_fw_is_disabled(&gt->uc.huc.fw) ? 0 :
+ HUC_LOADING_AGENT_GUC;
+ u32 mask;
+ int err;
+
+ XE_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK));
+ XE_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK);
+ XE_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
+ XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
+
+ mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
+ err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE.reg, size, mask,
+ size | GUC_WOPCM_SIZE_LOCKED);
+ if (err)
+ goto err_out;
+
+ mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
+ err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET.reg,
+ base | huc_agent, mask,
+ base | huc_agent |
+ GUC_WOPCM_OFFSET_VALID);
+ if (err)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n");
+ drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
+ DMA_GUC_WOPCM_OFFSET.reg,
+ xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg));
+ drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
+ GUC_WOPCM_SIZE.reg,
+ xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg));
+
+ return err;
+}
+
+u32 xe_wopcm_size(struct xe_device *xe)
+{
+ return IS_DGFX(xe) ? DGFX_WOPCM_SIZE :
+ xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE :
+ GEN11_WOPCM_SIZE;
+}
+
+/**
+ * xe_wopcm_init() - Initialize the WOPCM structure.
+ * @wopcm: pointer to xe_wopcm.
+ *
+ * This function will partition WOPCM space based on GuC and HuC firmware sizes
+ * and will allocate max remaining for use by GuC. This function will also
+ * enforce platform dependent hardware restrictions on GuC WOPCM offset and
+ * size. It will fail the WOPCM init if any of these checks fail, so that the
+ * following WOPCM registers setup and GuC firmware uploading would be aborted.
+ */
+int xe_wopcm_init(struct xe_wopcm *wopcm)
+{
+ struct xe_device *xe = wopcm_to_xe(wopcm);
+ struct xe_gt *gt = wopcm_to_gt(wopcm);
+ u32 guc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.guc.fw);
+ u32 huc_fw_size = xe_uc_fw_get_upload_size(&gt->uc.huc.fw);
+ u32 ctx_rsvd = context_reserved_size();
+ u32 guc_wopcm_base;
+ u32 guc_wopcm_size;
+ bool locked;
+ int ret = 0;
+
+ if (!guc_fw_size)
+ return -EINVAL;
+
+ wopcm->size = xe_wopcm_size(xe);
+ drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K);
+
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
+ XE_BUG_ON(guc_fw_size >= wopcm->size);
+ XE_BUG_ON(huc_fw_size >= wopcm->size);
+ XE_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size);
+
+ locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size);
+ if (locked) {
+ drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n",
+ guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+ goto check;
+ }
+
+ /*
+ * Aligned value of guc_wopcm_base will determine available WOPCM space
+ * for HuC firmware and mandatory reserved area.
+ */
+ guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE;
+ guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT);
+
+ /*
+ * Need to clamp guc_wopcm_base now to make sure the following math is
+ * correct. Formal check of whole WOPCM layout will be done below.
+ */
+ guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd);
+
+ /* Aligned remainings of usable WOPCM space can be assigned to GuC. */
+ guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base;
+ guc_wopcm_size &= GUC_WOPCM_SIZE_MASK;
+
+ drm_dbg(&xe->drm, "Calculated GuC WOPCM [%uK, %uK)\n",
+ guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+
+check:
+ if (__check_layout(xe, wopcm->size, guc_wopcm_base, guc_wopcm_size,
+ guc_fw_size, huc_fw_size)) {
+ wopcm->guc.base = guc_wopcm_base;
+ wopcm->guc.size = guc_wopcm_size;
+ XE_BUG_ON(!wopcm->guc.base);
+ XE_BUG_ON(!wopcm->guc.size);
+ } else {
+ drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n");
+ return -E2BIG;
+ }
+
+ if (!locked)
+ ret = __wopcm_init_regs(xe, gt, wopcm);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_wopcm.h b/drivers/gpu/drm/xe/xe_wopcm.h
new file mode 100644
index 000000000000..0197a282460b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_H_
+#define _XE_WOPCM_H_
+
+#include "xe_wopcm_types.h"
+
+struct xe_device;
+
+int xe_wopcm_init(struct xe_wopcm *wopcm);
+u32 xe_wopcm_size(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h
new file mode 100644
index 000000000000..486d850c4084
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_wopcm_types.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _XE_WOPCM_TYPES_H_
+#define _XE_WOPCM_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_wopcm - Overall WOPCM info and WOPCM regions.
+ */
+struct xe_wopcm {
+ /** @size: Size of overall WOPCM */
+ u32 size;
+ /** @guc: GuC WOPCM Region info */
+ struct {
+ /** @base: GuC WOPCM base which is offset from WOPCM base */
+ u32 base;
+ /** @size: Size of the GuC WOPCM region */
+ u32 size;
+ } guc;
+};
+
+#endif