summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h777
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c57
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c77
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c34
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c58
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c71
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c81
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c133
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c233
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c82
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c51
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c356
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.h84
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c36
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h45
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c56
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c35
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c133
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c141
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h3
37 files changed, 910 insertions, 1796 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 93bd4eda0d94..d3c3d3ab7225 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -6,7 +6,6 @@
config HSA_AMD
bool "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
- imply AMD_IOMMU_V2 if X86_64
select HMM_MIRROR
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 2ec8f27c5366..a5ae7bcf44eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -59,10 +59,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_crat.o \
$(AMDKFD_PATH)/kfd_debug.o
-ifneq ($(CONFIG_AMD_IOMMU_V2),)
-AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
-endif
-
ifneq ($(CONFIG_DEBUG_FS),)
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
endif
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 73ca9aebf086..d7cd5fa313ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -274,16 +274,16 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbf820001, 0xbf820254,
+ 0xbf820001, 0xbf820258,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf850051, 0xbf8e0010,
+ 0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
- 0x866eff7b, 0x00000900,
+ 0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
0x000071ff, 0xbf840008,
0x866fff7b, 0x00007080,
@@ -294,13 +294,15 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf850036,
+ 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000,
@@ -676,14 +678,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
};
static const uint32_t cwsr_trap_nv1x_hex[] = {
- 0xbf820001, 0xbf8201f1,
+ 0xbf820001, 0xbf8201f5,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
0x876eff6d, 0x00ff0000,
0xbf85001e, 0x876eff7b,
- 0x00000400, 0xbf850057,
+ 0x00000400, 0xbf85005b,
0xbf8e0010, 0xb97bf803,
0xbf82fffa, 0x876eff7b,
0x00000900, 0xbf850015,
@@ -697,7 +699,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xb96ef801, 0x876eff6e,
0x00000800, 0xbf850003,
0x876eff7b, 0x00000400,
- 0xbf85003c, 0x8a77ff77,
+ 0xbf850040, 0x8a77ff77,
0xff000000, 0xb97af807,
0x877bff7a, 0x02000000,
0x8f7b867b, 0x88777b77,
@@ -706,6 +708,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x8a7aff7a, 0x023f8000,
0xb9faf807, 0xb97af812,
0xb97bf813, 0x8ffa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
0xbf8cc07f, 0x8f6e976e,
0x8a77ff77, 0x00800000,
@@ -1094,16 +1098,16 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
- 0xbf820001, 0xbf8202d0,
+ 0xbf820001, 0xbf8202d4,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf850051, 0xbf8e0010,
+ 0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
- 0x866eff7b, 0x00000900,
+ 0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
0x000071ff, 0xbf840008,
0x866fff7b, 0x00007080,
@@ -1114,13 +1118,15 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf850036,
+ 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000,
@@ -1572,16 +1578,16 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
};
static const uint32_t cwsr_trap_aldebaran_hex[] = {
- 0xbf820001, 0xbf8202db,
+ 0xbf820001, 0xbf8202df,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
- 0xbf850051, 0xbf8e0010,
+ 0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
- 0x866eff7b, 0x00000900,
+ 0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
0x000071ff, 0xbf840008,
0x866fff7b, 0x00007080,
@@ -1592,13 +1598,15 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
- 0x00000400, 0xbf850036,
+ 0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000,
@@ -2061,14 +2069,14 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
- 0xbf820001, 0xbf82021c,
+ 0xbf820001, 0xbf820220,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
0x876eff6d, 0x00ff0000,
0xbf85001e, 0x876eff7b,
- 0x00000400, 0xbf850041,
+ 0x00000400, 0xbf850045,
0xbf8e0010, 0xb97bf803,
0xbf82fffa, 0x876eff7b,
0x00000900, 0xbf850015,
@@ -2082,8 +2090,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xb96ef801, 0x876eff6e,
0x00000800, 0xbf850003,
0x876eff7b, 0x00000400,
- 0xbf850026, 0xb97af812,
+ 0xbf85002a, 0xb97af812,
0xb97bf813, 0x8ffa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
0xbf8cc07f, 0x8f6e976e,
0x8a77ff77, 0x00800000,
@@ -2494,8 +2504,9 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0x00000000,
};
+
static const uint32_t cwsr_trap_gfx11_hex[] = {
- 0xbfa00001, 0xbfa00221,
+ 0xbfa00001, 0xbfa00225,
0xb0804006, 0xb8f8f802,
0x9178ff78, 0x00020006,
0xb8fbf803, 0xbf0d9e6d,
@@ -2505,7 +2516,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa10009, 0x8b6eff6d,
0x00ff0000, 0xbfa2001e,
0x8b6eff7b, 0x00000400,
- 0xbfa20041, 0xbf830010,
+ 0xbfa20045, 0xbf830010,
0xb8fbf803, 0xbfa0fffa,
0x8b6eff7b, 0x00000900,
0xbfa20015, 0x8b6eff7b,
@@ -2518,9 +2529,11 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa20007, 0xb8eef801,
0x8b6eff6e, 0x00000800,
0xbfa20003, 0x8b6eff7b,
- 0x00000400, 0xbfa20026,
+ 0x00000400, 0xbfa2002a,
0xbefa4d82, 0xbf89fc07,
- 0x84fa887a, 0xf4005bbd,
+ 0x84fa887a, 0xbf0d8f7b,
+ 0xbfa10002, 0x8c7bff7b,
+ 0xffff0000, 0xf4005bbd,
0xf8000010, 0xbf89fc07,
0x846e976e, 0x9177ff77,
0x00800000, 0x8c776e77,
@@ -2938,211 +2951,257 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
};
static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
- 0xbf820001, 0xbf8202d6,
- 0xb8f8f802, 0x89788678,
- 0xb8fbf803, 0x866eff78,
- 0x00002000, 0xbf840009,
- 0x866eff6d, 0x00ff0000,
- 0xbf85001a, 0x866eff7b,
- 0x00000400, 0xbf85004d,
- 0xbf8e0010, 0xb8fbf803,
- 0xbf82fffa, 0x866eff7b,
- 0x03c00900, 0xbf850011,
- 0x866eff7b, 0x000071ff,
- 0xbf840008, 0x866fff7b,
- 0x00007080, 0xbf840001,
- 0xbeee1a87, 0xb8eff801,
- 0x8e6e8c6e, 0x866e6f6e,
- 0xbf850006, 0x866eff6d,
- 0x00ff0000, 0xbf850003,
+ 0xbf820001, 0xbf8202db,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
+ 0x866eff78, 0x00002000,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001a,
0x866eff7b, 0x00000400,
- 0xbf850036, 0xb8faf807,
+ 0xbf850051, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x03c00900,
+ 0xbf850011, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf850006,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf85003a,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
+ 0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8979ff79, 0x00800000,
+ 0x87796e79, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x866dff6d,
+ 0x0000ffff, 0x8f7a8b79,
0x867aff7a, 0x001f8000,
- 0x8e7a8b7a, 0x8979ff79,
- 0xfc000000, 0x87797a79,
- 0xba7ff807, 0x00000000,
- 0xb8faf812, 0xb8fbf813,
- 0x8efa887a, 0xc0031bbd,
- 0x00000010, 0xbf8cc07f,
- 0x8e6e976e, 0x8979ff79,
- 0x00800000, 0x87796e79,
- 0xc0071bbd, 0x00000000,
- 0xbf8cc07f, 0xc0071ebd,
- 0x00000008, 0xbf8cc07f,
- 0x86ee6e6e, 0xbf840001,
- 0xbe801d6e, 0x866eff6d,
- 0x01ff0000, 0xbf850005,
- 0x8778ff78, 0x00002000,
- 0x80ec886c, 0x82ed806d,
- 0xbf820005, 0x866eff6d,
- 0x01000000, 0xbf850002,
- 0x806c846c, 0x826d806d,
+ 0xb97af807, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f6e8378,
+ 0xb96ee0c2, 0xbf800002,
+ 0xb9780002, 0xbe801f6c,
0x866dff6d, 0x0000ffff,
- 0x8f7a8b79, 0x867aff7a,
- 0x001f8000, 0xb97af807,
- 0x86fe7e7e, 0x86ea6a6a,
- 0x8f6e8378, 0xb96ee0c2,
- 0xbf800002, 0xb9780002,
- 0xbe801f6c, 0x866dff6d,
- 0x0000ffff, 0xbefa0080,
- 0xb97a0283, 0xb8faf807,
- 0x867aff7a, 0x001f8000,
- 0x8e7a8b7a, 0x8979ff79,
- 0xfc000000, 0x87797a79,
- 0xba7ff807, 0x00000000,
- 0xbeee007e, 0xbeef007f,
- 0xbefe0180, 0xbf900004,
- 0x877a8478, 0xb97af802,
- 0xbf8e0002, 0xbf88fffe,
- 0xb8fa2985, 0x807a817a,
- 0x8e7a8a7a, 0x8e7a817a,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b867b, 0x807a7b7a,
- 0x807a7e7a, 0x827b807f,
- 0x867bff7b, 0x0000ffff,
- 0xc04b1c3d, 0x00000050,
- 0xbf8cc07f, 0xc04b1d3d,
- 0x00000060, 0xbf8cc07f,
- 0xc0431e7d, 0x00000074,
- 0xbf8cc07f, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0xbef1007c,
- 0xbef00080, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xbefe007c, 0xbefc0070,
- 0xc0611c7a, 0x0000007c,
- 0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611b3a,
+ 0xbefa0080, 0xb97a0283,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2985,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611b7a, 0x0000007c,
+ 0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611bba,
+ 0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611bfa, 0x0000007c,
+ 0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611e3a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8fbf803, 0xbefe007c,
- 0xbefc0070, 0xc0611efa,
+ 0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
- 0xc0611a3a, 0x0000007c,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc0070, 0xc0611a7a,
- 0x0000007c, 0xbf8cc07f,
- 0x80708470, 0xbefc007e,
- 0xb8f1f801, 0xbefe007c,
- 0xbefc0070, 0xc0611c7a,
+ 0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
- 0x867aff7f, 0x04000000,
- 0xbeef0080, 0x876f6f7a,
- 0xb8f02985, 0x80708170,
- 0x8e708a70, 0x8e708170,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b847b, 0x8e76827b,
- 0xbef600ff, 0x01000000,
- 0xbef20174, 0x80747074,
- 0x82758075, 0xbefc0080,
- 0xbf800000, 0xbe802b00,
- 0xbe822b02, 0xbe842b04,
- 0xbe862b06, 0xbe882b08,
- 0xbe8a2b0a, 0xbe8c2b0c,
- 0xbe8e2b0e, 0xc06b003a,
- 0x00000000, 0xbf8cc07f,
- 0xc06b013a, 0x00000010,
- 0xbf8cc07f, 0xc06b023a,
- 0x00000020, 0xbf8cc07f,
- 0xc06b033a, 0x00000030,
- 0xbf8cc07f, 0x8074c074,
- 0x82758075, 0x807c907c,
- 0xbf0a7b7c, 0xbf85ffe7,
- 0xbef40172, 0xbef00080,
- 0xbefe00c1, 0xbeff00c1,
- 0xbee80080, 0xbee90080,
- 0xbef600ff, 0x01000000,
- 0x867aff78, 0x00400000,
- 0xbf850003, 0xb8faf803,
- 0x897a7aff, 0x10000000,
- 0xbf85004d, 0xbe840080,
- 0xd2890000, 0x00000900,
- 0x80048104, 0xd2890001,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0x867aff7f,
+ 0x04000000, 0xbeef0080,
+ 0x876f6f7a, 0xb8f02985,
+ 0x80708170, 0x8e708a70,
+ 0x8e708170, 0xb8fb1605,
+ 0x807b817b, 0x8e7b847b,
+ 0x8e76827b, 0xbef600ff,
+ 0x01000000, 0xbef20174,
+ 0x80747074, 0x82758075,
+ 0xbefc0080, 0xbf800000,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003a, 0x00000000,
+ 0xbf8cc07f, 0xc06b013a,
+ 0x00000010, 0xbf8cc07f,
+ 0xc06b023a, 0x00000020,
+ 0xbf8cc07f, 0xc06b033a,
+ 0x00000030, 0xbf8cc07f,
+ 0x8074c074, 0x82758075,
+ 0x807c907c, 0xbf0a7b7c,
+ 0xbf85ffe7, 0xbef40172,
+ 0xbef00080, 0xbefe00c1,
+ 0xbeff00c1, 0xbee80080,
+ 0xbee90080, 0xbef600ff,
+ 0x01000000, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85004d,
+ 0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
- 0xd2890002, 0x00000900,
- 0x80048104, 0xd2890003,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000901, 0x80048104,
- 0xd2890001, 0x00000901,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
0x80048104, 0xd2890002,
- 0x00000901, 0x80048104,
- 0xd2890003, 0x00000901,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000902,
+ 0xd2890000, 0x00000903,
0x80048104, 0xd2890001,
- 0x00000902, 0x80048104,
- 0xd2890002, 0x00000902,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
0x80048104, 0xd2890003,
- 0x00000902, 0x80048104,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbf820008, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb4306,
+ 0x867bc17b, 0xbf840064,
+ 0xbf8a0000, 0x867aff6f,
+ 0x04000000, 0xbf840060,
+ 0x8e7b867b, 0x8e7b827b,
+ 0xbef6007b, 0xb8f02985,
+ 0x80708170, 0x8e708a70,
+ 0x8e708170, 0xb8fa1605,
+ 0x807a817a, 0x8e7a867a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xd28c0002, 0x000100c1,
+ 0xd28d0003, 0x000204c1,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850030, 0x24040682,
+ 0xd86e4000, 0x00000002,
+ 0xbf8cc07f, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000903, 0x80048104,
- 0xd2890001, 0x00000903,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000903, 0x80048104,
- 0xd2890003, 0x00000903,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbf820008,
- 0xe0724000, 0x701d0000,
- 0xe0724100, 0x701d0100,
- 0xe0724200, 0x701d0200,
- 0xe0724300, 0x701d0300,
+ 0xbf84ffee, 0x680404ff,
+ 0x00000200, 0xd0c9006a,
+ 0x0000f702, 0xbf87ffd2,
+ 0xbf820015, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe800077,
+ 0x8677ff77, 0xff7fffff,
+ 0x8777ff77, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8cc07f, 0xe0765000,
+ 0x701d0002, 0x68040702,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87fff7, 0xbef70000,
+ 0xbef000ff, 0x00000400,
0xbefe00c1, 0xbeff00c1,
- 0xb8fb4306, 0x867bc17b,
- 0xbf840064, 0xbf8a0000,
- 0x867aff6f, 0x04000000,
- 0xbf840060, 0x8e7b867b,
- 0x8e7b827b, 0xbef6007b,
- 0xb8f02985, 0x80708170,
- 0x8e708a70, 0x8e708170,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x80707a70,
- 0x8070ff70, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0xbefc0080, 0xd28c0002,
- 0x000100c1, 0xd28d0003,
- 0x000204c1, 0x867aff78,
+ 0xb8fb2b05, 0x807b817b,
+ 0x8e7b827b, 0xbef600ff,
+ 0x01000000, 0xbefc0084,
+ 0xbf0a7b7c, 0xbf84006d,
+ 0xbf11017c, 0x807bff7b,
+ 0x00001000, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850030,
- 0x24040682, 0xd86e4000,
- 0x00000002, 0xbf8cc07f,
+ 0x10000000, 0xbf850051,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -3162,31 +3221,51 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x680404ff, 0x00000200,
- 0xd0c9006a, 0x0000f702,
- 0xbf87ffd2, 0xbf820015,
- 0xd1060002, 0x00011103,
- 0x7e0602ff, 0x00000200,
- 0xbefc00ff, 0x00010000,
- 0xbe800077, 0x8677ff77,
- 0xff7fffff, 0x8777ff77,
- 0x00058000, 0xd8ec0000,
- 0x00000002, 0xbf8cc07f,
- 0xe0765000, 0x701d0002,
- 0x68040702, 0xd0c9006a,
- 0x0000f702, 0xbf87fff7,
- 0xbef70000, 0xbef000ff,
- 0x00000400, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb2b05,
- 0x807b817b, 0x8e7b827b,
- 0xbef600ff, 0x01000000,
- 0xbefc0084, 0xbf0a7b7c,
- 0xbf84006d, 0xbf11017c,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffb1, 0xbf9c0000,
+ 0xbf820012, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffef,
+ 0xbf9c0000, 0xb8fb2985,
+ 0x807b817b, 0x8e7b837b,
+ 0xb8fa2b05, 0x807a817a,
+ 0x8e7a827a, 0x80fb7a7b,
+ 0x867b7b7b, 0xbf84007a,
0x807bff7b, 0x00001000,
+ 0xbefc0080, 0xbf11017c,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850051, 0xbe840080,
+ 0xbf850059, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -3225,201 +3304,137 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0x807c847c,
- 0xbf0a7b7c, 0xbf85ffb1,
- 0xbf9c0000, 0xbf820012,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
+ 0xbf0a7b7c, 0xbf85ffa9,
+ 0xbf9c0000, 0xbf820016,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
- 0xbf85ffef, 0xbf9c0000,
- 0xb8fb2985, 0x807b817b,
- 0x8e7b837b, 0xb8fa2b05,
- 0x807a817a, 0x8e7a827a,
- 0x80fb7a7b, 0x867b7b7b,
- 0xbf84007a, 0x807bff7b,
- 0x00001000, 0xbefc0080,
- 0xbf11017c, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850059,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
- 0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
- 0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffa9, 0xbf9c0000,
- 0xbf820016, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffeb,
- 0xbf9c0000, 0xbf8200ee,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x04000000,
- 0xbf84001f, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf84001a,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x8078ff78,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xe0510000, 0x781d0000,
- 0xe0510100, 0x781d0000,
- 0x807cff7c, 0x00000200,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7c, 0xbf85fff6,
+ 0xbf85ffeb, 0xbf9c0000,
+ 0xbf8200ee, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001f,
0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf84001a, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xb8ef2b05, 0x806f816f,
- 0x8e6f826f, 0x806fff6f,
- 0x00008000, 0xbef80080,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbefc0084,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbefe00c1,
+ 0xbeff00c1, 0xbef600ff,
+ 0x01000000, 0xb8ef2b05,
+ 0x806f816f, 0x8e6f826f,
+ 0x806fff6f, 0x00008000,
+ 0xbef80080, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xb8ef2985, 0x806f816f,
+ 0x8e6f836f, 0xb8f92b05,
+ 0x80798179, 0x8e798279,
+ 0x80ef796f, 0x866f6f6f,
+ 0xbf84001a, 0x806fff6f,
+ 0x00008000, 0xbefc0080,
0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
+ 0xd3d94000, 0x18000100,
+ 0xd3d94001, 0x18000101,
+ 0xd3d94002, 0x18000102,
+ 0xd3d94003, 0x18000103,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
- 0xbf85ffee, 0xb8ef2985,
- 0x806f816f, 0x8e6f836f,
- 0xb8f92b05, 0x80798179,
- 0x8e798279, 0x80ef796f,
- 0x866f6f6f, 0xbf84001a,
- 0x806fff6f, 0x00008000,
- 0xbefc0080, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0xd3d94000,
- 0x18000100, 0xd3d94001,
- 0x18000101, 0xd3d94002,
- 0x18000102, 0xd3d94003,
- 0x18000103, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffea,
- 0xbf9c0000, 0xe0524000,
- 0x6e1d0000, 0xe0524100,
- 0x6e1d0100, 0xe0524200,
- 0x6e1d0200, 0xe0524300,
- 0x6e1d0300, 0xbf8c0f70,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x80f8c078, 0xb8ef1605,
- 0x806f816f, 0x8e6f846f,
- 0x8e76826f, 0xbef600ff,
- 0x01000000, 0xbefc006f,
- 0xc031003a, 0x00000078,
- 0x80f8c078, 0xbf8cc07f,
- 0x80fc907c, 0xbf800000,
- 0xbe802d00, 0xbe822d02,
- 0xbe842d04, 0xbe862d06,
- 0xbe882d08, 0xbe8a2d0a,
- 0xbe8c2d0c, 0xbe8e2d0e,
- 0xbf06807c, 0xbf84fff0,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0xbef60084, 0xbef600ff,
- 0x01000000, 0xc0211bfa,
+ 0xbf85ffea, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
+ 0xbf8c0f70, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x80f8c078,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f846f, 0x8e76826f,
+ 0xbef600ff, 0x01000000,
+ 0xbefc006f, 0xc031003a,
+ 0x00000078, 0x80f8c078,
+ 0xbf8cc07f, 0x80fc907c,
+ 0xbf800000, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff0, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0xbef60084,
+ 0xbef600ff, 0x01000000,
+ 0xc0211bfa, 0x00000078,
+ 0x80788478, 0xc0211b3a,
0x00000078, 0x80788478,
- 0xc0211b3a, 0x00000078,
- 0x80788478, 0xc0211b7a,
+ 0xc0211b7a, 0x00000078,
+ 0x80788478, 0xc0211c3a,
0x00000078, 0x80788478,
- 0xc0211c3a, 0x00000078,
- 0x80788478, 0xc0211c7a,
+ 0xc0211c7a, 0x00000078,
+ 0x80788478, 0xc0211eba,
0x00000078, 0x80788478,
- 0xc0211eba, 0x00000078,
- 0x80788478, 0xc0211efa,
+ 0xc0211efa, 0x00000078,
+ 0x80788478, 0xc0211a3a,
0x00000078, 0x80788478,
- 0xc0211a3a, 0x00000078,
- 0x80788478, 0xc0211a7a,
+ 0xc0211a7a, 0x00000078,
+ 0x80788478, 0xc0211cfa,
0x00000078, 0x80788478,
- 0xc0211cfa, 0x00000078,
- 0x80788478, 0xbf8cc07f,
- 0xbefc006f, 0xbefe0070,
- 0xbeff0071, 0x866f7bff,
- 0x000003ff, 0xb96f4803,
- 0x866f7bff, 0xfffff800,
- 0x8f6f8b6f, 0xb96fa2c3,
- 0xb973f801, 0xb8ee2985,
- 0x806e816e, 0x8e6e8a6e,
- 0x8e6e816e, 0xb8ef1605,
- 0x806f816f, 0x8e6f866f,
- 0x806e6f6e, 0x806e746e,
- 0x826f8075, 0x866fff6f,
- 0x0000ffff, 0xc00b1c37,
- 0x00000050, 0xc00b1d37,
- 0x00000060, 0xc0031e77,
- 0x00000074, 0xbf8cc07f,
- 0x8f6e8b79, 0x866eff6e,
- 0x001f8000, 0xb96ef807,
- 0x866dff6d, 0x0000ffff,
- 0x86fe7e7e, 0x86ea6a6a,
- 0x8f6e837a, 0xb96ee0c2,
- 0xbf800002, 0xb97a0002,
- 0xbf8a0000, 0xbe801f6c,
- 0xbf810000, 0x00000000,
+ 0xbf8cc07f, 0xbefc006f,
+ 0xbefe0070, 0xbeff0071,
+ 0x866f7bff, 0x000003ff,
+ 0xb96f4803, 0x866f7bff,
+ 0xfffff800, 0x8f6f8b6f,
+ 0xb96fa2c3, 0xb973f801,
+ 0xb8ee2985, 0x806e816e,
+ 0x8e6e8a6e, 0x8e6e816e,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f866f, 0x806e6f6e,
+ 0x806e746e, 0x826f8075,
+ 0x866fff6f, 0x0000ffff,
+ 0xc00b1c37, 0x00000050,
+ 0xc00b1d37, 0x00000060,
+ 0xc0031e77, 0x00000074,
+ 0xbf8cc07f, 0x8f6e8b79,
+ 0x866eff6e, 0x001f8000,
+ 0xb96ef807, 0x866dff6d,
+ 0x0000ffff, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f6e837a,
+ 0xb96ee0c2, 0xbf800002,
+ 0xb97a0002, 0xbf8a0000,
+ 0xbe801f6c, 0xbf810000,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 8b92c33c2a7c..fdab64624422 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -276,6 +276,11 @@ L_FETCH_2ND_TRAP:
#endif
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index f2087cc2e89d..e506411ad28a 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -283,6 +283,11 @@ L_FETCH_2ND_TRAP:
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6a27b000a246..c37f1fcd2165 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -333,10 +333,12 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_bind_process;
}
- if (!pdd->doorbell_index &&
- kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) {
- err = -ENOMEM;
- goto err_alloc_doorbells;
+ if (!pdd->qpd.proc_doorbells) {
+ err = kfd_alloc_process_doorbells(dev->kfd, pdd);
+ if (err) {
+ pr_debug("failed to allocate process doorbells\n");
+ goto err_bind_process;
+ }
}
/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
@@ -417,7 +419,6 @@ err_create_queue:
if (wptr_bo)
amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
err_wptr_map_gart:
-err_alloc_doorbells:
err_bind_process:
err_pdd:
mutex_unlock(&p->mutex);
@@ -1025,9 +1026,6 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev)
return true;
}
- if (dev->kfd->use_iommu_v2)
- return false;
-
if (dev->local_mem_info.local_mem_size_private == 0 &&
dev->local_mem_info.local_mem_size_public > 0)
return true;
@@ -1487,7 +1485,8 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
goto out_unlock;
}
- if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) {
+ if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) ||
+ kfd_dbg_has_cwsr_workaround(dev))) {
retval = -EBUSY;
goto out_unlock;
}
@@ -1845,22 +1844,21 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
idr_for_each_entry(&pdd->alloc_idr, mem, id) {
struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
- if ((uint64_t)kgd_mem->va > pdd->gpuvm_base)
+ if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)
num_of_bos++;
}
}
return num_of_bos;
}
-static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
+static int criu_get_prime_handle(struct kgd_mem *mem, int flags,
u32 *shared_fd)
{
struct dma_buf *dmabuf;
int ret;
- dmabuf = amdgpu_gem_prime_export(gobj, flags);
- if (IS_ERR(dmabuf)) {
- ret = PTR_ERR(dmabuf);
+ ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
+ if (ret) {
pr_err("dmabuf export failed for the BO\n");
return ret;
}
@@ -1918,7 +1916,11 @@ static int criu_checkpoint_bos(struct kfd_process *p,
kgd_mem = (struct kgd_mem *)mem;
dumper_bo = kgd_mem->bo;
- if ((uint64_t)kgd_mem->va <= pdd->gpuvm_base)
+ /* Skip checkpointing BOs that are used for Trap handler
+ * code and state. Currently, these BOs have a VA that
+ * is less GPUVM Base
+ */
+ if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base)
continue;
bo_bucket = &bo_buckets[bo_index];
@@ -1940,7 +1942,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
}
if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
- ret = criu_get_prime_handle(&dumper_bo->tbo.base,
+ ret = criu_get_prime_handle(kgd_mem,
bo_bucket->alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
&bo_bucket->dmabuf_fd);
@@ -2262,10 +2264,10 @@ static int criu_restore_devices(struct kfd_process *p,
goto exit;
}
- if (!pdd->doorbell_index &&
- kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) {
- ret = -ENOMEM;
- goto exit;
+ if (!pdd->qpd.proc_doorbells) {
+ ret = kfd_alloc_process_doorbells(dev->kfd, pdd);
+ if (ret)
+ goto exit;
}
}
@@ -2402,7 +2404,7 @@ static int criu_restore_bo(struct kfd_process *p,
/* create the dmabuf object and export the bo */
if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
- ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR,
+ ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
&bo_bucket->dmabuf_fd);
if (ret)
return ret;
@@ -2755,6 +2757,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
if (pdd->qpd.queue_count)
return -EEXIST;
+
+ /*
+ * Setup TTMPs by default.
+ * Note that this call must remain here for MES ADD QUEUE to
+ * skip_process_ctx_clear unconditionally as the first call to
+ * SET_SHADER_DEBUGGER clears any stale process context data
+ * saved in MES.
+ */
+ if (pdd->dev->kfd->shared_resources.enable_mes)
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
@@ -2848,7 +2860,8 @@ static int runtime_disable(struct kfd_process *p)
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd,
+ !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 49f40d9f16e8..86fb7ac7982a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -26,7 +26,6 @@
#include "kfd_crat.h"
#include "kfd_priv.h"
#include "kfd_topology.h"
-#include "kfd_iommu.h"
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
@@ -1536,76 +1535,6 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
return num_of_cache_types;
}
-static bool kfd_ignore_crat(void)
-{
- bool ret;
-
- if (ignore_crat)
- return true;
-
-#ifndef KFD_SUPPORT_IOMMU_V2
- ret = true;
-#else
- ret = false;
-#endif
-
- return ret;
-}
-
-/*
- * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
- * copies CRAT from ACPI (if available).
- * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
- *
- * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
- * crat_image will be NULL
- * @size: [OUT] size of crat_image
- *
- * Return 0 if successful else return error code
- */
-int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
-{
- struct acpi_table_header *crat_table;
- acpi_status status;
- void *pcrat_image;
- int rc = 0;
-
- if (!crat_image)
- return -EINVAL;
-
- *crat_image = NULL;
-
- if (kfd_ignore_crat()) {
- pr_info("CRAT table disabled by module option\n");
- return -ENODATA;
- }
-
- /* Fetch the CRAT table from ACPI */
- status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
- if (status == AE_NOT_FOUND) {
- pr_info("CRAT table not found\n");
- return -ENODATA;
- } else if (ACPI_FAILURE(status)) {
- const char *err = acpi_format_exception(status);
-
- pr_err("CRAT table error: %s\n", err);
- return -EINVAL;
- }
-
- pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
- if (!pcrat_image) {
- rc = -ENOMEM;
- goto out;
- }
-
- memcpy(pcrat_image, crat_table, crat_table->length);
- *crat_image = pcrat_image;
- *size = crat_table->length;
-out:
- acpi_put_table(crat_table);
- return rc;
-}
-
/* Memory required to create Virtual CRAT.
* Since there is no easy way to predict the amount of memory required, the
* following amount is allocated for GPU Virtual CRAT. This is
@@ -2173,12 +2102,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->hsa_capability = 0;
- /* Check if this node supports IOMMU. During parsing this flag will
- * translate to HSA_CAP_ATS_PRESENT
- */
- if (!kfd_iommu_check_device(kdev->kfd))
- cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
-
crat_table->length += sub_type_hdr->length;
crat_table->total_entries++;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index fc719389b5d6..387a8ef49385 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -307,7 +307,6 @@ struct kfd_gpu_cache_info {
};
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info);
-int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
void kfd_destroy_crat_image(void *crat_image);
int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
uint32_t proximity_domain);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index fff3ccc04fa9..9ec750666382 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -302,8 +302,7 @@ static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
if (!q)
return 0;
- if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
- KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
+ if (!kfd_dbg_has_cwsr_workaround(q->device))
return 0;
if (enable && q->properties.is_user_cu_masked)
@@ -345,11 +344,10 @@ unwind:
return r;
}
-int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags;
- bool sq_trap_en = !!spi_dbg_cntl;
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0;
@@ -433,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
kfd_dbg_clear_dev_watch_id(pdd, watch_id);
@@ -446,7 +444,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
uint32_t *watch_id,
uint32_t watch_mode)
{
- int r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
+ int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
+ uint32_t xcc_mask = pdd->dev->xcc_mask;
if (r)
return r;
@@ -460,19 +459,21 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
}
amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
- pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
+ for_each_inst(xcc_id, xcc_mask)
+ pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
pdd->dev->adev,
watch_address,
watch_address_mask,
*watch_id,
watch_mode,
- pdd->dev->vm_info.last_vmid_kfd);
+ pdd->dev->vm_info.last_vmid_kfd,
+ xcc_id);
amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
/* HWS is broken so no point in HW rollback but release the watchpoint anyways */
if (r)
@@ -514,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) {
target->dbg_flags = prev_flags;
@@ -537,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd, true);
}
}
@@ -599,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
kfd_dbg_set_workaround(target, false);
@@ -715,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) {
target->runtime_info.runtime_state =
@@ -751,7 +752,8 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
if (!KFD_IS_SOC15(pdd->dev))
return -ENODEV;
- if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws)
+ if (pdd->qpd.num_gws && (!kfd_dbg_has_gws_support(pdd->dev) ||
+ kfd_dbg_has_cwsr_workaround(pdd->dev)))
return -EBUSY;
}
@@ -848,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r)
break;
@@ -880,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index a289e59ceb79..fd0ff64d4184 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -76,8 +76,9 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
{
- return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
- KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
+ return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
}
void debug_event_write_work_handler(struct work_struct *work);
@@ -100,6 +101,12 @@ static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
}
+static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev)
+{
+ return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3);
+}
+
static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
{
if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
@@ -119,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
return true;
}
-int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd);
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
+
+static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
+{
+ return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
+ (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0b3dc754e06b..93ce181eb3ba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -29,7 +29,6 @@
#include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_headers_aldebaran.h"
#include "cwsr_trap_handler.h"
-#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
#include "kfd_svm.h"
@@ -62,7 +61,6 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
-static int kfd_resume_iommu(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_node *kfd);
static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
@@ -194,11 +192,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
kfd_device_info_set_event_interrupt_class(kfd);
- /* Raven */
- if (gc_version == IP_VERSION(9, 1, 0) ||
- gc_version == IP_VERSION(9, 2, 2))
- kfd->device_info.needs_iommu_device = true;
-
if (gc_version < IP_VERSION(11, 0, 0)) {
/* Navi2x+, Navi1x+ */
if (gc_version == IP_VERSION(10, 3, 6))
@@ -233,10 +226,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
asic_type != CHIP_TONGA)
kfd->device_info.supports_cwsr = true;
- if (asic_type == CHIP_KAVERI ||
- asic_type == CHIP_CARRIZO)
- kfd->device_info.needs_iommu_device = true;
-
if (asic_type != CHIP_HAWAII && !vf)
kfd->device_info.needs_pci_atomics = true;
}
@@ -249,7 +238,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
uint32_t gfx_target_version = 0;
switch (adev->asic_type) {
-#ifdef KFD_SUPPORT_IOMMU_V2
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
gfx_target_version = 70000;
@@ -262,7 +250,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
-#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
gfx_target_version = 70001;
@@ -298,7 +285,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
gfx_target_version = 90000;
f2g = &gfx_v9_kfd2kgd;
break;
-#ifdef KFD_SUPPORT_IOMMU_V2
/* Raven */
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
@@ -306,7 +292,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
if (!vf)
f2g = &gfx_v9_kfd2kgd;
break;
-#endif
/* Vega12 */
case IP_VERSION(9, 2, 1):
gfx_target_version = 90004;
@@ -455,8 +440,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
atomic_set(&kfd->compute_profile, 0);
mutex_init(&kfd->doorbell_mutex);
- memset(&kfd->doorbell_available_index, 0,
- sizeof(kfd->doorbell_available_index));
ida_init(&kfd->doorbell_ida);
@@ -508,6 +491,7 @@ static int kfd_gws_init(struct kfd_node *node)
{
int ret = 0;
struct kfd_dev *kfd = node->kfd;
+ uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
return 0;
@@ -524,7 +508,10 @@ static int kfd_gws_init(struct kfd_node *node)
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
- && kfd->mec2_fw_version >= 0x6b))))
+ && kfd->mec2_fw_version >= 0x6b) ||
+ (KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0)
+ && KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0)
+ && mes_rev >= 68))))
ret = amdgpu_amdkfd_alloc_gws(node->adev,
node->adev->gds.gws_size, &node->gws);
@@ -766,15 +753,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->noretry = kfd->adev->gmc.noretry;
- /* If CRAT is broken, won't set iommu enabled */
- kfd_double_confirm_iommu_support(kfd);
-
- if (kfd_iommu_device_init(kfd)) {
- kfd->use_iommu_v2 = false;
- dev_err(kfd_device, "Error initializing iommuv2\n");
- goto device_iommu_error;
- }
-
kfd_cwsr_init(kfd);
dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
@@ -849,9 +827,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
svm_range_set_max_pages(kfd->adev);
- if (kfd_resume_iommu(kfd))
- goto kfd_resume_iommu_error;
-
spin_lock_init(&kfd->watch_points_lock);
kfd->init_complete = true;
@@ -863,11 +838,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto out;
-kfd_resume_iommu_error:
node_init_error:
node_alloc_error:
kfd_cleanup_nodes(kfd, i);
-device_iommu_error:
kfd_doorbell_fini(kfd);
kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
@@ -982,7 +955,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
node = kfd->nodes[i];
node->dqm->ops.stop(node->dqm);
}
- kfd_iommu_suspend(kfd);
}
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
@@ -1012,26 +984,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
return ret;
}
-int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
-{
- if (!kfd->init_complete)
- return 0;
-
- return kfd_resume_iommu(kfd);
-}
-
-static int kfd_resume_iommu(struct kfd_dev *kfd)
-{
- int err = 0;
-
- err = kfd_iommu_resume(kfd);
- if (err)
- dev_err(kfd_device,
- "Failed to resume IOMMU for device %x:%x\n",
- kfd->adev->pdev->vendor, kfd->adev->pdev->device);
- return err;
-}
-
static int kfd_resume(struct kfd_node *node)
{
int err = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f515cb8f30ca..b166f30f083e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -226,9 +226,9 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.paging = false;
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
- queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
- KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3);
- queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
+ queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
+ queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
+ kfd_dbg_has_ttmps_always_setup(q->device);
queue_type = convert_to_mes_queue_type(q->properties.type);
if (queue_type < 0) {
@@ -238,10 +238,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
}
queue_input.queue_type = (uint32_t)queue_type;
- if (q->gws) {
- queue_input.gws_base = 0;
- queue_input.gws_size = qpd->num_gws;
- }
+ queue_input.exclusively_scheduled = q->properties.is_gws;
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
@@ -251,7 +248,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
q->properties.doorbell_off);
pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
kfd_hws_hang(dqm);
-}
+ }
return r;
}
@@ -398,7 +395,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
unsigned int found;
found = find_first_zero_bit(qpd->doorbell_bitmap,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
pr_debug("No doorbells available");
return -EBUSY;
@@ -408,9 +405,9 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
}
}
- q->properties.doorbell_off =
- kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd),
- q->doorbell_id);
+ q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
+ qpd->proc_doorbells,
+ q->doorbell_id);
return 0;
}
@@ -1621,7 +1618,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
if (dqm->dev->kfd2kgd->get_iq_wait_times)
dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
- &dqm->wait_times);
+ &dqm->wait_times,
+ ffs(dqm->dev->xcc_mask) - 1);
return 0;
}
@@ -1663,6 +1661,26 @@ static int start_cpsch(struct device_queue_manager *dqm)
if (!dqm->dev->kfd->shared_resources.enable_mes)
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+
+ /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
+ if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
+ (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
+ uint32_t reg_offset = 0;
+ uint32_t grace_period = 1;
+
+ retval = pm_update_grace_period(&dqm->packet_mgr,
+ grace_period);
+ if (retval)
+ pr_err("Setting grace timeout failed\n");
+ else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
+ /* Update dqm->wait_times maintained in software */
+ dqm->dev->kfd2kgd->build_grace_period_packet_info(
+ dqm->dev->adev, dqm->wait_times,
+ grace_period, &reg_offset,
+ &dqm->wait_times,
+ ffs(dqm->dev->xcc_mask) - 1);
+ }
+
dqm_unlock(dqm);
return 0;
@@ -1806,8 +1824,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
*/
q->properties.is_evicted = !!qpd->evicted;
q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
- KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) &&
- KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3);
+ kfd_dbg_has_cwsr_workaround(q->device);
if (qd)
mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
@@ -2540,32 +2557,26 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
}
switch (dev->adev->asic_type) {
- case CHIP_CARRIZO:
- device_queue_manager_init_vi(&dqm->asic_ops);
- break;
-
case CHIP_KAVERI:
- device_queue_manager_init_cik(&dqm->asic_ops);
- break;
-
case CHIP_HAWAII:
- device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
+ device_queue_manager_init_cik(&dqm->asic_ops);
break;
+ case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
- device_queue_manager_init_vi_tonga(&dqm->asic_ops);
+ device_queue_manager_init_vi(&dqm->asic_ops);
break;
default:
if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
device_queue_manager_init_v11(&dqm->asic_ops);
else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
- device_queue_manager_init_v10_navi10(&dqm->asic_ops);
+ device_queue_manager_init_v10(&dqm->asic_ops);
else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
device_queue_manager_init_v9(&dqm->asic_ops);
else {
@@ -2805,19 +2816,11 @@ static void copy_context_work_handler (struct work_struct *work)
static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array)
{
size_t array_size = num_queues * sizeof(uint32_t);
- uint32_t *queue_ids = NULL;
if (!usr_queue_id_array)
return NULL;
- queue_ids = kzalloc(array_size, GFP_KERNEL);
- if (!queue_ids)
- return ERR_PTR(-ENOMEM);
-
- if (copy_from_user(queue_ids, usr_queue_id_array, array_size))
- return ERR_PTR(-EFAULT);
-
- return queue_ids;
+ return memdup_user(usr_queue_id_array, array_size);
}
int resume_queues(struct kfd_process *p,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 7dd4b177219d..cf7e182588f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -269,15 +269,11 @@ struct device_queue_manager {
void device_queue_manager_init_cik(
struct device_queue_manager_asic_ops *asic_ops);
-void device_queue_manager_init_cik_hawaii(
- struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops);
-void device_queue_manager_init_vi_tonga(
- struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops);
-void device_queue_manager_init_v10_navi10(
+void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index b1ab5b0775e1..d4d95c7f2e5d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -34,17 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static int update_qpd_cik(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd);
-static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd);
-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd);
-static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
- struct queue *q,
- struct qcm_process_device *qpd);
+ struct qcm_process_device *qpd);
+static void init_sdma_vm(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd);
void device_queue_manager_init_cik(
- struct device_queue_manager_asic_ops *asic_ops)
+ struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik;
@@ -52,15 +48,6 @@ void device_queue_manager_init_cik(
asic_ops->mqd_manager_init = mqd_manager_init_cik;
}
-void device_queue_manager_init_cik_hawaii(
- struct device_queue_manager_asic_ops *asic_ops)
-{
- asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
- asic_ops->update_qpd = update_qpd_cik_hawaii;
- asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
- asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
-}
-
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
@@ -115,41 +102,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
}
static int update_qpd_cik(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
- DEFAULT_MTYPE(MTYPE_NONCACHED) |
- APE1_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- if (qpd->pqm->process->is_32bit_user_mode) {
- temp = get_sh_mem_bases_32(pdd);
- qpd->sh_mem_bases = SHARED_BASE(temp);
- qpd->sh_mem_config |= PTR32;
- } else {
- temp = get_sh_mem_bases_nybble_64(pdd);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
- qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
- }
-
- pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
- qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
-
- return 0;
-}
-
-static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
@@ -178,25 +131,9 @@ static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
return 0;
}
-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd)
-{
- uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
-
- if (q->process->is_32bit_user_mode)
- value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
- get_sh_mem_bases_32(qpd_to_pdd(qpd));
- else
- value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
- SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
- SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
-
- q->properties.sdma_vm_addr = value;
-}
-
-static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
- struct queue *q,
- struct qcm_process_device *qpd)
+static void init_sdma_vm(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd)
{
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index f1a1f5753e65..245a90dfc2f6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -32,7 +32,7 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
-void device_queue_manager_init_v10_navi10(
+void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->update_qpd = update_qpd_v10;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 8af643388768..54eb1bff903c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
- if (dqm->dev->kfd->noretry && !dqm->dev->kfd->use_iommu_v2)
+ if (dqm->dev->kfd->noretry)
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3))
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index d7d45832df0f..b291ee0fab94 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -28,29 +28,19 @@
#include "oss/oss_3_0_sh_mask.h"
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd,
- enum cache_policy default_policy,
- enum cache_policy alternate_policy,
- void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
-static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd,
- enum cache_policy default_policy,
- enum cache_policy alternate_policy,
- void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size);
static int update_qpd_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd);
-static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd);
-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd);
-static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
- struct queue *q,
- struct qcm_process_device *qpd);
+ struct qcm_process_device *qpd);
+static void init_sdma_vm(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd);
void device_queue_manager_init_vi(
- struct device_queue_manager_asic_ops *asic_ops)
+ struct device_queue_manager_asic_ops *asic_ops)
{
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
asic_ops->update_qpd = update_qpd_vi;
@@ -58,15 +48,6 @@ void device_queue_manager_init_vi(
asic_ops->mqd_manager_init = mqd_manager_init_vi;
}
-void device_queue_manager_init_vi_tonga(
- struct device_queue_manager_asic_ops *asic_ops)
-{
- asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
- asic_ops->update_qpd = update_qpd_vi_tonga;
- asic_ops->init_sdma_vm = init_sdma_vm_tonga;
- asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
-}
-
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
@@ -96,35 +77,6 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
}
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd,
- enum cache_policy default_policy,
- enum cache_policy alternate_policy,
- void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
-{
- uint32_t default_mtype;
- uint32_t ape1_mtype;
-
- default_mtype = (default_policy == cache_policy_coherent) ?
- MTYPE_CC :
- MTYPE_NC;
-
- ape1_mtype = (alternate_policy == cache_policy_coherent) ?
- MTYPE_CC :
- MTYPE_NC;
-
- qpd->sh_mem_config = (qpd->sh_mem_config &
- SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
- default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
- ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
- SH_MEM_CONFIG__PRIVATE_ATC_MASK;
-
- return true;
-}
-
-static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
@@ -152,48 +104,7 @@ static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
}
static int update_qpd_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
- MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
- MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
- SH_MEM_CONFIG__PRIVATE_ATC_MASK;
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- if (qpd->pqm->process->is_32bit_user_mode) {
- temp = get_sh_mem_bases_32(pdd);
- qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
- qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
- SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
- } else {
- temp = get_sh_mem_bases_nybble_64(pdd);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
- qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
- SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
- qpd->sh_mem_config |= 1 <<
- SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
- }
-
- pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
- qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
-
- return 0;
-}
-
-static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
@@ -226,25 +137,9 @@ static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
return 0;
}
-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd)
-{
- uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
-
- if (q->process->is_32bit_user_mode)
- value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
- get_sh_mem_bases_32(qpd_to_pdd(qpd));
- else
- value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
- SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
- SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
-
- q->properties.sdma_vm_addr = value;
-}
-
-static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
- struct queue *q,
- struct qcm_process_device *qpd)
+static void init_sdma_vm(struct device_queue_manager *dqm,
+ struct queue *q,
+ struct qcm_process_device *qpd)
{
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 6421b620388d..c2e0b79dcc6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -61,81 +61,46 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
/* Doorbell calculations for device init. */
int kfd_doorbell_init(struct kfd_dev *kfd)
{
- size_t doorbell_start_offset;
- size_t doorbell_aperture_size;
- size_t doorbell_process_limit;
+ int size = PAGE_SIZE;
+ int r;
/*
- * With MES enabled, just set the doorbell base as it is needed
- * to calculate doorbell physical address.
- */
- if (kfd->shared_resources.enable_mes) {
- kfd->doorbell_base =
- kfd->shared_resources.doorbell_physical_address;
- return 0;
- }
-
- /*
- * We start with calculations in bytes because the input data might
- * only be byte-aligned.
- * Only after we have done the rounding can we assume any alignment.
+ * Todo: KFD kernel level operations need only one doorbell for
+ * ring test/HWS. So instead of reserving a whole page here for
+ * kernel, reserve and consume a doorbell from existing KGD kernel
+ * doorbell page.
*/
- doorbell_start_offset =
- roundup(kfd->shared_resources.doorbell_start_offset,
- kfd_doorbell_process_slice(kfd));
-
- doorbell_aperture_size =
- rounddown(kfd->shared_resources.doorbell_aperture_size,
- kfd_doorbell_process_slice(kfd));
-
- if (doorbell_aperture_size > doorbell_start_offset)
- doorbell_process_limit =
- (doorbell_aperture_size - doorbell_start_offset) /
- kfd_doorbell_process_slice(kfd);
- else
- return -ENOSPC;
-
- if (!kfd->max_doorbell_slices ||
- doorbell_process_limit < kfd->max_doorbell_slices)
- kfd->max_doorbell_slices = doorbell_process_limit;
-
- kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
- doorbell_start_offset;
-
- kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
-
- kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
- kfd_doorbell_process_slice(kfd));
-
- if (!kfd->doorbell_kernel_ptr)
+ /* Bitmap to dynamically allocate doorbells from kernel page */
+ kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
+ if (!kfd->doorbell_bitmap) {
+ DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
return -ENOMEM;
+ }
- pr_debug("Doorbell initialization:\n");
- pr_debug("doorbell base == 0x%08lX\n",
- (uintptr_t)kfd->doorbell_base);
-
- pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
- kfd->doorbell_base_dw_offset);
-
- pr_debug("doorbell_process_limit == 0x%08lX\n",
- doorbell_process_limit);
-
- pr_debug("doorbell_kernel_offset == 0x%08lX\n",
- (uintptr_t)kfd->doorbell_base);
-
- pr_debug("doorbell aperture size == 0x%08lX\n",
- kfd->shared_resources.doorbell_aperture_size);
-
- pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
+ /* Alloc a doorbell page for KFD kernel usages */
+ r = amdgpu_bo_create_kernel(kfd->adev,
+ size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_DOORBELL,
+ &kfd->doorbells,
+ NULL,
+ (void **)&kfd->doorbell_kernel_ptr);
+ if (r) {
+ pr_err("failed to allocate kernel doorbells\n");
+ bitmap_free(kfd->doorbell_bitmap);
+ return r;
+ }
+ pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
return 0;
}
void kfd_doorbell_fini(struct kfd_dev *kfd)
{
- if (kfd->doorbell_kernel_ptr)
- iounmap(kfd->doorbell_kernel_ptr);
+ bitmap_free(kfd->doorbell_bitmap);
+ amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
+ (void **)&kfd->doorbell_kernel_ptr);
}
int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
@@ -188,22 +153,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
u32 inx;
mutex_lock(&kfd->doorbell_mutex);
- inx = find_first_zero_bit(kfd->doorbell_available_index,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
- __set_bit(inx, kfd->doorbell_available_index);
+ __set_bit(inx, kfd->doorbell_bitmap);
mutex_unlock(&kfd->doorbell_mutex);
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
- inx *= kfd->device_info.doorbell_size / sizeof(u32);
-
- /*
- * Calculating the kernel doorbell offset using the first
- * doorbell page.
- */
- *doorbell_off = kfd->doorbell_base_dw_offset + inx;
+ *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n"
@@ -217,11 +175,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
{
unsigned int inx;
- inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
- * sizeof(u32) / kfd->device_info.doorbell_size;
+ inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
mutex_lock(&kfd->doorbell_mutex);
- __clear_bit(inx, kfd->doorbell_available_index);
+ __clear_bit(inx, kfd->doorbell_bitmap);
mutex_unlock(&kfd->doorbell_mutex);
}
@@ -243,80 +200,96 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
}
}
-unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
- struct kfd_process_device *pdd,
- unsigned int doorbell_id)
+static int init_doorbell_bitmap(struct qcm_process_device *qpd,
+ struct kfd_dev *dev)
{
- /*
- * doorbell_base_dw_offset accounts for doorbells taken by KGD.
- * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
- * the process's doorbells. The offset returned is in dword
- * units regardless of the ASIC-dependent doorbell size.
- */
- if (!kfd->shared_resources.enable_mes)
- return kfd->doorbell_base_dw_offset +
- pdd->doorbell_index
- * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
- doorbell_id *
- kfd->device_info.doorbell_size / sizeof(u32);
- else
- return amdgpu_mes_get_doorbell_dw_offset_in_bar(
- (struct amdgpu_device *)kfd->adev,
- pdd->doorbell_index, doorbell_id);
-}
+ unsigned int i;
+ int range_start = dev->shared_resources.non_cp_doorbells_start;
+ int range_end = dev->shared_resources.non_cp_doorbells_end;
-uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
-{
- uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
- kfd->shared_resources.doorbell_start_offset) /
- kfd_doorbell_process_slice(kfd) + 1;
+ if (!KFD_IS_SOC15(dev))
+ return 0;
- return num_of_elems;
+ /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
+ pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
+ range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+ range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
+
+ for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
+ if (i >= range_start && i <= range_end) {
+ __set_bit(i, qpd->doorbell_bitmap);
+ __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+ qpd->doorbell_bitmap);
+ }
+ }
+ return 0;
}
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
{
- if (!pdd->doorbell_index) {
- int r = kfd_alloc_process_doorbells(pdd->dev->kfd,
- &pdd->doorbell_index);
- if (r < 0)
+ struct amdgpu_device *adev = pdd->dev->adev;
+ uint32_t first_db_index;
+
+ if (!pdd->qpd.proc_doorbells) {
+ if (kfd_alloc_process_doorbells(pdd->dev->kfd, pdd))
+ /* phys_addr_t 0 is error */
return 0;
}
- return pdd->dev->kfd->doorbell_base +
- pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev->kfd);
+ first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0);
+ return adev->doorbell.base + first_db_index * sizeof(uint32_t);
}
-int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
{
- int r = 0;
-
- if (!kfd->shared_resources.enable_mes)
- r = ida_simple_get(&kfd->doorbell_ida, 1,
- kfd->max_doorbell_slices, GFP_KERNEL);
- else
- r = amdgpu_mes_alloc_process_doorbells(
- (struct amdgpu_device *)kfd->adev,
- doorbell_index);
+ int r;
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ /* Allocate bitmap for dynamic doorbell allocation */
+ qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
+ if (!qpd->doorbell_bitmap) {
+ DRM_ERROR("Failed to allocate process doorbell bitmap\n");
+ return -ENOMEM;
+ }
- if (r > 0)
- *doorbell_index = r;
+ r = init_doorbell_bitmap(&pdd->qpd, kfd);
+ if (r) {
+ DRM_ERROR("Failed to initialize process doorbells\n");
+ r = -ENOMEM;
+ goto err;
+ }
- if (r < 0)
- pr_err("Failed to allocate process doorbells\n");
+ /* Allocate doorbells for this process */
+ r = amdgpu_bo_create_kernel(kfd->adev,
+ kfd_doorbell_process_slice(kfd),
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_DOORBELL,
+ &qpd->proc_doorbells,
+ NULL,
+ NULL);
+ if (r) {
+ DRM_ERROR("Failed to allocate process doorbells\n");
+ goto err;
+ }
+ return 0;
+err:
+ bitmap_free(qpd->doorbell_bitmap);
+ qpd->doorbell_bitmap = NULL;
return r;
}
-void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
+void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
{
- if (doorbell_index) {
- if (!kfd->shared_resources.enable_mes)
- ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
- else
- amdgpu_mes_free_process_doorbells(
- (struct amdgpu_device *)kfd->adev,
- doorbell_index);
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ if (qpd->doorbell_bitmap) {
+ bitmap_free(qpd->doorbell_bitmap);
+ qpd->doorbell_bitmap = NULL;
}
+
+ amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 8081a9408006..0f58be65132f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -31,7 +31,6 @@
#include <linux/memory.h>
#include "kfd_priv.h"
#include "kfd_events.h"
-#include "kfd_iommu.h"
#include <linux/device.h>
/*
@@ -1146,87 +1145,6 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
rcu_read_unlock();
}
-#ifdef KFD_SUPPORT_IOMMU_V2
-void kfd_signal_iommu_event(struct kfd_node *dev, u32 pasid,
- unsigned long address, bool is_write_requested,
- bool is_execute_requested)
-{
- struct kfd_hsa_memory_exception_data memory_exception_data;
- struct vm_area_struct *vma;
- int user_gpu_id;
-
- /*
- * Because we are called from arbitrary context (workqueue) as opposed
- * to process context, kfd_process could attempt to exit while we are
- * running so the lookup function increments the process ref count.
- */
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
- struct mm_struct *mm;
-
- if (!p)
- return; /* Presumably process exited. */
-
- /* Take a safe reference to the mm_struct, which may otherwise
- * disappear even while the kfd_process is still referenced.
- */
- mm = get_task_mm(p->lead_thread);
- if (!mm) {
- kfd_unref_process(p);
- return; /* Process is exiting */
- }
-
- user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
- if (unlikely(user_gpu_id == -EINVAL)) {
- WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
- return;
- }
- memset(&memory_exception_data, 0, sizeof(memory_exception_data));
-
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
-
- memory_exception_data.gpu_id = user_gpu_id;
- memory_exception_data.va = address;
- /* Set failure reason */
- memory_exception_data.failure.NotPresent = 1;
- memory_exception_data.failure.NoExecute = 0;
- memory_exception_data.failure.ReadOnly = 0;
- if (vma && address >= vma->vm_start) {
- memory_exception_data.failure.NotPresent = 0;
-
- if (is_write_requested && !(vma->vm_flags & VM_WRITE))
- memory_exception_data.failure.ReadOnly = 1;
- else
- memory_exception_data.failure.ReadOnly = 0;
-
- if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
- memory_exception_data.failure.NoExecute = 1;
- else
- memory_exception_data.failure.NoExecute = 0;
- }
-
- mmap_read_unlock(mm);
- mmput(mm);
-
- pr_debug("notpresent %d, noexecute %d, readonly %d\n",
- memory_exception_data.failure.NotPresent,
- memory_exception_data.failure.NoExecute,
- memory_exception_data.failure.ReadOnly);
-
- /* Workaround on Raven to not kill the process when memory is freed
- * before IOMMU is able to finish processing all the excessive PPRs
- */
-
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0))
- lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
- &memory_exception_data);
-
- kfd_unref_process(p);
-}
-#endif /* KFD_SUPPORT_IOMMU_V2 */
-
void kfd_signal_hw_exception_event(u32 pasid)
{
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index da2ca00d79e5..62b205dac63a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -322,22 +322,19 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
pdd->lds_base = MAKE_LDS_APP_BASE_VI();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
- if (!pdd->dev->kfd->use_iommu_v2) {
- /* dGPUs: SVM aperture starting at 0
- * with small reserved space for kernel.
- * Set them to CANONICAL addresses.
- */
- pdd->gpuvm_base = SVM_USER_BASE;
- pdd->gpuvm_limit =
- pdd->dev->kfd->shared_resources.gpuvm_size - 1;
- } else {
- /* set them to non CANONICAL addresses, and no SVM is
- * allocated.
- */
- pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1);
- pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base,
- pdd->dev->kfd->shared_resources.gpuvm_size);
- }
+ /* dGPUs: SVM aperture starting at 0
+ * with small reserved space for kernel.
+ * Set them to CANONICAL addresses.
+ */
+ pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_limit =
+ pdd->dev->kfd->shared_resources.gpuvm_size - 1;
+
+ /* dGPUs: the reserved space for kernel
+ * before SVM
+ */
+ pdd->qpd.cwsr_base = SVM_CWSR_BASE;
+ pdd->qpd.ib_base = SVM_IB_BASE;
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
@@ -348,18 +345,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
pdd->lds_base = MAKE_LDS_APP_BASE_V9();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
- /* Raven needs SVM to support graphic handle, etc. Leave the small
- * reserved space before SVM on Raven as well, even though we don't
- * have to.
- * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
- * are used in Thunk to reserve SVM.
- */
- pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_base = PAGE_SIZE;
pdd->gpuvm_limit =
pdd->dev->kfd->shared_resources.gpuvm_size - 1;
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+
+ /*
+ * Place TBA/TMA on opposite side of VM hole to prevent
+ * stray faults from triggering SVM on these pages.
+ */
+ pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;
}
int kfd_init_apertures(struct kfd_process *process)
@@ -416,14 +413,6 @@ int kfd_init_apertures(struct kfd_process *process)
return -EINVAL;
}
}
-
- if (!dev->kfd->use_iommu_v2) {
- /* dGPUs: the reserved space for kernel
- * before SVM
- */
- pdd->qpd.cwsr_base = SVM_CWSR_BASE;
- pdd->qpd.ib_base = SVM_IB_BASE;
- }
}
dev_dbg(kfd_device, "node id %u\n", id);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
deleted file mode 100644
index 808ee010520a..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ /dev/null
@@ -1,356 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * Copyright 2018-2022 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/kconfig.h>
-
-#if IS_REACHABLE(CONFIG_AMD_IOMMU_V2)
-
-#include <linux/printk.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/amd-iommu.h>
-#include "kfd_priv.h"
-#include "kfd_topology.h"
-#include "kfd_iommu.h"
-
-static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
- AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
- AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
-
-/** kfd_iommu_check_device - Check whether IOMMU is available for device
- */
-int kfd_iommu_check_device(struct kfd_dev *kfd)
-{
- struct amd_iommu_device_info iommu_info;
- int err;
-
- if (!kfd->use_iommu_v2)
- return -ENODEV;
-
- iommu_info.flags = 0;
- err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info);
- if (err)
- return err;
-
- if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
- return -ENODEV;
-
- return 0;
-}
-
-/** kfd_iommu_device_init - Initialize IOMMU for device
- */
-int kfd_iommu_device_init(struct kfd_dev *kfd)
-{
- struct amd_iommu_device_info iommu_info;
- unsigned int pasid_limit;
- int err;
-
- if (!kfd->use_iommu_v2)
- return 0;
-
- iommu_info.flags = 0;
- err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info);
- if (err < 0) {
- dev_err(kfd_device,
- "error getting iommu info. is the iommu enabled?\n");
- return -ENODEV;
- }
-
- if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
- dev_err(kfd_device,
- "error required iommu flags ats %i, pri %i, pasid %i\n",
- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
- != 0);
- return -ENODEV;
- }
-
- pasid_limit = min_t(unsigned int,
- (unsigned int)(1 << kfd->device_info.max_pasid_bits),
- iommu_info.max_pasids);
-
- if (!kfd_set_pasid_limit(pasid_limit)) {
- dev_err(kfd_device, "error setting pasid limit\n");
- return -EBUSY;
- }
-
- return 0;
-}
-
-/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
- *
- * Binds the given process to the given device using its PASID. This
- * enables IOMMUv2 address translation for the process on the device.
- *
- * This function assumes that the process mutex is held.
- */
-int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
-{
- struct kfd_node *dev = pdd->dev;
- struct kfd_process *p = pdd->process;
- int err;
-
- if (!dev->kfd->use_iommu_v2 || pdd->bound == PDD_BOUND)
- return 0;
-
- if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
- pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
- return -EINVAL;
- }
-
- if (!kfd_is_first_node(dev)) {
- dev_warn_once(kfd_device,
- "IOMMU supported only on first node\n");
- return 0;
- }
-
- err = amd_iommu_bind_pasid(dev->adev->pdev, p->pasid, p->lead_thread);
- if (!err)
- pdd->bound = PDD_BOUND;
-
- return err;
-}
-
-/** kfd_iommu_unbind_process - Unbind process from all devices
- *
- * This removes all IOMMU device bindings of the process. To be used
- * before process termination.
- */
-void kfd_iommu_unbind_process(struct kfd_process *p)
-{
- int i;
-
- for (i = 0; i < p->n_pdds; i++)
- if ((p->pdds[i]->bound == PDD_BOUND) &&
- (kfd_is_first_node((p->pdds[i]->dev))))
- amd_iommu_unbind_pasid(p->pdds[i]->dev->adev->pdev,
- p->pasid);
-}
-
-/* Callback for process shutdown invoked by the IOMMU driver */
-static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid)
-{
- struct kfd_node *dev = kfd_device_by_pci_dev(pdev);
- struct kfd_process *p;
- struct kfd_process_device *pdd;
-
- if (!dev)
- return;
-
- /*
- * Look for the process that matches the pasid. If there is no such
- * process, we either released it in amdkfd's own notifier, or there
- * is a bug. Unfortunately, there is no way to tell...
- */
- p = kfd_lookup_process_by_pasid(pasid);
- if (!p)
- return;
-
- pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
-
- mutex_lock(&p->mutex);
-
- pdd = kfd_get_process_device_data(dev, p);
- if (pdd)
- /* For GPU relying on IOMMU, we need to dequeue here
- * when PASID is still bound.
- */
- kfd_process_dequeue_from_device(pdd);
-
- mutex_unlock(&p->mutex);
-
- kfd_unref_process(p);
-}
-
-/* This function called by IOMMU driver on PPR failure */
-static int iommu_invalid_ppr_cb(struct pci_dev *pdev, u32 pasid,
- unsigned long address, u16 flags)
-{
- struct kfd_node *dev;
-
- dev_warn_ratelimited(kfd_device,
- "Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
- pdev->bus->number,
- PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn),
- pasid,
- address,
- flags);
-
- dev = kfd_device_by_pci_dev(pdev);
- if (!WARN_ON(!dev))
- kfd_signal_iommu_event(dev, pasid, address,
- flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
-
- return AMD_IOMMU_INV_PRI_RSP_INVALID;
-}
-
-/*
- * Bind processes do the device that have been temporarily unbound
- * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
- */
-static int kfd_bind_processes_to_device(struct kfd_node *knode)
-{
- struct kfd_process_device *pdd;
- struct kfd_process *p;
- unsigned int temp;
- int err = 0;
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
-
- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- mutex_lock(&p->mutex);
- pdd = kfd_get_process_device_data(knode, p);
-
- if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
- mutex_unlock(&p->mutex);
- continue;
- }
-
- err = amd_iommu_bind_pasid(knode->adev->pdev, p->pasid,
- p->lead_thread);
- if (err < 0) {
- pr_err("Unexpected pasid 0x%x binding failure\n",
- p->pasid);
- mutex_unlock(&p->mutex);
- break;
- }
-
- pdd->bound = PDD_BOUND;
- mutex_unlock(&p->mutex);
- }
-
- srcu_read_unlock(&kfd_processes_srcu, idx);
-
- return err;
-}
-
-/*
- * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
- * processes will be restored to PDD_BOUND state in
- * kfd_bind_processes_to_device.
- */
-static void kfd_unbind_processes_from_device(struct kfd_node *knode)
-{
- struct kfd_process_device *pdd;
- struct kfd_process *p;
- unsigned int temp;
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
-
- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- mutex_lock(&p->mutex);
- pdd = kfd_get_process_device_data(knode, p);
-
- if (WARN_ON(!pdd)) {
- mutex_unlock(&p->mutex);
- continue;
- }
-
- if (pdd->bound == PDD_BOUND)
- pdd->bound = PDD_BOUND_SUSPENDED;
- mutex_unlock(&p->mutex);
- }
-
- srcu_read_unlock(&kfd_processes_srcu, idx);
-}
-
-/** kfd_iommu_suspend - Prepare IOMMU for suspend
- *
- * This unbinds processes from the device and disables the IOMMU for
- * the device.
- */
-void kfd_iommu_suspend(struct kfd_dev *kfd)
-{
- if (!kfd->use_iommu_v2)
- return;
-
- kfd_unbind_processes_from_device(kfd->nodes[0]);
-
- amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
- amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
- amd_iommu_free_device(kfd->adev->pdev);
-}
-
-/** kfd_iommu_resume - Restore IOMMU after resume
- *
- * This reinitializes the IOMMU for the device and re-binds previously
- * suspended processes to the device.
- */
-int kfd_iommu_resume(struct kfd_dev *kfd)
-{
- unsigned int pasid_limit;
- int err;
-
- if (!kfd->use_iommu_v2)
- return 0;
-
- pasid_limit = kfd_get_pasid_limit();
-
- err = amd_iommu_init_device(kfd->adev->pdev, pasid_limit);
- if (err)
- return -ENXIO;
-
- amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev,
- iommu_pasid_shutdown_callback);
- amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev,
- iommu_invalid_ppr_cb);
-
- err = kfd_bind_processes_to_device(kfd->nodes[0]);
- if (err) {
- amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
- amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
- amd_iommu_free_device(kfd->adev->pdev);
- return err;
- }
-
- return 0;
-}
-
-/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
- */
-int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
-{
- struct kfd_perf_properties *props;
-
- if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
- return 0;
-
- if (!amd_iommu_pc_supported())
- return 0;
-
- props = kfd_alloc_struct(props);
- if (!props)
- return -ENOMEM;
- strcpy(props->block_name, "iommu");
- props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
- amd_iommu_pc_get_max_counters(0); /* assume one iommu */
- list_add_tail(&props->list, &kdev->perf_props);
-
- return 0;
-}
-
-#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
deleted file mode 100644
index 8cf0fcbe87c2..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-/*
- * Copyright 2018-2022 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef __KFD_IOMMU_H__
-#define __KFD_IOMMU_H__
-
-#include <linux/kconfig.h>
-
-#if IS_REACHABLE(CONFIG_AMD_IOMMU_V2)
-
-#define KFD_SUPPORT_IOMMU_V2
-
-int kfd_iommu_check_device(struct kfd_dev *kfd);
-int kfd_iommu_device_init(struct kfd_dev *kfd);
-
-int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd);
-void kfd_iommu_unbind_process(struct kfd_process *p);
-
-void kfd_iommu_suspend(struct kfd_dev *kfd);
-int kfd_iommu_resume(struct kfd_dev *kfd);
-
-int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev);
-
-#else
-
-static inline int kfd_iommu_check_device(struct kfd_dev *kfd)
-{
- return -ENODEV;
-}
-static inline int kfd_iommu_device_init(struct kfd_dev *kfd)
-{
-#if IS_MODULE(CONFIG_AMD_IOMMU_V2)
- WARN_ONCE(1, "iommu_v2 module is not usable by built-in KFD");
-#endif
- return 0;
-}
-
-static inline int kfd_iommu_bind_process_to_device(
- struct kfd_process_device *pdd)
-{
- return 0;
-}
-static inline void kfd_iommu_unbind_process(struct kfd_process *p)
-{
- /* empty */
-}
-
-static inline void kfd_iommu_suspend(struct kfd_dev *kfd)
-{
- /* empty */
-}
-static inline int kfd_iommu_resume(struct kfd_dev *kfd)
-{
- return 0;
-}
-
-static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
-{
- return 0;
-}
-
-#endif /* IS_REACHABLE(CONFIG_AMD_IOMMU_V2) */
-
-#endif /* __KFD_IOMMU_H__ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 709ac885ca6d..7d82c7da223a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -461,7 +461,6 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
0, node->id, trigger);
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
- svm_range_free_dma_mappings(prange);
out_free:
kvfree(buf);
@@ -543,10 +542,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
addr = next;
}
- if (cpages)
+ if (cpages) {
prange->actual_loc = best_loc;
- else
+ svm_range_free_dma_mappings(prange, true);
+ } else {
svm_range_vram_node_free(prange);
+ }
return r < 0 ? r : 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 863cf060af48..d01bb57733b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -48,7 +48,7 @@ int pipe_priority_map[] = {
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev, struct queue_properties *q)
{
- struct kfd_mem_obj *mqd_mem_obj = NULL;
+ struct kfd_mem_obj *mqd_mem_obj;
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
@@ -64,7 +64,7 @@ struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev, struct queue_properti
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
struct queue_properties *q)
{
- struct kfd_mem_obj *mqd_mem_obj = NULL;
+ struct kfd_mem_obj *mqd_mem_obj;
uint64_t offset;
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 65c9f01a1f86..ee1d32d957f2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -206,13 +206,6 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static void update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q,
- struct mqd_update_info *minfo)
-{
- __update_mqd(mm, mqd, q, minfo, 1);
-}
-
static uint32_t read_doorbell_id(void *mqd)
{
struct cik_mqd *m = (struct cik_mqd *)mqd;
@@ -220,9 +213,9 @@ static uint32_t read_doorbell_id(void *mqd)
return m->queue_doorbell_id0;
}
-static void update_mqd_hawaii(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q,
- struct mqd_update_info *minfo)
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
__update_mqd(mm, mqd, q, minfo, 0);
}
@@ -387,7 +380,6 @@ static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
#endif
-
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_node *dev)
{
@@ -470,16 +462,3 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
return mqd;
}
-
-struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
- struct kfd_node *dev)
-{
- struct mqd_manager *mqd;
-
- mqd = mqd_manager_init_cik(type, dev);
- if (!mqd)
- return NULL;
- if (type == KFD_MQD_TYPE_CP)
- mqd->update_mqd = update_mqd_hawaii;
- return mqd;
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 94c0fc2e57b7..83699392c808 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -318,6 +318,26 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ int err;
+ struct v10_compute_mqd *m;
+ u32 doorbell_off;
+
+ m = get_mqd(mqd);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+ if (err)
+ pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+ return err;
+}
+
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -460,7 +480,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->destroy_mqd = destroy_hiq_mqd;
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
mqd->mqd_stride = kfd_mqd_stride;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 31fec5e70d13..2319467d2d95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -335,6 +335,26 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ int err;
+ struct v11_compute_mqd *m;
+ u32 doorbell_off;
+
+ m = get_mqd(mqd);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+ if (err)
+ pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+ return err;
+}
+
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -449,7 +469,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->update_mqd = update_mqd;
- mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->destroy_mqd = destroy_hiq_mqd;
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v11_compute_mqd);
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 601bb9f68048..e23d32f35607 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -405,6 +405,25 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type, unsigned int timeout,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ int err;
+ struct v9_mqd *m;
+ u32 doorbell_off;
+
+ m = get_mqd(mqd);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+ if (err)
+ pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+ return err;
+}
+
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -548,16 +567,19 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
{
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
- void *xcc_mqd;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
+ struct v9_mqd *m;
+ u32 doorbell_off;
for_each_inst(xcc_id, xcc_mask) {
- xcc_mqd = mqd + hiq_mqd_size * inst;
- err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
- type, timeout, pipe_id,
- queue_id, xcc_id);
+ m = get_mqd(mqd + hiq_mqd_size * inst);
+
+ doorbell_off = m->cp_hqd_pq_doorbell_control >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+ err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, xcc_id);
if (err) {
- pr_debug("Destroy MQD failed for xcc: %d\n", inst);
+ pr_debug("Destroy HIQ MQD failed for xcc: %d\n", inst);
break;
}
++inst;
@@ -846,7 +868,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
} else {
mqd->init_mqd = init_mqd_hiq;
mqd->load_mqd = kfd_hiq_load_mqd_kiq;
- mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->destroy_mqd = destroy_hiq_mqd;
}
break;
case KFD_MQD_TYPE_DIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index d1e962da51dd..657c37822980 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -237,14 +237,6 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-
-static void update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q,
- struct mqd_update_info *minfo)
-{
- __update_mqd(mm, mqd, q, minfo, MTYPE_CC, 1);
-}
-
static uint32_t read_doorbell_id(void *mqd)
{
struct vi_mqd *m = (struct vi_mqd *)mqd;
@@ -252,9 +244,9 @@ static uint32_t read_doorbell_id(void *mqd)
return m->queue_doorbell_id0;
}
-static void update_mqd_tonga(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q,
- struct mqd_update_info *minfo)
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
__update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
}
@@ -529,16 +521,3 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
return mqd;
}
-
-struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
- struct kfd_node *dev)
-{
- struct mqd_manager *mqd;
-
- mqd = mqd_manager_init_vi(type, dev);
- if (!mqd)
- return NULL;
- if (type == KFD_MQD_TYPE_CP)
- mqd->update_mqd = update_mqd_tonga;
- return mqd;
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 29a2d0499b67..8ce6f5200905 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -121,6 +121,7 @@ static int pm_map_process_aldebaran(struct packet_manager *pm,
packet->sh_mem_bases = qpd->sh_mem_bases;
if (qpd->tba_addr) {
packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+ packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
}
@@ -298,7 +299,8 @@ static int pm_set_grace_period_v9(struct packet_manager *pm,
pm->dqm->wait_times,
grace_period,
&reg_offset,
- &reg_data);
+ &reg_data,
+ 0);
if (grace_period == USE_DEFAULT_GRACE_PERIOD)
reg_data = pm->dqm->wait_times;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d4c9ee3f9953..3d9ce44d88da 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -175,12 +175,6 @@ extern int send_sigterm;
*/
extern int debug_largebar;
-/*
- * Ignore CRAT table during KFD initialization, can be used to work around
- * broken CRAT tables on some AMD systems
- */
-extern int ignore_crat;
-
/* Set sh_mem_config.retry_disable on GFX v9 */
extern int amdgpu_noretry;
@@ -234,7 +228,6 @@ struct kfd_device_info {
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
bool supports_cwsr;
- bool needs_iommu_device;
bool needs_pci_atomics;
uint32_t no_atomic_fw_version;
unsigned int num_sdma_queues_per_engine;
@@ -323,15 +316,6 @@ struct kfd_dev {
struct kfd_device_info device_info;
- phys_addr_t doorbell_base; /* Start of actual doorbells used by
- * KFD. It is aligned for mapping
- * into user mode
- */
- size_t doorbell_base_dw_offset; /* Offset from the start of the PCI
- * doorbell BAR to the first KFD
- * doorbell in dwords. GFX reserves
- * the segment before this offset.
- */
u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
* page used by kernel queue
*/
@@ -340,8 +324,6 @@ struct kfd_dev {
const struct kfd2kgd_calls *kfd2kgd;
struct mutex doorbell_mutex;
- DECLARE_BITMAP(doorbell_available_index,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
void *gtt_mem;
uint64_t gtt_start_gpu_addr;
@@ -368,9 +350,6 @@ struct kfd_dev {
bool pci_atomic_requested;
- /* Use IOMMU v2 flag */
- bool use_iommu_v2;
-
/* Compute Profile ref. count */
atomic_t compute_profile;
@@ -385,6 +364,12 @@ struct kfd_dev {
/* Track per device allocated watch points */
uint32_t alloc_watch_ids;
spinlock_t watch_points_lock;
+
+ /* Kernel doorbells for KFD device */
+ struct amdgpu_bo *doorbells;
+
+ /* bitmap for dynamic doorbell allocation from doorbell object */
+ unsigned long *doorbell_bitmap;
};
enum kfd_mempool {
@@ -702,7 +687,10 @@ struct qcm_process_device {
uint64_t ib_base;
void *ib_kaddr;
- /* doorbell resources per process per device */
+ /* doorbells for kfd process */
+ struct amdgpu_bo *proc_doorbells;
+
+ /* bitmap for dynamic doorbell allocation from the bo */
unsigned long *doorbell_bitmap;
};
@@ -792,7 +780,6 @@ struct kfd_process_device {
struct attribute attr_evict;
struct kobject *kobj_stats;
- unsigned int doorbell_index;
/*
* @cu_occupancy: Reports occupancy of Compute Units (CU) of a process
@@ -1100,9 +1087,9 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
- unsigned int *doorbell_index);
+ struct kfd_process_device *pdd);
void kfd_free_process_doorbells(struct kfd_dev *kfd,
- unsigned int doorbell_index);
+ struct kfd_process_device *pdd);
/* GTT Sub-Allocator */
int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size,
@@ -1152,7 +1139,6 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
}
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev);
int kfd_numa_node_to_apic_id(int numa_node_id);
-void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
/* Interrupts */
#define KFD_IRQ_FENCE_CLIENTID 0xff
@@ -1299,12 +1285,8 @@ void print_queue(struct queue *q);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
-struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
- struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
-struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
- struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
@@ -1459,9 +1441,6 @@ int kfd_wait_on_events(struct kfd_process *p,
uint32_t *wait_result);
void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint32_t valid_id_bits);
-void kfd_signal_iommu_event(struct kfd_node *dev,
- u32 pasid, unsigned long address,
- bool is_write_requested, bool is_execute_requested);
void kfd_signal_hw_exception_event(u32 pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a844e68211ac..fbf053001af9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -28,7 +28,6 @@
#include <linux/sched/task.h>
#include <linux/mmu_context.h>
#include <linux/slab.h>
-#include <linux/amd-iommu.h>
#include <linux/notifier.h>
#include <linux/compat.h>
#include <linux/mman.h>
@@ -41,7 +40,6 @@ struct mm_struct;
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_iommu.h"
#include "kfd_svm.h"
#include "kfd_smi_events.h"
#include "kfd_debug.h"
@@ -1035,10 +1033,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
get_order(KFD_CWSR_TBA_TMA_SIZE));
- bitmap_free(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
- kfd_free_process_doorbells(pdd->dev->kfd, pdd->doorbell_index);
+ kfd_free_process_doorbells(pdd->dev->kfd, pdd);
if (pdd->dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
@@ -1123,7 +1120,6 @@ static void kfd_process_wq_release(struct work_struct *work)
dma_fence_signal(p->ef);
kfd_process_remove_sysfs(p);
- kfd_iommu_unbind_process(p);
kfd_process_kunmap_signal_bo(p);
kfd_process_free_outstanding_kfd_bos(p);
@@ -1550,38 +1546,6 @@ err_alloc_process:
return ERR_PTR(err);
}
-static int init_doorbell_bitmap(struct qcm_process_device *qpd,
- struct kfd_dev *dev)
-{
- unsigned int i;
- int range_start = dev->shared_resources.non_cp_doorbells_start;
- int range_end = dev->shared_resources.non_cp_doorbells_end;
-
- if (!KFD_IS_SOC15(dev))
- return 0;
-
- qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- GFP_KERNEL);
- if (!qpd->doorbell_bitmap)
- return -ENOMEM;
-
- /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
- pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
- pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
- range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
- range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
-
- for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
- if (i >= range_start && i <= range_end) {
- __set_bit(i, qpd->doorbell_bitmap);
- __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
- qpd->doorbell_bitmap);
- }
- }
-
- return 0;
-}
-
struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
@@ -1606,11 +1570,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
if (!pdd)
return NULL;
- if (init_doorbell_bitmap(&pdd->qpd, dev->kfd)) {
- pr_err("Failed to init doorbell for process\n");
- goto err_free_pdd;
- }
-
pdd->dev = dev;
INIT_LIST_HEAD(&pdd->qpd.queues_list);
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
@@ -1766,10 +1725,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
}
}
- err = kfd_iommu_bind_process_to_device(pdd);
- if (err)
- goto out;
-
/*
* make sure that runtime_usage counter is incremented just once
* per pdd
@@ -1777,15 +1732,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
pdd->runtime_inuse = true;
return pdd;
-
-out:
- /* balance runpm reference count and exit with error */
- if (!pdd->runtime_inuse) {
- pm_runtime_mark_last_busy(adev_to_drm(dev->adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev);
- }
-
- return ERR_PTR(err);
}
/* Create specific handle mapped to mem from process local memory idr
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index ba9d69054119..adb5e4bdc0b2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -123,7 +123,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) {
+ if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
gws, &mem);
@@ -136,7 +136,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
} else {
/*
* Intentionally set GWS to a non-NULL value
- * for GFX 9.4.3.
+ * for devices that do not use GWS for global wave
+ * synchronization but require the formality
+ * of setting GWS for cooperative groups.
*/
pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
}
@@ -173,7 +175,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
if (pqn->q && pqn->q->gws &&
- KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3))
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ !pqn->q->device->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
pqn->q->gws);
kfd_procfs_del_queue(pqn->q);
@@ -365,17 +368,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- if (q && p_doorbell_offset_in_process)
+ if (q && p_doorbell_offset_in_process) {
/* Return the doorbell offset within the doorbell page
* to the caller so it can be passed up to user mode
* (in bytes).
- * There are always 1024 doorbells per process, so in case
- * of 8-byte doorbells, there are two doorbell pages per
- * process.
+ * relative doorbell index = Absolute doorbell index -
+ * absolute index of first doorbell in the page.
*/
- *p_doorbell_offset_in_process =
- (q->properties.doorbell_off * sizeof(uint32_t)) &
- (kfd_doorbell_process_slice(dev->kfd) - 1);
+ uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
+ pdd->qpd.proc_doorbells,
+ 0);
+
+ *p_doorbell_offset_in_process = (q->properties.doorbell_off
+ - first_db_index) * sizeof(uint32_t);
+ }
pr_debug("PQM After DQM create queue\n");
@@ -455,7 +461,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
}
if (pqn->q->gws) {
- if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ !dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(
pqm->process->kgd_process_info,
pqn->q->gws);
@@ -929,12 +936,6 @@ int kfd_criu_restore_queue(struct kfd_process *p,
goto exit;
}
- if (!pdd->doorbell_index &&
- kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) {
- ret = -ENOMEM;
- goto exit;
- }
-
/* data stored in this order: mqd, ctl_stack */
mqd = q_extra_data;
ctl_stack = mqd + q_data->mqd_size;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 5ff1a5a89d96..011561605983 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -23,7 +23,10 @@
#include <linux/types.h>
#include <linux/sched/task.h>
+#include <linux/dynamic_debug.h>
#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_exec.h>
+
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -46,6 +49,13 @@
* page table is updated.
*/
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
+#if IS_ENABLED(CONFIG_DYNAMIC_DEBUG)
+#define dynamic_svm_range_dump(svms) \
+ _dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms)
+#else
+#define dynamic_svm_range_dump(svms) \
+ do { if (0) svm_range_debug_dump(svms); } while (0)
+#endif
/* Giant svm range split into smaller ranges based on this, it is decided using
* minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to
@@ -239,7 +249,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
}
}
-void svm_range_free_dma_mappings(struct svm_range *prange)
+void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma)
{
struct kfd_process_device *pdd;
dma_addr_t *dma_addr;
@@ -260,13 +270,14 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
continue;
}
dev = &pdd->dev->adev->pdev->dev;
- svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
+ if (unmap_dma)
+ svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
kvfree(dma_addr);
prange->dma_addr[gpuidx] = NULL;
}
}
-static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
+static void svm_range_free(struct svm_range *prange, bool do_unmap)
{
uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
@@ -275,9 +286,9 @@ static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
prange->start, prange->last);
svm_range_vram_node_free(prange);
- svm_range_free_dma_mappings(prange);
+ svm_range_free_dma_mappings(prange, do_unmap);
- if (update_mem_usage && !p->xnack_enabled) {
+ if (do_unmap && !p->xnack_enabled) {
pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
@@ -849,6 +860,37 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
}
}
+static void *
+svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
+ uint64_t offset)
+{
+ unsigned char *dst;
+
+ dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
+ if (!dst)
+ return NULL;
+ memcpy(dst, (unsigned char *)psrc + offset, num_elements * size);
+
+ return (void *)dst;
+}
+
+static int
+svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
+{
+ int i;
+
+ for (i = 0; i < MAX_GPU_INSTANCE; i++) {
+ if (!src->dma_addr[i])
+ continue;
+ dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
+ sizeof(*src->dma_addr[i]), src->npages, 0);
+ if (!dst->dma_addr[i])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static int
svm_range_split_array(void *ppnew, void *ppold, size_t size,
uint64_t old_start, uint64_t old_n,
@@ -863,22 +905,16 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
if (!pold)
return 0;
- new = kvmalloc_array(new_n, size, GFP_KERNEL);
+ d = (new_start - old_start) * size;
+ new = svm_range_copy_array(pold, size, new_n, d);
if (!new)
return -ENOMEM;
-
- d = (new_start - old_start) * size;
- memcpy(new, pold + d, new_n * size);
-
- old = kvmalloc_array(old_n, size, GFP_KERNEL);
+ d = (new_start == old_start) ? new_n * size : 0;
+ old = svm_range_copy_array(pold, size, old_n, d);
if (!old) {
kvfree(new);
return -ENOMEM;
}
-
- d = (new_start == old_start) ? new_n * size : 0;
- memcpy(old, pold + d, old_n * size);
-
kvfree(pold);
*(void **)ppold = old;
*(void **)ppnew = new;
@@ -1455,37 +1491,34 @@ struct svm_validate_context {
struct svm_range *prange;
bool intr;
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
- struct ttm_validate_buffer tv[MAX_GPU_INSTANCE];
- struct list_head validate_list;
- struct ww_acquire_ctx ticket;
+ struct drm_exec exec;
};
-static int svm_range_reserve_bos(struct svm_validate_context *ctx)
+static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
{
struct kfd_process_device *pdd;
struct amdgpu_vm *vm;
uint32_t gpuidx;
int r;
- INIT_LIST_HEAD(&ctx->validate_list);
- for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
- pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
- if (!pdd) {
- pr_debug("failed to find device idx %d\n", gpuidx);
- return -EINVAL;
- }
- vm = drm_priv_to_vm(pdd->drm_priv);
-
- ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
- ctx->tv[gpuidx].num_shared = 4;
- list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
- }
+ drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0);
+ drm_exec_until_all_locked(&ctx->exec) {
+ for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+ pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ r = -EINVAL;
+ goto unreserve_out;
+ }
+ vm = drm_priv_to_vm(pdd->drm_priv);
- r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
- ctx->intr, NULL);
- if (r) {
- pr_debug("failed %d to reserve bo\n", r);
- return r;
+ r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(r)) {
+ pr_debug("failed %d to reserve bo\n", r);
+ goto unreserve_out;
+ }
+ }
}
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
@@ -1508,13 +1541,13 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
return 0;
unreserve_out:
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
return r;
}
static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
{
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
}
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
@@ -1522,6 +1555,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
struct kfd_process_device *pdd;
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ if (!pdd)
+ return NULL;
return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);
}
@@ -1596,12 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
}
if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
- if (!prange->mapped_to_gpu) {
+ bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+ if (!prange->mapped_to_gpu ||
+ bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
r = 0;
goto free_ctx;
}
-
- bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
}
if (prange->actual_loc && !prange->ttm_res) {
@@ -1613,7 +1648,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
goto free_ctx;
}
- svm_range_reserve_bos(ctx);
+ svm_range_reserve_bos(ctx, intr);
p = container_of(prange->svms, struct kfd_process, svms);
owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap,
@@ -1925,7 +1960,10 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
new = svm_range_new(old->svms, old->start, old->last, false);
if (!new)
return NULL;
-
+ if (svm_range_copy_dma_addrs(new, old)) {
+ svm_range_free(new, false);
+ return NULL;
+ }
if (old->svm_bo) {
new->ttm_res = old->ttm_res;
new->offset = old->offset;
@@ -2621,10 +2659,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
return -EFAULT;
}
- *is_heap_stack = (vma->vm_start <= vma->vm_mm->brk &&
- vma->vm_end >= vma->vm_mm->start_brk) ||
- (vma->vm_start <= vma->vm_mm->start_stack &&
- vma->vm_end >= vma->vm_mm->start_stack);
+ *is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
start_limit = max(vma->vm_start >> PAGE_SHIFT,
(unsigned long)ALIGN_DOWN(addr, 2UL << 8));
@@ -3561,7 +3596,7 @@ out_unlock_range:
break;
}
- svm_range_debug_dump(svms);
+ dynamic_svm_range_dump(svms);
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 21b14510882b..9e668eeefb32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -183,7 +183,7 @@ void svm_range_add_list_work(struct svm_range_list *svms,
void schedule_deferred_list_work(struct svm_range_list *svms);
void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages);
-void svm_range_free_dma_mappings(struct svm_range *prange);
+void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma);
int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
uint64_t *svm_priv_data_size);
int kfd_criu_checkpoint_svm(struct kfd_process *p,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 61fc62f3e003..ff98fded9534 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -36,8 +36,8 @@
#include "kfd_crat.h"
#include "kfd_topology.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_iommu.h"
#include "kfd_svm.h"
+#include "kfd_debug.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
#include "amdgpu.h"
@@ -988,17 +988,6 @@ static void find_system_memory(const struct dmi_header *dm,
}
}
-/*
- * Performance counters information is not part of CRAT but we would like to
- * put them in the sysfs under topology directory for Thunk to get the data.
- * This function is called before updating the sysfs.
- */
-static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
-{
- /* These are the only counters supported so far */
- return kfd_iommu_add_perf_counters(kdev);
-}
-
/* kfd_add_non_crat_information - Add information that is not currently
* defined in CRAT but is necessary for KFD topology
* @dev - topology device to which addition info is added
@@ -1013,25 +1002,6 @@ static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
/* TODO: For GPU node, rearrange code from kfd_topology_add_device */
}
-/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
- * Ignore CRAT for all other devices. AMD APU is identified if both CPU
- * and GPU cores are present.
- * @device_list - topology device list created by parsing ACPI CRAT table.
- * @return - TRUE if invalid, FALSE is valid.
- */
-static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
-{
- struct kfd_topology_device *dev;
-
- list_for_each_entry(dev, device_list, list) {
- if (dev->node_props.cpu_cores_count &&
- dev->node_props.simd_count)
- return false;
- }
- pr_info("Ignoring ACPI CRAT on non-APU system\n");
- return true;
-}
-
int kfd_topology_init(void)
{
void *crat_image = NULL;
@@ -1062,48 +1032,25 @@ int kfd_topology_init(void)
*/
proximity_domain = 0;
- /*
- * Get the CRAT image from the ACPI. If ACPI doesn't have one
- * or if ACPI CRAT is invalid create a virtual CRAT.
- * NOTE: The current implementation expects all AMD APUs to have
- * CRAT. If no CRAT is available, it is assumed to be a CPU
- */
- ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
- if (!ret) {
- ret = kfd_parse_crat_table(crat_image,
- &temp_topology_device_list,
- proximity_domain);
- if (ret ||
- kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
- kfd_release_topology_device_list(
- &temp_topology_device_list);
- kfd_destroy_crat_image(crat_image);
- crat_image = NULL;
- }
+ ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
+ COMPUTE_UNIT_CPU, NULL,
+ proximity_domain);
+ cpu_only_node = 1;
+ if (ret) {
+ pr_err("Error creating VCRAT table for CPU\n");
+ return ret;
}
- if (!crat_image) {
- ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
- COMPUTE_UNIT_CPU, NULL,
- proximity_domain);
- cpu_only_node = 1;
- if (ret) {
- pr_err("Error creating VCRAT table for CPU\n");
- return ret;
- }
-
- ret = kfd_parse_crat_table(crat_image,
- &temp_topology_device_list,
- proximity_domain);
- if (ret) {
- pr_err("Error parsing VCRAT table for CPU\n");
- goto err;
- }
+ ret = kfd_parse_crat_table(crat_image,
+ &temp_topology_device_list,
+ proximity_domain);
+ if (ret) {
+ pr_err("Error parsing VCRAT table for CPU\n");
+ goto err;
}
kdev = list_first_entry(&temp_topology_device_list,
struct kfd_topology_device, list);
- kfd_add_perf_to_topology(kdev);
down_write(&topology_lock);
kfd_topology_update_device_list(&temp_topology_device_list,
@@ -1189,8 +1136,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu)
/* Discrete GPUs need their own topology device list
* entries. Don't assign them to CPU/APU nodes.
*/
- if (!gpu->kfd->use_iommu_v2 &&
- dev->node_props.cpu_cores_count)
+ if (dev->node_props.cpu_cores_count)
continue;
if (!dev->gpu && (dev->node_props.simd_count > 0)) {
@@ -1931,23 +1877,27 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
- if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
- dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
- HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+ if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
+ dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
- if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 4, 2))
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
+ if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
dev->node_props.debug_prop |=
- HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
+ HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 |
+ HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3;
else
+ dev->node_props.debug_prop |=
+ HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
+ HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
} else {
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
- if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0))
- dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
- else
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
}
@@ -1965,7 +1915,14 @@ int kfd_topology_add_device(struct kfd_node *gpu)
const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
gpu_id = kfd_generate_gpu_id(gpu);
- pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+ if (gpu->xcp && !gpu->xcp->ddev) {
+ dev_warn(gpu->adev->dev,
+ "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.",
+ gpu_id);
+ return 0;
+ } else {
+ pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+ }
/* Check to see if this gpu device exists in the topology_device_list.
* If so, assign the gpu to that device,
@@ -2076,10 +2033,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
* Overwrite ATS capability according to needs_iommu_device to fix
* potential missing corresponding bit in CRAT of BIOS.
*/
- if (dev->gpu->kfd->use_iommu_v2)
- dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
- else
- dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
+ dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
/* Fix errors in CZ CRAT.
* simd_count: Carrizo CRAT reports wrong simd_count, probably
@@ -2274,29 +2228,6 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
}
-void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
-{
- struct kfd_topology_device *dev;
-
- gpu->use_iommu_v2 = false;
-
- if (!gpu->device_info.needs_iommu_device)
- return;
-
- down_read(&topology_lock);
-
- /* Only use IOMMUv2 if there is an APU topology node with no GPU
- * assigned yet. This GPU will be assigned to it.
- */
- list_for_each_entry(dev, &topology_device_list, list)
- if (dev->node_props.cpu_cores_count &&
- dev->node_props.simd_count &&
- !dev->gpu)
- gpu->use_iommu_v2 = true;
-
- up_read(&topology_lock);
-}
-
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index cba2cd5ed9d1..dea32a9e5506 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -32,9 +32,12 @@
#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 6
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 7
#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 7
#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT \
(29 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3 \
+ (30 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
struct kfd_node_properties {
uint64_t hive_id;