summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorMukul Joshi <mukul.joshi@amd.com>2022-06-10 16:41:29 +0300
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 16:45:38 +0300
commit0c7315e7d5ef9b36ca4db32ffeb34a187cbaf231 (patch)
tree6507b0c864294533e748770edfbdfb50c8d831e3 /drivers/gpu/drm
parentfe1f05df5919c67c3add49efb55e251a8d78ee4e (diff)
downloadlinux-0c7315e7d5ef9b36ca4db32ffeb34a187cbaf231.tar.xz
drm/amdkfd: Add device repartition support
GFX9.4.3 will support dynamic repartitioning of the GPU through sysfs. Add device repartitioning support in KFD to repartition GPU from one mode to other. v2: squash in fix ("drm/amdkfd: Fix warning kgd2kfd_unlock_kfd defined but not used") Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c21
5 files changed, 66 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 9d19c7ceda3f..bbbfe9ec4adf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -773,3 +773,13 @@ bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
else
return false;
}
+
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
+{
+ return kgd2kfd_check_and_lock_kfd();
+}
+
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
+{
+ kgd2kfd_unlock_kfd();
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index df07e212c21e..d1d643a050a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -151,6 +151,8 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
@@ -373,6 +375,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
+int kgd2kfd_check_and_lock_kfd(void);
+void kgd2kfd_unlock_kfd(void);
#else
static inline int kgd2kfd_init(void)
{
@@ -438,5 +442,14 @@ static inline
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
}
+
+static inline int kgd2kfd_check_and_lock_kfd(void)
+{
+ return 0;
+}
+
+static inline void kgd2kfd_unlock_kfd(void)
+{
+}
#endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 069b259f384c..69bac5b801ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1233,10 +1233,30 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
return -EINVAL;
}
+ if (!adev->kfd.init_complete)
+ return -EPERM;
+
mutex_lock(&adev->gfx.partition_mutex);
- ret = adev->gfx.funcs->switch_partition_mode(adev, mode);
+ if (mode == adev->gfx.funcs->query_partition_mode(adev))
+ goto out;
+
+ ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
+ if (ret)
+ goto out;
+
+ amdgpu_amdkfd_device_fini_sw(adev);
+
+ adev->gfx.funcs->switch_partition_mode(adev, mode);
+
+ amdgpu_amdkfd_device_probe(adev);
+ amdgpu_amdkfd_device_init(adev);
+ /* If KFD init failed, return failure */
+ if (!adev->kfd.init_complete)
+ ret = -EIO;
+ amdgpu_amdkfd_unlock_kfd(adev);
+out:
mutex_unlock(&adev->gfx.partition_mutex);
if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index c776fc5884de..47d8ac64e877 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -675,7 +675,7 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
static enum amdgpu_gfx_partition
gfx_v9_4_3_query_compute_partition(struct amdgpu_device *adev)
{
- enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ enum amdgpu_gfx_partition mode = adev->gfx.partition_mode;
if (adev->nbio.funcs->get_compute_partition_mode)
mode = adev->nbio.funcs->get_compute_partition_mode(adev);
@@ -689,9 +689,6 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
u32 tmp = 0;
int num_xcc_per_partition, i, num_xcc;
- if (mode == adev->gfx.partition_mode)
- return mode;
-
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
switch (mode) {
case AMDGPU_SPX_PARTITION_MODE:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index eb2b44fddf74..293787290e36 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1356,6 +1356,27 @@ unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node)
kfd_get_num_sdma_engines(node);
}
+int kgd2kfd_check_and_lock_kfd(void)
+{
+ mutex_lock(&kfd_processes_mutex);
+ if (!hash_empty(kfd_processes_table) || kfd_is_locked()) {
+ mutex_unlock(&kfd_processes_mutex);
+ return -EBUSY;
+ }
+
+ ++kfd_locked;
+ mutex_unlock(&kfd_processes_mutex);
+
+ return 0;
+}
+
+void kgd2kfd_unlock_kfd(void)
+{
+ mutex_lock(&kfd_processes_mutex);
+ --kfd_locked;
+ mutex_unlock(&kfd_processes_mutex);
+}
+
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS