summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorJonathan Kim <jonathan.kim@amd.com>2023-03-24 00:17:20 +0300
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 19:35:31 +0300
commit7cee6a6824a0429a6255abe91b5af01b9a01cd03 (patch)
tree281a17e83e234f3d62aae716cd640b4620dd3d7e /drivers/gpu/drm/amd/amdkfd
parent33f3437ae1194ef5dedbf275dcf74ed9c114647d (diff)
downloadlinux-7cee6a6824a0429a6255abe91b5af01b9a01cd03.tar.xz
drm/amdgpu: add configurable grace period for unmap queues
The HWS schedule allows a grace period for wave completion prior to preemption for better performance by avoiding CWSR on waves that can potentially complete quickly. The debugger, on the other hand, will want to inspect wave status immediately after it actively triggers preemption (a suspend function to be provided). To minimize latency between preemption and debugger wave inspection, allow immediate preemption by setting the grace period to 0. Note that setting the preepmtion grace period to 0 will result in an infinite grace period being set due to a CP FW bug so set it to 1 for now. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c63
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c39
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h65
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h5
7 files changed, 191 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 2baa0781eafc..0b88a64e61fe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -46,10 +46,13 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param);
+ uint32_t filter_param,
+ uint32_t grace_period);
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset);
+ uint32_t filter_param,
+ uint32_t grace_period,
+ bool reset);
static int map_queues_cpsch(struct device_queue_manager *dqm);
@@ -866,7 +869,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
if (!dqm->dev->kfd->shared_resources.enable_mes)
retval = unmap_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
else if (prev_active)
retval = remove_queue_mes(dqm, q, &pdd->qpd);
@@ -1042,7 +1045,8 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
retval = execute_queues_cpsch(dqm,
qpd->is_debug ?
KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD);
out:
dqm_unlock(dqm);
@@ -1182,8 +1186,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
}
if (!dqm->dev->kfd->shared_resources.enable_mes)
retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
-
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
atomic64_add(eviction_duration, &pdd->evict_duration_counter);
vm_not_acquired:
@@ -1525,6 +1528,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
init_sdma_bitmaps(dqm);
+ if (dqm->dev->kfd2kgd->get_iq_wait_times)
+ dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
+ &dqm->wait_times);
return 0;
}
@@ -1563,8 +1569,9 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->is_hws_hang = false;
dqm->is_resetting = false;
dqm->sched_running = true;
+
if (!dqm->dev->kfd->shared_resources.enable_mes)
- execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
dqm_unlock(dqm);
return 0;
@@ -1589,7 +1596,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
if (!dqm->is_hws_hang) {
if (!dqm->dev->kfd->shared_resources.enable_mes)
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
else
remove_all_queues_mes(dqm);
}
@@ -1631,7 +1638,8 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
list_add(&kq->list, &qpd->priv_queue_list);
increment_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = true;
- execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD);
dqm_unlock(dqm);
return 0;
@@ -1645,7 +1653,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
list_del(&kq->list);
decrement_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = false;
- execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD);
/*
* Unconditionally decrement this counter, regardless of the queue's
* type.
@@ -1722,7 +1731,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (!dqm->dev->kfd->shared_resources.enable_mes)
retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
else
retval = add_queue_mes(dqm, q, qpd);
if (retval)
@@ -1811,7 +1820,9 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
/* dqm->lock mutex has to be locked before calling this function */
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset)
+ uint32_t filter_param,
+ uint32_t grace_period,
+ bool reset)
{
int retval = 0;
struct mqd_manager *mqd_mgr;
@@ -1823,6 +1834,12 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->active_runlist)
return retval;
+ if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
+ retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
+ if (retval)
+ return retval;
+ }
+
retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
if (retval)
return retval;
@@ -1855,6 +1872,13 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
return -ETIME;
}
+ /* We need to reset the grace period value for this device */
+ if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
+ if (pm_update_grace_period(&dqm->packet_mgr,
+ USE_DEFAULT_GRACE_PERIOD))
+ pr_err("Failed to reset grace period\n");
+ }
+
pm_release_ib(&dqm->packet_mgr);
dqm->active_runlist = false;
@@ -1870,7 +1894,7 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm,
dqm_lock(dqm);
retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
- pasid, true);
+ pasid, USE_DEFAULT_GRACE_PERIOD, true);
dqm_unlock(dqm);
return retval;
@@ -1879,13 +1903,14 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm,
/* dqm->lock mutex has to be locked before calling this function */
static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
- uint32_t filter_param)
+ uint32_t filter_param,
+ uint32_t grace_period)
{
int retval;
if (dqm->is_hws_hang)
return -EIO;
- retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
+ retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false);
if (retval)
return retval;
@@ -1943,7 +1968,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
if (!dqm->dev->kfd->shared_resources.enable_mes) {
decrement_queue_count(dqm, qpd, q);
retval = execute_queues_cpsch(dqm,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD);
if (retval == -ETIME)
qpd->reset_wavefronts = true;
} else {
@@ -2228,7 +2254,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
}
if (!dqm->dev->kfd->shared_resources.enable_mes)
- retval = execute_queues_cpsch(dqm, filter, 0);
+ retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD);
if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
@@ -2589,7 +2615,8 @@ int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
return r;
}
dqm->active_runlist = true;
- r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
+ 0, USE_DEFAULT_GRACE_PERIOD);
dqm_unlock(dqm);
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index cd4383bb207f..d4dd3b4acbf0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -37,6 +37,7 @@
#define KFD_MES_PROCESS_QUANTUM 100000
#define KFD_MES_GANG_QUANTUM 10000
+#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
struct device_process_node {
struct qcm_process_device *qpd;
@@ -259,6 +260,8 @@ struct device_queue_manager {
/* used for GFX 9.4.3 only */
uint32_t current_logical_xcc_start;
+
+ uint32_t wait_times;
};
void device_queue_manager_init_cik(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 2f54172e9175..401096c103b2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -370,6 +370,38 @@ out:
return retval;
}
+int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+{
+ int retval = 0;
+ uint32_t *buffer, size;
+
+ size = pm->pmf->set_grace_period_size;
+
+ mutex_lock(&pm->lock);
+
+ if (size) {
+ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
+
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+
+ retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
+ if (!retval)
+ kq_submit_packet(pm->priv_queue);
+ else
+ kq_rollback_packet(pm->priv_queue);
+ }
+
+out:
+ mutex_unlock(&pm->lock);
+ return retval;
+}
+
int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 44cf3a5f6fdb..1fda6dcf84b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -262,6 +262,41 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
+static int pm_set_grace_period_v9(struct packet_manager *pm,
+ uint32_t *buffer,
+ uint32_t grace_period)
+{
+ struct pm4_mec_write_data_mmio *packet;
+ uint32_t reg_offset = 0;
+ uint32_t reg_data = 0;
+
+ pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
+ pm->dqm->dev->adev,
+ pm->dqm->wait_times,
+ grace_period,
+ &reg_offset,
+ &reg_data);
+
+ if (grace_period == USE_DEFAULT_GRACE_PERIOD)
+ reg_data = pm->dqm->wait_times;
+
+ packet = (struct pm4_mec_write_data_mmio *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
+
+ packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA,
+ sizeof(struct pm4_mec_write_data_mmio));
+
+ packet->bitfields2.dst_sel = dst_sel___write_data__mem_mapped_register;
+ packet->bitfields2.addr_incr =
+ addr_incr___write_data__do_not_increment_address;
+
+ packet->bitfields3.dst_mmreg_addr = reg_offset;
+
+ packet->data = reg_data;
+
+ return 0;
+}
+
static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset)
@@ -345,6 +380,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
+ .set_grace_period = pm_set_grace_period_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -352,6 +388,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
@@ -362,6 +399,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
+ .set_grace_period = pm_set_grace_period_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
@@ -369,6 +407,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index faf4772ed317..c1199d06d131 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -303,6 +303,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources = pm_set_resources_vi,
.map_queues = pm_map_queues_vi,
.unmap_queues = pm_unmap_queues_vi,
+ .set_grace_period = NULL,
.query_status = pm_query_status_vi,
.release_mem = pm_release_mem_vi,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -310,6 +311,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .set_grace_period_size = 0,
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = sizeof(struct pm4_mec_release_mem)
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index 2ad708c64012..206f1960857f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -584,6 +584,71 @@ struct pm4_mec_release_mem {
#endif
+#ifndef PM4_MEC_WRITE_DATA_DEFINED
+#define PM4_MEC_WRITE_DATA_DEFINED
+
+enum WRITE_DATA_dst_sel_enum {
+ dst_sel___write_data__mem_mapped_register = 0,
+ dst_sel___write_data__tc_l2 = 2,
+ dst_sel___write_data__gds = 3,
+ dst_sel___write_data__memory = 5,
+ dst_sel___write_data__memory_mapped_adc_persistent_state = 6,
+};
+
+enum WRITE_DATA_addr_incr_enum {
+ addr_incr___write_data__increment_address = 0,
+ addr_incr___write_data__do_not_increment_address = 1
+};
+
+enum WRITE_DATA_wr_confirm_enum {
+ wr_confirm___write_data__do_not_wait_for_write_confirmation = 0,
+ wr_confirm___write_data__wait_for_write_confirmation = 1
+};
+
+enum WRITE_DATA_cache_policy_enum {
+ cache_policy___write_data__lru = 0,
+ cache_policy___write_data__stream = 1
+};
+
+
+struct pm4_mec_write_data_mmio {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reserved1:8;
+ unsigned int dst_sel:4;
+ unsigned int reserved2:4;
+ unsigned int addr_incr:1;
+ unsigned int reserved3:2;
+ unsigned int resume_vf:1;
+ unsigned int wr_confirm:1;
+ unsigned int reserved4:4;
+ unsigned int cache_policy:2;
+ unsigned int reserved5:5;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int dst_mmreg_addr:18;
+ unsigned int reserved6:14;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ uint32_t reserved7;
+
+ uint32_t data;
+
+};
+
+#endif
+
enum {
CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 1b272f879b4c..4c912b7735b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1350,6 +1350,8 @@ struct packet_manager_funcs {
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
enum kfd_unmap_queues_filter mode,
uint32_t filter_param, bool reset);
+ int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
+ uint32_t grace_period);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
@@ -1360,6 +1362,7 @@ struct packet_manager_funcs {
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
+ int set_grace_period_size;
int query_status_size;
int release_mem_size;
};
@@ -1382,6 +1385,8 @@ int pm_send_unmap_queue(struct packet_manager *pm,
void pm_release_ib(struct packet_manager *pm);
+int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
+
/* Following PM funcs can be shared among VI and AI */
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);