diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 63 |
1 files changed, 36 insertions, 27 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e686ce2bf3b3..d3eaa1549bd7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1128,6 +1128,9 @@ static int set_sched_resources(struct device_queue_manager *dqm) static int initialize_cpsch(struct device_queue_manager *dqm) { + uint64_t num_sdma_queues; + uint64_t num_xgmi_sdma_queues; + pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); mutex_init(&dqm->lock_hidden); @@ -1136,8 +1139,18 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->active_cp_queue_count = 0; dqm->gws_queue_count = 0; dqm->active_runlist = false; - dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); - dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); + + num_sdma_queues = get_num_sdma_queues(dqm); + if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap)) + dqm->sdma_bitmap = ULLONG_MAX; + else + dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1); + + num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm); + if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap)) + dqm->xgmi_sdma_bitmap = ULLONG_MAX; + else + dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1); INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); @@ -1167,7 +1180,7 @@ static int start_cpsch(struct device_queue_manager *dqm) if (retval) goto fail_allocate_vidmem; - dqm->fence_addr = dqm->fence_mem->cpu_ptr; + dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; init_interrupts(dqm); @@ -1340,8 +1353,8 @@ out: return retval; } -int amdkfd_fence_wait_timeout(unsigned int *fence_addr, - unsigned int fence_value, +int amdkfd_fence_wait_timeout(uint64_t *fence_addr, + uint64_t fence_value, unsigned int timeout_ms) { unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; @@ -1393,6 +1406,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, uint32_t filter_param) { int retval = 0; + struct mqd_manager *mqd_mgr; if (!dqm->sched_running) return 0; @@ -1424,6 +1438,22 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, return retval; } + /* In the current MEC firmware implementation, if compute queue + * doesn't response to the preemption request in time, HIQ will + * abandon the unmap request without returning any timeout error + * to driver. Instead, MEC firmware will log the doorbell of the + * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. + * To make sure the queue unmap was successful, driver need to + * check those fields + */ + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; + if (mqd_mgr->read_doorbell_id(dqm->packets.priv_queue->queue->mqd)) { + pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); + while (halt_if_hws_hang) + schedule(); + return -ETIME; + } + pm_release_ib(&dqm->packets); dqm->active_runlist = false; @@ -1596,26 +1626,6 @@ out: return retval; } -static int set_trap_handler(struct device_queue_manager *dqm, - struct qcm_process_device *qpd, - uint64_t tba_addr, - uint64_t tma_addr) -{ - uint64_t *tma; - - if (dqm->dev->cwsr_enabled) { - /* Jump from CWSR trap handler to user trap */ - tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); - tma[0] = tba_addr; - tma[1] = tma_addr; - } else { - qpd->tba_addr = tba_addr; - qpd->tma_addr = tma_addr; - } - - return 0; -} - static int process_termination_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -1859,7 +1869,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; - dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_cpsch; dqm->ops.evict_process_queues = evict_process_queues_cpsch; dqm->ops.restore_process_queues = restore_process_queues_cpsch; @@ -1878,7 +1887,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.initialize = initialize_nocpsch; dqm->ops.uninitialize = uninitialize; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; - dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_nocpsch; dqm->ops.evict_process_queues = evict_process_queues_nocpsch; dqm->ops.restore_process_queues = @@ -1918,6 +1926,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_RAVEN: case CHIP_RENOIR: case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: device_queue_manager_init_v9(&dqm->asic_ops); break; case CHIP_NAVI10: |