From 08fb97de03aa2205c6791301bd83a095abc1949c Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Fri, 30 Sep 2022 00:12:58 -0400 Subject: drm/sched: Add FIFO sched policy to run queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When many entities are competing for the same run queue on the same scheduler, we observe an unusually long wait times and some jobs get starved. This has been observed on GPUVis. The issue is due to the Round Robin policy used by schedulers to pick up the next entity's job queue for execution. Under stress of many entities and long job queues within entity some jobs could be stuck for very long time in it's entity's queue before being popped from the queue and executed while for other entities with smaller job queues a job might execute earlier even though that job arrived later then the job in the long queue. Fix: Add FIFO selection policy to entities in run queue, chose next entity on run queue in such order that if job on one entity arrived earlier then job on another entity the first job will start executing earlier regardless of the length of the entity's job queue. v2: Switch to rb tree structure for entities based on TS of oldest job waiting in the job queue of an entity. Improves next entity extraction to O(1). Entity TS update O(log N) where N is the number of entities in the run-queue Drop default option in module control parameter. v3: Various cosmetical fixes and minor refactoring of fifo update function. (Luben) v4: Switch drm_sched_rq_select_entity_fifo to in order search (Luben) v5: Fix up drm_sched_rq_select_entity_fifo loop (Luben) v6: Add missing drm_sched_rq_remove_fifo_locked v7: Fix ts sampling bug and more cosmetic stuff (Luben) v8: Fix module parameter string (Luben) Cc: Luben Tuikov Cc: Christian König Cc: Direct Rendering Infrastructure - Development Cc: AMD Graphics Signed-off-by: Andrey Grodzovsky Tested-by: Yunxiang Li (Teddy) Signed-off-by: Luben Tuikov Reviewed-by: Luben Tuikov Link: https://patchwork.freedesktop.org/patch/msgid/20220930041258.1050247-1-luben.tuikov@amd.com --- drivers/gpu/drm/scheduler/sched_entity.c | 20 +++++++ drivers/gpu/drm/scheduler/sched_main.c | 96 +++++++++++++++++++++++++++++++- 2 files changed, 113 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/scheduler') diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 6b25b2f4f5a3..7060e4ed5a31 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -73,6 +73,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->priority = priority; entity->sched_list = num_sched_list > 1 ? sched_list : NULL; entity->last_scheduled = NULL; + RB_CLEAR_NODE(&entity->rb_tree_node); if(num_sched_list) entity->rq = &sched_list[0]->sched_rq[entity->priority]; @@ -443,6 +444,19 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) smp_wmb(); spsc_queue_pop(&entity->job_queue); + + /* + * Update the entity's location in the min heap according to + * the timestamp of the next job, if any. + */ + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) { + struct drm_sched_job *next; + + next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); + if (next) + drm_sched_rq_update_fifo(entity, next->submit_ts); + } + return sched_job; } @@ -507,6 +521,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) atomic_inc(entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); + sched_job->submit_ts = ktime_get(); /* first job wakes up scheduler */ if (first) { @@ -518,8 +533,13 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) DRM_ERROR("Trying to push to a killed entity\n"); return; } + drm_sched_rq_add_entity(entity->rq, entity); spin_unlock(&entity->rq_lock); + + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) + drm_sched_rq_update_fifo(entity, sched_job->submit_ts); + drm_sched_wakeup(entity->rq->sched); } } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 4f2395d1a791..ce86b03e8386 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -62,6 +62,55 @@ #define to_drm_sched_job(sched_job) \ container_of((sched_job), struct drm_sched_job, queue_node) +int drm_sched_policy = DRM_SCHED_POLICY_RR; + +/** + * DOC: sched_policy (int) + * Used to override default entities scheduling policy in a run queue. + */ +MODULE_PARM_DESC(sched_policy, "Specify schedule policy for entities on a runqueue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin (default), " __stringify(DRM_SCHED_POLICY_FIFO) " = use FIFO."); +module_param_named(sched_policy, drm_sched_policy, int, 0444); + +static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, + const struct rb_node *b) +{ + struct drm_sched_entity *ent_a = rb_entry((a), struct drm_sched_entity, rb_tree_node); + struct drm_sched_entity *ent_b = rb_entry((b), struct drm_sched_entity, rb_tree_node); + + return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting); +} + +static inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity) +{ + struct drm_sched_rq *rq = entity->rq; + + if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { + rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root); + RB_CLEAR_NODE(&entity->rb_tree_node); + } +} + +void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts) +{ + /* + * Both locks need to be grabbed, one to protect from entity->rq change + * for entity from within concurrent drm_sched_entity_select_rq and the + * other to update the rb tree structure. + */ + spin_lock(&entity->rq_lock); + spin_lock(&entity->rq->lock); + + drm_sched_rq_remove_fifo_locked(entity); + + entity->oldest_job_waiting = ts; + + rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root, + drm_sched_entity_compare_before); + + spin_unlock(&entity->rq->lock); + spin_unlock(&entity->rq_lock); +} + /** * drm_sched_rq_init - initialize a given run queue struct * @@ -75,6 +124,7 @@ static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, { spin_lock_init(&rq->lock); INIT_LIST_HEAD(&rq->entities); + rq->rb_tree_root = RB_ROOT_CACHED; rq->current_entity = NULL; rq->sched = sched; } @@ -92,9 +142,12 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq, { if (!list_empty(&entity->list)) return; + spin_lock(&rq->lock); + atomic_inc(rq->sched->score); list_add_tail(&entity->list, &rq->entities); + spin_unlock(&rq->lock); } @@ -111,23 +164,30 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, { if (list_empty(&entity->list)) return; + spin_lock(&rq->lock); + atomic_dec(rq->sched->score); list_del_init(&entity->list); + if (rq->current_entity == entity) rq->current_entity = NULL; + + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) + drm_sched_rq_remove_fifo_locked(entity); + spin_unlock(&rq->lock); } /** - * drm_sched_rq_select_entity - Select an entity which could provide a job to run + * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run * * @rq: scheduler run queue to check. * * Try to find a ready entity, returns NULL if none found. */ static struct drm_sched_entity * -drm_sched_rq_select_entity(struct drm_sched_rq *rq) +drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) { struct drm_sched_entity *entity; @@ -163,6 +223,34 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) return NULL; } +/** + * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run + * + * @rq: scheduler run queue to check. + * + * Find oldest waiting ready entity, returns NULL if none found. + */ +static struct drm_sched_entity * +drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) +{ + struct rb_node *rb; + + spin_lock(&rq->lock); + for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { + struct drm_sched_entity *entity; + + entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); + if (drm_sched_entity_is_ready(entity)) { + rq->current_entity = entity; + reinit_completion(&entity->entity_idle); + break; + } + } + spin_unlock(&rq->lock); + + return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL; +} + /** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done @@ -803,7 +891,9 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) /* Kernel run queue has higher priority than normal run queue*/ for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { - entity = drm_sched_rq_select_entity(&sched->sched_rq[i]); + entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? + drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) : + drm_sched_rq_select_entity_rr(&sched->sched_rq[i]); if (entity) break; } -- cgit v1.2.3 From 7c022f516fbe2d4b1b4abdd1c4b7687ec81a6ed9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 27 Sep 2022 18:43:03 +0200 Subject: drm/scheduler: fix fence ref counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We leaked dependency fences when processes were beeing killed. Additional to that grab a reference to the last scheduled fence. Signed-off-by: Christian König Reviewed-by: Andrey Grodzovsky Link: https://patchwork.freedesktop.org/patch/msgid/20220929180151.139751-1-christian.koenig@amd.com --- drivers/gpu/drm/scheduler/sched_entity.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/scheduler') diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 7060e4ed5a31..dac552f0a75b 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -208,6 +208,7 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, struct drm_sched_job *job = container_of(cb, struct drm_sched_job, finish_cb); + dma_fence_put(f); INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); schedule_work(&job->work); } @@ -235,8 +236,10 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity) struct drm_sched_fence *s_fence = job->s_fence; /* Wait for all dependencies to avoid data corruptions */ - while ((f = drm_sched_job_dependency(job, entity))) + while ((f = drm_sched_job_dependency(job, entity))) { dma_fence_wait(f, false); + dma_fence_put(f); + } drm_sched_fence_scheduled(s_fence); dma_fence_set_error(&s_fence->finished, -ESRCH); @@ -251,6 +254,7 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity) continue; } + dma_fence_get(entity->last_scheduled); r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb, drm_sched_entity_kill_jobs_cb); -- cgit v1.2.3 From 65b698bf400f00ab452d5f27ecad84ab8c826014 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 29 Sep 2022 19:53:53 +0200 Subject: drm/sched: add missing NULL check in drm_sched_get_cleanup_job v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we would crash if the job is not resubmitted. v2: fix second usage of s_fence->parent as well. Signed-off-by: Christian König Reviewed-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20221004132831.134986-1-christian.koenig@amd.com --- drivers/gpu/drm/scheduler/sched_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/scheduler') diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index ce86b03e8386..4cc59bae38dd 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -919,7 +919,8 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); - if (job && dma_fence_is_signaled(job->s_fence->parent)) { + if (job && (!job->s_fence->parent || + dma_fence_is_signaled(job->s_fence->parent))) { /* remove job from pending_list */ list_del_init(&job->list); @@ -929,7 +930,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) next = list_first_entry_or_null(&sched->pending_list, typeof(*next), list); - if (next) { + if (next && job->s_fence->parent) { next->s_fence->scheduled.timestamp = job->s_fence->parent->timestamp; /* start TO timer for next job */ -- cgit v1.2.3