From 35963cf2cd25eeea8bdb4d02853dac1e66fb13a0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 30 Oct 2023 20:24:35 -0700 Subject: drm/sched: Add drm_sched_wqueue_* helpers Add scheduler wqueue ready, stop, and start helpers to hide the implementation details of the scheduler from the drivers. v2: - s/sched_wqueue/sched_wqueue (Luben) - Remove the extra white line after the return-statement (Luben) - update drm_sched_wqueue_ready comment (Luben) Cc: Luben Tuikov Signed-off-by: Matthew Brost Reviewed-by: Luben Tuikov Link: https://lore.kernel.org/r/20231031032439.1558703-2-matthew.brost@intel.com Signed-off-by: Luben Tuikov --- include/drm/gpu_scheduler.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/drm') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 653e0eec9743..7e622c18d336 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -553,6 +553,9 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, void drm_sched_job_cleanup(struct drm_sched_job *job); void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched); +bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); +void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); +void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad); void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery); void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); -- cgit v1.2.3 From a6149f0393699308fb00149be913044977bceb56 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 30 Oct 2023 20:24:36 -0700 Subject: drm/sched: Convert drm scheduler to use a work queue rather than kthread In Xe, the new Intel GPU driver, a choice has made to have a 1 to 1 mapping between a drm_gpu_scheduler and drm_sched_entity. At first this seems a bit odd but let us explain the reasoning below. 1. In Xe the submission order from multiple drm_sched_entity is not guaranteed to be the same completion even if targeting the same hardware engine. This is because in Xe we have a firmware scheduler, the GuC, which allowed to reorder, timeslice, and preempt submissions. If a using shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls apart as the TDR expects submission order == completion order. Using a dedicated drm_gpu_scheduler per drm_sched_entity solve this problem. 2. In Xe submissions are done via programming a ring buffer (circular buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow control on the ring for free. A problem with this design is currently a drm_gpu_scheduler uses a kthread for submission / job cleanup. This doesn't scale if a large number of drm_gpu_scheduler are used. To work around the scaling issue, use a worker rather than kthread for submission / job cleanup. v2: - (Rob Clark) Fix msm build - Pass in run work queue v3: - (Boris) don't have loop in worker v4: - (Tvrtko) break out submit ready, stop, start helpers into own patch v5: - (Boris) default to ordered work queue v6: - (Luben / checkpatch) fix alignment in msm_ringbuffer.c - (Luben) s/drm_sched_submit_queue/drm_sched_wqueue_enqueue - (Luben) Update comment for drm_sched_wqueue_enqueue - (Luben) Positive check for submit_wq in drm_sched_init - (Luben) s/alloc_submit_wq/own_submit_wq v7: - (Luben) s/drm_sched_wqueue_enqueue/drm_sched_run_job_queue v8: - (Luben) Adjust var names / comments Signed-off-by: Matthew Brost Reviewed-by: Luben Tuikov Link: https://lore.kernel.org/r/20231031032439.1558703-3-matthew.brost@intel.com Signed-off-by: Luben Tuikov --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 +- drivers/gpu/drm/lima/lima_sched.c | 2 +- drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +- drivers/gpu/drm/nouveau/nouveau_sched.c | 2 +- drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 131 +++++++++++++++-------------- drivers/gpu/drm/v3d/v3d_sched.c | 10 +-- include/drm/gpu_scheduler.h | 14 +-- 9 files changed, 86 insertions(+), 81 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a7f7afcb2bf0..203dfcea82b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) break; } - r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, + r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, ring->num_hw_submission, 0, timeout, adev->reset_domain->wq, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 9b79f218e21a..c4b04b0dee16 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu) { int ret; - ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, + ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, msecs_to_jiffies(500), NULL, NULL, diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 295f0353a02e..aa030e1f7cda 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) INIT_WORK(&pipe->recover_work, lima_sched_recover_work); - return drm_sched_init(&pipe->base, &lima_sched_ops, + return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, 1, lima_job_hang_limit, diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 95257ab0185d..4968568e3b54 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, /* currently managing hangcheck ourselves: */ sched_timeout = MAX_SCHEDULE_TIMEOUT; - ret = drm_sched_init(&ring->sched, &msm_sched_ops, + ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, num_hw_submissions, 0, sched_timeout, NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index e62b04109b2f..7e64b5ef90fb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm) if (!drm->sched_wq) return -ENOMEM; - return drm_sched_init(sched, &nouveau_sched_ops, + return drm_sched_init(sched, &nouveau_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, NULL, NULL, "nouveau_sched", drm->dev->dev); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index ecd2e035147f..6d89e24322db 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -852,7 +852,7 @@ int panfrost_job_init(struct panfrost_device *pfdev) js->queue[j].fence_context = dma_fence_context_alloc(1); ret = drm_sched_init(&js->queue[j].sched, - &panfrost_sched_ops, + &panfrost_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, nentries, 0, msecs_to_jiffies(JOB_TIMEOUT_MS), diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 54c1c5fe01ba..d1ae05bded15 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -48,7 +48,6 @@ * through the jobs entity pointer. */ -#include #include #include #include @@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL; } +/** + * drm_sched_run_job_queue - enqueue run-job work + * @sched: scheduler instance + */ +static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) +{ + if (!READ_ONCE(sched->pause_submit)) + queue_work(sched->submit_wq, &sched->work_run_job); +} + /** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done @@ -275,7 +284,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) dma_fence_get(&s_fence->finished); drm_sched_fence_finished(s_fence, result); dma_fence_put(&s_fence->finished); - wake_up_interruptible(&sched->wake_up_worker); + drm_sched_run_job_queue(sched); } /** @@ -874,7 +883,7 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched) { if (drm_sched_can_queue(sched)) - wake_up_interruptible(&sched->wake_up_worker); + drm_sched_run_job_queue(sched); } /** @@ -985,60 +994,41 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, EXPORT_SYMBOL(drm_sched_pick_best); /** - * drm_sched_blocked - check if the scheduler is blocked + * drm_sched_run_job_work - main scheduler thread * - * @sched: scheduler instance - * - * Returns true if blocked, otherwise false. + * @w: run job work */ -static bool drm_sched_blocked(struct drm_gpu_scheduler *sched) +static void drm_sched_run_job_work(struct work_struct *w) { - if (kthread_should_park()) { - kthread_parkme(); - return true; - } - - return false; -} - -/** - * drm_sched_main - main scheduler thread - * - * @param: scheduler instance - * - * Returns 0. - */ -static int drm_sched_main(void *param) -{ - struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param; + struct drm_gpu_scheduler *sched = + container_of(w, struct drm_gpu_scheduler, work_run_job); + struct drm_sched_entity *entity; + struct drm_sched_job *cleanup_job; int r; - sched_set_fifo_low(current); + if (READ_ONCE(sched->pause_submit)) + return; - while (!kthread_should_stop()) { - struct drm_sched_entity *entity = NULL; - struct drm_sched_fence *s_fence; - struct drm_sched_job *sched_job; - struct dma_fence *fence; - struct drm_sched_job *cleanup_job = NULL; + cleanup_job = drm_sched_get_cleanup_job(sched); + entity = drm_sched_select_entity(sched); - wait_event_interruptible(sched->wake_up_worker, - (cleanup_job = drm_sched_get_cleanup_job(sched)) || - (!drm_sched_blocked(sched) && - (entity = drm_sched_select_entity(sched))) || - kthread_should_stop()); + if (!entity && !cleanup_job) + return; /* No more work */ - if (cleanup_job) - sched->ops->free_job(cleanup_job); + if (cleanup_job) + sched->ops->free_job(cleanup_job); - if (!entity) - continue; + if (entity) { + struct dma_fence *fence; + struct drm_sched_fence *s_fence; + struct drm_sched_job *sched_job; sched_job = drm_sched_entity_pop_job(entity); - if (!sched_job) { complete_all(&entity->entity_idle); - continue; + if (!cleanup_job) + return; /* No more work */ + goto again; } s_fence = sched_job->s_fence; @@ -1069,7 +1059,9 @@ static int drm_sched_main(void *param) wake_up(&sched->job_scheduled); } - return 0; + +again: + drm_sched_run_job_queue(sched); } /** @@ -1077,6 +1069,8 @@ static int drm_sched_main(void *param) * * @sched: scheduler instance * @ops: backend operations for this scheduler + * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is + * allocated and used * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT * @hw_submission: number of hw submissions that can be in flight * @hang_limit: number of times to allow a job to hang before dropping it @@ -1091,6 +1085,7 @@ static int drm_sched_main(void *param) */ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, + struct workqueue_struct *submit_wq, u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, long timeout, struct workqueue_struct *timeout_wq, atomic_t *score, const char *name, struct device *dev) @@ -1121,14 +1116,22 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, return 0; } + if (submit_wq) { + sched->submit_wq = submit_wq; + sched->own_submit_wq = false; + } else { + sched->submit_wq = alloc_ordered_workqueue(name, 0); + if (!sched->submit_wq) + return -ENOMEM; + + sched->own_submit_wq = true; + } + ret = -ENOMEM; sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq), GFP_KERNEL | __GFP_ZERO); - if (!sched->sched_rq) { - drm_err(sched, "%s: out of memory for sched_rq\n", __func__); - return -ENOMEM; - } + if (!sched->sched_rq) + goto Out_free; sched->num_rqs = num_rqs; - ret = -ENOMEM; for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) { sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); if (!sched->sched_rq[i]) @@ -1136,31 +1139,26 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, drm_sched_rq_init(sched, sched->sched_rq[i]); } - init_waitqueue_head(&sched->wake_up_worker); init_waitqueue_head(&sched->job_scheduled); INIT_LIST_HEAD(&sched->pending_list); spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); + INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); atomic_set(&sched->_score, 0); atomic64_set(&sched->job_id_count, 0); - - /* Each scheduler will run on a seperate kernel thread */ - sched->thread = kthread_run(drm_sched_main, sched, sched->name); - if (IS_ERR(sched->thread)) { - ret = PTR_ERR(sched->thread); - sched->thread = NULL; - DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name); - goto Out_unroll; - } + sched->pause_submit = false; sched->ready = true; return 0; Out_unroll: for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--) kfree(sched->sched_rq[i]); +Out_free: kfree(sched->sched_rq); sched->sched_rq = NULL; + if (sched->own_submit_wq) + destroy_workqueue(sched->submit_wq); drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__); return ret; } @@ -1178,8 +1176,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) struct drm_sched_entity *s_entity; int i; - if (sched->thread) - kthread_stop(sched->thread); + drm_sched_wqueue_stop(sched); for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { struct drm_sched_rq *rq = sched->sched_rq[i]; @@ -1202,6 +1199,8 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&sched->work_tdr); + if (sched->own_submit_wq) + destroy_workqueue(sched->submit_wq); sched->ready = false; kfree(sched->sched_rq); sched->sched_rq = NULL; @@ -1262,7 +1261,7 @@ EXPORT_SYMBOL(drm_sched_increase_karma); */ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched) { - return !!sched->thread; + return sched->ready; } EXPORT_SYMBOL(drm_sched_wqueue_ready); @@ -1273,7 +1272,8 @@ EXPORT_SYMBOL(drm_sched_wqueue_ready); */ void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched) { - kthread_park(sched->thread); + WRITE_ONCE(sched->pause_submit, true); + cancel_work_sync(&sched->work_run_job); } EXPORT_SYMBOL(drm_sched_wqueue_stop); @@ -1284,6 +1284,7 @@ EXPORT_SYMBOL(drm_sched_wqueue_stop); */ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched) { - kthread_unpark(sched->thread); + WRITE_ONCE(sched->pause_submit, false); + queue_work(sched->submit_wq, &sched->work_run_job); } EXPORT_SYMBOL(drm_sched_wqueue_start); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 038e1ae589c7..0b6696b0d882 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -388,7 +388,7 @@ v3d_sched_init(struct v3d_dev *v3d) int ret; ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, - &v3d_bin_sched_ops, + &v3d_bin_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -397,7 +397,7 @@ v3d_sched_init(struct v3d_dev *v3d) return ret; ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, - &v3d_render_sched_ops, + &v3d_render_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -406,7 +406,7 @@ v3d_sched_init(struct v3d_dev *v3d) goto fail; ret = drm_sched_init(&v3d->queue[V3D_TFU].sched, - &v3d_tfu_sched_ops, + &v3d_tfu_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -416,7 +416,7 @@ v3d_sched_init(struct v3d_dev *v3d) if (v3d_has_csd(v3d)) { ret = drm_sched_init(&v3d->queue[V3D_CSD].sched, - &v3d_csd_sched_ops, + &v3d_csd_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -425,7 +425,7 @@ v3d_sched_init(struct v3d_dev *v3d) goto fail; ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched, - &v3d_cache_clean_sched_ops, + &v3d_cache_clean_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 7e622c18d336..4bf94aca2631 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -475,17 +475,16 @@ struct drm_sched_backend_ops { * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT, * as there's usually one run-queue per priority, but could be less. * @sched_rq: An allocated array of run-queues of size @num_rqs; - * @wake_up_worker: the wait queue on which the scheduler sleeps until a job - * is ready to be scheduled. * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler * waits on this wait queue until all the scheduled jobs are * finished. * @hw_rq_count: the number of jobs currently in the hardware queue. * @job_id_count: used to assign unique id to the each job. + * @submit_wq: workqueue used to queue @work_run_job * @timeout_wq: workqueue used to queue @work_tdr + * @work_run_job: work which calls run_job op of each scheduler. * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the * timeout interval is over. - * @thread: the kthread on which the scheduler which run. * @pending_list: the list of jobs which are currently in the job queue. * @job_list_lock: lock to protect the pending_list. * @hang_limit: once the hangs by a job crosses this limit then it is marked @@ -494,6 +493,8 @@ struct drm_sched_backend_ops { * @_score: score used when the driver doesn't provide one * @ready: marks if the underlying HW is ready to work * @free_guilty: A hit to time out handler to free the guilty job. + * @pause_submit: pause queuing of @work_run_job on @submit_wq + * @own_submit_wq: scheduler owns allocation of @submit_wq * @dev: system &struct device * * One scheduler is implemented for each hardware ring. @@ -505,13 +506,13 @@ struct drm_gpu_scheduler { const char *name; u32 num_rqs; struct drm_sched_rq **sched_rq; - wait_queue_head_t wake_up_worker; wait_queue_head_t job_scheduled; atomic_t hw_rq_count; atomic64_t job_id_count; + struct workqueue_struct *submit_wq; struct workqueue_struct *timeout_wq; + struct work_struct work_run_job; struct delayed_work work_tdr; - struct task_struct *thread; struct list_head pending_list; spinlock_t job_list_lock; int hang_limit; @@ -519,11 +520,14 @@ struct drm_gpu_scheduler { atomic_t _score; bool ready; bool free_guilty; + bool pause_submit; + bool own_submit_wq; struct device *dev; }; int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, + struct workqueue_struct *submit_wq, u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, long timeout, struct workqueue_struct *timeout_wq, atomic_t *score, const char *name, struct device *dev); -- cgit v1.2.3 From f7fe64ad0f22ff034f8ebcfbd7299ee9cc9b57d7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 30 Oct 2023 20:24:37 -0700 Subject: drm/sched: Split free_job into own work item Rather than call free_job and run_job in same work item have a dedicated work item for each. This aligns with the design and intended use of work queues. v2: - Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting timestamp in free_job() work item (Danilo) v3: - Drop forward dec of drm_sched_select_entity (Boris) - Return in drm_sched_run_job_work if entity NULL (Boris) v4: - Replace dequeue with peek and invert logic (Luben) - Wrap to 100 lines (Luben) - Update comments for *_queue / *_queue_if_ready functions (Luben) v5: - Drop peek argument, blindly reinit idle (Luben) - s/drm_sched_free_job_queue_if_ready/drm_sched_free_job_queue_if_done (Luben) - Update work_run_job & work_free_job kernel doc (Luben) v6: - Do not move drm_sched_select_entity in file (Luben) Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20231031032439.1558703-4-matthew.brost@intel.com Reviewed-by: Luben Tuikov Signed-off-by: Luben Tuikov --- drivers/gpu/drm/scheduler/sched_main.c | 146 ++++++++++++++++++++++----------- include/drm/gpu_scheduler.h | 4 +- 2 files changed, 101 insertions(+), 49 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index d1ae05bded15..3b1b2f8eafe8 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -265,6 +265,32 @@ static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) queue_work(sched->submit_wq, &sched->work_run_job); } +/** + * drm_sched_free_job_queue - enqueue free-job work + * @sched: scheduler instance + */ +static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched) +{ + if (!READ_ONCE(sched->pause_submit)) + queue_work(sched->submit_wq, &sched->work_free_job); +} + +/** + * drm_sched_free_job_queue_if_done - enqueue free-job work if ready + * @sched: scheduler instance + */ +static void drm_sched_free_job_queue_if_done(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *job; + + spin_lock(&sched->job_list_lock); + job = list_first_entry_or_null(&sched->pending_list, + struct drm_sched_job, list); + if (job && dma_fence_is_signaled(&job->s_fence->finished)) + drm_sched_free_job_queue(sched); + spin_unlock(&sched->job_list_lock); +} + /** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done @@ -284,7 +310,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) dma_fence_get(&s_fence->finished); drm_sched_fence_finished(s_fence, result); dma_fence_put(&s_fence->finished); - drm_sched_run_job_queue(sched); + drm_sched_free_job_queue(sched); } /** @@ -943,8 +969,10 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) typeof(*next), list); if (next) { - next->s_fence->scheduled.timestamp = - dma_fence_timestamp(&job->s_fence->finished); + if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, + &next->s_fence->scheduled.flags)) + next->s_fence->scheduled.timestamp = + dma_fence_timestamp(&job->s_fence->finished); /* start TO timer for next job */ drm_sched_start_timeout(sched); } @@ -994,7 +1022,40 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, EXPORT_SYMBOL(drm_sched_pick_best); /** - * drm_sched_run_job_work - main scheduler thread + * drm_sched_run_job_queue_if_ready - enqueue run-job work if ready + * @sched: scheduler instance + */ +static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched) +{ + if (drm_sched_select_entity(sched)) + drm_sched_run_job_queue(sched); +} + +/** + * drm_sched_free_job_work - worker to call free_job + * + * @w: free job work + */ +static void drm_sched_free_job_work(struct work_struct *w) +{ + struct drm_gpu_scheduler *sched = + container_of(w, struct drm_gpu_scheduler, work_free_job); + struct drm_sched_job *cleanup_job; + + if (READ_ONCE(sched->pause_submit)) + return; + + cleanup_job = drm_sched_get_cleanup_job(sched); + if (cleanup_job) { + sched->ops->free_job(cleanup_job); + + drm_sched_free_job_queue_if_done(sched); + drm_sched_run_job_queue_if_ready(sched); + } +} + +/** + * drm_sched_run_job_work - worker to call run_job * * @w: run job work */ @@ -1003,65 +1064,51 @@ static void drm_sched_run_job_work(struct work_struct *w) struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_run_job); struct drm_sched_entity *entity; - struct drm_sched_job *cleanup_job; + struct dma_fence *fence; + struct drm_sched_fence *s_fence; + struct drm_sched_job *sched_job; int r; if (READ_ONCE(sched->pause_submit)) return; - cleanup_job = drm_sched_get_cleanup_job(sched); entity = drm_sched_select_entity(sched); + if (!entity) + return; - if (!entity && !cleanup_job) + sched_job = drm_sched_entity_pop_job(entity); + if (!sched_job) { + complete_all(&entity->entity_idle); return; /* No more work */ + } - if (cleanup_job) - sched->ops->free_job(cleanup_job); - - if (entity) { - struct dma_fence *fence; - struct drm_sched_fence *s_fence; - struct drm_sched_job *sched_job; - - sched_job = drm_sched_entity_pop_job(entity); - if (!sched_job) { - complete_all(&entity->entity_idle); - if (!cleanup_job) - return; /* No more work */ - goto again; - } - - s_fence = sched_job->s_fence; - - atomic_inc(&sched->hw_rq_count); - drm_sched_job_begin(sched_job); + s_fence = sched_job->s_fence; - trace_drm_run_job(sched_job, entity); - fence = sched->ops->run_job(sched_job); - complete_all(&entity->entity_idle); - drm_sched_fence_scheduled(s_fence, fence); + atomic_inc(&sched->hw_rq_count); + drm_sched_job_begin(sched_job); - if (!IS_ERR_OR_NULL(fence)) { - /* Drop for original kref_init of the fence */ - dma_fence_put(fence); + trace_drm_run_job(sched_job, entity); + fence = sched->ops->run_job(sched_job); + complete_all(&entity->entity_idle); + drm_sched_fence_scheduled(s_fence, fence); - r = dma_fence_add_callback(fence, &sched_job->cb, - drm_sched_job_done_cb); - if (r == -ENOENT) - drm_sched_job_done(sched_job, fence->error); - else if (r) - DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", - r); - } else { - drm_sched_job_done(sched_job, IS_ERR(fence) ? - PTR_ERR(fence) : 0); - } + if (!IS_ERR_OR_NULL(fence)) { + /* Drop for original kref_init of the fence */ + dma_fence_put(fence); - wake_up(&sched->job_scheduled); + r = dma_fence_add_callback(fence, &sched_job->cb, + drm_sched_job_done_cb); + if (r == -ENOENT) + drm_sched_job_done(sched_job, fence->error); + else if (r) + DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); + } else { + drm_sched_job_done(sched_job, IS_ERR(fence) ? + PTR_ERR(fence) : 0); } -again: - drm_sched_run_job_queue(sched); + wake_up(&sched->job_scheduled); + drm_sched_run_job_queue_if_ready(sched); } /** @@ -1145,6 +1192,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, atomic_set(&sched->hw_rq_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); + INIT_WORK(&sched->work_free_job, drm_sched_free_job_work); atomic_set(&sched->_score, 0); atomic64_set(&sched->job_id_count, 0); sched->pause_submit = false; @@ -1274,6 +1322,7 @@ void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched) { WRITE_ONCE(sched->pause_submit, true); cancel_work_sync(&sched->work_run_job); + cancel_work_sync(&sched->work_free_job); } EXPORT_SYMBOL(drm_sched_wqueue_stop); @@ -1286,5 +1335,6 @@ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched) { WRITE_ONCE(sched->pause_submit, false); queue_work(sched->submit_wq, &sched->work_run_job); + queue_work(sched->submit_wq, &sched->work_free_job); } EXPORT_SYMBOL(drm_sched_wqueue_start); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 4bf94aca2631..6655aeeef9bb 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -480,9 +480,10 @@ struct drm_sched_backend_ops { * finished. * @hw_rq_count: the number of jobs currently in the hardware queue. * @job_id_count: used to assign unique id to the each job. - * @submit_wq: workqueue used to queue @work_run_job + * @submit_wq: workqueue used to queue @work_run_job and @work_free_job * @timeout_wq: workqueue used to queue @work_tdr * @work_run_job: work which calls run_job op of each scheduler. + * @work_free_job: work which calls free_job op of each scheduler. * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the * timeout interval is over. * @pending_list: the list of jobs which are currently in the job queue. @@ -512,6 +513,7 @@ struct drm_gpu_scheduler { struct workqueue_struct *submit_wq; struct workqueue_struct *timeout_wq; struct work_struct work_run_job; + struct work_struct work_free_job; struct delayed_work work_tdr; struct list_head pending_list; spinlock_t job_list_lock; -- cgit v1.2.3 From 3c6c7ca4508b6cb1a033ac954c50a1b2c97af883 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 30 Oct 2023 20:24:39 -0700 Subject: drm/sched: Add a helper to queue TDR immediately Add a helper whereby a driver can invoke TDR immediately. v2: - Drop timeout args, rename function, use mod delayed work (Luben) v3: - s/XE/Xe (Luben) - present tense in commit message (Luben) - Adjust comment for drm_sched_tdr_queue_imm (Luben) v4: - Adjust commit message (Luben) Cc: Luben Tuikov Signed-off-by: Matthew Brost Reviewed-by: Luben Tuikov Link: https://lore.kernel.org/r/20231031032439.1558703-6-matthew.brost@intel.com Signed-off-by: Luben Tuikov --- drivers/gpu/drm/scheduler/sched_main.c | 18 +++++++++++++++++- include/drm/gpu_scheduler.h | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index fc387de5a0c7..98b2ad54fc70 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -338,7 +338,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) if (sched->timeout != MAX_SCHEDULE_TIMEOUT && !list_empty(&sched->pending_list)) - queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); + mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); } static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched) @@ -348,6 +348,22 @@ static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched) spin_unlock(&sched->job_list_lock); } +/** + * drm_sched_tdr_queue_imm: - immediately start job timeout handler + * + * @sched: scheduler for which the timeout handling should be started. + * + * Start timeout handling immediately for the named scheduler. + */ +void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched) +{ + spin_lock(&sched->job_list_lock); + sched->timeout = 0; + drm_sched_start_timeout(sched); + spin_unlock(&sched->job_list_lock); +} +EXPORT_SYMBOL(drm_sched_tdr_queue_imm); + /** * drm_sched_fault - immediately start timeout handler * diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 6655aeeef9bb..9daa78d2c04c 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -557,6 +557,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, struct drm_gpu_scheduler **sched_list, unsigned int num_sched_list); +void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched); void drm_sched_job_cleanup(struct drm_sched_job *job); void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched); bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); -- cgit v1.2.3 From f12af4c461fb6cd5ed7b48f8b4d09b22eb19fcc5 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 2 Nov 2023 10:55:38 +0000 Subject: drm/sched: Drop suffix from drm_sched_wakeup_if_can_queue Because a) helper is exported to other parts of the scheduler and b) there isn't a plain drm_sched_wakeup to begin with, I think we can drop the suffix and by doing so separate the intimiate knowledge between the scheduler components a bit better. Signed-off-by: Tvrtko Ursulin Cc: Luben Tuikov Cc: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20231102105538.391648-6-tvrtko.ursulin@linux.intel.com Reviewed-by: Luben Tuikov Signed-off-by: Luben Tuikov --- drivers/gpu/drm/scheduler/sched_entity.c | 4 ++-- drivers/gpu/drm/scheduler/sched_main.c | 4 ++-- include/drm/gpu_scheduler.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 409e4256f6e7..f1db63cc8198 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -370,7 +370,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, container_of(cb, struct drm_sched_entity, cb); drm_sched_entity_clear_dep(f, cb); - drm_sched_wakeup_if_can_queue(entity->rq->sched); + drm_sched_wakeup(entity->rq->sched); } /** @@ -602,7 +602,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) drm_sched_rq_update_fifo(entity, submit_ts); - drm_sched_wakeup_if_can_queue(entity->rq->sched); + drm_sched_wakeup(entity->rq->sched); } } EXPORT_SYMBOL(drm_sched_entity_push_job); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index d5ddbce68fb7..27843e37d9b7 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -920,12 +920,12 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) } /** - * drm_sched_wakeup_if_can_queue - Wake up the scheduler + * drm_sched_wakeup - Wake up the scheduler if it is ready to queue * @sched: scheduler instance * * Wake up the scheduler if we can queue jobs. */ -void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched) +void drm_sched_wakeup(struct drm_gpu_scheduler *sched) { if (drm_sched_can_queue(sched)) __drm_sched_run_job_queue(sched); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9daa78d2c04c..754fd2217334 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -559,7 +559,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched); void drm_sched_job_cleanup(struct drm_sched_job *job); -void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched); +void drm_sched_wakeup(struct drm_gpu_scheduler *sched); bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); -- cgit v1.2.3 From 7707dd6022593f3edd8e182e7935870cf326f874 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 24 Oct 2023 04:08:57 +0300 Subject: drm/dp_mst: Fix fractional DSC bpp handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current code does '(bpp << 4) / 16' in the MST PBN calculation, but that is just the same as 'bpp' so the DSC codepath achieves absolutely nothing. Fix it up so that the fractional part of the bpp value is actually used instead of truncated away. 64*1006 has enough zero lsbs that we can just shift that down in the dividend and thus still manage to stick to a 32bit divisor. And while touching this, let's just make the whole thing more straightforward by making the passed in bpp value .4 binary fixed point always, instead of having to pass in different things based on whether DSC is enabled or not. v2: - Fix DSC kunit test cases. Cc: Manasi Navare Cc: Lyude Paul Cc: Harry Wentland Cc: David Francis Cc: Mikita Lipski Cc: Alex Deucher Fixes: dc48529fb14e ("drm/dp_mst: Add PBN calculation for DSC modes") Signed-off-by: Ville Syrjälä [Imre: Fix kunit test cases] Acked-by: Maxime Ripard Reviewed-by: Lyude Paul Acked-by: Harry Wentland Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-3-imre.deak@intel.com --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- .../drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +- drivers/gpu/drm/display/drm_dp_mst_topology.c | 20 +++++--------------- drivers/gpu/drm/i915/display/intel_dp_mst.c | 5 ++--- drivers/gpu/drm/nouveau/dispnv50/disp.c | 3 +-- drivers/gpu/drm/tests/drm_dp_mst_helper_test.c | 6 +++--- include/drm/display/drm_dp_mst_helper.h | 2 +- 7 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c6fd34bab358..df3c42bb9274 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6876,7 +6876,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, max_bpc); bpp = convert_dc_color_depth_into_bpc(color_depth) * 3; clock = adjusted_mode->clock; - dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp, false); + dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp << 4); } dm_new_connector_state->vcpi_slots = diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 57230661132b..2afd1bc74978 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1636,7 +1636,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( } else { /* check if mode could be supported within full_pbn */ bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; - pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); + pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp << 4); if (pbn > aconnector->mst_output_port->full_pbn) return DC_FAIL_BANDWIDTH_VALIDATE; diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index c490e8befc2f..bc2c9dfe229f 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -4718,13 +4718,12 @@ EXPORT_SYMBOL(drm_dp_check_act_status); /** * drm_dp_calc_pbn_mode() - Calculate the PBN for a mode. - * @clock: dot clock for the mode - * @bpp: bpp for the mode. - * @dsc: DSC mode. If true, bpp has units of 1/16 of a bit per pixel + * @clock: dot clock + * @bpp: bpp as .4 binary fixed point * * This uses the formula in the spec to calculate the PBN value for a mode. */ -int drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc) +int drm_dp_calc_pbn_mode(int clock, int bpp) { /* * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 @@ -4735,18 +4734,9 @@ int drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc) * peak_kbps *= (1006/1000) * peak_kbps *= (64/54) * peak_kbps *= 8 convert to bytes - * - * If the bpp is in units of 1/16, further divide by 16. Put this - * factor in the numerator rather than the denominator to avoid - * integer overflow */ - - if (dsc) - return DIV_ROUND_UP_ULL(mul_u32_u32(clock * (bpp / 16), 64 * 1006), - 8 * 54 * 1000 * 1000); - - return DIV_ROUND_UP_ULL(mul_u32_u32(clock * bpp, 64 * 1006), - 8 * 54 * 1000 * 1000); + return DIV_ROUND_UP_ULL(mul_u32_u32(clock * bpp, 64 * 1006 >> 4), + 1000 * 8 * 54 * 1000); } EXPORT_SYMBOL(drm_dp_calc_pbn_mode); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 4625900416e9..6f9c9f3b5b12 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -107,8 +107,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, continue; crtc_state->pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, - dsc ? bpp << 4 : bpp, - dsc); + bpp << 4); slots = drm_dp_atomic_find_time_slots(state, &intel_dp->mst_mgr, connector->port, @@ -982,7 +981,7 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, return ret; if (mode_rate > max_rate || mode->clock > max_dotclk || - drm_dp_calc_pbn_mode(mode->clock, min_bpp, false) > port->full_pbn) { + drm_dp_calc_pbn_mode(mode->clock, min_bpp << 4) > port->full_pbn) { *status = MODE_CLOCK_HIGH; return 0; } diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 52f1569ee37c..3d390f5029f6 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -982,8 +982,7 @@ nv50_msto_atomic_check(struct drm_encoder *encoder, const int clock = crtc_state->adjusted_mode.clock; asyh->or.bpc = connector->display_info.bpc; - asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3, - false); + asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3 << 4); } mst_state = drm_atomic_get_mst_topology_state(state, &mstm->mgr); diff --git a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c index 545beea33e8c..e3c818dfc0e6 100644 --- a/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c +++ b/drivers/gpu/drm/tests/drm_dp_mst_helper_test.c @@ -42,13 +42,13 @@ static const struct drm_dp_mst_calc_pbn_mode_test drm_dp_mst_calc_pbn_mode_cases .clock = 332880, .bpp = 24, .dsc = true, - .expected = 50 + .expected = 1191 }, { .clock = 324540, .bpp = 24, .dsc = true, - .expected = 49 + .expected = 1161 }, }; @@ -56,7 +56,7 @@ static void drm_test_dp_mst_calc_pbn_mode(struct kunit *test) { const struct drm_dp_mst_calc_pbn_mode_test *params = test->param_value; - KUNIT_EXPECT_EQ(test, drm_dp_calc_pbn_mode(params->clock, params->bpp, params->dsc), + KUNIT_EXPECT_EQ(test, drm_dp_calc_pbn_mode(params->clock, params->bpp << 4), params->expected); } diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index 4429d3b1745b..655862b3d2a4 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -842,7 +842,7 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, int drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, int link_rate, int link_lane_count); -int drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc); +int drm_dp_calc_pbn_mode(int clock, int bpp); void drm_dp_mst_update_slots(struct drm_dp_mst_topology_state *mst_state, uint8_t link_encoding_cap); -- cgit v1.2.3 From 9dcf67deeab6fbc4984175278b1b2c59881dca52 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 24 Oct 2023 04:08:58 +0300 Subject: drm/dp_mst: Add helper to determine if an MST port is downstream of another port Add drm_dp_mst_port_downstream_of_parent() required by the i915 driver in a follow-up patch to resolve a BW overallocation of MST streams going through a given MST port. Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Lyude Paul Acked-by: Maxime Ripard Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-4-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 52 +++++++++++++++++++++++++++ include/drm/display/drm_dp_mst_helper.h | 3 ++ 2 files changed, 55 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index bc2c9dfe229f..fa0049572f22 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5126,6 +5126,58 @@ static bool drm_dp_mst_port_downstream_of_branch(struct drm_dp_mst_port *port, return false; } +static bool +drm_dp_mst_port_downstream_of_parent_locked(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port, + struct drm_dp_mst_port *parent) +{ + if (!mgr->mst_primary) + return false; + + port = drm_dp_mst_topology_get_port_validated_locked(mgr->mst_primary, + port); + if (!port) + return false; + + if (!parent) + return true; + + parent = drm_dp_mst_topology_get_port_validated_locked(mgr->mst_primary, + parent); + if (!parent) + return false; + + if (!parent->mstb) + return false; + + return drm_dp_mst_port_downstream_of_branch(port, parent->mstb); +} + +/** + * drm_dp_mst_port_downstream_of_parent - check if a port is downstream of a parent port + * @mgr: MST topology manager + * @port: the port being looked up + * @parent: the parent port + * + * The function returns %true if @port is downstream of @parent. If @parent is + * %NULL - denoting the root port - the function returns %true if @port is in + * @mgr's topology. + */ +bool +drm_dp_mst_port_downstream_of_parent(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port, + struct drm_dp_mst_port *parent) +{ + bool ret; + + mutex_lock(&mgr->lock); + ret = drm_dp_mst_port_downstream_of_parent_locked(mgr, port, parent); + mutex_unlock(&mgr->lock); + + return ret; +} +EXPORT_SYMBOL(drm_dp_mst_port_downstream_of_parent); + static int drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, struct drm_dp_mst_topology_state *state); diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index 655862b3d2a4..e44485aa74e9 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -892,6 +892,9 @@ drm_atomic_get_new_mst_topology_state(struct drm_atomic_state *state, struct drm_dp_mst_atomic_payload * drm_atomic_get_mst_payload_state(struct drm_dp_mst_topology_state *state, struct drm_dp_mst_port *port); +bool drm_dp_mst_port_downstream_of_parent(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port, + struct drm_dp_mst_port *parent); int __must_check drm_dp_atomic_find_time_slots(struct drm_atomic_state *state, struct drm_dp_mst_topology_mgr *mgr, -- cgit v1.2.3 From 1cd0a5ea427931016c3e95b20dc20f17604937cc Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 24 Oct 2023 04:08:59 +0300 Subject: drm/dp_mst: Factor out a helper to check the atomic state of a topology manager Factor out a helper to check the atomic state for one MST topology manager, returning the MST port where the BW limit check has failed. This will be used in a follow-up patch by the i915 driver to improve the BW sharing between MST streams. Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Lyude Paul Acked-by: Maxime Ripard Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-5-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 93 +++++++++++++++++++++------ include/drm/display/drm_dp_mst_helper.h | 4 ++ 2 files changed, 78 insertions(+), 19 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index fa0049572f22..0fbfdd23ef4e 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5180,11 +5180,13 @@ EXPORT_SYMBOL(drm_dp_mst_port_downstream_of_parent); static int drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, - struct drm_dp_mst_topology_state *state); + struct drm_dp_mst_topology_state *state, + struct drm_dp_mst_port **failing_port); static int drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb, - struct drm_dp_mst_topology_state *state) + struct drm_dp_mst_topology_state *state, + struct drm_dp_mst_port **failing_port) { struct drm_dp_mst_atomic_payload *payload; struct drm_dp_mst_port *port; @@ -5213,7 +5215,7 @@ drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb, drm_dbg_atomic(mstb->mgr->dev, "[MSTB:%p] Checking bandwidth limits\n", mstb); list_for_each_entry(port, &mstb->ports, next) { - ret = drm_dp_mst_atomic_check_port_bw_limit(port, state); + ret = drm_dp_mst_atomic_check_port_bw_limit(port, state, failing_port); if (ret < 0) return ret; @@ -5225,7 +5227,8 @@ drm_dp_mst_atomic_check_mstb_bw_limit(struct drm_dp_mst_branch *mstb, static int drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, - struct drm_dp_mst_topology_state *state) + struct drm_dp_mst_topology_state *state, + struct drm_dp_mst_port **failing_port) { struct drm_dp_mst_atomic_payload *payload; int pbn_used = 0; @@ -5246,13 +5249,15 @@ drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, drm_dbg_atomic(port->mgr->dev, "[MSTB:%p] [MST PORT:%p] no BW available for the port\n", port->parent, port); + *failing_port = port; return -EINVAL; } pbn_used = payload->pbn; } else { pbn_used = drm_dp_mst_atomic_check_mstb_bw_limit(port->mstb, - state); + state, + failing_port); if (pbn_used <= 0) return pbn_used; } @@ -5261,6 +5266,7 @@ drm_dp_mst_atomic_check_port_bw_limit(struct drm_dp_mst_port *port, drm_dbg_atomic(port->mgr->dev, "[MSTB:%p] [MST PORT:%p] required PBN of %d exceeds port limit of %d\n", port->parent, port, pbn_used, port->full_pbn); + *failing_port = port; return -ENOSPC; } @@ -5438,20 +5444,79 @@ int drm_dp_mst_atomic_enable_dsc(struct drm_atomic_state *state, } EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc); +/** + * drm_dp_mst_atomic_check_mgr - Check the atomic state of an MST topology manager + * @state: The global atomic state + * @mgr: Manager to check + * @mst_state: The MST atomic state for @mgr + * @failing_port: Returns the port with a BW limitation + * + * Checks the given MST manager's topology state for an atomic update to ensure + * that it's valid. This includes checking whether there's enough bandwidth to + * support the new timeslot allocations in the atomic update. + * + * Any atomic drivers supporting DP MST must make sure to call this or + * the drm_dp_mst_atomic_check() function after checking the rest of their state + * in their &drm_mode_config_funcs.atomic_check() callback. + * + * See also: + * drm_dp_mst_atomic_check() + * drm_dp_atomic_find_time_slots() + * drm_dp_atomic_release_time_slots() + * + * Returns: + * - 0 if the new state is valid + * - %-ENOSPC, if the new state is invalid, because of BW limitation + * @failing_port is set to: + * - The non-root port where a BW limit check failed + * The returned port pointer is valid until at least + * one payload downstream of it exists. + * - %NULL if the BW limit check failed at the root port + * - %-EINVAL, if the new state is invalid, because the root port has + * too many payloads. + */ +int drm_dp_mst_atomic_check_mgr(struct drm_atomic_state *state, + struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_topology_state *mst_state, + struct drm_dp_mst_port **failing_port) +{ + int ret; + + *failing_port = NULL; + + if (!mgr->mst_state) + return 0; + + ret = drm_dp_mst_atomic_check_payload_alloc_limits(mgr, mst_state); + if (ret) + return ret; + + mutex_lock(&mgr->lock); + ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary, + mst_state, + failing_port); + mutex_unlock(&mgr->lock); + + return ret < 0 ? ret : 0; +} +EXPORT_SYMBOL(drm_dp_mst_atomic_check_mgr); + /** * drm_dp_mst_atomic_check - Check that the new state of an MST topology in an * atomic update is valid * @state: Pointer to the new &struct drm_dp_mst_topology_state * * Checks the given topology state for an atomic update to ensure that it's - * valid. This includes checking whether there's enough bandwidth to support - * the new timeslot allocations in the atomic update. + * valid, calling drm_dp_mst_atomic_check_mgr() for all MST manager in the + * atomic state. This includes checking whether there's enough bandwidth to + * support the new timeslot allocations in the atomic update. * * Any atomic drivers supporting DP MST must make sure to call this after * checking the rest of their state in their * &drm_mode_config_funcs.atomic_check() callback. * * See also: + * drm_dp_mst_atomic_check_mgr() * drm_dp_atomic_find_time_slots() * drm_dp_atomic_release_time_slots() * @@ -5466,21 +5531,11 @@ int drm_dp_mst_atomic_check(struct drm_atomic_state *state) int i, ret = 0; for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) { - if (!mgr->mst_state) - continue; + struct drm_dp_mst_port *tmp_port; - ret = drm_dp_mst_atomic_check_payload_alloc_limits(mgr, mst_state); + ret = drm_dp_mst_atomic_check_mgr(state, mgr, mst_state, &tmp_port); if (ret) break; - - mutex_lock(&mgr->lock); - ret = drm_dp_mst_atomic_check_mstb_bw_limit(mgr->mst_primary, - mst_state); - mutex_unlock(&mgr->lock); - if (ret < 0) - break; - else - ret = 0; } return ret; diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index e44485aa74e9..a4aad6df71f1 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -916,6 +916,10 @@ int drm_dp_send_power_updown_phy(struct drm_dp_mst_topology_mgr *mgr, int drm_dp_send_query_stream_enc_status(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port, struct drm_dp_query_stream_enc_status_ack_reply *status); +int __must_check drm_dp_mst_atomic_check_mgr(struct drm_atomic_state *state, + struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_topology_state *mst_state, + struct drm_dp_mst_port **failing_port); int __must_check drm_dp_mst_atomic_check(struct drm_atomic_state *state); int __must_check drm_dp_mst_root_conn_atomic_check(struct drm_connector_state *new_conn_state, struct drm_dp_mst_topology_mgr *mgr); -- cgit v1.2.3 From b348150406564595cf6c1be388e9797fa97c2a5d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 24 Oct 2023 04:09:02 +0300 Subject: drm/dp: Add DP_HBLANK_EXPANSION_CAPABLE and DSC_PASSTHROUGH_EN DPCD flags Add the DPCD flag to enable DSC passthrough in a last branch device, used in a follow-up i915 patch. Also add a flag to detect HBLANK expansion support in a branch device, used by a workaround in a follow-up i915 patch. Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Stanislav Lisovskiy Acked-by: Maxime Ripard Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-8-imre.deak@intel.com --- include/drm/display/drm_dp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/drm') diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index e69cece404b3..763d45a612f3 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -148,6 +148,7 @@ #define DP_RECEIVE_PORT_0_CAP_0 0x008 # define DP_LOCAL_EDID_PRESENT (1 << 1) # define DP_ASSOCIATED_TO_PRECEDING_PORT (1 << 2) +# define DP_HBLANK_EXPANSION_CAPABLE (1 << 3) #define DP_RECEIVE_PORT_0_BUFFER_SIZE 0x009 @@ -699,6 +700,7 @@ #define DP_DSC_ENABLE 0x160 /* DP 1.4 */ # define DP_DECOMPRESSION_EN (1 << 0) +# define DP_DSC_PASSTHROUGH_EN (1 << 1) #define DP_DSC_CONFIGURATION 0x161 /* DP 2.0 */ #define DP_PSR_EN_CFG 0x170 /* XXX 1.2? */ -- cgit v1.2.3 From a6315ec25eed0e9a70cb1cfc43cf694911546a5c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 24 Oct 2023 13:22:16 +0300 Subject: drm/dp_mst: Add HBLANK expansion quirk for Synaptics MST hubs Add a quirk for Synaptics MST hubs, which require a workaround - at leat on i915 - for some modes, on which the hub applies HBLANK expansion. These modes will only work by enabling DSC decompression for them, a follow-up patch will do this in i915. v2: - Fix the quirk name in its DocBook description. Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Stanislav Lisovskiy Acked-by: Maxime Ripard Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-9-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_helper.c | 2 ++ include/drm/display/drm_dp_helper.h | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 8a1b64c57dfd..2a95a05f0437 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2242,6 +2242,8 @@ static const struct dpcd_quirk dpcd_quirk_list[] = { { OUI(0x00, 0x00, 0x00), DEVICE_ID('C', 'H', '7', '5', '1', '1'), false, BIT(DP_DPCD_QUIRK_NO_SINK_COUNT) }, /* Synaptics DP1.4 MST hubs can support DSC without virtual DPCD */ { OUI(0x90, 0xCC, 0x24), DEVICE_ID_ANY, true, BIT(DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD) }, + /* Synaptics DP1.4 MST hubs require DSC for some modes on which it applies HBLANK expansion. */ + { OUI(0x90, 0xCC, 0x24), DEVICE_ID_ANY, true, BIT(DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC) }, /* Apple MacBookPro 2017 15 inch eDP Retina panel reports too low DP_MAX_LINK_RATE */ { OUI(0x00, 0x10, 0xfa), DEVICE_ID(101, 68, 21, 101, 98, 97), false, BIT(DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS) }, }; diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 3369104e2d25..01636a679ee6 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -632,6 +632,13 @@ enum drm_dp_quirk { * the DP_MAX_LINK_RATE register reporting a lower max multiplier. */ DP_DPCD_QUIRK_CAN_DO_MAX_LINK_RATE_3_24_GBPS, + /** + * @DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC: + * + * The device applies HBLANK expansion for some modes, but this + * requires enabling DSC. + */ + DP_DPCD_QUIRK_HBLANK_EXPANSION_REQUIRES_DSC, }; /** -- cgit v1.2.3 From c1d6a22b7219bd52c66e9e038a282ba79f04be1f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 24 Oct 2023 13:22:17 +0300 Subject: drm/dp: Add helpers to calculate the link BW overhead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helpers drivers can use to calculate the BW allocation overhead - due to SSC, FEC, DSC and data alignment on symbol cycles - and the channel coding efficiency - due to the 8b/10b, 128b/132b encoding. On 128b/132b links the FEC overhead is part of the coding efficiency, so not accounted for in the BW allocation overhead. The drivers can use these functions to calculate a ratio, controlling the stream symbol insertion rate of the source device in each SST TU or MST MTP frame. Drivers can calculate this m/n = (pixel_data_rate * drm_dp_bw_overhead()) / (link_data_rate * drm_dp_bw_channel_coding_efficiency()) ratio for a given link and pixel stream and with that the slots_per_mtp = CEIL(64 * m / n) allocated slots per MTP for the stream in a link frame and with that the pbn = slots_per_mtp * drm_mst_get_pbn_divider() allocated PBNs for the stream on the MST link path. Take drm_dp_bw_overhead() into use in drm_dp_calc_pbn_mode(), for drivers calculating the PBN value directly. v2: - Add dockbook description to drm_dp_bw_channel_coding_efficiency(). (LKP). - Clarify the way m/n ratio is calculated in the commit log. v3: - Fix compile breakage for !CONFIG_BACKLIGHT_CLASS_DEVICE. (LKP) - Account for FEC_PM overhead (+ 0.0015625 %), add comment with the formula to calculate the total FEC overhead. (Ville) v4: - Rename DRM_DP_OVERHEAD_SSC to DRM_DP_OVERHEAD_SSC_REF_CLK. (Ville) v5: - Clarify in the commit log what MTP means. - Simplify the commit log's formula to calculate PBN. Cc: Lyude Paul Cc: Ville Syrjälä Cc: kernel test robot Cc: dri-devel@lists.freedesktop.org Reviewed-by: Stanislav Lisovskiy (v2) Acked-by: Maxime Ripard Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231107001505.3370108-1-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_helper.c | 132 ++++++++++++++++++++++++++ drivers/gpu/drm/display/drm_dp_mst_topology.c | 23 +++-- include/drm/display/drm_dp_helper.h | 11 +++ 3 files changed, 160 insertions(+), 6 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 2a95a05f0437..64c151e4f78c 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -3897,3 +3897,135 @@ int drm_panel_dp_aux_backlight(struct drm_panel *panel, struct drm_dp_aux *aux) EXPORT_SYMBOL(drm_panel_dp_aux_backlight); #endif + +/* See DP Standard v2.1 2.6.4.4.1.1, 2.8.4.4, 2.8.7 */ +static int drm_dp_link_symbol_cycles(int lane_count, int pixels, int bpp_x16, + int symbol_size, bool is_mst) +{ + int cycles = DIV_ROUND_UP(pixels * bpp_x16, 16 * symbol_size * lane_count); + int align = is_mst ? 4 / lane_count : 1; + + return ALIGN(cycles, align); +} + +static int drm_dp_link_dsc_symbol_cycles(int lane_count, int pixels, int slice_count, + int bpp_x16, int symbol_size, bool is_mst) +{ + int slice_pixels = DIV_ROUND_UP(pixels, slice_count); + int slice_data_cycles = drm_dp_link_symbol_cycles(lane_count, slice_pixels, + bpp_x16, symbol_size, is_mst); + int slice_eoc_cycles = is_mst ? 4 / lane_count : 1; + + return slice_count * (slice_data_cycles + slice_eoc_cycles); +} + +/** + * drm_dp_bw_overhead - Calculate the BW overhead of a DP link stream + * @lane_count: DP link lane count + * @hactive: pixel count of the active period in one scanline of the stream + * @dsc_slice_count: DSC slice count if @flags/DRM_DP_LINK_BW_OVERHEAD_DSC is set + * @bpp_x16: bits per pixel in .4 binary fixed point + * @flags: DRM_DP_OVERHEAD_x flags + * + * Calculate the BW allocation overhead of a DP link stream, depending + * on the link's + * - @lane_count + * - SST/MST mode (@flags / %DRM_DP_OVERHEAD_MST) + * - symbol size (@flags / %DRM_DP_OVERHEAD_UHBR) + * - FEC mode (@flags / %DRM_DP_OVERHEAD_FEC) + * - SSC/REF_CLK mode (@flags / %DRM_DP_OVERHEAD_SSC_REF_CLK) + * as well as the stream's + * - @hactive timing + * - @bpp_x16 color depth + * - compression mode (@flags / %DRM_DP_OVERHEAD_DSC). + * Note that this overhead doesn't account for the 8b/10b, 128b/132b + * channel coding efficiency, for that see + * @drm_dp_link_bw_channel_coding_efficiency(). + * + * Returns the overhead as 100% + overhead% in 1ppm units. + */ +int drm_dp_bw_overhead(int lane_count, int hactive, + int dsc_slice_count, + int bpp_x16, unsigned long flags) +{ + int symbol_size = flags & DRM_DP_BW_OVERHEAD_UHBR ? 32 : 8; + bool is_mst = flags & DRM_DP_BW_OVERHEAD_MST; + u32 overhead = 1000000; + int symbol_cycles; + + /* + * DP Standard v2.1 2.6.4.1 + * SSC downspread and ref clock variation margin: + * 5300ppm + 300ppm ~ 0.6% + */ + if (flags & DRM_DP_BW_OVERHEAD_SSC_REF_CLK) + overhead += 6000; + + /* + * DP Standard v2.1 2.6.4.1.1, 3.5.1.5.4: + * FEC symbol insertions for 8b/10b channel coding: + * After each 250 data symbols on 2-4 lanes: + * 250 LL + 5 FEC_PARITY_PH + 1 CD_ADJ (256 byte FEC block) + * After each 2 x 250 data symbols on 1 lane: + * 2 * 250 LL + 11 FEC_PARITY_PH + 1 CD_ADJ (512 byte FEC block) + * After 256 (2-4 lanes) or 128 (1 lane) FEC blocks: + * 256 * 256 bytes + 1 FEC_PM + * or + * 128 * 512 bytes + 1 FEC_PM + * (256 * 6 + 1) / (256 * 250) = 2.4015625 % + */ + if (flags & DRM_DP_BW_OVERHEAD_FEC) + overhead += 24016; + + /* + * DP Standard v2.1 2.7.9, 5.9.7 + * The FEC overhead for UHBR is accounted for in its 96.71% channel + * coding efficiency. + */ + WARN_ON((flags & DRM_DP_BW_OVERHEAD_UHBR) && + (flags & DRM_DP_BW_OVERHEAD_FEC)); + + if (flags & DRM_DP_BW_OVERHEAD_DSC) + symbol_cycles = drm_dp_link_dsc_symbol_cycles(lane_count, hactive, + dsc_slice_count, + bpp_x16, symbol_size, + is_mst); + else + symbol_cycles = drm_dp_link_symbol_cycles(lane_count, hactive, + bpp_x16, symbol_size, + is_mst); + + return DIV_ROUND_UP_ULL(mul_u32_u32(symbol_cycles * symbol_size * lane_count, + overhead * 16), + hactive * bpp_x16); +} +EXPORT_SYMBOL(drm_dp_bw_overhead); + +/** + * drm_dp_bw_channel_coding_efficiency - Get a DP link's channel coding efficiency + * @is_uhbr: Whether the link has a 128b/132b channel coding + * + * Return the channel coding efficiency of the given DP link type, which is + * either 8b/10b or 128b/132b (aka UHBR). The corresponding overhead includes + * the 8b -> 10b, 128b -> 132b pixel data to link symbol conversion overhead + * and for 128b/132b any link or PHY level control symbol insertion overhead + * (LLCP, FEC, PHY sync, see DP Standard v2.1 3.5.2.18). For 8b/10b the + * corresponding FEC overhead is BW allocation specific, included in the value + * returned by drm_dp_bw_overhead(). + * + * Returns the efficiency in the 100%/coding-overhead% ratio in + * 1ppm units. + */ +int drm_dp_bw_channel_coding_efficiency(bool is_uhbr) +{ + if (is_uhbr) + return 967100; + else + /* + * Note that on 8b/10b MST the efficiency is only + * 78.75% due to the 1 out of 64 MTPH packet overhead, + * not accounted for here. + */ + return 800000; +} +EXPORT_SYMBOL(drm_dp_bw_channel_coding_efficiency); diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index a462229908fc..0854fe428f17 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -4726,17 +4726,28 @@ EXPORT_SYMBOL(drm_dp_check_act_status); int drm_dp_calc_pbn_mode(int clock, int bpp) { /* - * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on * common multiplier to render an integer PBN for all link rate/lane * counts combinations * calculate - * peak_kbps *= (1006/1000) - * peak_kbps *= (64/54) - * peak_kbps *= 8 convert to bytes + * peak_kbps = clock * bpp / 16 + * peak_kbps *= SSC overhead / 1000000 + * peak_kbps /= 8 convert to Kbytes + * peak_kBps *= (64/54) / 1000 convert to PBN */ - return DIV_ROUND_UP_ULL(mul_u32_u32(clock * bpp, 64 * 1006 >> 4), - 1000 * 8 * 54 * 1000); + /* + * TODO: Use the actual link and mode parameters to calculate + * the overhead. For now it's assumed that these are + * 4 link lanes, 4096 hactive pixels, which don't add any + * significant data padding overhead and that there is no DSC + * or FEC overhead. + */ + int overhead = drm_dp_bw_overhead(4, 4096, 0, bpp, + DRM_DP_BW_OVERHEAD_MST | + DRM_DP_BW_OVERHEAD_SSC_REF_CLK); + + return DIV64_U64_ROUND_UP(mul_u32_u32(clock * bpp, 64 * overhead >> 4), + 1000000ULL * 8 * 54 * 1000); } EXPORT_SYMBOL(drm_dp_calc_pbn_mode); diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 01636a679ee6..3c59f64d4a69 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -788,4 +788,15 @@ bool drm_dp_downstream_rgb_to_ycbcr_conversion(const u8 dpcd[DP_RECEIVER_CAP_SIZ const u8 port_cap[4], u8 color_spc); int drm_dp_pcon_convert_rgb_to_ycbcr(struct drm_dp_aux *aux, u8 color_spc); +#define DRM_DP_BW_OVERHEAD_MST BIT(0) +#define DRM_DP_BW_OVERHEAD_UHBR BIT(1) +#define DRM_DP_BW_OVERHEAD_SSC_REF_CLK BIT(2) +#define DRM_DP_BW_OVERHEAD_FEC BIT(3) +#define DRM_DP_BW_OVERHEAD_DSC BIT(4) + +int drm_dp_bw_overhead(int lane_count, int hactive, + int dsc_slice_count, + int bpp_x16, unsigned long flags); +int drm_dp_bw_channel_coding_efficiency(bool is_uhbr); + #endif /* _DRM_DP_HELPER_H_ */ -- cgit v1.2.3 From 8eb80946ab0c18a853be5f90d6b6ccbe3fd42989 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 31 Oct 2023 12:16:38 +0200 Subject: drm/edid: split out drm_eld.h from drm_edid.h The drm_edid.[ch] files are starting to be a bit crowded, and with plans to add more ELD related functionality, it's perhaps cleanest to split the ELD code out to a header of its own. Include drm_eld.h from drm_edid.h for starters, and leave it to follow-up work to only include drm_eld.h where needed. Cc: Mitul Golani Reviewed-by: Mitul Golani Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/0c6d631fa1058036d72dd25d1cabc90a7c52490e.1698747331.git.jani.nikula@intel.com --- Documentation/gpu/drm-kms-helpers.rst | 3 + include/drm/drm_edid.h | 149 +------------------------------ include/drm/drm_eld.h | 159 ++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 148 deletions(-) create mode 100644 include/drm/drm_eld.h (limited to 'include/drm') diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index b748b8ae70b2..cfa8e6c399b6 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -363,6 +363,9 @@ EDID Helper Functions Reference .. kernel-doc:: drivers/gpu/drm/drm_edid.c :export: +.. kernel-doc:: include/drm/drm_eld.h + :internal: + SCDC Helper Functions Reference =============================== diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 882d2638708e..1ff52f57ab9c 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -25,6 +25,7 @@ #include #include +#include /* FIXME: remove this, include directly where needed */ #include struct drm_device; @@ -269,64 +270,6 @@ struct detailed_timing { #define DRM_EDID_DSC_MAX_SLICES 0xf #define DRM_EDID_DSC_TOTAL_CHUNK_KBYTES 0x3f -/* ELD Header Block */ -#define DRM_ELD_HEADER_BLOCK_SIZE 4 - -#define DRM_ELD_VER 0 -# define DRM_ELD_VER_SHIFT 3 -# define DRM_ELD_VER_MASK (0x1f << 3) -# define DRM_ELD_VER_CEA861D (2 << 3) /* supports 861D or below */ -# define DRM_ELD_VER_CANNED (0x1f << 3) - -#define DRM_ELD_BASELINE_ELD_LEN 2 /* in dwords! */ - -/* ELD Baseline Block for ELD_Ver == 2 */ -#define DRM_ELD_CEA_EDID_VER_MNL 4 -# define DRM_ELD_CEA_EDID_VER_SHIFT 5 -# define DRM_ELD_CEA_EDID_VER_MASK (7 << 5) -# define DRM_ELD_CEA_EDID_VER_NONE (0 << 5) -# define DRM_ELD_CEA_EDID_VER_CEA861 (1 << 5) -# define DRM_ELD_CEA_EDID_VER_CEA861A (2 << 5) -# define DRM_ELD_CEA_EDID_VER_CEA861BCD (3 << 5) -# define DRM_ELD_MNL_SHIFT 0 -# define DRM_ELD_MNL_MASK (0x1f << 0) - -#define DRM_ELD_SAD_COUNT_CONN_TYPE 5 -# define DRM_ELD_SAD_COUNT_SHIFT 4 -# define DRM_ELD_SAD_COUNT_MASK (0xf << 4) -# define DRM_ELD_CONN_TYPE_SHIFT 2 -# define DRM_ELD_CONN_TYPE_MASK (3 << 2) -# define DRM_ELD_CONN_TYPE_HDMI (0 << 2) -# define DRM_ELD_CONN_TYPE_DP (1 << 2) -# define DRM_ELD_SUPPORTS_AI (1 << 1) -# define DRM_ELD_SUPPORTS_HDCP (1 << 0) - -#define DRM_ELD_AUD_SYNCH_DELAY 6 /* in units of 2 ms */ -# define DRM_ELD_AUD_SYNCH_DELAY_MAX 0xfa /* 500 ms */ - -#define DRM_ELD_SPEAKER 7 -# define DRM_ELD_SPEAKER_MASK 0x7f -# define DRM_ELD_SPEAKER_RLRC (1 << 6) -# define DRM_ELD_SPEAKER_FLRC (1 << 5) -# define DRM_ELD_SPEAKER_RC (1 << 4) -# define DRM_ELD_SPEAKER_RLR (1 << 3) -# define DRM_ELD_SPEAKER_FC (1 << 2) -# define DRM_ELD_SPEAKER_LFE (1 << 1) -# define DRM_ELD_SPEAKER_FLR (1 << 0) - -#define DRM_ELD_PORT_ID 8 /* offsets 8..15 inclusive */ -# define DRM_ELD_PORT_ID_LEN 8 - -#define DRM_ELD_MANUFACTURER_NAME0 16 -#define DRM_ELD_MANUFACTURER_NAME1 17 - -#define DRM_ELD_PRODUCT_CODE0 18 -#define DRM_ELD_PRODUCT_CODE1 19 - -#define DRM_ELD_MONITOR_NAME_STRING 20 /* offsets 20..(20+mnl-1) inclusive */ - -#define DRM_ELD_CEA_SAD(mnl, sad) (20 + (mnl) + 3 * (sad)) - struct edid { u8 header[8]; /* Vendor & product info */ @@ -409,96 +352,6 @@ drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, const struct drm_display_mode *mode, enum hdmi_quantization_range rgb_quant_range); -/** - * drm_eld_mnl - Get ELD monitor name length in bytes. - * @eld: pointer to an eld memory structure with mnl set - */ -static inline int drm_eld_mnl(const uint8_t *eld) -{ - return (eld[DRM_ELD_CEA_EDID_VER_MNL] & DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT; -} - -/** - * drm_eld_sad - Get ELD SAD structures. - * @eld: pointer to an eld memory structure with sad_count set - */ -static inline const uint8_t *drm_eld_sad(const uint8_t *eld) -{ - unsigned int ver, mnl; - - ver = (eld[DRM_ELD_VER] & DRM_ELD_VER_MASK) >> DRM_ELD_VER_SHIFT; - if (ver != 2 && ver != 31) - return NULL; - - mnl = drm_eld_mnl(eld); - if (mnl > 16) - return NULL; - - return eld + DRM_ELD_CEA_SAD(mnl, 0); -} - -/** - * drm_eld_sad_count - Get ELD SAD count. - * @eld: pointer to an eld memory structure with sad_count set - */ -static inline int drm_eld_sad_count(const uint8_t *eld) -{ - return (eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_SAD_COUNT_MASK) >> - DRM_ELD_SAD_COUNT_SHIFT; -} - -/** - * drm_eld_calc_baseline_block_size - Calculate baseline block size in bytes - * @eld: pointer to an eld memory structure with mnl and sad_count set - * - * This is a helper for determining the payload size of the baseline block, in - * bytes, for e.g. setting the Baseline_ELD_Len field in the ELD header block. - */ -static inline int drm_eld_calc_baseline_block_size(const uint8_t *eld) -{ - return DRM_ELD_MONITOR_NAME_STRING - DRM_ELD_HEADER_BLOCK_SIZE + - drm_eld_mnl(eld) + drm_eld_sad_count(eld) * 3; -} - -/** - * drm_eld_size - Get ELD size in bytes - * @eld: pointer to a complete eld memory structure - * - * The returned value does not include the vendor block. It's vendor specific, - * and comprises of the remaining bytes in the ELD memory buffer after - * drm_eld_size() bytes of header and baseline block. - * - * The returned value is guaranteed to be a multiple of 4. - */ -static inline int drm_eld_size(const uint8_t *eld) -{ - return DRM_ELD_HEADER_BLOCK_SIZE + eld[DRM_ELD_BASELINE_ELD_LEN] * 4; -} - -/** - * drm_eld_get_spk_alloc - Get speaker allocation - * @eld: pointer to an ELD memory structure - * - * The returned value is the speakers mask. User has to use %DRM_ELD_SPEAKER - * field definitions to identify speakers. - */ -static inline u8 drm_eld_get_spk_alloc(const uint8_t *eld) -{ - return eld[DRM_ELD_SPEAKER] & DRM_ELD_SPEAKER_MASK; -} - -/** - * drm_eld_get_conn_type - Get device type hdmi/dp connected - * @eld: pointer to an ELD memory structure - * - * The caller need to use %DRM_ELD_CONN_TYPE_HDMI or %DRM_ELD_CONN_TYPE_DP to - * identify the display type connected. - */ -static inline u8 drm_eld_get_conn_type(const uint8_t *eld) -{ - return eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_CONN_TYPE_MASK; -} - /** * drm_edid_decode_mfg_id - Decode the manufacturer ID * @mfg_id: The manufacturer ID diff --git a/include/drm/drm_eld.h b/include/drm/drm_eld.h new file mode 100644 index 000000000000..9bde89bd96ea --- /dev/null +++ b/include/drm/drm_eld.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __DRM_ELD_H__ +#define __DRM_ELD_H__ + +#include + +/* ELD Header Block */ +#define DRM_ELD_HEADER_BLOCK_SIZE 4 + +#define DRM_ELD_VER 0 +# define DRM_ELD_VER_SHIFT 3 +# define DRM_ELD_VER_MASK (0x1f << 3) +# define DRM_ELD_VER_CEA861D (2 << 3) /* supports 861D or below */ +# define DRM_ELD_VER_CANNED (0x1f << 3) + +#define DRM_ELD_BASELINE_ELD_LEN 2 /* in dwords! */ + +/* ELD Baseline Block for ELD_Ver == 2 */ +#define DRM_ELD_CEA_EDID_VER_MNL 4 +# define DRM_ELD_CEA_EDID_VER_SHIFT 5 +# define DRM_ELD_CEA_EDID_VER_MASK (7 << 5) +# define DRM_ELD_CEA_EDID_VER_NONE (0 << 5) +# define DRM_ELD_CEA_EDID_VER_CEA861 (1 << 5) +# define DRM_ELD_CEA_EDID_VER_CEA861A (2 << 5) +# define DRM_ELD_CEA_EDID_VER_CEA861BCD (3 << 5) +# define DRM_ELD_MNL_SHIFT 0 +# define DRM_ELD_MNL_MASK (0x1f << 0) + +#define DRM_ELD_SAD_COUNT_CONN_TYPE 5 +# define DRM_ELD_SAD_COUNT_SHIFT 4 +# define DRM_ELD_SAD_COUNT_MASK (0xf << 4) +# define DRM_ELD_CONN_TYPE_SHIFT 2 +# define DRM_ELD_CONN_TYPE_MASK (3 << 2) +# define DRM_ELD_CONN_TYPE_HDMI (0 << 2) +# define DRM_ELD_CONN_TYPE_DP (1 << 2) +# define DRM_ELD_SUPPORTS_AI (1 << 1) +# define DRM_ELD_SUPPORTS_HDCP (1 << 0) + +#define DRM_ELD_AUD_SYNCH_DELAY 6 /* in units of 2 ms */ +# define DRM_ELD_AUD_SYNCH_DELAY_MAX 0xfa /* 500 ms */ + +#define DRM_ELD_SPEAKER 7 +# define DRM_ELD_SPEAKER_MASK 0x7f +# define DRM_ELD_SPEAKER_RLRC (1 << 6) +# define DRM_ELD_SPEAKER_FLRC (1 << 5) +# define DRM_ELD_SPEAKER_RC (1 << 4) +# define DRM_ELD_SPEAKER_RLR (1 << 3) +# define DRM_ELD_SPEAKER_FC (1 << 2) +# define DRM_ELD_SPEAKER_LFE (1 << 1) +# define DRM_ELD_SPEAKER_FLR (1 << 0) + +#define DRM_ELD_PORT_ID 8 /* offsets 8..15 inclusive */ +# define DRM_ELD_PORT_ID_LEN 8 + +#define DRM_ELD_MANUFACTURER_NAME0 16 +#define DRM_ELD_MANUFACTURER_NAME1 17 + +#define DRM_ELD_PRODUCT_CODE0 18 +#define DRM_ELD_PRODUCT_CODE1 19 + +#define DRM_ELD_MONITOR_NAME_STRING 20 /* offsets 20..(20+mnl-1) inclusive */ + +#define DRM_ELD_CEA_SAD(mnl, sad) (20 + (mnl) + 3 * (sad)) + +/** + * drm_eld_mnl - Get ELD monitor name length in bytes. + * @eld: pointer to an eld memory structure with mnl set + */ +static inline int drm_eld_mnl(const uint8_t *eld) +{ + return (eld[DRM_ELD_CEA_EDID_VER_MNL] & DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT; +} + +/** + * drm_eld_sad - Get ELD SAD structures. + * @eld: pointer to an eld memory structure with sad_count set + */ +static inline const uint8_t *drm_eld_sad(const uint8_t *eld) +{ + unsigned int ver, mnl; + + ver = (eld[DRM_ELD_VER] & DRM_ELD_VER_MASK) >> DRM_ELD_VER_SHIFT; + if (ver != 2 && ver != 31) + return NULL; + + mnl = drm_eld_mnl(eld); + if (mnl > 16) + return NULL; + + return eld + DRM_ELD_CEA_SAD(mnl, 0); +} + +/** + * drm_eld_sad_count - Get ELD SAD count. + * @eld: pointer to an eld memory structure with sad_count set + */ +static inline int drm_eld_sad_count(const uint8_t *eld) +{ + return (eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_SAD_COUNT_MASK) >> + DRM_ELD_SAD_COUNT_SHIFT; +} + +/** + * drm_eld_calc_baseline_block_size - Calculate baseline block size in bytes + * @eld: pointer to an eld memory structure with mnl and sad_count set + * + * This is a helper for determining the payload size of the baseline block, in + * bytes, for e.g. setting the Baseline_ELD_Len field in the ELD header block. + */ +static inline int drm_eld_calc_baseline_block_size(const uint8_t *eld) +{ + return DRM_ELD_MONITOR_NAME_STRING - DRM_ELD_HEADER_BLOCK_SIZE + + drm_eld_mnl(eld) + drm_eld_sad_count(eld) * 3; +} + +/** + * drm_eld_size - Get ELD size in bytes + * @eld: pointer to a complete eld memory structure + * + * The returned value does not include the vendor block. It's vendor specific, + * and comprises of the remaining bytes in the ELD memory buffer after + * drm_eld_size() bytes of header and baseline block. + * + * The returned value is guaranteed to be a multiple of 4. + */ +static inline int drm_eld_size(const uint8_t *eld) +{ + return DRM_ELD_HEADER_BLOCK_SIZE + eld[DRM_ELD_BASELINE_ELD_LEN] * 4; +} + +/** + * drm_eld_get_spk_alloc - Get speaker allocation + * @eld: pointer to an ELD memory structure + * + * The returned value is the speakers mask. User has to use %DRM_ELD_SPEAKER + * field definitions to identify speakers. + */ +static inline u8 drm_eld_get_spk_alloc(const uint8_t *eld) +{ + return eld[DRM_ELD_SPEAKER] & DRM_ELD_SPEAKER_MASK; +} + +/** + * drm_eld_get_conn_type - Get device type hdmi/dp connected + * @eld: pointer to an ELD memory structure + * + * The caller need to use %DRM_ELD_CONN_TYPE_HDMI or %DRM_ELD_CONN_TYPE_DP to + * identify the display type connected. + */ +static inline u8 drm_eld_get_conn_type(const uint8_t *eld) +{ + return eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_CONN_TYPE_MASK; +} + +#endif /* __DRM_ELD_H__ */ -- cgit v1.2.3 From 533914536bf5cb5984755244f5aa13cf93cc84d3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 31 Oct 2023 12:16:39 +0200 Subject: drm/eld: replace uint8_t with u8 Unify on kernel types. Cc: Mitul Golani Reviewed-by: Chaitanya Kumar Borah Reviewed-by: Mitul Golani Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/6e048fc4c8a3ebec638ce27b0b8b969a3d2fa8bc.1698747331.git.jani.nikula@intel.com --- include/drm/drm_eld.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/drm') diff --git a/include/drm/drm_eld.h b/include/drm/drm_eld.h index 9bde89bd96ea..7b674256b9aa 100644 --- a/include/drm/drm_eld.h +++ b/include/drm/drm_eld.h @@ -70,7 +70,7 @@ * drm_eld_mnl - Get ELD monitor name length in bytes. * @eld: pointer to an eld memory structure with mnl set */ -static inline int drm_eld_mnl(const uint8_t *eld) +static inline int drm_eld_mnl(const u8 *eld) { return (eld[DRM_ELD_CEA_EDID_VER_MNL] & DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT; } @@ -79,7 +79,7 @@ static inline int drm_eld_mnl(const uint8_t *eld) * drm_eld_sad - Get ELD SAD structures. * @eld: pointer to an eld memory structure with sad_count set */ -static inline const uint8_t *drm_eld_sad(const uint8_t *eld) +static inline const u8 *drm_eld_sad(const u8 *eld) { unsigned int ver, mnl; @@ -98,7 +98,7 @@ static inline const uint8_t *drm_eld_sad(const uint8_t *eld) * drm_eld_sad_count - Get ELD SAD count. * @eld: pointer to an eld memory structure with sad_count set */ -static inline int drm_eld_sad_count(const uint8_t *eld) +static inline int drm_eld_sad_count(const u8 *eld) { return (eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_SAD_COUNT_MASK) >> DRM_ELD_SAD_COUNT_SHIFT; @@ -111,7 +111,7 @@ static inline int drm_eld_sad_count(const uint8_t *eld) * This is a helper for determining the payload size of the baseline block, in * bytes, for e.g. setting the Baseline_ELD_Len field in the ELD header block. */ -static inline int drm_eld_calc_baseline_block_size(const uint8_t *eld) +static inline int drm_eld_calc_baseline_block_size(const u8 *eld) { return DRM_ELD_MONITOR_NAME_STRING - DRM_ELD_HEADER_BLOCK_SIZE + drm_eld_mnl(eld) + drm_eld_sad_count(eld) * 3; @@ -127,7 +127,7 @@ static inline int drm_eld_calc_baseline_block_size(const uint8_t *eld) * * The returned value is guaranteed to be a multiple of 4. */ -static inline int drm_eld_size(const uint8_t *eld) +static inline int drm_eld_size(const u8 *eld) { return DRM_ELD_HEADER_BLOCK_SIZE + eld[DRM_ELD_BASELINE_ELD_LEN] * 4; } @@ -139,7 +139,7 @@ static inline int drm_eld_size(const uint8_t *eld) * The returned value is the speakers mask. User has to use %DRM_ELD_SPEAKER * field definitions to identify speakers. */ -static inline u8 drm_eld_get_spk_alloc(const uint8_t *eld) +static inline u8 drm_eld_get_spk_alloc(const u8 *eld) { return eld[DRM_ELD_SPEAKER] & DRM_ELD_SPEAKER_MASK; } @@ -151,7 +151,7 @@ static inline u8 drm_eld_get_spk_alloc(const uint8_t *eld) * The caller need to use %DRM_ELD_CONN_TYPE_HDMI or %DRM_ELD_CONN_TYPE_DP to * identify the display type connected. */ -static inline u8 drm_eld_get_conn_type(const uint8_t *eld) +static inline u8 drm_eld_get_conn_type(const u8 *eld) { return eld[DRM_ELD_SAD_COUNT_CONN_TYPE] & DRM_ELD_CONN_TYPE_MASK; } -- cgit v1.2.3 From 439590ace7755657523a1a0230c6099cb0a6e15f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 31 Oct 2023 12:16:40 +0200 Subject: drm/edid: include drm_eld.h only where required Reduce the dependencies on drm_eld.h. Some files might be able to drop the dependency on drm_edid.h too with the direct inclusion of drm_eld.h. Cc: Mitul Golani Reviewed-by: Mitul Golani Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/9f5963ce900d747f3279312c0cd1da599fd83f94.1698747331.git.jani.nikula@intel.com --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + drivers/gpu/drm/drm_edid.c | 1 + drivers/gpu/drm/i915/display/intel_audio.c | 1 + drivers/gpu/drm/i915/display/intel_crtc_state_dump.c | 1 + drivers/gpu/drm/i915/display/intel_sdvo.c | 1 + drivers/gpu/drm/nouveau/dispnv50/disp.c | 1 + drivers/gpu/drm/radeon/radeon_audio.c | 1 + drivers/gpu/drm/tegra/hdmi.c | 1 + drivers/gpu/drm/tegra/sor.c | 1 + include/drm/drm_edid.h | 1 - sound/core/pcm_drm_eld.c | 1 + sound/soc/codecs/hdac_hdmi.c | 1 + sound/soc/codecs/hdmi-codec.c | 1 + sound/x86/intel_hdmi_audio.c | 1 + 14 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c6fd34bab358..42fa2e142712 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -87,6 +87,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ec1cb4890acb..85f719f67b50 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 19605264a35c..39f5b698e08a 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -25,6 +25,7 @@ #include #include +#include #include #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 66fe880af8f3..2d15e82c0b3d 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -4,6 +4,7 @@ */ #include +#include #include "i915_drv.h" #include "intel_crtc_state_dump.h" diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 950ba0431f5f..f967fe06f549 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "i915_drv.h" #include "i915_reg.h" diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index a0ac8c258d9f..334ca7f919f2 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index d6ccaf24ee0c..279bf130a18c 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -26,6 +26,7 @@ #include #include +#include #include "dce6_afmt.h" #include "evergreen_hdmi.h" #include "radeon.h" diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index 0ba3ca3ac509..a1fcee665023 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index d5a3d3f4fece..83341576630d 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 1ff52f57ab9c..e98aa6818700 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -25,7 +25,6 @@ #include #include -#include /* FIXME: remove this, include directly where needed */ #include struct drm_device; diff --git a/sound/core/pcm_drm_eld.c b/sound/core/pcm_drm_eld.c index 07075071972d..1cdca4d4fc9c 100644 --- a/sound/core/pcm_drm_eld.c +++ b/sound/core/pcm_drm_eld.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 8b6b76029694..d1b53fc1efb6 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 13689e718d36..9b4e9192cd79 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -17,6 +17,7 @@ #include #include /* This is only to get MAX_ELD_BYTES */ +#include #define HDMI_CODEC_CHMAP_IDX_UNKNOWN -1 diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index ab95fb34a635..02f5a7f9b728 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "intel_hdmi_audio.h" -- cgit v1.2.3 From f415a6078f640ab15bae34d3c6a1d8e6071363de Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 31 Oct 2023 12:16:43 +0200 Subject: drm/eld: add helpers to modify the SADs of an ELD Occasionally it's necessary for drivers to modify the SADs of an ELD, but it's not so cool to have drivers poke at the ELD buffer directly. Using the helpers to translate between 3-byte SAD and struct cea_sad, add ELD helpers to get/set the SADs from/to an ELD. v2: s/i/sad_index/ (Mitul) Cc: Mitul Golani Signed-off-by: Jani Nikula Reviewed-by: Mitul Golani Link: https://patchwork.freedesktop.org/patch/msgid/8e9a05f2b1e0dd184132d636e1e778e8917ec25d.1698747331.git.jani.nikula@intel.com --- Documentation/gpu/drm-kms-helpers.rst | 3 ++ drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/drm_eld.c | 55 +++++++++++++++++++++++++++++++++++ include/drm/drm_eld.h | 5 ++++ 4 files changed, 64 insertions(+) create mode 100644 drivers/gpu/drm/drm_eld.c (limited to 'include/drm') diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index cfa8e6c399b6..59cfe8a7a8ba 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -366,6 +366,9 @@ EDID Helper Functions Reference .. kernel-doc:: include/drm/drm_eld.h :internal: +.. kernel-doc:: drivers/gpu/drm/drm_eld.c + :export: + SCDC Helper Functions Reference =============================== diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 8e1bde059170..cdbe91ac0bfc 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -22,6 +22,7 @@ drm-y := \ drm_drv.o \ drm_dumb_buffers.o \ drm_edid.o \ + drm_eld.o \ drm_encoder.o \ drm_file.o \ drm_fourcc.o \ diff --git a/drivers/gpu/drm/drm_eld.c b/drivers/gpu/drm/drm_eld.c new file mode 100644 index 000000000000..5177991aa272 --- /dev/null +++ b/drivers/gpu/drm/drm_eld.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include + +#include "drm_internal.h" + +/** + * drm_eld_sad_get - get SAD from ELD to struct cea_sad + * @eld: ELD buffer + * @sad_index: SAD index + * @cta_sad: destination struct cea_sad + * + * @return: 0 on success, or negative on errors + */ +int drm_eld_sad_get(const u8 *eld, int sad_index, struct cea_sad *cta_sad) +{ + const u8 *sad; + + if (sad_index >= drm_eld_sad_count(eld)) + return -EINVAL; + + sad = eld + DRM_ELD_CEA_SAD(drm_eld_mnl(eld), sad_index); + + drm_edid_cta_sad_set(cta_sad, sad); + + return 0; +} +EXPORT_SYMBOL(drm_eld_sad_get); + +/** + * drm_eld_sad_set - set SAD to ELD from struct cea_sad + * @eld: ELD buffer + * @sad_index: SAD index + * @cta_sad: source struct cea_sad + * + * @return: 0 on success, or negative on errors + */ +int drm_eld_sad_set(u8 *eld, int sad_index, const struct cea_sad *cta_sad) +{ + u8 *sad; + + if (sad_index >= drm_eld_sad_count(eld)) + return -EINVAL; + + sad = eld + DRM_ELD_CEA_SAD(drm_eld_mnl(eld), sad_index); + + drm_edid_cta_sad_get(cta_sad, sad); + + return 0; +} +EXPORT_SYMBOL(drm_eld_sad_set); diff --git a/include/drm/drm_eld.h b/include/drm/drm_eld.h index 7b674256b9aa..0a88d10b28b0 100644 --- a/include/drm/drm_eld.h +++ b/include/drm/drm_eld.h @@ -8,6 +8,8 @@ #include +struct cea_sad; + /* ELD Header Block */ #define DRM_ELD_HEADER_BLOCK_SIZE 4 @@ -75,6 +77,9 @@ static inline int drm_eld_mnl(const u8 *eld) return (eld[DRM_ELD_CEA_EDID_VER_MNL] & DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT; } +int drm_eld_sad_get(const u8 *eld, int sad_index, struct cea_sad *cta_sad); +int drm_eld_sad_set(u8 *eld, int sad_index, const struct cea_sad *cta_sad); + /** * drm_eld_sad - Get ELD SAD structures. * @eld: pointer to an eld memory structure with sad_count set -- cgit v1.2.3 From f3123c2590005c5ff631653d31428e40cd10c618 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Thu, 9 Nov 2023 18:53:26 -0500 Subject: drm/sched: Qualify drm_sched_wakeup() by drm_sched_entity_is_ready() Don't "wake up" the GPU scheduler unless the entity is ready, as well as we can queue to the scheduler, i.e. there is no point in waking up the scheduler for the entity unless the entity is ready. Signed-off-by: Luben Tuikov Fixes: bc8d6a9df99038 ("drm/sched: Don't disturb the entity when in RR-mode scheduling") Reviewed-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231110000123.72565-2-ltuikov89@gmail.com --- drivers/gpu/drm/scheduler/sched_entity.c | 4 ++-- drivers/gpu/drm/scheduler/sched_main.c | 8 +++++--- include/drm/gpu_scheduler.h | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index f1db63cc8198..4d42b1e4daa6 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -370,7 +370,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, container_of(cb, struct drm_sched_entity, cb); drm_sched_entity_clear_dep(f, cb); - drm_sched_wakeup(entity->rq->sched); + drm_sched_wakeup(entity->rq->sched, entity); } /** @@ -602,7 +602,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) drm_sched_rq_update_fifo(entity, submit_ts); - drm_sched_wakeup(entity->rq->sched); + drm_sched_wakeup(entity->rq->sched, entity); } } EXPORT_SYMBOL(drm_sched_entity_push_job); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index cd0dc3f81d05..8f5e466bd582 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -925,10 +925,12 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) * * Wake up the scheduler if we can queue jobs. */ -void drm_sched_wakeup(struct drm_gpu_scheduler *sched) +void drm_sched_wakeup(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) { - if (drm_sched_can_queue(sched)) - drm_sched_run_job_queue(sched); + if (drm_sched_entity_is_ready(entity)) + if (drm_sched_can_queue(sched)) + drm_sched_run_job_queue(sched); } /** diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 754fd2217334..09916c84703f 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -559,7 +559,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched); void drm_sched_job_cleanup(struct drm_sched_job *job); -void drm_sched_wakeup(struct drm_gpu_scheduler *sched); +void drm_sched_wakeup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); -- cgit v1.2.3 From 36245bd02e88e68ac5955c2958c968879d7b75a9 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Thu, 9 Nov 2023 19:11:46 -0500 Subject: drm/sched: Define pr_fmt() for DRM using pr_*() Define pr_fmt() as "[drm] " for DRM code using pr_*() facilities, especially when no devices are available. This makes it easier to browse kernel logs. Signed-off-by: Luben Tuikov Acked-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231110002659.113208-2-ltuikov89@gmail.com --- include/drm/drm_print.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/drm') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index a93a387f8a1a..e8fe60d0eb87 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -26,6 +26,20 @@ #ifndef DRM_PRINT_H_ #define DRM_PRINT_H_ +/* Define this before including linux/printk.h, so that the format + * string in pr_*() macros is correctly set for DRM. If a file wants + * to define this to something else, it should do so before including + * this header file. + * + * It is encouraged code using pr_err() to prefix their format with + * the string "*ERROR* ", to make it easier to scan kernel logs. For + * instance, + * pr_err("*ERROR* ", args). + */ +#ifndef pr_fmt +#define pr_fmt(fmt) "[drm] " fmt +#endif + #include #include #include -- cgit v1.2.3 From a78422e9dff366b3a46ae44caf6ec8ded9c9fc2f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 10 Nov 2023 01:16:33 +0100 Subject: drm/sched: implement dynamic job-flow control Currently, job flow control is implemented simply by limiting the number of jobs in flight. Therefore, a scheduler is initialized with a credit limit that corresponds to the number of jobs which can be sent to the hardware. This implies that for each job, drivers need to account for the maximum job size possible in order to not overflow the ring buffer. However, there are drivers, such as Nouveau, where the job size has a rather large range. For such drivers it can easily happen that job submissions not even filling the ring by 1% can block subsequent submissions, which, in the worst case, can lead to the ring run dry. In order to overcome this issue, allow for tracking the actual job size instead of the number of jobs. Therefore, add a field to track a job's credit count, which represents the number of credits a job contributes to the scheduler's credit limit. Signed-off-by: Danilo Krummrich Reviewed-by: Luben Tuikov Link: https://patchwork.freedesktop.org/patch/msgid/20231110001638.71750-1-dakr@redhat.com --- Documentation/gpu/drm-mm.rst | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 2 +- drivers/gpu/drm/lima/lima_device.c | 2 +- drivers/gpu/drm/lima/lima_sched.c | 2 +- drivers/gpu/drm/msm/msm_gem_submit.c | 2 +- drivers/gpu/drm/nouveau/nouveau_sched.c | 2 +- drivers/gpu/drm/panfrost/panfrost_drv.c | 2 +- drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 170 +++++++++++++++++++----- drivers/gpu/drm/v3d/v3d_gem.c | 2 +- include/drm/gpu_scheduler.h | 28 +++- 14 files changed, 175 insertions(+), 51 deletions(-) (limited to 'include/drm') diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index 602010cb6894..acc5901ac840 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -552,6 +552,12 @@ Overview .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c :doc: Overview +Flow Control +------------ + +.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c + :doc: Flow Control + Scheduler Function References ----------------------------- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 1f357198533f..62bb7fc7448a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -115,7 +115,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!entity) return 0; - return drm_sched_job_init(&(*job)->base, entity, owner); + return drm_sched_job_init(&(*job)->base, entity, 1, owner); } int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 2416c526f9b0..3d0f8d182506 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -535,7 +535,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, ret = drm_sched_job_init(&submit->sched_job, &ctx->sched_entity[args->pipe], - submit->ctx); + 1, submit->ctx); if (ret) goto err_submit_put; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 9276756e1397..5105d290e72e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1917,7 +1917,7 @@ static int etnaviv_gpu_rpm_suspend(struct device *dev) u32 idle, mask; /* If there are any jobs in the HW queue, we're not idle */ - if (atomic_read(&gpu->sched.hw_rq_count)) + if (atomic_read(&gpu->sched.credit_count)) return -EBUSY; /* Check whether the hardware (except FE and MC) is idle */ diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c index 02cef0cea657..0bf7105c8748 100644 --- a/drivers/gpu/drm/lima/lima_device.c +++ b/drivers/gpu/drm/lima/lima_device.c @@ -514,7 +514,7 @@ int lima_device_suspend(struct device *dev) /* check any task running */ for (i = 0; i < lima_pipe_num; i++) { - if (atomic_read(&ldev->pipe[i].base.hw_rq_count)) + if (atomic_read(&ldev->pipe[i].base.credit_count)) return -EBUSY; } diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index aa030e1f7cda..c3bf8cda8498 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -123,7 +123,7 @@ int lima_sched_task_init(struct lima_sched_task *task, for (i = 0; i < num_bos; i++) drm_gem_object_get(&bos[i]->base.base); - err = drm_sched_job_init(&task->base, &context->base, vm); + err = drm_sched_job_init(&task->base, &context->base, 1, vm); if (err) { kfree(task->bos); return err; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 99744de6c05a..c002cabe7b9c 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -48,7 +48,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, return ERR_PTR(ret); } - ret = drm_sched_job_init(&submit->base, queue->entity, queue); + ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue); if (ret) { kfree(submit->hw_fence); kfree(submit); diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 7e64b5ef90fb..1b2cc3f2e1c7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -89,7 +89,7 @@ nouveau_job_init(struct nouveau_job *job, } - ret = drm_sched_job_init(&job->base, &entity->base, NULL); + ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL); if (ret) goto err_free_chains; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index b834777b409b..54d1c19bea84 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -274,7 +274,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, ret = drm_sched_job_init(&job->base, &file_priv->sched_entity[slot], - NULL); + 1, NULL); if (ret) goto out_put_job; diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 6d89e24322db..f9446e197428 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -963,7 +963,7 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev) for (i = 0; i < NUM_JOB_SLOTS; i++) { /* If there are any jobs in the HW queue, we're not idle */ - if (atomic_read(&js->queue[i].sched.hw_rq_count)) + if (atomic_read(&js->queue[i].sched.credit_count)) return false; } diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index 3143ecaaff86..f8ed093b7356 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(drm_sched_job, __assign_str(name, sched_job->sched->name); __entry->job_count = spsc_queue_count(&entity->job_queue); __entry->hw_job_count = atomic_read( - &sched_job->sched->hw_rq_count); + &sched_job->sched->credit_count); ), TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", __entry->entity, __entry->id, diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 8f5e466bd582..044a8c4875ba 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -48,6 +48,30 @@ * through the jobs entity pointer. */ +/** + * DOC: Flow Control + * + * The DRM GPU scheduler provides a flow control mechanism to regulate the rate + * in which the jobs fetched from scheduler entities are executed. + * + * In this context the &drm_gpu_scheduler keeps track of a driver specified + * credit limit representing the capacity of this scheduler and a credit count; + * every &drm_sched_job carries a driver specified number of credits. + * + * Once a job is executed (but not yet finished), the job's credits contribute + * to the scheduler's credit count until the job is finished. If by executing + * one more job the scheduler's credit count would exceed the scheduler's + * credit limit, the job won't be executed. Instead, the scheduler will wait + * until the credit count has decreased enough to not overflow its credit limit. + * This implies waiting for previously executed jobs. + * + * Optionally, drivers may register a callback (update_job_credits) provided by + * struct drm_sched_backend_ops to update the job's credits dynamically. The + * scheduler executes this callback every time the scheduler considers a job for + * execution and subsequently checks whether the job fits the scheduler's credit + * limit. + */ + #include #include #include @@ -75,6 +99,51 @@ int drm_sched_policy = DRM_SCHED_POLICY_FIFO; MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)."); module_param_named(sched_policy, drm_sched_policy, int, 0444); +static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) +{ + u32 credits; + + drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit, + atomic_read(&sched->credit_count), + &credits)); + + return credits; +} + +/** + * drm_sched_can_queue -- Can we queue more to the hardware? + * @sched: scheduler instance + * @entity: the scheduler entity + * + * Return true if we can push at least one more job from @entity, false + * otherwise. + */ +static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) +{ + struct drm_sched_job *s_job; + + s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); + if (!s_job) + return false; + + if (sched->ops->update_job_credits) { + s_job->credits = sched->ops->update_job_credits(s_job); + + drm_WARN(sched, !s_job->credits, + "Jobs with zero credits bypass job-flow control.\n"); + } + + /* If a job exceeds the credit limit, truncate it to the credit limit + * itself to guarantee forward progress. + */ + if (drm_WARN(sched, s_job->credits > sched->credit_limit, + "Jobs may not exceed the credit limit, truncate.\n")) + s_job->credits = sched->credit_limit; + + return drm_sched_available_credits(sched) >= s_job->credits; +} + static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, const struct rb_node *b) { @@ -186,12 +255,18 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, /** * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run * + * @sched: the gpu scheduler * @rq: scheduler run queue to check. * - * Try to find a ready entity, returns NULL if none found. + * Try to find the next ready entity. + * + * Return an entity if one is found; return an error-pointer (!NULL) if an + * entity was ready, but the scheduler had insufficient credits to accommodate + * its job; return NULL, if no ready entity was found. */ static struct drm_sched_entity * -drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) +drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq) { struct drm_sched_entity *entity; @@ -201,6 +276,14 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) if (entity) { list_for_each_entry_continue(entity, &rq->entities, list) { if (drm_sched_entity_is_ready(entity)) { + /* If we can't queue yet, preserve the current + * entity in terms of fairness. + */ + if (!drm_sched_can_queue(sched, entity)) { + spin_unlock(&rq->lock); + return ERR_PTR(-ENOSPC); + } + rq->current_entity = entity; reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); @@ -210,8 +293,15 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) } list_for_each_entry(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { + /* If we can't queue yet, preserve the current entity in + * terms of fairness. + */ + if (!drm_sched_can_queue(sched, entity)) { + spin_unlock(&rq->lock); + return ERR_PTR(-ENOSPC); + } + rq->current_entity = entity; reinit_completion(&entity->entity_idle); spin_unlock(&rq->lock); @@ -230,12 +320,18 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) /** * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run * + * @sched: the gpu scheduler * @rq: scheduler run queue to check. * - * Find oldest waiting ready entity, returns NULL if none found. + * Find oldest waiting ready entity. + * + * Return an entity if one is found; return an error-pointer (!NULL) if an + * entity was ready, but the scheduler had insufficient credits to accommodate + * its job; return NULL, if no ready entity was found. */ static struct drm_sched_entity * -drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) +drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq) { struct rb_node *rb; @@ -245,6 +341,14 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); if (drm_sched_entity_is_ready(entity)) { + /* If we can't queue yet, preserve the current entity in + * terms of fairness. + */ + if (!drm_sched_can_queue(sched, entity)) { + spin_unlock(&rq->lock); + return ERR_PTR(-ENOSPC); + } + rq->current_entity = entity; reinit_completion(&entity->entity_idle); break; @@ -302,7 +406,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) struct drm_sched_fence *s_fence = s_job->s_fence; struct drm_gpu_scheduler *sched = s_fence->sched; - atomic_dec(&sched->hw_rq_count); + atomic_sub(s_job->credits, &sched->credit_count); atomic_dec(sched->score); trace_drm_sched_process_job(s_fence); @@ -525,7 +629,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) &s_job->cb)) { dma_fence_put(s_job->s_fence->parent); s_job->s_fence->parent = NULL; - atomic_dec(&sched->hw_rq_count); + atomic_sub(s_job->credits, &sched->credit_count); } else { /* * remove job from pending_list. @@ -586,7 +690,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { struct dma_fence *fence = s_job->s_fence->parent; - atomic_inc(&sched->hw_rq_count); + atomic_add(s_job->credits, &sched->credit_count); if (!full_recovery) continue; @@ -667,6 +771,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); * drm_sched_job_init - init a scheduler job * @job: scheduler job to init * @entity: scheduler entity to use + * @credits: the number of credits this job contributes to the schedulers + * credit limit * @owner: job owner for debugging * * Refer to drm_sched_entity_push_job() documentation @@ -684,7 +790,7 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); */ int drm_sched_job_init(struct drm_sched_job *job, struct drm_sched_entity *entity, - void *owner) + u32 credits, void *owner) { if (!entity->rq) { /* This will most likely be followed by missing frames @@ -695,7 +801,13 @@ int drm_sched_job_init(struct drm_sched_job *job, return -ENOENT; } + if (unlikely(!credits)) { + pr_err("*ERROR* %s: credits cannot be 0!\n", __func__); + return -EINVAL; + } + job->entity = entity; + job->credits = credits; job->s_fence = drm_sched_fence_alloc(entity, owner); if (!job->s_fence) return -ENOMEM; @@ -907,21 +1019,10 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) } EXPORT_SYMBOL(drm_sched_job_cleanup); -/** - * drm_sched_can_queue -- Can we queue more to the hardware? - * @sched: scheduler instance - * - * Return true if we can push more jobs to the hw, otherwise false. - */ -static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) -{ - return atomic_read(&sched->hw_rq_count) < - sched->hw_submission_limit; -} - /** * drm_sched_wakeup - Wake up the scheduler if it is ready to queue * @sched: scheduler instance + * @entity: the scheduler entity * * Wake up the scheduler if we can queue jobs. */ @@ -929,7 +1030,7 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { if (drm_sched_entity_is_ready(entity)) - if (drm_sched_can_queue(sched)) + if (drm_sched_can_queue(sched, entity)) drm_sched_run_job_queue(sched); } @@ -938,7 +1039,11 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched, * * @sched: scheduler instance * - * Returns the entity to process or NULL if none are found. + * Return an entity to process or NULL if none are found. + * + * Note, that we break out of the for-loop when "entity" is non-null, which can + * also be an error-pointer--this assures we don't process lower priority + * run-queues. See comments in the respectively called functions. */ static struct drm_sched_entity * drm_sched_select_entity(struct drm_gpu_scheduler *sched) @@ -946,19 +1051,16 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) struct drm_sched_entity *entity; int i; - if (!drm_sched_can_queue(sched)) - return NULL; - /* Kernel run queue has higher priority than normal run queue*/ for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? - drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) : - drm_sched_rq_select_entity_rr(sched->sched_rq[i]); + drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : + drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); if (entity) break; } - return entity; + return IS_ERR(entity) ? NULL : entity; } /** @@ -1094,7 +1196,7 @@ static void drm_sched_run_job_work(struct work_struct *w) s_fence = sched_job->s_fence; - atomic_inc(&sched->hw_rq_count); + atomic_add(sched_job->credits, &sched->credit_count); drm_sched_job_begin(sched_job); trace_drm_run_job(sched_job, entity); @@ -1129,7 +1231,7 @@ static void drm_sched_run_job_work(struct work_struct *w) * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is * allocated and used * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT - * @hw_submission: number of hw submissions that can be in flight + * @credit_limit: the number of credits this scheduler can hold from all jobs * @hang_limit: number of times to allow a job to hang before dropping it * @timeout: timeout value in jiffies for the scheduler * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is @@ -1143,14 +1245,14 @@ static void drm_sched_run_job_work(struct work_struct *w) int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, struct workqueue_struct *submit_wq, - u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, + u32 num_rqs, u32 credit_limit, unsigned int hang_limit, long timeout, struct workqueue_struct *timeout_wq, atomic_t *score, const char *name, struct device *dev) { int i, ret; sched->ops = ops; - sched->hw_submission_limit = hw_submission; + sched->credit_limit = credit_limit; sched->name = name; sched->timeout = timeout; sched->timeout_wq = timeout_wq ? : system_wq; @@ -1199,7 +1301,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, init_waitqueue_head(&sched->job_scheduled); INIT_LIST_HEAD(&sched->pending_list); spin_lock_init(&sched->job_list_lock); - atomic_set(&sched->hw_rq_count, 0); + atomic_set(&sched->credit_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); INIT_WORK(&sched->work_free_job, drm_sched_free_job_work); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 712675134c04..9d2ac23c29e3 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -418,7 +418,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, job->file = file_priv; ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], - v3d_priv); + 1, v3d_priv); if (ret) goto fail; diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 09916c84703f..1d60eab747de 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -321,6 +321,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); * @sched: the scheduler instance on which this job is scheduled. * @s_fence: contains the fences for the scheduling of job. * @finish_cb: the callback for the finished fence. + * @credits: the number of credits this job contributes to the scheduler * @work: Helper to reschdeule job kill to different context. * @id: a unique id assigned to each job scheduled on the scheduler. * @karma: increment on every hang caused by this job. If this exceeds the hang @@ -340,6 +341,8 @@ struct drm_sched_job { struct drm_gpu_scheduler *sched; struct drm_sched_fence *s_fence; + u32 credits; + /* * work is used only after finish_cb has been used and will not be * accessed anymore. @@ -463,13 +466,27 @@ struct drm_sched_backend_ops { * and it's time to clean it up. */ void (*free_job)(struct drm_sched_job *sched_job); + + /** + * @update_job_credits: Called when the scheduler is considering this + * job for execution. + * + * This callback returns the number of credits the job would take if + * pushed to the hardware. Drivers may use this to dynamically update + * the job's credit count. For instance, deduct the number of credits + * for already signalled native fences. + * + * This callback is optional. + */ + u32 (*update_job_credits)(struct drm_sched_job *sched_job); }; /** * struct drm_gpu_scheduler - scheduler instance-specific data * * @ops: backend operations provided by the driver. - * @hw_submission_limit: the max size of the hardware queue. + * @credit_limit: the credit limit of this scheduler + * @credit_count: the current credit count of this scheduler * @timeout: the time after which a job is removed from the scheduler. * @name: name of the ring for which this scheduler is being used. * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT, @@ -478,7 +495,6 @@ struct drm_sched_backend_ops { * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler * waits on this wait queue until all the scheduled jobs are * finished. - * @hw_rq_count: the number of jobs currently in the hardware queue. * @job_id_count: used to assign unique id to the each job. * @submit_wq: workqueue used to queue @work_run_job and @work_free_job * @timeout_wq: workqueue used to queue @work_tdr @@ -502,13 +518,13 @@ struct drm_sched_backend_ops { */ struct drm_gpu_scheduler { const struct drm_sched_backend_ops *ops; - uint32_t hw_submission_limit; + u32 credit_limit; + atomic_t credit_count; long timeout; const char *name; u32 num_rqs; struct drm_sched_rq **sched_rq; wait_queue_head_t job_scheduled; - atomic_t hw_rq_count; atomic64_t job_id_count; struct workqueue_struct *submit_wq; struct workqueue_struct *timeout_wq; @@ -530,14 +546,14 @@ struct drm_gpu_scheduler { int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, struct workqueue_struct *submit_wq, - u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, + u32 num_rqs, u32 credit_limit, unsigned int hang_limit, long timeout, struct workqueue_struct *timeout_wq, atomic_t *score, const char *name, struct device *dev); void drm_sched_fini(struct drm_gpu_scheduler *sched); int drm_sched_job_init(struct drm_sched_job *job, struct drm_sched_entity *entity, - void *owner); + u32 credits, void *owner); void drm_sched_job_arm(struct drm_sched_job *job); int drm_sched_job_add_dependency(struct drm_sched_job *job, struct dma_fence *fence); -- cgit v1.2.3 From 48d054c2d34cdc67acb8cc9cfac326d91f1470ed Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Wed, 8 Nov 2023 12:52:58 +0530 Subject: drm/panelreplay: dpcd register definition for panelreplay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add DPCD register definition for discovering, enabling and checking status of panel replay of the sink. Cc: Jouni Högander Cc: Arun R Murthy Cc: Jani Nikula Acked-by: Maxime Ripard Reviewed-by: Arun R Murthy Signed-off-by: Animesh Manna Link: https://patchwork.freedesktop.org/patch/msgid/20231108072303.3414118-2-animesh.manna@intel.com --- include/drm/display/drm_dp.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/drm') diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index 763d45a612f3..83d2039c018b 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -544,6 +544,10 @@ /* DFP Capability Extension */ #define DP_DFP_CAPABILITY_EXTENSION_SUPPORT 0x0a3 /* 2.0 */ +#define DP_PANEL_REPLAY_CAP 0x0b0 /* DP 2.0 */ +# define DP_PANEL_REPLAY_SUPPORT (1 << 0) +# define DP_PANEL_REPLAY_SU_SUPPORT (1 << 1) + /* Link Configuration */ #define DP_LINK_BW_SET 0x100 # define DP_LINK_RATE_TABLE 0x00 /* eDP 1.4 */ @@ -718,6 +722,13 @@ #define DP_BRANCH_DEVICE_CTRL 0x1a1 # define DP_BRANCH_DEVICE_IRQ_HPD (1 << 0) +#define PANEL_REPLAY_CONFIG 0x1b0 /* DP 2.0 */ +# define DP_PANEL_REPLAY_ENABLE (1 << 0) +# define DP_PANEL_REPLAY_UNRECOVERABLE_ERROR_EN (1 << 3) +# define DP_PANEL_REPLAY_RFB_STORAGE_ERROR_EN (1 << 4) +# define DP_PANEL_REPLAY_ACTIVE_FRAME_CRC_ERROR_EN (1 << 5) +# define DP_PANEL_REPLAY_SU_ENABLE (1 << 6) + #define DP_PAYLOAD_ALLOCATE_SET 0x1c0 #define DP_PAYLOAD_ALLOCATE_START_TIME_SLOT 0x1c1 #define DP_PAYLOAD_ALLOCATE_TIME_SLOT_COUNT 0x1c2 @@ -1107,6 +1118,18 @@ #define DP_LANE_ALIGN_STATUS_UPDATED_ESI 0x200e /* status same as 0x204 */ #define DP_SINK_STATUS_ESI 0x200f /* status same as 0x205 */ +#define DP_PANEL_REPLAY_ERROR_STATUS 0x2020 /* DP 2.1*/ +# define DP_PANEL_REPLAY_LINK_CRC_ERROR (1 << 0) +# define DP_PANEL_REPLAY_RFB_STORAGE_ERROR (1 << 1) +# define DP_PANEL_REPLAY_VSC_SDP_UNCORRECTABLE_ERROR (1 << 2) + +#define DP_SINK_DEVICE_PR_AND_FRAME_LOCK_STATUS 0x2022 /* DP 2.1 */ +# define DP_SINK_DEVICE_PANEL_REPLAY_STATUS_MASK (7 << 0) +# define DP_SINK_FRAME_LOCKED_SHIFT 3 +# define DP_SINK_FRAME_LOCKED_MASK (3 << 3) +# define DP_SINK_FRAME_LOCKED_STATUS_VALID_SHIFT 5 +# define DP_SINK_FRAME_LOCKED_STATUS_VALID_MASK (1 << 5) + /* Extended Receiver Capability: See DP_DPCD_REV for definitions */ #define DP_DP13_DPCD_REV 0x2200 -- cgit v1.2.3 From 546ca4d35dccaca6613766ed36ccfb2b5bd63bfe Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 8 Nov 2023 01:12:31 +0100 Subject: drm/gpuvm: convert WARN() to drm_WARN() variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use drm_WARN() and drm_WARN_ON() variants to indicate drivers the context the failing VM resides in. Acked-by: Christian König Reviewed-by: Boris Brezillon Reviewed-by: Thomas Hellström Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-2-dakr@redhat.com --- drivers/gpu/drm/drm_gpuvm.c | 32 +++++++++++++++++--------------- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 3 ++- include/drm/drm_gpuvm.h | 7 +++++++ 3 files changed, 26 insertions(+), 16 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 08c088319652..d7367a202fee 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -614,12 +614,12 @@ static int __drm_gpuva_insert(struct drm_gpuvm *gpuvm, static void __drm_gpuva_remove(struct drm_gpuva *va); static bool -drm_gpuvm_check_overflow(u64 addr, u64 range) +drm_gpuvm_check_overflow(struct drm_gpuvm *gpuvm, u64 addr, u64 range) { u64 end; - return WARN(check_add_overflow(addr, range, &end), - "GPUVA address limited to %zu bytes.\n", sizeof(end)); + return drm_WARN(gpuvm->drm, check_add_overflow(addr, range, &end), + "GPUVA address limited to %zu bytes.\n", sizeof(end)); } static bool @@ -647,7 +647,7 @@ static bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range) { - return !drm_gpuvm_check_overflow(addr, range) && + return !drm_gpuvm_check_overflow(gpuvm, addr, range) && drm_gpuvm_in_mm_range(gpuvm, addr, range) && !drm_gpuvm_in_kernel_node(gpuvm, addr, range); } @@ -656,6 +656,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, * drm_gpuvm_init() - initialize a &drm_gpuvm * @gpuvm: pointer to the &drm_gpuvm to initialize * @name: the name of the GPU VA space + * @drm: the &drm_device this VM resides in * @start_offset: the start offset of the GPU VA space * @range: the size of the GPU VA space * @reserve_offset: the start of the kernel reserved GPU VA area @@ -668,8 +669,8 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, * &name is expected to be managed by the surrounding driver structures. */ void -drm_gpuvm_init(struct drm_gpuvm *gpuvm, - const char *name, +drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, + struct drm_device *drm, u64 start_offset, u64 range, u64 reserve_offset, u64 reserve_range, const struct drm_gpuvm_ops *ops) @@ -677,20 +678,20 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, gpuvm->rb.tree = RB_ROOT_CACHED; INIT_LIST_HEAD(&gpuvm->rb.list); - drm_gpuvm_check_overflow(start_offset, range); - gpuvm->mm_start = start_offset; - gpuvm->mm_range = range; - gpuvm->name = name ? name : "unknown"; gpuvm->ops = ops; + gpuvm->drm = drm; - memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva)); + drm_gpuvm_check_overflow(gpuvm, start_offset, range); + gpuvm->mm_start = start_offset; + gpuvm->mm_range = range; + memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva)); if (reserve_range) { gpuvm->kernel_alloc_node.va.addr = reserve_offset; gpuvm->kernel_alloc_node.va.range = reserve_range; - if (likely(!drm_gpuvm_check_overflow(reserve_offset, + if (likely(!drm_gpuvm_check_overflow(gpuvm, reserve_offset, reserve_range))) __drm_gpuva_insert(gpuvm, &gpuvm->kernel_alloc_node); } @@ -712,8 +713,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) if (gpuvm->kernel_alloc_node.va.range) __drm_gpuva_remove(&gpuvm->kernel_alloc_node); - WARN(!RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), - "GPUVA tree is not empty, potentially leaking memory."); + drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), + "GPUVA tree is not empty, potentially leaking memory.\n"); } EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); @@ -795,7 +796,8 @@ drm_gpuva_remove(struct drm_gpuva *va) struct drm_gpuvm *gpuvm = va->vm; if (unlikely(va == &gpuvm->kernel_alloc_node)) { - WARN(1, "Can't destroy kernel reserved node.\n"); + drm_WARN(gpuvm->drm, 1, + "Can't destroy kernel reserved node.\n"); return; } diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 5cf892c50f43..aaf5d28bd587 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1808,6 +1808,7 @@ int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, u64 kernel_managed_addr, u64 kernel_managed_size) { + struct drm_device *drm = cli->drm->dev; int ret; u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size; @@ -1836,7 +1837,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, uvmm->kernel_managed_addr = kernel_managed_addr; uvmm->kernel_managed_size = kernel_managed_size; - drm_gpuvm_init(&uvmm->base, cli->name, + drm_gpuvm_init(&uvmm->base, cli->name, drm, NOUVEAU_VA_SPACE_START, NOUVEAU_VA_SPACE_END, kernel_managed_addr, kernel_managed_size, diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index bdfafc4a7705..687fd5893624 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -29,6 +29,7 @@ #include #include +#include #include struct drm_gpuvm; @@ -201,6 +202,11 @@ struct drm_gpuvm { */ const char *name; + /** + * @drm: the &drm_device this VM lives in + */ + struct drm_device *drm; + /** * @mm_start: start of the VA space */ @@ -241,6 +247,7 @@ struct drm_gpuvm { }; void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, + struct drm_device *drm, u64 start_offset, u64 range, u64 reserve_offset, u64 reserve_range, const struct drm_gpuvm_ops *ops); -- cgit v1.2.3 From 9297cfc9405bc6b60540b8b8aaf930b7e449e15a Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 8 Nov 2023 01:12:33 +0100 Subject: drm/gpuvm: export drm_gpuvm_range_valid() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers may use this function to validate userspace requests in advance, hence export it. Acked-by: Christian König Reviewed-by: Thomas Hellström Reviewed-by: Boris Brezillon Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-4-dakr@redhat.com --- drivers/gpu/drm/drm_gpuvm.c | 14 +++++++++++++- include/drm/drm_gpuvm.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 445767f8fbc4..2669f9bbc377 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -649,7 +649,18 @@ drm_gpuvm_in_kernel_node(struct drm_gpuvm *gpuvm, u64 addr, u64 range) return krange && addr < kend && kstart < end; } -static bool +/** + * drm_gpuvm_range_valid() - checks whether the given range is valid for the + * given &drm_gpuvm + * @gpuvm: the GPUVM to check the range for + * @addr: the base address + * @range: the range starting from the base address + * + * Checks whether the range is within the GPUVM's managed boundaries. + * + * Returns: true for a valid range, false otherwise + */ +bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range) { @@ -657,6 +668,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, drm_gpuvm_in_mm_range(gpuvm, addr, range) && !drm_gpuvm_in_kernel_node(gpuvm, addr, range); } +EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid); /** * drm_gpuvm_init() - initialize a &drm_gpuvm diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 687fd5893624..13eac6f70061 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -253,6 +253,7 @@ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, const struct drm_gpuvm_ops *ops); void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm); +bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range); bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range); static inline struct drm_gpuva * -- cgit v1.2.3 From bbe8458037e74b9887ba2f0f0b8084a13ade3a90 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 8 Nov 2023 01:12:35 +0100 Subject: drm/gpuvm: add common dma-resv per struct drm_gpuvm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a common dma-resv for GEM objects not being used outside of this GPU-VM. This is used in a subsequent patch to generalize dma-resv, external and evicted object handling and GEM validation. Acked-by: Christian König Reviewed-by: Boris Brezillon Reviewed-by: Thomas Hellström Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-6-dakr@redhat.com --- drivers/gpu/drm/drm_gpuvm.c | 53 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/nouveau/nouveau_uvmm.c | 13 +++++++-- include/drm/drm_gpuvm.h | 33 +++++++++++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 2669f9bbc377..af5805e4d7c9 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -61,6 +61,15 @@ * contained within struct drm_gpuva already. Hence, for inserting &drm_gpuva * entries from within dma-fence signalling critical sections it is enough to * pre-allocate the &drm_gpuva structures. + * + * &drm_gem_objects which are private to a single VM can share a common + * &dma_resv in order to improve locking efficiency (e.g. with &drm_exec). + * For this purpose drivers must pass a &drm_gem_object to drm_gpuvm_init(), in + * the following called 'resv object', which serves as the container of the + * GPUVM's shared &dma_resv. This resv object can be a driver specific + * &drm_gem_object, such as the &drm_gem_object containing the root page table, + * but it can also be a 'dummy' object, which can be allocated with + * drm_gpuvm_resv_object_alloc(). */ /** @@ -670,11 +679,49 @@ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, } EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid); +static void +drm_gpuvm_gem_object_free(struct drm_gem_object *obj) +{ + drm_gem_object_release(obj); + kfree(obj); +} + +static const struct drm_gem_object_funcs drm_gpuvm_object_funcs = { + .free = drm_gpuvm_gem_object_free, +}; + +/** + * drm_gpuvm_resv_object_alloc() - allocate a dummy &drm_gem_object + * @drm: the drivers &drm_device + * + * Allocates a dummy &drm_gem_object which can be passed to drm_gpuvm_init() in + * order to serve as root GEM object providing the &drm_resv shared across + * &drm_gem_objects local to a single GPUVM. + * + * Returns: the &drm_gem_object on success, NULL on failure + */ +struct drm_gem_object * +drm_gpuvm_resv_object_alloc(struct drm_device *drm) +{ + struct drm_gem_object *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + + obj->funcs = &drm_gpuvm_object_funcs; + drm_gem_private_object_init(drm, obj, 0); + + return obj; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc); + /** * drm_gpuvm_init() - initialize a &drm_gpuvm * @gpuvm: pointer to the &drm_gpuvm to initialize * @name: the name of the GPU VA space * @drm: the &drm_device this VM resides in + * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv * @start_offset: the start offset of the GPU VA space * @range: the size of the GPU VA space * @reserve_offset: the start of the kernel reserved GPU VA area @@ -689,6 +736,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid); void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, struct drm_device *drm, + struct drm_gem_object *r_obj, u64 start_offset, u64 range, u64 reserve_offset, u64 reserve_range, const struct drm_gpuvm_ops *ops) @@ -699,6 +747,9 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, gpuvm->name = name ? name : "unknown"; gpuvm->ops = ops; gpuvm->drm = drm; + gpuvm->r_obj = r_obj; + + drm_gem_object_get(r_obj); drm_gpuvm_warn_check_overflow(gpuvm, start_offset, range); gpuvm->mm_start = start_offset; @@ -733,6 +784,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), "GPUVA tree is not empty, potentially leaking memory.\n"); + + drm_gem_object_put(gpuvm->r_obj); } EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 641a911528db..f74bf30bc683 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1797,8 +1797,9 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, u64 kernel_managed_addr, u64 kernel_managed_size) { struct drm_device *drm = cli->drm->dev; - int ret; + struct drm_gem_object *r_obj; u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size; + int ret; mutex_init(&uvmm->mutex); dma_resv_init(&uvmm->resv); @@ -1822,11 +1823,19 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, goto out_unlock; } - drm_gpuvm_init(&uvmm->base, cli->name, drm, + r_obj = drm_gpuvm_resv_object_alloc(drm); + if (!r_obj) { + ret = -ENOMEM; + goto out_unlock; + } + + drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj, NOUVEAU_VA_SPACE_START, NOUVEAU_VA_SPACE_END, kernel_managed_addr, kernel_managed_size, NULL); + /* GPUVM takes care from here on. */ + drm_gem_object_put(r_obj); ret = nvif_vmm_ctor(&cli->mmu, "uvmm", cli->vmm.vmm.object.oclass, RAW, diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 13eac6f70061..ff3377cbfe52 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -244,10 +244,16 @@ struct drm_gpuvm { * @ops: &drm_gpuvm_ops providing the split/merge steps to drivers */ const struct drm_gpuvm_ops *ops; + + /** + * @r_obj: Resv GEM object; representing the GPUVM's common &dma_resv. + */ + struct drm_gem_object *r_obj; }; void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, struct drm_device *drm, + struct drm_gem_object *r_obj, u64 start_offset, u64 range, u64 reserve_offset, u64 reserve_range, const struct drm_gpuvm_ops *ops); @@ -256,6 +262,33 @@ void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm); bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range); bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range); +struct drm_gem_object * +drm_gpuvm_resv_object_alloc(struct drm_device *drm); + +/** + * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv + * @gpuvm__: the &drm_gpuvm + * + * Returns: a pointer to the &drm_gpuvm's shared &dma_resv + */ +#define drm_gpuvm_resv(gpuvm__) ((gpuvm__)->r_obj->resv) + +/** + * drm_gpuvm_resv_obj() - returns the &drm_gem_object holding the &drm_gpuvm's + * &dma_resv + * @gpuvm__: the &drm_gpuvm + * + * Returns: a pointer to the &drm_gem_object holding the &drm_gpuvm's shared + * &dma_resv + */ +#define drm_gpuvm_resv_obj(gpuvm__) ((gpuvm__)->r_obj) + +#define drm_gpuvm_resv_held(gpuvm__) \ + dma_resv_held(drm_gpuvm_resv(gpuvm__)) + +#define drm_gpuvm_resv_assert_held(gpuvm__) \ + dma_resv_assert_held(drm_gpuvm_resv(gpuvm__)) + static inline struct drm_gpuva * __drm_gpuva_next(struct drm_gpuva *va) { -- cgit v1.2.3 From 809ef191ee600e8bcbe2f8a769e00d2d54c16094 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 8 Nov 2023 01:12:37 +0100 Subject: drm/gpuvm: add drm_gpuvm_flags to drm_gpuvm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce flags for struct drm_gpuvm, this required by subsequent commits. Acked-by: Christian König Reviewed-by: Boris Brezillon Reviewed-by: Thomas Hellström Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-8-dakr@redhat.com --- drivers/gpu/drm/drm_gpuvm.c | 3 +++ drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +- include/drm/drm_gpuvm.h | 16 ++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index af5805e4d7c9..53e2c406fb04 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -720,6 +720,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc); * drm_gpuvm_init() - initialize a &drm_gpuvm * @gpuvm: pointer to the &drm_gpuvm to initialize * @name: the name of the GPU VA space + * @flags: the &drm_gpuvm_flags for this GPUVM * @drm: the &drm_device this VM resides in * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv * @start_offset: the start offset of the GPU VA space @@ -735,6 +736,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc); */ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, + enum drm_gpuvm_flags flags, struct drm_device *drm, struct drm_gem_object *r_obj, u64 start_offset, u64 range, @@ -745,6 +747,7 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, INIT_LIST_HEAD(&gpuvm->rb.list); gpuvm->name = name ? name : "unknown"; + gpuvm->flags = flags; gpuvm->ops = ops; gpuvm->drm = drm; gpuvm->r_obj = r_obj; diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 8977a518de96..f765e3835306 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1828,7 +1828,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, goto out_unlock; } - drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj, + drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj, NOUVEAU_VA_SPACE_START, NOUVEAU_VA_SPACE_END, kernel_managed_addr, kernel_managed_size, diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index ff3377cbfe52..0c2e24155a93 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -184,6 +184,16 @@ static inline bool drm_gpuva_invalidated(struct drm_gpuva *va) return va->flags & DRM_GPUVA_INVALIDATED; } +/** + * enum drm_gpuvm_flags - flags for struct drm_gpuvm + */ +enum drm_gpuvm_flags { + /** + * @DRM_GPUVM_USERBITS: user defined bits + */ + DRM_GPUVM_USERBITS = BIT(0), +}; + /** * struct drm_gpuvm - DRM GPU VA Manager * @@ -202,6 +212,11 @@ struct drm_gpuvm { */ const char *name; + /** + * @flags: the &drm_gpuvm_flags of this GPUVM + */ + enum drm_gpuvm_flags flags; + /** * @drm: the &drm_device this VM lives in */ @@ -252,6 +267,7 @@ struct drm_gpuvm { }; void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, + enum drm_gpuvm_flags flags, struct drm_device *drm, struct drm_gem_object *r_obj, u64 start_offset, u64 range, -- cgit v1.2.3 From 8af72338dd81d1f8667e0240bd28f5fc98b3f20d Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 8 Nov 2023 01:12:39 +0100 Subject: drm/gpuvm: reference count drm_gpuvm structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement reference counting for struct drm_gpuvm. Acked-by: Christian König Reviewed-by: Thomas Hellström Reviewed-by: Boris Brezillon Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-10-dakr@redhat.com --- drivers/gpu/drm/drm_gpuvm.c | 56 +++++++++++++++++++++++++++------- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 20 +++++++++--- include/drm/drm_gpuvm.h | 31 ++++++++++++++++++- 3 files changed, 90 insertions(+), 17 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 53e2c406fb04..ef968eba6fe6 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -746,6 +746,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, gpuvm->rb.tree = RB_ROOT_CACHED; INIT_LIST_HEAD(&gpuvm->rb.list); + kref_init(&gpuvm->kref); + gpuvm->name = name ? name : "unknown"; gpuvm->flags = flags; gpuvm->ops = ops; @@ -770,15 +772,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, } EXPORT_SYMBOL_GPL(drm_gpuvm_init); -/** - * drm_gpuvm_destroy() - cleanup a &drm_gpuvm - * @gpuvm: pointer to the &drm_gpuvm to clean up - * - * Note that it is a bug to call this function on a manager that still - * holds GPU VA mappings. - */ -void -drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) +static void +drm_gpuvm_fini(struct drm_gpuvm *gpuvm) { gpuvm->name = NULL; @@ -790,7 +785,35 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) drm_gem_object_put(gpuvm->r_obj); } -EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); + +static void +drm_gpuvm_free(struct kref *kref) +{ + struct drm_gpuvm *gpuvm = container_of(kref, struct drm_gpuvm, kref); + + drm_gpuvm_fini(gpuvm); + + if (drm_WARN_ON(gpuvm->drm, !gpuvm->ops->vm_free)) + return; + + gpuvm->ops->vm_free(gpuvm); +} + +/** + * drm_gpuvm_put() - drop a struct drm_gpuvm reference + * @gpuvm: the &drm_gpuvm to release the reference of + * + * This releases a reference to @gpuvm. + * + * This function may be called from atomic context. + */ +void +drm_gpuvm_put(struct drm_gpuvm *gpuvm) +{ + if (gpuvm) + kref_put(&gpuvm->kref, drm_gpuvm_free); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_put); static int __drm_gpuva_insert(struct drm_gpuvm *gpuvm, @@ -839,11 +862,21 @@ drm_gpuva_insert(struct drm_gpuvm *gpuvm, { u64 addr = va->va.addr; u64 range = va->va.range; + int ret; if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range))) return -EINVAL; - return __drm_gpuva_insert(gpuvm, va); + ret = __drm_gpuva_insert(gpuvm, va); + if (likely(!ret)) + /* Take a reference of the GPUVM for the successfully inserted + * drm_gpuva. We can't take the reference in + * __drm_gpuva_insert() itself, since we don't want to increse + * the reference count for the GPUVM's kernel_alloc_node. + */ + drm_gpuvm_get(gpuvm); + + return ret; } EXPORT_SYMBOL_GPL(drm_gpuva_insert); @@ -876,6 +909,7 @@ drm_gpuva_remove(struct drm_gpuva *va) } __drm_gpuva_remove(va); + drm_gpuvm_put(va->vm); } EXPORT_SYMBOL_GPL(drm_gpuva_remove); diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 54be12c1272f..cb2f06565c46 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1780,6 +1780,18 @@ nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo) } } +static void +nouveau_uvmm_free(struct drm_gpuvm *gpuvm) +{ + struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm); + + kfree(uvmm); +} + +static const struct drm_gpuvm_ops gpuvm_ops = { + .vm_free = nouveau_uvmm_free, +}; + int nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, void *data, @@ -1830,7 +1842,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, NOUVEAU_VA_SPACE_END, init->kernel_managed_addr, init->kernel_managed_size, - NULL); + &gpuvm_ops); /* GPUVM takes care from here on. */ drm_gem_object_put(r_obj); @@ -1849,8 +1861,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, return 0; out_gpuvm_fini: - drm_gpuvm_destroy(&uvmm->base); - kfree(uvmm); + drm_gpuvm_put(&uvmm->base); out_unlock: mutex_unlock(&cli->mutex); return ret; @@ -1902,7 +1913,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm) mutex_lock(&cli->mutex); nouveau_vmm_fini(&uvmm->vmm); - drm_gpuvm_destroy(&uvmm->base); - kfree(uvmm); + drm_gpuvm_put(&uvmm->base); mutex_unlock(&cli->mutex); } diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 0c2e24155a93..4e6e1fd3485a 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -247,6 +247,11 @@ struct drm_gpuvm { struct list_head list; } rb; + /** + * @kref: reference count of this object + */ + struct kref kref; + /** * @kernel_alloc_node: * @@ -273,7 +278,23 @@ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, u64 start_offset, u64 range, u64 reserve_offset, u64 reserve_range, const struct drm_gpuvm_ops *ops); -void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm); + +/** + * drm_gpuvm_get() - acquire a struct drm_gpuvm reference + * @gpuvm: the &drm_gpuvm to acquire the reference of + * + * This function acquires an additional reference to @gpuvm. It is illegal to + * call this without already holding a reference. No locks required. + */ +static inline struct drm_gpuvm * +drm_gpuvm_get(struct drm_gpuvm *gpuvm) +{ + kref_get(&gpuvm->kref); + + return gpuvm; +} + +void drm_gpuvm_put(struct drm_gpuvm *gpuvm); bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range); bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range); @@ -673,6 +694,14 @@ static inline void drm_gpuva_init_from_op(struct drm_gpuva *va, * operations to drivers. */ struct drm_gpuvm_ops { + /** + * @vm_free: called when the last reference of a struct drm_gpuvm is + * dropped + * + * This callback is mandatory. + */ + void (*vm_free)(struct drm_gpuvm *gpuvm); + /** * @op_alloc: called when the &drm_gpuvm allocates * a struct drm_gpuva_op -- cgit v1.2.3 From 94bc2249f08e141fb4aa120bfdc392c7a5e78211 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 8 Nov 2023 01:12:40 +0100 Subject: drm/gpuvm: add an abstraction for a VM / BO combination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an abstraction layer between the drm_gpuva mappings of a particular drm_gem_object and this GEM object itself. The abstraction represents a combination of a drm_gem_object and drm_gpuvm. The drm_gem_object holds a list of drm_gpuvm_bo structures (the structure representing this abstraction), while each drm_gpuvm_bo contains list of mappings of this GEM object. This has multiple advantages: 1) We can use the drm_gpuvm_bo structure to attach it to various lists of the drm_gpuvm. This is useful for tracking external and evicted objects per VM, which is introduced in subsequent patches. 2) Finding mappings of a certain drm_gem_object mapped in a certain drm_gpuvm becomes much cheaper. 3) Drivers can derive and extend the structure to easily represent driver specific states of a BO for a certain GPUVM. The idea of this abstraction was taken from amdgpu, hence the credit for this idea goes to the developers of amdgpu. Cc: Christian König Acked-by: Christian König Reviewed-by: Thomas Hellström Reviewed-by: Boris Brezillon Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-11-dakr@redhat.com --- drivers/gpu/drm/drm_gpuvm.c | 340 ++++++++++++++++++++++++++++----- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 63 ++++-- include/drm/drm_gem.h | 32 ++-- include/drm/drm_gpuvm.h | 185 +++++++++++++++++- 4 files changed, 534 insertions(+), 86 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index ef968eba6fe6..7e193bca8bab 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -70,6 +70,18 @@ * &drm_gem_object, such as the &drm_gem_object containing the root page table, * but it can also be a 'dummy' object, which can be allocated with * drm_gpuvm_resv_object_alloc(). + * + * In order to connect a struct drm_gpuva its backing &drm_gem_object each + * &drm_gem_object maintains a list of &drm_gpuvm_bo structures, and each + * &drm_gpuvm_bo contains a list of &drm_gpuva structures. + * + * A &drm_gpuvm_bo is an abstraction that represents a combination of a + * &drm_gpuvm and a &drm_gem_object. Every such combination should be unique. + * This is ensured by the API through drm_gpuvm_bo_obtain() and + * drm_gpuvm_bo_obtain_prealloc() which first look into the corresponding + * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this + * particular combination. If not existent a new instance is created and linked + * to the &drm_gem_object. */ /** @@ -395,21 +407,28 @@ /** * DOC: Locking * - * Generally, the GPU VA manager does not take care of locking itself, it is - * the drivers responsibility to take care about locking. Drivers might want to - * protect the following operations: inserting, removing and iterating - * &drm_gpuva objects as well as generating all kinds of operations, such as - * split / merge or prefetch. - * - * The GPU VA manager also does not take care of the locking of the backing - * &drm_gem_object buffers GPU VA lists by itself; drivers are responsible to - * enforce mutual exclusion using either the GEMs dma_resv lock or alternatively - * a driver specific external lock. For the latter see also - * drm_gem_gpuva_set_lock(). - * - * However, the GPU VA manager contains lockdep checks to ensure callers of its - * API hold the corresponding lock whenever the &drm_gem_objects GPU VA list is - * accessed by functions such as drm_gpuva_link() or drm_gpuva_unlink(). + * In terms of managing &drm_gpuva entries DRM GPUVM does not take care of + * locking itself, it is the drivers responsibility to take care about locking. + * Drivers might want to protect the following operations: inserting, removing + * and iterating &drm_gpuva objects as well as generating all kinds of + * operations, such as split / merge or prefetch. + * + * DRM GPUVM also does not take care of the locking of the backing + * &drm_gem_object buffers GPU VA lists and &drm_gpuvm_bo abstractions by + * itself; drivers are responsible to enforce mutual exclusion using either the + * GEMs dma_resv lock or alternatively a driver specific external lock. For the + * latter see also drm_gem_gpuva_set_lock(). + * + * However, DRM GPUVM contains lockdep checks to ensure callers of its API hold + * the corresponding lock whenever the &drm_gem_objects GPU VA list is accessed + * by functions such as drm_gpuva_link() or drm_gpuva_unlink(), but also + * drm_gpuvm_bo_obtain() and drm_gpuvm_bo_put(). + * + * The latter is required since on creation and destruction of a &drm_gpuvm_bo + * the &drm_gpuvm_bo is attached / removed from the &drm_gem_objects gpuva list. + * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and + * &drm_gem_object must be able to observe previous creations and destructions + * of &drm_gpuvm_bos in order to keep instances unique. */ /** @@ -439,6 +458,7 @@ * { * struct drm_gpuva_ops *ops; * struct drm_gpuva_op *op + * struct drm_gpuvm_bo *vm_bo; * * driver_lock_va_space(); * ops = drm_gpuvm_sm_map_ops_create(gpuvm, addr, range, @@ -446,6 +466,10 @@ * if (IS_ERR(ops)) * return PTR_ERR(ops); * + * vm_bo = drm_gpuvm_bo_obtain(gpuvm, obj); + * if (IS_ERR(vm_bo)) + * return PTR_ERR(vm_bo); + * * drm_gpuva_for_each_op(op, ops) { * struct drm_gpuva *va; * @@ -458,7 +482,7 @@ * * driver_vm_map(); * drm_gpuva_map(gpuvm, va, &op->map); - * drm_gpuva_link(va); + * drm_gpuva_link(va, vm_bo); * * break; * case DRM_GPUVA_OP_REMAP: { @@ -485,11 +509,11 @@ * driver_vm_remap(); * drm_gpuva_remap(prev, next, &op->remap); * - * drm_gpuva_unlink(va); * if (prev) - * drm_gpuva_link(prev); + * drm_gpuva_link(prev, va->vm_bo); * if (next) - * drm_gpuva_link(next); + * drm_gpuva_link(next, va->vm_bo); + * drm_gpuva_unlink(va); * * break; * } @@ -505,6 +529,7 @@ * break; * } * } + * drm_gpuvm_bo_put(vm_bo); * driver_unlock_va_space(); * * return 0; @@ -514,6 +539,7 @@ * * struct driver_context { * struct drm_gpuvm *gpuvm; + * struct drm_gpuvm_bo *vm_bo; * struct drm_gpuva *new_va; * struct drm_gpuva *prev_va; * struct drm_gpuva *next_va; @@ -534,6 +560,7 @@ * struct drm_gem_object *obj, u64 offset) * { * struct driver_context ctx; + * struct drm_gpuvm_bo *vm_bo; * struct drm_gpuva_ops *ops; * struct drm_gpuva_op *op; * int ret = 0; @@ -543,16 +570,23 @@ * ctx.new_va = kzalloc(sizeof(*ctx.new_va), GFP_KERNEL); * ctx.prev_va = kzalloc(sizeof(*ctx.prev_va), GFP_KERNEL); * ctx.next_va = kzalloc(sizeof(*ctx.next_va), GFP_KERNEL); - * if (!ctx.new_va || !ctx.prev_va || !ctx.next_va) { + * ctx.vm_bo = drm_gpuvm_bo_create(gpuvm, obj); + * if (!ctx.new_va || !ctx.prev_va || !ctx.next_va || !vm_bo) { * ret = -ENOMEM; * goto out; * } * + * // Typically protected with a driver specific GEM gpuva lock + * // used in the fence signaling path for drm_gpuva_link() and + * // drm_gpuva_unlink(), hence pre-allocate. + * ctx.vm_bo = drm_gpuvm_bo_obtain_prealloc(ctx.vm_bo); + * * driver_lock_va_space(); * ret = drm_gpuvm_sm_map(gpuvm, &ctx, addr, range, obj, offset); * driver_unlock_va_space(); * * out: + * drm_gpuvm_bo_put(ctx.vm_bo); * kfree(ctx.new_va); * kfree(ctx.prev_va); * kfree(ctx.next_va); @@ -565,7 +599,7 @@ * * drm_gpuva_map(ctx->vm, ctx->new_va, &op->map); * - * drm_gpuva_link(ctx->new_va); + * drm_gpuva_link(ctx->new_va, ctx->vm_bo); * * // prevent the new GPUVA from being freed in * // driver_mapping_create() @@ -577,22 +611,23 @@ * int driver_gpuva_remap(struct drm_gpuva_op *op, void *__ctx) * { * struct driver_context *ctx = __ctx; + * struct drm_gpuva *va = op->remap.unmap->va; * * drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap); * - * drm_gpuva_unlink(op->remap.unmap->va); - * kfree(op->remap.unmap->va); - * * if (op->remap.prev) { - * drm_gpuva_link(ctx->prev_va); + * drm_gpuva_link(ctx->prev_va, va->vm_bo); * ctx->prev_va = NULL; * } * * if (op->remap.next) { - * drm_gpuva_link(ctx->next_va); + * drm_gpuva_link(ctx->next_va, va->vm_bo); * ctx->next_va = NULL; * } * + * drm_gpuva_unlink(va); + * kfree(va); + * * return 0; * } * @@ -815,6 +850,199 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm) } EXPORT_SYMBOL_GPL(drm_gpuvm_put); +/** + * drm_gpuvm_bo_create() - create a new instance of struct drm_gpuvm_bo + * @gpuvm: The &drm_gpuvm the @obj is mapped in. + * @obj: The &drm_gem_object being mapped in the @gpuvm. + * + * If provided by the driver, this function uses the &drm_gpuvm_ops + * vm_bo_alloc() callback to allocate. + * + * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure + */ +struct drm_gpuvm_bo * +drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj) +{ + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gpuvm_bo *vm_bo; + + if (ops && ops->vm_bo_alloc) + vm_bo = ops->vm_bo_alloc(); + else + vm_bo = kzalloc(sizeof(*vm_bo), GFP_KERNEL); + + if (unlikely(!vm_bo)) + return NULL; + + vm_bo->vm = drm_gpuvm_get(gpuvm); + vm_bo->obj = obj; + drm_gem_object_get(obj); + + kref_init(&vm_bo->kref); + INIT_LIST_HEAD(&vm_bo->list.gpuva); + INIT_LIST_HEAD(&vm_bo->list.entry.gem); + + return vm_bo; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create); + +static void +drm_gpuvm_bo_destroy(struct kref *kref) +{ + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, + kref); + struct drm_gpuvm *gpuvm = vm_bo->vm; + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gem_object *obj = vm_bo->obj; + bool lock = !drm_gpuvm_resv_protected(gpuvm); + + if (!lock) + drm_gpuvm_resv_assert_held(gpuvm); + + drm_gem_gpuva_assert_lock_held(obj); + list_del(&vm_bo->list.entry.gem); + + if (ops && ops->vm_bo_free) + ops->vm_bo_free(vm_bo); + else + kfree(vm_bo); + + drm_gpuvm_put(gpuvm); + drm_gem_object_put(obj); +} + +/** + * drm_gpuvm_bo_put() - drop a struct drm_gpuvm_bo reference + * @vm_bo: the &drm_gpuvm_bo to release the reference of + * + * This releases a reference to @vm_bo. + * + * If the reference count drops to zero, the &gpuvm_bo is destroyed, which + * includes removing it from the GEMs gpuva list. Hence, if a call to this + * function can potentially let the reference count drop to zero the caller must + * hold the dma-resv or driver specific GEM gpuva lock. + * + * This function may only be called from non-atomic context. + */ +void +drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo) +{ + might_sleep(); + + if (vm_bo) + kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put); + +static struct drm_gpuvm_bo * +__drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj) +{ + struct drm_gpuvm_bo *vm_bo; + + drm_gem_gpuva_assert_lock_held(obj); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) + if (vm_bo->vm == gpuvm) + return vm_bo; + + return NULL; +} + +/** + * drm_gpuvm_bo_find() - find the &drm_gpuvm_bo for the given + * &drm_gpuvm and &drm_gem_object + * @gpuvm: The &drm_gpuvm the @obj is mapped in. + * @obj: The &drm_gem_object being mapped in the @gpuvm. + * + * Find the &drm_gpuvm_bo representing the combination of the given + * &drm_gpuvm and &drm_gem_object. If found, increases the reference + * count of the &drm_gpuvm_bo accordingly. + * + * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure + */ +struct drm_gpuvm_bo * +drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj) +{ + struct drm_gpuvm_bo *vm_bo = __drm_gpuvm_bo_find(gpuvm, obj); + + return vm_bo ? drm_gpuvm_bo_get(vm_bo) : NULL; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_find); + +/** + * drm_gpuvm_bo_obtain() - obtains and instance of the &drm_gpuvm_bo for the + * given &drm_gpuvm and &drm_gem_object + * @gpuvm: The &drm_gpuvm the @obj is mapped in. + * @obj: The &drm_gem_object being mapped in the @gpuvm. + * + * Find the &drm_gpuvm_bo representing the combination of the given + * &drm_gpuvm and &drm_gem_object. If found, increases the reference + * count of the &drm_gpuvm_bo accordingly. If not found, allocates a new + * &drm_gpuvm_bo. + * + * A new &drm_gpuvm_bo is added to the GEMs gpuva list. + * + * Returns: a pointer to the &drm_gpuvm_bo on success, an ERR_PTR on failure + */ +struct drm_gpuvm_bo * +drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj) +{ + struct drm_gpuvm_bo *vm_bo; + + vm_bo = drm_gpuvm_bo_find(gpuvm, obj); + if (vm_bo) + return vm_bo; + + vm_bo = drm_gpuvm_bo_create(gpuvm, obj); + if (!vm_bo) + return ERR_PTR(-ENOMEM); + + drm_gem_gpuva_assert_lock_held(obj); + list_add_tail(&vm_bo->list.entry.gem, &obj->gpuva.list); + + return vm_bo; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain); + +/** + * drm_gpuvm_bo_obtain_prealloc() - obtains and instance of the &drm_gpuvm_bo + * for the given &drm_gpuvm and &drm_gem_object + * @__vm_bo: A pre-allocated struct drm_gpuvm_bo. + * + * Find the &drm_gpuvm_bo representing the combination of the given + * &drm_gpuvm and &drm_gem_object. If found, increases the reference + * count of the found &drm_gpuvm_bo accordingly, while the @__vm_bo reference + * count is decreased. If not found @__vm_bo is returned without further + * increase of the reference count. + * + * A new &drm_gpuvm_bo is added to the GEMs gpuva list. + * + * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing + * &drm_gpuvm_bo was found + */ +struct drm_gpuvm_bo * +drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo) +{ + struct drm_gpuvm *gpuvm = __vm_bo->vm; + struct drm_gem_object *obj = __vm_bo->obj; + struct drm_gpuvm_bo *vm_bo; + + vm_bo = drm_gpuvm_bo_find(gpuvm, obj); + if (vm_bo) { + drm_gpuvm_bo_put(__vm_bo); + return vm_bo; + } + + drm_gem_gpuva_assert_lock_held(obj); + list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list); + + return __vm_bo; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain_prealloc); + static int __drm_gpuva_insert(struct drm_gpuvm *gpuvm, struct drm_gpuva *va) @@ -916,24 +1144,33 @@ EXPORT_SYMBOL_GPL(drm_gpuva_remove); /** * drm_gpuva_link() - link a &drm_gpuva * @va: the &drm_gpuva to link + * @vm_bo: the &drm_gpuvm_bo to add the &drm_gpuva to * - * This adds the given &va to the GPU VA list of the &drm_gem_object it is - * associated with. + * This adds the given &va to the GPU VA list of the &drm_gpuvm_bo and the + * &drm_gpuvm_bo to the &drm_gem_object it is associated with. + * + * For every &drm_gpuva entry added to the &drm_gpuvm_bo an additional + * reference of the latter is taken. * * This function expects the caller to protect the GEM's GPUVA list against - * concurrent access using the GEMs dma_resv lock. + * concurrent access using either the GEMs dma_resv lock or a driver specific + * lock set through drm_gem_gpuva_set_lock(). */ void -drm_gpuva_link(struct drm_gpuva *va) +drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo) { struct drm_gem_object *obj = va->gem.obj; + struct drm_gpuvm *gpuvm = va->vm; if (unlikely(!obj)) return; - drm_gem_gpuva_assert_lock_held(obj); + drm_WARN_ON(gpuvm->drm, obj != vm_bo->obj); - list_add_tail(&va->gem.entry, &obj->gpuva.list); + va->vm_bo = drm_gpuvm_bo_get(vm_bo); + + drm_gem_gpuva_assert_lock_held(obj); + list_add_tail(&va->gem.entry, &vm_bo->list.gpuva); } EXPORT_SYMBOL_GPL(drm_gpuva_link); @@ -944,20 +1181,31 @@ EXPORT_SYMBOL_GPL(drm_gpuva_link); * This removes the given &va from the GPU VA list of the &drm_gem_object it is * associated with. * + * This removes the given &va from the GPU VA list of the &drm_gpuvm_bo and + * the &drm_gpuvm_bo from the &drm_gem_object it is associated with in case + * this call unlinks the last &drm_gpuva from the &drm_gpuvm_bo. + * + * For every &drm_gpuva entry removed from the &drm_gpuvm_bo a reference of + * the latter is dropped. + * * This function expects the caller to protect the GEM's GPUVA list against - * concurrent access using the GEMs dma_resv lock. + * concurrent access using either the GEMs dma_resv lock or a driver specific + * lock set through drm_gem_gpuva_set_lock(). */ void drm_gpuva_unlink(struct drm_gpuva *va) { struct drm_gem_object *obj = va->gem.obj; + struct drm_gpuvm_bo *vm_bo = va->vm_bo; if (unlikely(!obj)) return; drm_gem_gpuva_assert_lock_held(obj); - list_del_init(&va->gem.entry); + + va->vm_bo = NULL; + drm_gpuvm_bo_put(vm_bo); } EXPORT_SYMBOL_GPL(drm_gpuva_unlink); @@ -1102,10 +1350,10 @@ drm_gpuva_remap(struct drm_gpuva *prev, struct drm_gpuva *next, struct drm_gpuva_op_remap *op) { - struct drm_gpuva *curr = op->unmap->va; - struct drm_gpuvm *gpuvm = curr->vm; + struct drm_gpuva *va = op->unmap->va; + struct drm_gpuvm *gpuvm = va->vm; - drm_gpuva_remove(curr); + drm_gpuva_remove(va); if (op->prev) { drm_gpuva_init_from_op(prev, op->prev); @@ -1747,9 +1995,8 @@ err_free_ops: EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create); /** - * drm_gpuvm_gem_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM - * @gpuvm: the &drm_gpuvm representing the GPU VA space - * @obj: the &drm_gem_object to unmap + * drm_gpuvm_bo_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM + * @vm_bo: the &drm_gpuvm_bo abstraction * * This function creates a list of operations to perform unmapping for every * GPUVA attached to a GEM. @@ -1766,15 +2013,14 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create); * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure */ struct drm_gpuva_ops * -drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, - struct drm_gem_object *obj) +drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo) { struct drm_gpuva_ops *ops; struct drm_gpuva_op *op; struct drm_gpuva *va; int ret; - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(vm_bo->obj); ops = kzalloc(sizeof(*ops), GFP_KERNEL); if (!ops) @@ -1782,8 +2028,8 @@ drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, INIT_LIST_HEAD(&ops->list); - drm_gem_for_each_gpuva(va, obj) { - op = gpuva_op_alloc(gpuvm); + drm_gpuvm_bo_for_each_va(va, vm_bo) { + op = gpuva_op_alloc(vm_bo->vm); if (!op) { ret = -ENOMEM; goto err_free_ops; @@ -1797,10 +2043,10 @@ drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, return ops; err_free_ops: - drm_gpuva_ops_free(gpuvm, ops); + drm_gpuva_ops_free(vm_bo->vm, ops); return ERR_PTR(ret); } -EXPORT_SYMBOL_GPL(drm_gpuvm_gem_unmap_ops_create); +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_unmap_ops_create); /** * drm_gpuva_ops_free() - free the given &drm_gpuva_ops diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index cb2f06565c46..eda7bb8624f1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -62,6 +62,8 @@ struct bind_job_op { enum vm_bind_op op; u32 flags; + struct drm_gpuvm_bo *vm_bo; + struct { u64 addr; u64 range; @@ -1101,22 +1103,28 @@ bind_validate_region(struct nouveau_job *job) } static void -bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new) +bind_link_gpuvas(struct bind_job_op *bop) { + struct nouveau_uvma_prealloc *new = &bop->new; + struct drm_gpuvm_bo *vm_bo = bop->vm_bo; + struct drm_gpuva_ops *ops = bop->ops; struct drm_gpuva_op *op; drm_gpuva_for_each_op(op, ops) { switch (op->op) { case DRM_GPUVA_OP_MAP: - drm_gpuva_link(&new->map->va); + drm_gpuva_link(&new->map->va, vm_bo); break; - case DRM_GPUVA_OP_REMAP: + case DRM_GPUVA_OP_REMAP: { + struct drm_gpuva *va = op->remap.unmap->va; + if (op->remap.prev) - drm_gpuva_link(&new->prev->va); + drm_gpuva_link(&new->prev->va, va->vm_bo); if (op->remap.next) - drm_gpuva_link(&new->next->va); - drm_gpuva_unlink(op->remap.unmap->va); + drm_gpuva_link(&new->next->va, va->vm_bo); + drm_gpuva_unlink(va); break; + } case DRM_GPUVA_OP_UNMAP: drm_gpuva_unlink(op->unmap.va); break; @@ -1138,10 +1146,17 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) list_for_each_op(op, &bind_job->ops) { if (op->op == OP_MAP) { - op->gem.obj = drm_gem_object_lookup(job->file_priv, - op->gem.handle); - if (!op->gem.obj) + struct drm_gem_object *obj = op->gem.obj = + drm_gem_object_lookup(job->file_priv, + op->gem.handle); + if (!obj) return -ENOENT; + + dma_resv_lock(obj->resv, NULL); + op->vm_bo = drm_gpuvm_bo_obtain(&uvmm->base, obj); + dma_resv_unlock(obj->resv); + if (IS_ERR(op->vm_bo)) + return PTR_ERR(op->vm_bo); } ret = bind_validate_op(job, op); @@ -1352,7 +1367,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) case OP_UNMAP_SPARSE: case OP_MAP: case OP_UNMAP: - bind_link_gpuvas(op->ops, &op->new); + bind_link_gpuvas(op); break; default: break; @@ -1499,6 +1514,12 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work) if (!IS_ERR_OR_NULL(op->ops)) drm_gpuva_ops_free(&uvmm->base, op->ops); + if (!IS_ERR_OR_NULL(op->vm_bo)) { + dma_resv_lock(obj->resv, NULL); + drm_gpuvm_bo_put(op->vm_bo); + dma_resv_unlock(obj->resv); + } + if (obj) drm_gem_object_put(obj); } @@ -1752,15 +1773,18 @@ void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem) { struct drm_gem_object *obj = &nvbo->bo.base; + struct drm_gpuvm_bo *vm_bo; struct drm_gpuva *va; dma_resv_assert_held(obj->resv); - drm_gem_for_each_gpuva(va, obj) { - struct nouveau_uvma *uvma = uvma_from_va(va); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(va, vm_bo) { + struct nouveau_uvma *uvma = uvma_from_va(va); - nouveau_uvma_map(uvma, mem); - drm_gpuva_invalidate(va, false); + nouveau_uvma_map(uvma, mem); + drm_gpuva_invalidate(va, false); + } } } @@ -1768,15 +1792,18 @@ void nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo) { struct drm_gem_object *obj = &nvbo->bo.base; + struct drm_gpuvm_bo *vm_bo; struct drm_gpuva *va; dma_resv_assert_held(obj->resv); - drm_gem_for_each_gpuva(va, obj) { - struct nouveau_uvma *uvma = uvma_from_va(va); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(va, vm_bo) { + struct nouveau_uvma *uvma = uvma_from_va(va); - nouveau_uvma_unmap(uvma); - drm_gpuva_invalidate(va, true); + nouveau_uvma_unmap(uvma); + drm_gpuva_invalidate(va, true); + } } } diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 16364487fde9..369505447acd 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -580,7 +580,7 @@ int drm_gem_evict(struct drm_gem_object *obj); * drm_gem_gpuva_init() - initialize the gpuva list of a GEM object * @obj: the &drm_gem_object * - * This initializes the &drm_gem_object's &drm_gpuva list. + * This initializes the &drm_gem_object's &drm_gpuvm_bo list. * * Calling this function is only necessary for drivers intending to support the * &drm_driver_feature DRIVER_GEM_GPUVA. @@ -593,28 +593,28 @@ static inline void drm_gem_gpuva_init(struct drm_gem_object *obj) } /** - * drm_gem_for_each_gpuva() - iternator to walk over a list of gpuvas - * @entry__: &drm_gpuva structure to assign to in each iteration step - * @obj__: the &drm_gem_object the &drm_gpuvas to walk are associated with + * drm_gem_for_each_gpuvm_bo() - iterator to walk over a list of &drm_gpuvm_bo + * @entry__: &drm_gpuvm_bo structure to assign to in each iteration step + * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with * - * This iterator walks over all &drm_gpuva structures associated with the - * &drm_gpuva_manager. + * This iterator walks over all &drm_gpuvm_bo structures associated with the + * &drm_gem_object. */ -#define drm_gem_for_each_gpuva(entry__, obj__) \ - list_for_each_entry(entry__, &(obj__)->gpuva.list, gem.entry) +#define drm_gem_for_each_gpuvm_bo(entry__, obj__) \ + list_for_each_entry(entry__, &(obj__)->gpuva.list, list.entry.gem) /** - * drm_gem_for_each_gpuva_safe() - iternator to safely walk over a list of - * gpuvas - * @entry__: &drm_gpuva structure to assign to in each iteration step - * @next__: &next &drm_gpuva to store the next step - * @obj__: the &drm_gem_object the &drm_gpuvas to walk are associated with + * drm_gem_for_each_gpuvm_bo_safe() - iterator to safely walk over a list of + * &drm_gpuvm_bo + * @entry__: &drm_gpuvm_bostructure to assign to in each iteration step + * @next__: &next &drm_gpuvm_bo to store the next step + * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with * - * This iterator walks over all &drm_gpuva structures associated with the + * This iterator walks over all &drm_gpuvm_bo structures associated with the * &drm_gem_object. It is implemented with list_for_each_entry_safe(), hence * it is save against removal of elements. */ -#define drm_gem_for_each_gpuva_safe(entry__, next__, obj__) \ - list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, gem.entry) +#define drm_gem_for_each_gpuvm_bo_safe(entry__, next__, obj__) \ + list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, list.entry.gem) #endif /* __DRM_GEM_H__ */ diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 4e6e1fd3485a..b12fb22b0e22 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -25,6 +25,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include #include #include #include @@ -33,6 +34,7 @@ #include struct drm_gpuvm; +struct drm_gpuvm_bo; struct drm_gpuvm_ops; /** @@ -73,6 +75,12 @@ struct drm_gpuva { */ struct drm_gpuvm *vm; + /** + * @vm_bo: the &drm_gpuvm_bo abstraction for the mapped + * &drm_gem_object + */ + struct drm_gpuvm_bo *vm_bo; + /** * @flags: the &drm_gpuva_flags for this mapping */ @@ -108,7 +116,7 @@ struct drm_gpuva { struct drm_gem_object *obj; /** - * @entry: the &list_head to attach this object to a &drm_gem_object + * @entry: the &list_head to attach this object to a &drm_gpuvm_bo */ struct list_head entry; } gem; @@ -141,7 +149,7 @@ struct drm_gpuva { int drm_gpuva_insert(struct drm_gpuvm *gpuvm, struct drm_gpuva *va); void drm_gpuva_remove(struct drm_gpuva *va); -void drm_gpuva_link(struct drm_gpuva *va); +void drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo); void drm_gpuva_unlink(struct drm_gpuva *va); struct drm_gpuva *drm_gpuva_find(struct drm_gpuvm *gpuvm, @@ -188,10 +196,16 @@ static inline bool drm_gpuva_invalidated(struct drm_gpuva *va) * enum drm_gpuvm_flags - flags for struct drm_gpuvm */ enum drm_gpuvm_flags { + /** + * @DRM_GPUVM_RESV_PROTECTED: GPUVM is protected externally by the + * GPUVM's &dma_resv lock + */ + DRM_GPUVM_RESV_PROTECTED = BIT(0), + /** * @DRM_GPUVM_USERBITS: user defined bits */ - DRM_GPUVM_USERBITS = BIT(0), + DRM_GPUVM_USERBITS = BIT(1), }; /** @@ -302,6 +316,19 @@ bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range); struct drm_gem_object * drm_gpuvm_resv_object_alloc(struct drm_device *drm); +/** + * drm_gpuvm_resv_protected() - indicates whether &DRM_GPUVM_RESV_PROTECTED is + * set + * @gpuvm: the &drm_gpuvm + * + * Returns: true if &DRM_GPUVM_RESV_PROTECTED is set, false otherwise. + */ +static inline bool +drm_gpuvm_resv_protected(struct drm_gpuvm *gpuvm) +{ + return gpuvm->flags & DRM_GPUVM_RESV_PROTECTED; +} + /** * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv * @gpuvm__: the &drm_gpuvm @@ -320,6 +347,12 @@ drm_gpuvm_resv_object_alloc(struct drm_device *drm); */ #define drm_gpuvm_resv_obj(gpuvm__) ((gpuvm__)->r_obj) +#define drm_gpuvm_resv_held(gpuvm__) \ + dma_resv_held(drm_gpuvm_resv(gpuvm__)) + +#define drm_gpuvm_resv_assert_held(gpuvm__) \ + dma_resv_assert_held(drm_gpuvm_resv(gpuvm__)) + #define drm_gpuvm_resv_held(gpuvm__) \ dma_resv_held(drm_gpuvm_resv(gpuvm__)) @@ -404,6 +437,125 @@ __drm_gpuva_next(struct drm_gpuva *va) #define drm_gpuvm_for_each_va_safe(va__, next__, gpuvm__) \ list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry) +/** + * struct drm_gpuvm_bo - structure representing a &drm_gpuvm and + * &drm_gem_object combination + * + * This structure is an abstraction representing a &drm_gpuvm and + * &drm_gem_object combination. It serves as an indirection to accelerate + * iterating all &drm_gpuvas within a &drm_gpuvm backed by the same + * &drm_gem_object. + * + * Furthermore it is used cache evicted GEM objects for a certain GPU-VM to + * accelerate validation. + * + * Typically, drivers want to create an instance of a struct drm_gpuvm_bo once + * a GEM object is mapped first in a GPU-VM and release the instance once the + * last mapping of the GEM object in this GPU-VM is unmapped. + */ +struct drm_gpuvm_bo { + /** + * @vm: The &drm_gpuvm the @obj is mapped in. This is a reference + * counted pointer. + */ + struct drm_gpuvm *vm; + + /** + * @obj: The &drm_gem_object being mapped in @vm. This is a reference + * counted pointer. + */ + struct drm_gem_object *obj; + + /** + * @kref: The reference count for this &drm_gpuvm_bo. + */ + struct kref kref; + + /** + * @list: Structure containing all &list_heads. + */ + struct { + /** + * @gpuva: The list of linked &drm_gpuvas. + * + * It is safe to access entries from this list as long as the + * GEM's gpuva lock is held. See also struct drm_gem_object. + */ + struct list_head gpuva; + + /** + * @entry: Structure containing all &list_heads serving as + * entry. + */ + struct { + /** + * @gem: List entry to attach to the &drm_gem_objects + * gpuva list. + */ + struct list_head gem; + } entry; + } list; +}; + +struct drm_gpuvm_bo * +drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj); + +struct drm_gpuvm_bo * +drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj); +struct drm_gpuvm_bo * +drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *vm_bo); + +/** + * drm_gpuvm_bo_get() - acquire a struct drm_gpuvm_bo reference + * @vm_bo: the &drm_gpuvm_bo to acquire the reference of + * + * This function acquires an additional reference to @vm_bo. It is illegal to + * call this without already holding a reference. No locks required. + */ +static inline struct drm_gpuvm_bo * +drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo) +{ + kref_get(&vm_bo->kref); + return vm_bo; +} + +void drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo); + +struct drm_gpuvm_bo * +drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj); + +/** + * drm_gpuvm_bo_for_each_va() - iterator to walk over a list of &drm_gpuva + * @va__: &drm_gpuva structure to assign to in each iteration step + * @vm_bo__: the &drm_gpuvm_bo the &drm_gpuva to walk are associated with + * + * This iterator walks over all &drm_gpuva structures associated with the + * &drm_gpuvm_bo. + * + * The caller must hold the GEM's gpuva lock. + */ +#define drm_gpuvm_bo_for_each_va(va__, vm_bo__) \ + list_for_each_entry(va__, &(vm_bo)->list.gpuva, gem.entry) + +/** + * drm_gpuvm_bo_for_each_va_safe() - iterator to safely walk over a list of + * &drm_gpuva + * @va__: &drm_gpuva structure to assign to in each iteration step + * @next__: &next &drm_gpuva to store the next step + * @vm_bo__: the &drm_gpuvm_bo the &drm_gpuva to walk are associated with + * + * This iterator walks over all &drm_gpuva structures associated with the + * &drm_gpuvm_bo. It is implemented with list_for_each_entry_safe(), hence + * it is save against removal of elements. + * + * The caller must hold the GEM's gpuva lock. + */ +#define drm_gpuvm_bo_for_each_va_safe(va__, next__, vm_bo__) \ + list_for_each_entry_safe(va__, next__, &(vm_bo)->list.gpuva, gem.entry) + /** * enum drm_gpuva_op_type - GPU VA operation type * @@ -673,8 +825,7 @@ drm_gpuvm_prefetch_ops_create(struct drm_gpuvm *gpuvm, u64 addr, u64 range); struct drm_gpuva_ops * -drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, - struct drm_gem_object *obj); +drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo); void drm_gpuva_ops_free(struct drm_gpuvm *gpuvm, struct drm_gpuva_ops *ops); @@ -726,6 +877,30 @@ struct drm_gpuvm_ops { */ void (*op_free)(struct drm_gpuva_op *op); + /** + * @vm_bo_alloc: called when the &drm_gpuvm allocates + * a struct drm_gpuvm_bo + * + * Some drivers may want to embed struct drm_gpuvm_bo into driver + * specific structures. By implementing this callback drivers can + * allocate memory accordingly. + * + * This callback is optional. + */ + struct drm_gpuvm_bo *(*vm_bo_alloc)(void); + + /** + * @vm_bo_free: called when the &drm_gpuvm frees a + * struct drm_gpuvm_bo + * + * Some drivers may want to embed struct drm_gpuvm_bo into driver + * specific structures. By implementing this callback drivers can + * free the previously allocated memory accordingly. + * + * This callback is optional. + */ + void (*vm_bo_free)(struct drm_gpuvm_bo *vm_bo); + /** * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the * mapping once all previous steps were completed -- cgit v1.2.3 From 50c1a36f594bb3dd33f3f9386c5d960cd12327d8 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 8 Nov 2023 01:12:41 +0100 Subject: drm/gpuvm: track/lock/validate external/evicted objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the DRM GPUVM offers common infrastructure to track GPU VA allocations and mappings, generically connect GPU VA mappings to their backing buffers and perform more complex mapping operations on the GPU VA space. However, there are more design patterns commonly used by drivers, which can potentially be generalized in order to make the DRM GPUVM represent a basis for GPU-VM implementations. In this context, this patch aims at generalizing the following elements. 1) Provide a common dma-resv for GEM objects not being used outside of this GPU-VM. 2) Provide tracking of external GEM objects (GEM objects which are shared with other GPU-VMs). 3) Provide functions to efficiently lock all GEM objects dma-resv the GPU-VM contains mappings of. 4) Provide tracking of evicted GEM objects the GPU-VM contains mappings of, such that validation of evicted GEM objects is accelerated. 5) Provide some convinience functions for common patterns. Big thanks to Boris Brezillon for his help to figure out locking for drivers updating the GPU VA space within the fence signalling path. Acked-by: Christian König Reviewed-by: Boris Brezillon Reviewed-by: Thomas Hellström Suggested-by: Matthew Brost Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-12-dakr@redhat.com --- drivers/gpu/drm/drm_gpuvm.c | 633 ++++++++++++++++++++++++++++++++++++++++++++ include/drm/drm_gpuvm.h | 250 +++++++++++++++++ 2 files changed, 883 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 7e193bca8bab..54f5e8851de5 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -82,6 +82,21 @@ * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this * particular combination. If not existent a new instance is created and linked * to the &drm_gem_object. + * + * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used + * as entry for the &drm_gpuvm's lists of external and evicted objects. Those + * lists are maintained in order to accelerate locking of dma-resv locks and + * validation of evicted objects bound in a &drm_gpuvm. For instance, all + * &drm_gem_object's &dma_resv of a given &drm_gpuvm can be locked by calling + * drm_gpuvm_exec_lock(). Once locked drivers can call drm_gpuvm_validate() in + * order to validate all evicted &drm_gem_objects. It is also possible to lock + * additional &drm_gem_objects by providing the corresponding parameters to + * drm_gpuvm_exec_lock() as well as open code the &drm_exec loop while making + * use of helper functions such as drm_gpuvm_prepare_range() or + * drm_gpuvm_prepare_objects(). + * + * Every bound &drm_gem_object is treated as external object when its &dma_resv + * structure is different than the &drm_gpuvm's common &dma_resv structure. */ /** @@ -429,6 +444,20 @@ * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and * &drm_gem_object must be able to observe previous creations and destructions * of &drm_gpuvm_bos in order to keep instances unique. + * + * The &drm_gpuvm's lists for keeping track of external and evicted objects are + * protected against concurrent insertion / removal and iteration internally. + * + * However, drivers still need ensure to protect concurrent calls to functions + * iterating those lists, namely drm_gpuvm_prepare_objects() and + * drm_gpuvm_validate(). + * + * Alternatively, drivers can set the &DRM_GPUVM_RESV_PROTECTED flag to indicate + * that the corresponding &dma_resv locks are held in order to protect the + * lists. If &DRM_GPUVM_RESV_PROTECTED is set, internal locking is disabled and + * the corresponding lockdep checks are enabled. This is an optimization for + * drivers which are capable of taking the corresponding &dma_resv locks and + * hence do not require internal locking. */ /** @@ -641,6 +670,201 @@ * } */ +/** + * get_next_vm_bo_from_list() - get the next vm_bo element + * @__gpuvm: the &drm_gpuvm + * @__list_name: the name of the list we're iterating on + * @__local_list: a pointer to the local list used to store already iterated items + * @__prev_vm_bo: the previous element we got from get_next_vm_bo_from_list() + * + * This helper is here to provide lockless list iteration. Lockless as in, the + * iterator releases the lock immediately after picking the first element from + * the list, so list insertion deletion can happen concurrently. + * + * Elements popped from the original list are kept in a local list, so removal + * and is_empty checks can still happen while we're iterating the list. + */ +#define get_next_vm_bo_from_list(__gpuvm, __list_name, __local_list, __prev_vm_bo) \ + ({ \ + struct drm_gpuvm_bo *__vm_bo = NULL; \ + \ + drm_gpuvm_bo_put(__prev_vm_bo); \ + \ + spin_lock(&(__gpuvm)->__list_name.lock); \ + if (!(__gpuvm)->__list_name.local_list) \ + (__gpuvm)->__list_name.local_list = __local_list; \ + else \ + drm_WARN_ON((__gpuvm)->drm, \ + (__gpuvm)->__list_name.local_list != __local_list); \ + \ + while (!list_empty(&(__gpuvm)->__list_name.list)) { \ + __vm_bo = list_first_entry(&(__gpuvm)->__list_name.list, \ + struct drm_gpuvm_bo, \ + list.entry.__list_name); \ + if (kref_get_unless_zero(&__vm_bo->kref)) { \ + list_move_tail(&(__vm_bo)->list.entry.__list_name, \ + __local_list); \ + break; \ + } else { \ + list_del_init(&(__vm_bo)->list.entry.__list_name); \ + __vm_bo = NULL; \ + } \ + } \ + spin_unlock(&(__gpuvm)->__list_name.lock); \ + \ + __vm_bo; \ + }) + +/** + * for_each_vm_bo_in_list() - internal vm_bo list iterator + * @__gpuvm: the &drm_gpuvm + * @__list_name: the name of the list we're iterating on + * @__local_list: a pointer to the local list used to store already iterated items + * @__vm_bo: the struct drm_gpuvm_bo to assign in each iteration step + * + * This helper is here to provide lockless list iteration. Lockless as in, the + * iterator releases the lock immediately after picking the first element from the + * list, hence list insertion and deletion can happen concurrently. + * + * It is not allowed to re-assign the vm_bo pointer from inside this loop. + * + * Typical use: + * + * struct drm_gpuvm_bo *vm_bo; + * LIST_HEAD(my_local_list); + * + * ret = 0; + * for_each_vm_bo_in_list(gpuvm, , &my_local_list, vm_bo) { + * ret = do_something_with_vm_bo(..., vm_bo); + * if (ret) + * break; + * } + * // Drop ref in case we break out of the loop. + * drm_gpuvm_bo_put(vm_bo); + * restore_vm_bo_list(gpuvm, , &my_local_list); + * + * + * Only used for internal list iterations, not meant to be exposed to the outside + * world. + */ +#define for_each_vm_bo_in_list(__gpuvm, __list_name, __local_list, __vm_bo) \ + for (__vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name, \ + __local_list, NULL); \ + __vm_bo; \ + __vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name, \ + __local_list, __vm_bo)) + +static void +__restore_vm_bo_list(struct drm_gpuvm *gpuvm, spinlock_t *lock, + struct list_head *list, struct list_head **local_list) +{ + /* Merge back the two lists, moving local list elements to the + * head to preserve previous ordering, in case it matters. + */ + spin_lock(lock); + if (*local_list) { + list_splice(*local_list, list); + *local_list = NULL; + } + spin_unlock(lock); +} + +/** + * restore_vm_bo_list() - move vm_bo elements back to their original list + * @__gpuvm: the &drm_gpuvm + * @__list_name: the name of the list we're iterating on + * + * When we're done iterating a vm_bo list, we should call restore_vm_bo_list() + * to restore the original state and let new iterations take place. + */ +#define restore_vm_bo_list(__gpuvm, __list_name) \ + __restore_vm_bo_list((__gpuvm), &(__gpuvm)->__list_name.lock, \ + &(__gpuvm)->__list_name.list, \ + &(__gpuvm)->__list_name.local_list) + +static void +cond_spin_lock(spinlock_t *lock, bool cond) +{ + if (cond) + spin_lock(lock); +} + +static void +cond_spin_unlock(spinlock_t *lock, bool cond) +{ + if (cond) + spin_unlock(lock); +} + +static void +__drm_gpuvm_bo_list_add(struct drm_gpuvm *gpuvm, spinlock_t *lock, + struct list_head *entry, struct list_head *list) +{ + cond_spin_lock(lock, !!lock); + if (list_empty(entry)) + list_add_tail(entry, list); + cond_spin_unlock(lock, !!lock); +} + +/** + * drm_gpuvm_bo_list_add() - insert a vm_bo into the given list + * @__vm_bo: the &drm_gpuvm_bo + * @__list_name: the name of the list to insert into + * @__lock: whether to lock with the internal spinlock + * + * Inserts the given @__vm_bo into the list specified by @__list_name. + */ +#define drm_gpuvm_bo_list_add(__vm_bo, __list_name, __lock) \ + __drm_gpuvm_bo_list_add((__vm_bo)->vm, \ + __lock ? &(__vm_bo)->vm->__list_name.lock : \ + NULL, \ + &(__vm_bo)->list.entry.__list_name, \ + &(__vm_bo)->vm->__list_name.list) + +static void +__drm_gpuvm_bo_list_del(struct drm_gpuvm *gpuvm, spinlock_t *lock, + struct list_head *entry, bool init) +{ + cond_spin_lock(lock, !!lock); + if (init) { + if (!list_empty(entry)) + list_del_init(entry); + } else { + list_del(entry); + } + cond_spin_unlock(lock, !!lock); +} + +/** + * drm_gpuvm_bo_list_del_init() - remove a vm_bo from the given list + * @__vm_bo: the &drm_gpuvm_bo + * @__list_name: the name of the list to insert into + * @__lock: whether to lock with the internal spinlock + * + * Removes the given @__vm_bo from the list specified by @__list_name. + */ +#define drm_gpuvm_bo_list_del_init(__vm_bo, __list_name, __lock) \ + __drm_gpuvm_bo_list_del((__vm_bo)->vm, \ + __lock ? &(__vm_bo)->vm->__list_name.lock : \ + NULL, \ + &(__vm_bo)->list.entry.__list_name, \ + true) + +/** + * drm_gpuvm_bo_list_del() - remove a vm_bo from the given list + * @__vm_bo: the &drm_gpuvm_bo + * @__list_name: the name of the list to insert into + * @__lock: whether to lock with the internal spinlock + * + * Removes the given @__vm_bo from the list specified by @__list_name. + */ +#define drm_gpuvm_bo_list_del(__vm_bo, __list_name, __lock) \ + __drm_gpuvm_bo_list_del((__vm_bo)->vm, \ + __lock ? &(__vm_bo)->vm->__list_name.lock : \ + NULL, \ + &(__vm_bo)->list.entry.__list_name, \ + false) + #define to_drm_gpuva(__node) container_of((__node), struct drm_gpuva, rb.node) #define GPUVA_START(node) ((node)->va.addr) @@ -781,6 +1005,12 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, gpuvm->rb.tree = RB_ROOT_CACHED; INIT_LIST_HEAD(&gpuvm->rb.list); + INIT_LIST_HEAD(&gpuvm->extobj.list); + spin_lock_init(&gpuvm->extobj.lock); + + INIT_LIST_HEAD(&gpuvm->evict.list); + spin_lock_init(&gpuvm->evict.lock); + kref_init(&gpuvm->kref); gpuvm->name = name ? name : "unknown"; @@ -818,6 +1048,11 @@ drm_gpuvm_fini(struct drm_gpuvm *gpuvm) drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), "GPUVA tree is not empty, potentially leaking memory.\n"); + drm_WARN(gpuvm->drm, !list_empty(&gpuvm->extobj.list), + "Extobj list should be empty.\n"); + drm_WARN(gpuvm->drm, !list_empty(&gpuvm->evict.list), + "Evict list should be empty.\n"); + drm_gem_object_put(gpuvm->r_obj); } @@ -850,6 +1085,343 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm) } EXPORT_SYMBOL_GPL(drm_gpuvm_put); +static int +__drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +{ + struct drm_gpuvm_bo *vm_bo; + LIST_HEAD(extobjs); + int ret = 0; + + for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) { + ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences); + if (ret) + break; + } + /* Drop ref in case we break out of the loop. */ + drm_gpuvm_bo_put(vm_bo); + restore_vm_bo_list(gpuvm, extobj); + + return ret; +} + +static int +drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +{ + struct drm_gpuvm_bo *vm_bo; + int ret = 0; + + drm_gpuvm_resv_assert_held(gpuvm); + list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) { + ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences); + if (ret) + break; + + if (vm_bo->evicted) + drm_gpuvm_bo_list_add(vm_bo, evict, false); + } + + return ret; +} + +/** + * drm_gpuvm_prepare_objects() - prepare all assoiciated BOs + * @gpuvm: the &drm_gpuvm + * @exec: the &drm_exec locking context + * @num_fences: the amount of &dma_fences to reserve + * + * Calls drm_exec_prepare_obj() for all &drm_gem_objects the given + * &drm_gpuvm contains mappings of. + * + * Using this function directly, it is the drivers responsibility to call + * drm_exec_init() and drm_exec_fini() accordingly. + * + * Note: This function is safe against concurrent insertion and removal of + * external objects, however it is not safe against concurrent usage itself. + * + * Drivers need to make sure to protect this case with either an outer VM lock + * or by calling drm_gpuvm_prepare_vm() before this function within the + * drm_exec_until_all_locked() loop, such that the GPUVM's dma-resv lock ensures + * mutual exclusion. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +{ + if (drm_gpuvm_resv_protected(gpuvm)) + return drm_gpuvm_prepare_objects_locked(gpuvm, exec, + num_fences); + else + return __drm_gpuvm_prepare_objects(gpuvm, exec, num_fences); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_objects); + +/** + * drm_gpuvm_prepare_range() - prepare all BOs mapped within a given range + * @gpuvm: the &drm_gpuvm + * @exec: the &drm_exec locking context + * @addr: the start address within the VA space + * @range: the range to iterate within the VA space + * @num_fences: the amount of &dma_fences to reserve + * + * Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr + * and @addr + @range. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec, + u64 addr, u64 range, unsigned int num_fences) +{ + struct drm_gpuva *va; + u64 end = addr + range; + int ret; + + drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) { + struct drm_gem_object *obj = va->gem.obj; + + ret = drm_exec_prepare_obj(exec, obj, num_fences); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_range); + +/** + * drm_gpuvm_exec_lock() - lock all dma-resv of all assoiciated BOs + * @vm_exec: the &drm_gpuvm_exec wrapper + * + * Acquires all dma-resv locks of all &drm_gem_objects the given + * &drm_gpuvm contains mappings of. + * + * Addionally, when calling this function with struct drm_gpuvm_exec::extra + * being set the driver receives the given @fn callback to lock additional + * dma-resv in the context of the &drm_gpuvm_exec instance. Typically, drivers + * would call drm_exec_prepare_obj() from within this callback. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec) +{ + struct drm_gpuvm *gpuvm = vm_exec->vm; + struct drm_exec *exec = &vm_exec->exec; + unsigned int num_fences = vm_exec->num_fences; + int ret; + + drm_exec_init(exec, vm_exec->flags); + + drm_exec_until_all_locked(exec) { + ret = drm_gpuvm_prepare_vm(gpuvm, exec, num_fences); + drm_exec_retry_on_contention(exec); + if (ret) + goto err; + + ret = drm_gpuvm_prepare_objects(gpuvm, exec, num_fences); + drm_exec_retry_on_contention(exec); + if (ret) + goto err; + + if (vm_exec->extra.fn) { + ret = vm_exec->extra.fn(vm_exec); + drm_exec_retry_on_contention(exec); + if (ret) + goto err; + } + } + + return 0; + +err: + drm_exec_fini(exec); + return ret; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock); + +static int +fn_lock_array(struct drm_gpuvm_exec *vm_exec) +{ + struct { + struct drm_gem_object **objs; + unsigned int num_objs; + } *args = vm_exec->extra.priv; + + return drm_exec_prepare_array(&vm_exec->exec, args->objs, + args->num_objs, vm_exec->num_fences); +} + +/** + * drm_gpuvm_exec_lock_array() - lock all dma-resv of all assoiciated BOs + * @vm_exec: the &drm_gpuvm_exec wrapper + * @objs: additional &drm_gem_objects to lock + * @num_objs: the number of additional &drm_gem_objects to lock + * + * Acquires all dma-resv locks of all &drm_gem_objects the given &drm_gpuvm + * contains mappings of, plus the ones given through @objs. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_exec_lock_array(struct drm_gpuvm_exec *vm_exec, + struct drm_gem_object **objs, + unsigned int num_objs) +{ + struct { + struct drm_gem_object **objs; + unsigned int num_objs; + } args; + + args.objs = objs; + args.num_objs = num_objs; + + vm_exec->extra.fn = fn_lock_array; + vm_exec->extra.priv = &args; + + return drm_gpuvm_exec_lock(vm_exec); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_array); + +/** + * drm_gpuvm_exec_lock_range() - prepare all BOs mapped within a given range + * @vm_exec: the &drm_gpuvm_exec wrapper + * @addr: the start address within the VA space + * @range: the range to iterate within the VA space + * + * Acquires all dma-resv locks of all &drm_gem_objects mapped between @addr and + * @addr + @range. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec, + u64 addr, u64 range) +{ + struct drm_gpuvm *gpuvm = vm_exec->vm; + struct drm_exec *exec = &vm_exec->exec; + int ret; + + drm_exec_init(exec, vm_exec->flags); + + drm_exec_until_all_locked(exec) { + ret = drm_gpuvm_prepare_range(gpuvm, exec, addr, range, + vm_exec->num_fences); + drm_exec_retry_on_contention(exec); + if (ret) + goto err; + } + + return ret; + +err: + drm_exec_fini(exec); + return ret; +} +EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_range); + +static int +__drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec) +{ + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gpuvm_bo *vm_bo; + LIST_HEAD(evict); + int ret = 0; + + for_each_vm_bo_in_list(gpuvm, evict, &evict, vm_bo) { + ret = ops->vm_bo_validate(vm_bo, exec); + if (ret) + break; + } + /* Drop ref in case we break out of the loop. */ + drm_gpuvm_bo_put(vm_bo); + restore_vm_bo_list(gpuvm, evict); + + return ret; +} + +static int +drm_gpuvm_validate_locked(struct drm_gpuvm *gpuvm, struct drm_exec *exec) +{ + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gpuvm_bo *vm_bo, *next; + int ret = 0; + + drm_gpuvm_resv_assert_held(gpuvm); + + list_for_each_entry_safe(vm_bo, next, &gpuvm->evict.list, + list.entry.evict) { + ret = ops->vm_bo_validate(vm_bo, exec); + if (ret) + break; + + dma_resv_assert_held(vm_bo->obj->resv); + if (!vm_bo->evicted) + drm_gpuvm_bo_list_del_init(vm_bo, evict, false); + } + + return ret; +} + +/** + * drm_gpuvm_validate() - validate all BOs marked as evicted + * @gpuvm: the &drm_gpuvm to validate evicted BOs + * @exec: the &drm_exec instance used for locking the GPUVM + * + * Calls the &drm_gpuvm_ops::vm_bo_validate callback for all evicted buffer + * objects being mapped in the given &drm_gpuvm. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec) +{ + const struct drm_gpuvm_ops *ops = gpuvm->ops; + + if (unlikely(!ops || !ops->vm_bo_validate)) + return -EOPNOTSUPP; + + if (drm_gpuvm_resv_protected(gpuvm)) + return drm_gpuvm_validate_locked(gpuvm, exec); + else + return __drm_gpuvm_validate(gpuvm, exec); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_validate); + +/** + * drm_gpuvm_resv_add_fence - add fence to private and all extobj + * dma-resv + * @gpuvm: the &drm_gpuvm to add a fence to + * @exec: the &drm_exec locking context + * @fence: fence to add + * @private_usage: private dma-resv usage + * @extobj_usage: extobj dma-resv usage + */ +void +drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + struct dma_fence *fence, + enum dma_resv_usage private_usage, + enum dma_resv_usage extobj_usage) +{ + struct drm_gem_object *obj; + unsigned long index; + + drm_exec_for_each_locked_object(exec, index, obj) { + dma_resv_assert_held(obj->resv); + dma_resv_add_fence(obj->resv, fence, + drm_gpuvm_is_extobj(gpuvm, obj) ? + extobj_usage : private_usage); + } +} +EXPORT_SYMBOL_GPL(drm_gpuvm_resv_add_fence); + /** * drm_gpuvm_bo_create() - create a new instance of struct drm_gpuvm_bo * @gpuvm: The &drm_gpuvm the @obj is mapped in. @@ -883,6 +1455,9 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm, INIT_LIST_HEAD(&vm_bo->list.gpuva); INIT_LIST_HEAD(&vm_bo->list.entry.gem); + INIT_LIST_HEAD(&vm_bo->list.entry.extobj); + INIT_LIST_HEAD(&vm_bo->list.entry.evict); + return vm_bo; } EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create); @@ -900,6 +1475,9 @@ drm_gpuvm_bo_destroy(struct kref *kref) if (!lock) drm_gpuvm_resv_assert_held(gpuvm); + drm_gpuvm_bo_list_del(vm_bo, extobj, lock); + drm_gpuvm_bo_list_del(vm_bo, evict, lock); + drm_gem_gpuva_assert_lock_held(obj); list_del(&vm_bo->list.entry.gem); @@ -1043,6 +1621,61 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo) } EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain_prealloc); +/** + * drm_gpuvm_bo_extobj_add() - adds the &drm_gpuvm_bo to its &drm_gpuvm's + * extobj list + * @vm_bo: The &drm_gpuvm_bo to add to its &drm_gpuvm's the extobj list. + * + * Adds the given @vm_bo to its &drm_gpuvm's extobj list if not on the list + * already and if the corresponding &drm_gem_object is an external object, + * actually. + */ +void +drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo) +{ + struct drm_gpuvm *gpuvm = vm_bo->vm; + bool lock = !drm_gpuvm_resv_protected(gpuvm); + + if (!lock) + drm_gpuvm_resv_assert_held(gpuvm); + + if (drm_gpuvm_is_extobj(gpuvm, vm_bo->obj)) + drm_gpuvm_bo_list_add(vm_bo, extobj, lock); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_extobj_add); + +/** + * drm_gpuvm_bo_evict() - add / remove a &drm_gpuvm_bo to / from the &drm_gpuvms + * evicted list + * @vm_bo: the &drm_gpuvm_bo to add or remove + * @evict: indicates whether the object is evicted + * + * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvms evicted list. + */ +void +drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict) +{ + struct drm_gpuvm *gpuvm = vm_bo->vm; + struct drm_gem_object *obj = vm_bo->obj; + bool lock = !drm_gpuvm_resv_protected(gpuvm); + + dma_resv_assert_held(obj->resv); + vm_bo->evicted = evict; + + /* Can't add external objects to the evicted list directly if not using + * internal spinlocks, since in this case the evicted list is protected + * with the VM's common dma-resv lock. + */ + if (drm_gpuvm_is_extobj(gpuvm, obj) && !lock) + return; + + if (evict) + drm_gpuvm_bo_list_add(vm_bo, evict, lock); + else + drm_gpuvm_bo_list_del_init(vm_bo, evict, lock); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_evict); + static int __drm_gpuva_insert(struct drm_gpuvm *gpuvm, struct drm_gpuva *va) diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index b12fb22b0e22..8ca10461d8ac 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -32,6 +32,7 @@ #include #include +#include struct drm_gpuvm; struct drm_gpuvm_bo; @@ -283,6 +284,50 @@ struct drm_gpuvm { * @r_obj: Resv GEM object; representing the GPUVM's common &dma_resv. */ struct drm_gem_object *r_obj; + + /** + * @extobj: structure holding the extobj list + */ + struct { + /** + * @list: &list_head storing &drm_gpuvm_bos serving as + * external object + */ + struct list_head list; + + /** + * @local_list: pointer to the local list temporarily storing + * entries from the external object list + */ + struct list_head *local_list; + + /** + * @lock: spinlock to protect the extobj list + */ + spinlock_t lock; + } extobj; + + /** + * @evict: structure holding the evict list and evict list lock + */ + struct { + /** + * @list: &list_head storing &drm_gpuvm_bos currently being + * evicted + */ + struct list_head list; + + /** + * @local_list: pointer to the local list temporarily storing + * entries from the evicted object list + */ + struct list_head *local_list; + + /** + * @lock: spinlock to protect the evict list + */ + spinlock_t lock; + } evict; }; void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, @@ -359,6 +404,22 @@ drm_gpuvm_resv_protected(struct drm_gpuvm *gpuvm) #define drm_gpuvm_resv_assert_held(gpuvm__) \ dma_resv_assert_held(drm_gpuvm_resv(gpuvm__)) +/** + * drm_gpuvm_is_extobj() - indicates whether the given &drm_gem_object is an + * external object + * @gpuvm: the &drm_gpuvm to check + * @obj: the &drm_gem_object to check + * + * Returns: true if the &drm_gem_object &dma_resv differs from the + * &drm_gpuvms &dma_resv, false otherwise + */ +static inline bool +drm_gpuvm_is_extobj(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj) +{ + return obj && obj->resv != drm_gpuvm_resv(gpuvm); +} + static inline struct drm_gpuva * __drm_gpuva_next(struct drm_gpuva *va) { @@ -437,6 +498,144 @@ __drm_gpuva_next(struct drm_gpuva *va) #define drm_gpuvm_for_each_va_safe(va__, next__, gpuvm__) \ list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry) +/** + * struct drm_gpuvm_exec - &drm_gpuvm abstraction of &drm_exec + * + * This structure should be created on the stack as &drm_exec should be. + * + * Optionally, @extra can be set in order to lock additional &drm_gem_objects. + */ +struct drm_gpuvm_exec { + /** + * @exec: the &drm_exec structure + */ + struct drm_exec exec; + + /** + * @flags: the flags for the struct drm_exec + */ + uint32_t flags; + + /** + * @vm: the &drm_gpuvm to lock its DMA reservations + */ + struct drm_gpuvm *vm; + + /** + * @num_fences: the number of fences to reserve for the &dma_resv of the + * locked &drm_gem_objects + */ + unsigned int num_fences; + + /** + * @extra: Callback and corresponding private data for the driver to + * lock arbitrary additional &drm_gem_objects. + */ + struct { + /** + * @fn: The driver callback to lock additional &drm_gem_objects. + */ + int (*fn)(struct drm_gpuvm_exec *vm_exec); + + /** + * @priv: driver private data for the @fn callback + */ + void *priv; + } extra; +}; + +/** + * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv + * @gpuvm: the &drm_gpuvm + * @exec: the &drm_exec context + * @num_fences: the amount of &dma_fences to reserve + * + * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object. + * + * Using this function directly, it is the drivers responsibility to call + * drm_exec_init() and drm_exec_fini() accordingly. + * + * Returns: 0 on success, negative error code on failure. + */ +static inline int +drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +{ + return drm_exec_prepare_obj(exec, gpuvm->r_obj, num_fences); +} + +int drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences); + +int drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + u64 addr, u64 range, + unsigned int num_fences); + +int drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec); + +int drm_gpuvm_exec_lock_array(struct drm_gpuvm_exec *vm_exec, + struct drm_gem_object **objs, + unsigned int num_objs); + +int drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec, + u64 addr, u64 range); + +/** + * drm_gpuvm_exec_unlock() - lock all dma-resv of all assoiciated BOs + * @vm_exec: the &drm_gpuvm_exec wrapper + * + * Releases all dma-resv locks of all &drm_gem_objects previously acquired + * through drm_gpuvm_exec_lock() or its variants. + * + * Returns: 0 on success, negative error code on failure. + */ +static inline void +drm_gpuvm_exec_unlock(struct drm_gpuvm_exec *vm_exec) +{ + drm_exec_fini(&vm_exec->exec); +} + +int drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec); +void drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + struct dma_fence *fence, + enum dma_resv_usage private_usage, + enum dma_resv_usage extobj_usage); + +/** + * drm_gpuvm_exec_resv_add_fence() + * @vm_exec: the &drm_gpuvm_exec wrapper + * @fence: fence to add + * @private_usage: private dma-resv usage + * @extobj_usage: extobj dma-resv usage + * + * See drm_gpuvm_resv_add_fence(). + */ +static inline void +drm_gpuvm_exec_resv_add_fence(struct drm_gpuvm_exec *vm_exec, + struct dma_fence *fence, + enum dma_resv_usage private_usage, + enum dma_resv_usage extobj_usage) +{ + drm_gpuvm_resv_add_fence(vm_exec->vm, &vm_exec->exec, fence, + private_usage, extobj_usage); +} + +/** + * drm_gpuvm_exec_validate() + * @vm_exec: the &drm_gpuvm_exec wrapper + * + * See drm_gpuvm_validate(). + */ +static inline int +drm_gpuvm_exec_validate(struct drm_gpuvm_exec *vm_exec) +{ + return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); +} + /** * struct drm_gpuvm_bo - structure representing a &drm_gpuvm and * &drm_gem_object combination @@ -466,6 +665,12 @@ struct drm_gpuvm_bo { */ struct drm_gem_object *obj; + /** + * @evicted: Indicates whether the &drm_gem_object is evicted; field + * protected by the &drm_gem_object's dma-resv lock. + */ + bool evicted; + /** * @kref: The reference count for this &drm_gpuvm_bo. */ @@ -493,6 +698,18 @@ struct drm_gpuvm_bo { * gpuva list. */ struct list_head gem; + + /** + * @evict: List entry to attach to the &drm_gpuvms + * extobj list. + */ + struct list_head extobj; + + /** + * @evict: List entry to attach to the &drm_gpuvms evict + * list. + */ + struct list_head evict; } entry; } list; }; @@ -527,6 +744,27 @@ struct drm_gpuvm_bo * drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, struct drm_gem_object *obj); +void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict); + +/** + * drm_gpuvm_bo_gem_evict() + * @obj: the &drm_gem_object + * @evict: indicates whether @obj is evicted + * + * See drm_gpuvm_bo_evict(). + */ +static inline void +drm_gpuvm_bo_gem_evict(struct drm_gem_object *obj, bool evict) +{ + struct drm_gpuvm_bo *vm_bo; + + drm_gem_gpuva_assert_lock_held(obj); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) + drm_gpuvm_bo_evict(vm_bo, evict); +} + +void drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo); + /** * drm_gpuvm_bo_for_each_va() - iterator to walk over a list of &drm_gpuva * @va__: &drm_gpuva structure to assign to in each iteration step @@ -901,6 +1139,18 @@ struct drm_gpuvm_ops { */ void (*vm_bo_free)(struct drm_gpuvm_bo *vm_bo); + /** + * @vm_bo_validate: called from drm_gpuvm_validate() + * + * Drivers receive this callback for every evicted &drm_gem_object being + * mapped in the corresponding &drm_gpuvm. + * + * Typically, drivers would call their driver specific variant of + * ttm_bo_validate() from within this callback. + */ + int (*vm_bo_validate)(struct drm_gpuvm_bo *vm_bo, + struct drm_exec *exec); + /** * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the * mapping once all previous steps were completed -- cgit v1.2.3 From b0e396d68fef9c9c050dfbb590cc0066441f65c7 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Fri, 10 Nov 2023 20:01:57 -0500 Subject: Revert "drm/sched: Define pr_fmt() for DRM using pr_*()" From Jani: The drm_print.[ch] facilities use very few pr_*() calls directly. The users of pr_*() calls do not necessarily include at all, and really don't have to. Even the ones that do include it, usually have includes first, and includes next. Notably, includes . And, of course, defines pr_fmt() itself if not already defined. No, it's encouraged not to use pr_*() at all, and prefer drm device based logging, or device based logging. This reverts commit 36245bd02e88e68ac5955c2958c968879d7b75a9. Signed-off-by: Luben Tuikov Link: https://patchwork.freedesktop.org/patch/msgid/878r75wzm9.fsf@intel.com Acked-by: Javier Martinez Canillas Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231111024130.11464-2-ltuikov89@gmail.com --- include/drm/drm_print.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/drm') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index e8fe60d0eb87..a93a387f8a1a 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -26,20 +26,6 @@ #ifndef DRM_PRINT_H_ #define DRM_PRINT_H_ -/* Define this before including linux/printk.h, so that the format - * string in pr_*() macros is correctly set for DRM. If a file wants - * to define this to something else, it should do so before including - * this header file. - * - * It is encouraged code using pr_err() to prefix their format with - * the string "*ERROR* ", to make it easier to scan kernel logs. For - * instance, - * pr_err("*ERROR* ", args). - */ -#ifndef pr_fmt -#define pr_fmt(fmt) "[drm] " fmt -#endif - #include #include #include -- cgit v1.2.3 From 38b2d9d385102f430eb023aee1ed0ed37d9173f5 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 9 Oct 2023 16:06:30 +0200 Subject: drm/format-helper: Cache buffers with struct drm_format_conv_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hold temporary memory for format conversion in an instance of struct drm_format_conv_state. Update internal helpers of DRM's format-conversion code accordingly. Drivers will later be able to maintain this cache by themselves. Besides caching, struct drm_format_conv_state will be useful to hold additional information for format conversion, such as palette data or foreground/background colors. This will enable conversion from indexed color formats to component-based formats. v5: * improve documentation (Javier, Noralf) v3: * rename struct drm_xfrm_buf to struct drm_format_conv_state (Javier) * remove managed cleanup * add drm_format_conv_state_copy() for shadow-plane support Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Acked-by: Noralf Trønnes Tested-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20231009141018.11291-2-tzimmermann@suse.de --- drivers/gpu/drm/drm_format_helper.c | 118 ++++++++++++++++++++++++++++++++---- include/drm/drm_format_helper.h | 51 ++++++++++++++++ 2 files changed, 158 insertions(+), 11 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index f93a4efcee90..28d013d42778 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -20,6 +20,97 @@ #include #include +/** + * drm_format_conv_state_init - Initialize format-conversion state + * @state: The state to initialize + * + * Clears all fields in struct drm_format_conv_state. The state will + * be empty with no preallocated resources. + */ +void drm_format_conv_state_init(struct drm_format_conv_state *state) +{ + state->tmp.mem = NULL; + state->tmp.size = 0; + state->tmp.preallocated = false; +} +EXPORT_SYMBOL(drm_format_conv_state_init); + +/** + * drm_format_conv_state_copy - Copy format-conversion state + * @state: Destination state + * @old_state: Source state + * + * Copies format-conversion state from @old_state to @state; except for + * temporary storage. + */ +void drm_format_conv_state_copy(struct drm_format_conv_state *state, + const struct drm_format_conv_state *old_state) +{ + /* + * So far, there's only temporary storage here, which we don't + * duplicate. Just clear the fields. + */ + state->tmp.mem = NULL; + state->tmp.size = 0; + state->tmp.preallocated = false; +} +EXPORT_SYMBOL(drm_format_conv_state_copy); + +/** + * drm_format_conv_state_reserve - Allocates storage for format conversion + * @state: The format-conversion state + * @new_size: The minimum allocation size + * @flags: Flags for kmalloc() + * + * Allocates at least @new_size bytes and returns a pointer to the memory + * range. After calling this function, previously returned memory blocks + * are invalid. It's best to collect all memory requirements of a format + * conversion and call this function once to allocate the range. + * + * Returns: + * A pointer to the allocated memory range, or NULL otherwise. + */ +void *drm_format_conv_state_reserve(struct drm_format_conv_state *state, + size_t new_size, gfp_t flags) +{ + void *mem; + + if (new_size <= state->tmp.size) + goto out; + else if (state->tmp.preallocated) + return NULL; + + mem = krealloc(state->tmp.mem, new_size, flags); + if (!mem) + return NULL; + + state->tmp.mem = mem; + state->tmp.size = new_size; + +out: + return state->tmp.mem; +} +EXPORT_SYMBOL(drm_format_conv_state_reserve); + +/** + * drm_format_conv_state_release - Releases an format-conversion storage + * @state: The format-conversion state + * + * Releases the memory range references by the format-conversion state. + * After this call, all pointers to the memory are invalid. Prefer + * drm_format_conv_state_init() for cleaning up and unloading a driver. + */ +void drm_format_conv_state_release(struct drm_format_conv_state *state) +{ + if (state->tmp.preallocated) + return; + + kfree(state->tmp.mem); + state->tmp.mem = NULL; + state->tmp.size = 0; +} +EXPORT_SYMBOL(drm_format_conv_state_release); + static unsigned int clip_offset(const struct drm_rect *clip, unsigned int pitch, unsigned int cpp) { return clip->y1 * pitch + clip->x1 * cpp; @@ -45,6 +136,7 @@ EXPORT_SYMBOL(drm_fb_clip_offset); static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_pixsize, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, + struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { unsigned long linepixels = drm_rect_width(clip); @@ -60,7 +152,7 @@ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_p * one line at a time. */ if (!vaddr_cached_hint) { - stmp = kmalloc(sbuf_len, GFP_KERNEL); + stmp = drm_format_conv_state_reserve(state, sbuf_len, GFP_KERNEL); if (!stmp) return -ENOMEM; } @@ -79,8 +171,6 @@ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_p dst += dst_pitch; } - kfree(stmp); - return 0; } @@ -88,6 +178,7 @@ static int __drm_fb_xfrm(void *dst, unsigned long dst_pitch, unsigned long dst_p static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsigned long dst_pixsize, const void *vaddr, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, + struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { unsigned long linepixels = drm_rect_width(clip); @@ -101,9 +192,9 @@ static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsign void *dbuf; if (vaddr_cached_hint) { - dbuf = kmalloc(dbuf_len, GFP_KERNEL); + dbuf = drm_format_conv_state_reserve(state, dbuf_len, GFP_KERNEL); } else { - dbuf = kmalloc(stmp_off + sbuf_len, GFP_KERNEL); + dbuf = drm_format_conv_state_reserve(state, stmp_off + sbuf_len, GFP_KERNEL); stmp = dbuf + stmp_off; } if (!dbuf) @@ -124,8 +215,6 @@ static int __drm_fb_xfrm_toio(void __iomem *dst, unsigned long dst_pitch, unsign dst += dst_pitch; } - kfree(dbuf); - return 0; } @@ -139,17 +228,24 @@ static int drm_fb_xfrm(struct iosys_map *dst, static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 }; + struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT; + int ret; if (!dst_pitch) dst_pitch = default_dst_pitch; /* TODO: handle src in I/O memory here */ if (dst[0].is_iomem) - return __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0], - src[0].vaddr, fb, clip, vaddr_cached_hint, xfrm_line); + ret = __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0], + src[0].vaddr, fb, clip, vaddr_cached_hint, &fmtcnv_state, + xfrm_line); else - return __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0], - src[0].vaddr, fb, clip, vaddr_cached_hint, xfrm_line); + ret = __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0], + src[0].vaddr, fb, clip, vaddr_cached_hint, &fmtcnv_state, + xfrm_line); + drm_format_conv_state_release(&fmtcnv_state); + + return ret; } /** diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index 291deb09475b..724a9baf7315 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -15,6 +15,57 @@ struct drm_rect; struct iosys_map; +/** + * struct drm_format_conv_state - Stores format-conversion state + * + * DRM helpers for format conversion store temporary state in + * struct drm_xfrm_buf. The buffer's resources can be reused + * among multiple conversion operations. + * + * All fields are considered private. + */ +struct drm_format_conv_state { + struct { + void *mem; + size_t size; + bool preallocated; + } tmp; +}; + +#define __DRM_FORMAT_CONV_STATE_INIT(_mem, _size, _preallocated) { \ + .tmp = { \ + .mem = (_mem), \ + .size = (_size), \ + .preallocated = (_preallocated), \ + } \ + } + +/** + * DRM_FORMAT_CONV_STATE_INIT - Initializer for struct drm_format_conv_state + * + * Initializes an instance of struct drm_format_conv_state to default values. + */ +#define DRM_FORMAT_CONV_STATE_INIT \ + __DRM_FORMAT_CONV_STATE_INIT(NULL, 0, false) + +/** + * DRM_FORMAT_CONV_STATE_INIT_PREALLOCATED - Initializer for struct drm_format_conv_state + * @_mem: The preallocated memory area + * @_size: The number of bytes in _mem + * + * Initializes an instance of struct drm_format_conv_state to preallocated + * storage. The caller is responsible for releasing the provided memory range. + */ +#define DRM_FORMAT_CONV_STATE_INIT_PREALLOCATED(_mem, _size) \ + __DRM_FORMAT_CONV_STATE_INIT(_mem, _size, true) + +void drm_format_conv_state_init(struct drm_format_conv_state *state); +void drm_format_conv_state_copy(struct drm_format_conv_state *state, + const struct drm_format_conv_state *old_state); +void *drm_format_conv_state_reserve(struct drm_format_conv_state *state, + size_t new_size, gfp_t flags); +void drm_format_conv_state_release(struct drm_format_conv_state *state); + unsigned int drm_fb_clip_offset(unsigned int pitch, const struct drm_format_info *format, const struct drm_rect *clip); -- cgit v1.2.3 From 903674588a48df25bb79b1bedbfc48450f1d5d8f Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 9 Oct 2023 16:06:31 +0200 Subject: drm/atomic-helper: Add format-conversion state to shadow-plane state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Store an instance of struct drm_format_conv_state in the shadow-plane state struct drm_shadow_plane_state. Many drivers with shadow planes use DRM's format helpers to copy or convert the framebuffer data to backing storage in the scanout buffer. The shadow plane provides the necessary state and manages the conversion's intermediate buffer memory. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Acked-by: Noralf Trønnes Tested-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20231009141018.11291-3-tzimmermann@suse.de --- drivers/gpu/drm/drm_gem_atomic_helper.c | 9 +++++++++ include/drm/drm_gem_atomic_helper.h | 10 ++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index 5d4b9cd077f7..e440f458b663 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -218,7 +218,14 @@ void __drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane, struct drm_shadow_plane_state *new_shadow_plane_state) { + struct drm_plane_state *plane_state = plane->state; + struct drm_shadow_plane_state *shadow_plane_state = + to_drm_shadow_plane_state(plane_state); + __drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base); + + drm_format_conv_state_copy(&shadow_plane_state->fmtcnv_state, + &new_shadow_plane_state->fmtcnv_state); } EXPORT_SYMBOL(__drm_gem_duplicate_shadow_plane_state); @@ -266,6 +273,7 @@ EXPORT_SYMBOL(drm_gem_duplicate_shadow_plane_state); */ void __drm_gem_destroy_shadow_plane_state(struct drm_shadow_plane_state *shadow_plane_state) { + drm_format_conv_state_release(&shadow_plane_state->fmtcnv_state); __drm_atomic_helper_plane_destroy_state(&shadow_plane_state->base); } EXPORT_SYMBOL(__drm_gem_destroy_shadow_plane_state); @@ -302,6 +310,7 @@ void __drm_gem_reset_shadow_plane(struct drm_plane *plane, struct drm_shadow_plane_state *shadow_plane_state) { __drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base); + drm_format_conv_state_init(&shadow_plane_state->fmtcnv_state); } EXPORT_SYMBOL(__drm_gem_reset_shadow_plane); diff --git a/include/drm/drm_gem_atomic_helper.h b/include/drm/drm_gem_atomic_helper.h index 40b8b039518e..3e01c619a25e 100644 --- a/include/drm/drm_gem_atomic_helper.h +++ b/include/drm/drm_gem_atomic_helper.h @@ -5,6 +5,7 @@ #include +#include #include #include @@ -49,6 +50,15 @@ struct drm_shadow_plane_state { /** @base: plane state */ struct drm_plane_state base; + /** + * @fmtcnv_state: Format-conversion state + * + * Per-plane state for format conversion. + * Flags for copying shadow buffers into backend storage. Also holds + * temporary storage for format conversion. + */ + struct drm_format_conv_state fmtcnv_state; + /* Transitional state - do not export or duplicate */ /** -- cgit v1.2.3 From 4cd24d4b1a9548f42cdb7f449edc6f869a8ae730 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 9 Oct 2023 16:06:32 +0200 Subject: drm/format-helper: Pass format-conversion state to helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass an instance of struct drm_format_conv_state to DRM's format conversion helpers. Update all callers. Most drivers can use the format-conversion state from their shadow- plane state. The shadow plane's destroy function releases the allocated buffer. Drivers will later be able to allocate a buffer of appropriate size in their plane's atomic_check code. The gud driver uses a separate thread for committing updates. For now, the update worker contains its own format-conversion state. Images in the format-helper tests are small. The tests preallocate a static page for the temporary buffer. Unloading the module releases the memory. v6: * update patch for ssd132x support v5: * avoid using unusupported shadow-plane state in repaper (Noralf) * fix documentation (Noralf, kernel test robot) v3: * store buffer in shadow-plane state (Javier, Maxime) * replace ARRAY_SIZE() with sizeof() (Jani) Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas # ssd130x Cc: Noralf Trønnes Cc: Javier Martinez Canillas Cc: Gerd Hoffmann Cc: David Lechner Link: https://patchwork.freedesktop.org/patch/msgid/20231009141018.11291-4-tzimmermann@suse.de --- drivers/gpu/drm/drm_format_helper.c | 123 ++++++++++++++----------- drivers/gpu/drm/drm_mipi_dbi.c | 19 ++-- drivers/gpu/drm/gud/gud_pipe.c | 30 +++--- drivers/gpu/drm/solomon/ssd130x.c | 16 ++-- drivers/gpu/drm/tests/drm_format_helper_test.c | 72 +++++++++------ drivers/gpu/drm/tiny/cirrus.c | 3 +- drivers/gpu/drm/tiny/ili9225.c | 10 +- drivers/gpu/drm/tiny/ofdrm.c | 2 +- drivers/gpu/drm/tiny/repaper.c | 10 +- drivers/gpu/drm/tiny/simpledrm.c | 2 +- drivers/gpu/drm/tiny/st7586.c | 19 ++-- include/drm/drm_format_helper.h | 30 +++--- include/drm/drm_mipi_dbi.h | 4 +- 13 files changed, 200 insertions(+), 140 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 28d013d42778..b1be458ed4dd 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -223,29 +223,25 @@ static int drm_fb_xfrm(struct iosys_map *dst, const unsigned int *dst_pitch, const u8 *dst_pixsize, const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, bool vaddr_cached_hint, + struct drm_format_conv_state *state, void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels)) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 }; - struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT; - int ret; if (!dst_pitch) dst_pitch = default_dst_pitch; /* TODO: handle src in I/O memory here */ if (dst[0].is_iomem) - ret = __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0], - src[0].vaddr, fb, clip, vaddr_cached_hint, &fmtcnv_state, - xfrm_line); + return __drm_fb_xfrm_toio(dst[0].vaddr_iomem, dst_pitch[0], dst_pixsize[0], + src[0].vaddr, fb, clip, vaddr_cached_hint, state, + xfrm_line); else - ret = __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0], - src[0].vaddr, fb, clip, vaddr_cached_hint, &fmtcnv_state, - xfrm_line); - drm_format_conv_state_release(&fmtcnv_state); - - return ret; + return __drm_fb_xfrm(dst[0].vaddr, dst_pitch[0], dst_pixsize[0], + src[0].vaddr, fb, clip, vaddr_cached_hint, state, + xfrm_line); } /** @@ -331,6 +327,7 @@ static void drm_fb_swab32_line(void *dbuf, const void *sbuf, unsigned int pixels * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @cached: Source buffer is mapped cached (eg. not write-combined) + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and swaps per-pixel * bytes during the process. Destination and framebuffer formats must match. The @@ -345,7 +342,8 @@ static void drm_fb_swab32_line(void *dbuf, const void *sbuf, unsigned int pixels */ void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, bool cached) + const struct drm_rect *clip, bool cached, + struct drm_format_conv_state *state) { const struct drm_format_info *format = fb->format; u8 cpp = DIV_ROUND_UP(drm_format_info_bpp(format, 0), 8); @@ -364,7 +362,7 @@ void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch, return; } - drm_fb_xfrm(dst, dst_pitch, &cpp, src, fb, clip, cached, swab_line); + drm_fb_xfrm(dst, dst_pitch, &cpp, src, fb, clip, cached, state, swab_line); } EXPORT_SYMBOL(drm_fb_swab); @@ -391,6 +389,7 @@ static void drm_fb_xrgb8888_to_rgb332_line(void *dbuf, const void *sbuf, unsigne * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -405,13 +404,13 @@ static void drm_fb_xrgb8888_to_rgb332_line(void *dbuf, const void *sbuf, unsigne */ void drm_fb_xrgb8888_to_rgb332(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 1, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgb332_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb332); @@ -460,6 +459,7 @@ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * @swab: Swap bytes * * This function copies parts of a framebuffer to display memory and converts the @@ -475,7 +475,8 @@ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, */ void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, bool swab) + const struct drm_rect *clip, struct drm_format_conv_state *state, + bool swab) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, @@ -488,7 +489,7 @@ void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pi else xfrm_line = drm_fb_xrgb8888_to_rgb565_line; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, xfrm_line); + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, xfrm_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565); @@ -517,6 +518,7 @@ static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsig * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and @@ -532,13 +534,13 @@ static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsig */ void drm_fb_xrgb8888_to_xrgb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xrgb1555_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb1555); @@ -569,6 +571,7 @@ static void drm_fb_xrgb8888_to_argb1555_line(void *dbuf, const void *sbuf, unsig * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and @@ -584,13 +587,13 @@ static void drm_fb_xrgb8888_to_argb1555_line(void *dbuf, const void *sbuf, unsig */ void drm_fb_xrgb8888_to_argb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb1555_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb1555); @@ -621,6 +624,7 @@ static void drm_fb_xrgb8888_to_rgba5551_line(void *dbuf, const void *sbuf, unsig * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and @@ -636,13 +640,13 @@ static void drm_fb_xrgb8888_to_rgba5551_line(void *dbuf, const void *sbuf, unsig */ void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgba5551_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgba5551); @@ -671,6 +675,7 @@ static void drm_fb_xrgb8888_to_rgb888_line(void *dbuf, const void *sbuf, unsigne * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -686,13 +691,13 @@ static void drm_fb_xrgb8888_to_rgb888_line(void *dbuf, const void *sbuf, unsigne */ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 3, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_rgb888_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888); @@ -719,6 +724,7 @@ static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsig * @src: Array of XRGB8888 source buffer * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. The parameters @dst, @dst_pitch and @src refer @@ -734,13 +740,13 @@ static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsig */ void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb8888_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb8888); @@ -765,13 +771,14 @@ static void drm_fb_xrgb8888_to_abgr8888_line(void *dbuf, const void *sbuf, unsig static void drm_fb_xrgb8888_to_abgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, + struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_abgr8888_line); } @@ -795,13 +802,14 @@ static void drm_fb_xrgb8888_to_xbgr8888_line(void *dbuf, const void *sbuf, unsig static void drm_fb_xrgb8888_to_xbgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, + struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xbgr8888_line); } @@ -831,6 +839,7 @@ static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, un * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -846,13 +855,14 @@ static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, un */ void drm_fb_xrgb8888_to_xrgb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, + struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_xrgb2101010_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb2101010); @@ -884,6 +894,7 @@ static void drm_fb_xrgb8888_to_argb2101010_line(void *dbuf, const void *sbuf, un * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts * the color format during the process. The parameters @dst, @dst_pitch and @@ -899,13 +910,14 @@ static void drm_fb_xrgb8888_to_argb2101010_line(void *dbuf, const void *sbuf, un */ void drm_fb_xrgb8888_to_argb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, + struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 4, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_argb2101010_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb2101010); @@ -935,6 +947,7 @@ static void drm_fb_xrgb8888_to_gray8_line(void *dbuf, const void *sbuf, unsigned * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -954,13 +967,13 @@ static void drm_fb_xrgb8888_to_gray8_line(void *dbuf, const void *sbuf, unsigned */ void drm_fb_xrgb8888_to_gray8(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 1, }; - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, drm_fb_xrgb8888_to_gray8_line); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8); @@ -974,6 +987,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8); * @src: The framebuffer memory to copy from * @fb: The framebuffer to copy from * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory. If the * formats of the display and the framebuffer mismatch, the blit function @@ -992,7 +1006,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8); */ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t dst_format, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { uint32_t fb_format = fb->format->format; @@ -1000,44 +1014,44 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d drm_fb_memcpy(dst, dst_pitch, src, fb, clip); return 0; } else if (fb_format == (dst_format | DRM_FORMAT_BIG_ENDIAN)) { - drm_fb_swab(dst, dst_pitch, src, fb, clip, false); + drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } else if (fb_format == (dst_format & ~DRM_FORMAT_BIG_ENDIAN)) { - drm_fb_swab(dst, dst_pitch, src, fb, clip, false); + drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } else if (fb_format == DRM_FORMAT_XRGB8888) { if (dst_format == DRM_FORMAT_RGB565) { - drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, false); + drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, state, false); return 0; } else if (dst_format == DRM_FORMAT_XRGB1555) { - drm_fb_xrgb8888_to_xrgb1555(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_xrgb1555(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB1555) { - drm_fb_xrgb8888_to_argb1555(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_argb1555(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_RGBA5551) { - drm_fb_xrgb8888_to_rgba5551(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_rgba5551(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_RGB888) { - drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB8888) { - drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_XBGR8888) { - drm_fb_xrgb8888_to_xbgr8888(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_xbgr8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ABGR8888) { - drm_fb_xrgb8888_to_abgr8888(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_abgr8888(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_XRGB2101010) { - drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_ARGB2101010) { - drm_fb_xrgb8888_to_argb2101010(dst, dst_pitch, src, fb, clip); + drm_fb_xrgb8888_to_argb2101010(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_BGRX8888) { - drm_fb_swab(dst, dst_pitch, src, fb, clip, false); + drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; } } @@ -1074,6 +1088,7 @@ static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int * @src: Array of XRGB8888 source buffers * @fb: DRM framebuffer * @clip: Clip rectangle area to copy + * @state: Transform and conversion state * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -1098,7 +1113,7 @@ static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int */ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { 0, 0, 0, 0 @@ -1138,7 +1153,7 @@ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitc * Allocate a buffer to be used for both copying from the cma * memory and to store the intermediate grayscale line pixels. */ - src32 = kmalloc(len_src32 + linepixels, GFP_KERNEL); + src32 = drm_format_conv_state_reserve(state, len_src32 + linepixels, GFP_KERNEL); if (!src32) return; @@ -1152,8 +1167,6 @@ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitc vaddr += fb->pitches[0]; mono += dst_pitch_0; } - - kfree(src32); } EXPORT_SYMBOL(drm_fb_xrgb8888_to_mono); diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index e90f0bf895b3..daac649aabdb 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -197,12 +197,14 @@ EXPORT_SYMBOL(mipi_dbi_command_stackbuf); * @fb: The source framebuffer * @clip: Clipping rectangle of the area to be copied * @swap: When true, swap MSB/LSB of 16-bit values + * @fmtcnv_state: Format-conversion state * * Returns: * Zero on success, negative error code on failure. */ int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *clip, bool swap) + struct drm_rect *clip, bool swap, + struct drm_format_conv_state *fmtcnv_state) { struct drm_gem_object *gem = drm_gem_fb_get_obj(fb, 0); struct iosys_map dst_map = IOSYS_MAP_INIT_VADDR(dst); @@ -215,12 +217,13 @@ int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer * switch (fb->format->format) { case DRM_FORMAT_RGB565: if (swap) - drm_fb_swab(&dst_map, NULL, src, fb, clip, !gem->import_attach); + drm_fb_swab(&dst_map, NULL, src, fb, clip, !gem->import_attach, + fmtcnv_state); else drm_fb_memcpy(&dst_map, NULL, src, fb, clip); break; case DRM_FORMAT_XRGB8888: - drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, swap); + drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, fmtcnv_state, swap); break; default: drm_err_once(fb->dev, "Format is not supported: %p4cc\n", @@ -252,7 +255,7 @@ static void mipi_dbi_set_window_address(struct mipi_dbi_dev *dbidev, } static void mipi_dbi_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *rect) + struct drm_rect *rect, struct drm_format_conv_state *fmtcnv_state) { struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev); unsigned int height = rect->y2 - rect->y1; @@ -270,7 +273,7 @@ static void mipi_dbi_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, if (!dbi->dc || !full || swap || fb->format->format == DRM_FORMAT_XRGB8888) { tr = dbidev->tx_buf; - ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap); + ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap, fmtcnv_state); if (ret) goto err_msg; } else { @@ -332,7 +335,8 @@ void mipi_dbi_pipe_update(struct drm_simple_display_pipe *pipe, return; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); drm_dev_exit(idx); } @@ -368,7 +372,8 @@ void mipi_dbi_enable_flush(struct mipi_dbi_dev *dbidev, if (!drm_dev_enter(&dbidev->drm, &idx)) return; - mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); backlight_enable(dbidev->backlight); drm_dev_exit(idx); diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c index d2f199ea3c11..28171de84718 100644 --- a/drivers/gpu/drm/gud/gud_pipe.c +++ b/drivers/gpu/drm/gud/gud_pipe.c @@ -51,7 +51,8 @@ static bool gud_is_big_endian(void) static size_t gud_xrgb8888_to_r124(u8 *dst, const struct drm_format_info *format, void *src, struct drm_framebuffer *fb, - struct drm_rect *rect) + struct drm_rect *rect, + struct drm_format_conv_state *fmtcnv_state) { unsigned int block_width = drm_format_info_block_width(format, 0); unsigned int bits_per_pixel = 8 / block_width; @@ -75,7 +76,7 @@ static size_t gud_xrgb8888_to_r124(u8 *dst, const struct drm_format_info *format iosys_map_set_vaddr(&dst_map, buf); iosys_map_set_vaddr(&vmap, src); - drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, rect); + drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, rect, fmtcnv_state); pix8 = buf; for (y = 0; y < height; y++) { @@ -152,7 +153,8 @@ static size_t gud_xrgb8888_to_color(u8 *dst, const struct drm_format_info *forma static int gud_prep_flush(struct gud_device *gdrm, struct drm_framebuffer *fb, const struct iosys_map *src, bool cached_reads, const struct drm_format_info *format, struct drm_rect *rect, - struct gud_set_buffer_req *req) + struct gud_set_buffer_req *req, + struct drm_format_conv_state *fmtcnv_state) { u8 compression = gdrm->compression; struct iosys_map dst; @@ -178,23 +180,23 @@ retry: */ if (format != fb->format) { if (format->format == GUD_DRM_FORMAT_R1) { - len = gud_xrgb8888_to_r124(buf, format, vaddr, fb, rect); + len = gud_xrgb8888_to_r124(buf, format, vaddr, fb, rect, fmtcnv_state); if (!len) return -ENOMEM; } else if (format->format == DRM_FORMAT_R8) { - drm_fb_xrgb8888_to_gray8(&dst, NULL, src, fb, rect); + drm_fb_xrgb8888_to_gray8(&dst, NULL, src, fb, rect, fmtcnv_state); } else if (format->format == DRM_FORMAT_RGB332) { - drm_fb_xrgb8888_to_rgb332(&dst, NULL, src, fb, rect); + drm_fb_xrgb8888_to_rgb332(&dst, NULL, src, fb, rect, fmtcnv_state); } else if (format->format == DRM_FORMAT_RGB565) { - drm_fb_xrgb8888_to_rgb565(&dst, NULL, src, fb, rect, + drm_fb_xrgb8888_to_rgb565(&dst, NULL, src, fb, rect, fmtcnv_state, gud_is_big_endian()); } else if (format->format == DRM_FORMAT_RGB888) { - drm_fb_xrgb8888_to_rgb888(&dst, NULL, src, fb, rect); + drm_fb_xrgb8888_to_rgb888(&dst, NULL, src, fb, rect, fmtcnv_state); } else { len = gud_xrgb8888_to_color(buf, format, vaddr, fb, rect); } } else if (gud_is_big_endian() && format->cpp[0] > 1) { - drm_fb_swab(&dst, NULL, src, fb, rect, cached_reads); + drm_fb_swab(&dst, NULL, src, fb, rect, cached_reads, fmtcnv_state); } else if (compression && cached_reads && pitch == fb->pitches[0]) { /* can compress directly from the framebuffer */ buf = vaddr + rect->y1 * pitch; @@ -266,7 +268,8 @@ static int gud_usb_bulk(struct gud_device *gdrm, size_t len) static int gud_flush_rect(struct gud_device *gdrm, struct drm_framebuffer *fb, const struct iosys_map *src, bool cached_reads, - const struct drm_format_info *format, struct drm_rect *rect) + const struct drm_format_info *format, struct drm_rect *rect, + struct drm_format_conv_state *fmtcnv_state) { struct gud_set_buffer_req req; size_t len, trlen; @@ -274,7 +277,7 @@ static int gud_flush_rect(struct gud_device *gdrm, struct drm_framebuffer *fb, drm_dbg(&gdrm->drm, "Flushing [FB:%d] " DRM_RECT_FMT "\n", fb->base.id, DRM_RECT_ARG(rect)); - ret = gud_prep_flush(gdrm, fb, src, cached_reads, format, rect, &req); + ret = gud_prep_flush(gdrm, fb, src, cached_reads, format, rect, &req, fmtcnv_state); if (ret) return ret; @@ -318,6 +321,7 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb const struct iosys_map *src, bool cached_reads, struct drm_rect *damage) { + struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT; const struct drm_format_info *format; unsigned int i, lines; size_t pitch; @@ -340,7 +344,7 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb rect.y1 += i * lines; rect.y2 = min_t(u32, rect.y1 + lines, damage->y2); - ret = gud_flush_rect(gdrm, fb, src, cached_reads, format, &rect); + ret = gud_flush_rect(gdrm, fb, src, cached_reads, format, &rect, &fmtcnv_state); if (ret) { if (ret != -ENODEV && ret != -ECONNRESET && ret != -ESHUTDOWN && ret != -EPROTO) @@ -350,6 +354,8 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb break; } } + + drm_format_conv_state_release(&fmtcnv_state); } void gud_flush_work(struct work_struct *work) diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index e0174f82e353..ee0a06c50692 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -808,7 +808,8 @@ static void ssd132x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) static int ssd130x_fb_blit_rect(struct drm_framebuffer *fb, const struct iosys_map *vmap, struct drm_rect *rect, - u8 *buf, u8 *data_array) + u8 *buf, u8 *data_array, + struct drm_format_conv_state *fmtcnv_state) { struct ssd130x_device *ssd130x = drm_to_ssd130x(fb->dev); struct iosys_map dst; @@ -826,7 +827,7 @@ static int ssd130x_fb_blit_rect(struct drm_framebuffer *fb, return ret; iosys_map_set_vaddr(&dst, buf); - drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, vmap, fb, rect); + drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); @@ -838,7 +839,8 @@ static int ssd130x_fb_blit_rect(struct drm_framebuffer *fb, static int ssd132x_fb_blit_rect(struct drm_framebuffer *fb, const struct iosys_map *vmap, struct drm_rect *rect, u8 *buf, - u8 *data_array) + u8 *data_array, + struct drm_format_conv_state *fmtcnv_state) { struct ssd130x_device *ssd130x = drm_to_ssd130x(fb->dev); unsigned int dst_pitch = drm_rect_width(rect); @@ -855,7 +857,7 @@ static int ssd132x_fb_blit_rect(struct drm_framebuffer *fb, return ret; iosys_map_set_vaddr(&dst, buf); - drm_fb_xrgb8888_to_gray8(&dst, &dst_pitch, vmap, fb, rect); + drm_fb_xrgb8888_to_gray8(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); @@ -968,7 +970,8 @@ static void ssd130x_primary_plane_atomic_update(struct drm_plane *plane, ssd130x_fb_blit_rect(fb, &shadow_plane_state->data[0], &dst_clip, ssd130x_plane_state->buffer, - ssd130x_crtc_state->data_array); + ssd130x_crtc_state->data_array, + &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); @@ -1002,7 +1005,8 @@ static void ssd132x_primary_plane_atomic_update(struct drm_plane *plane, ssd132x_fb_blit_rect(fb, &shadow_plane_state->data[0], &dst_clip, ssd130x_plane_state->buffer, - ssd130x_crtc_state->data_array); + ssd130x_crtc_state->data_array, + &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c index f6408e56f786..08992636ec05 100644 --- a/drivers/gpu/drm/tests/drm_format_helper_test.c +++ b/drivers/gpu/drm/tests/drm_format_helper_test.c @@ -20,6 +20,10 @@ #define TEST_USE_DEFAULT_PITCH 0 +static unsigned char fmtcnv_state_mem[PAGE_SIZE]; +static struct drm_format_conv_state fmtcnv_state = + DRM_FORMAT_CONV_STATE_INIT_PREALLOCATED(fmtcnv_state_mem, sizeof(fmtcnv_state_mem)); + struct convert_to_gray8_result { unsigned int dst_pitch; const u8 expected[TEST_BUF_SIZE]; @@ -630,8 +634,7 @@ static void drm_test_fb_xrgb8888_to_gray8(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_gray8(&dst, dst_pitch, &src, &fb, ¶ms->clip); - + drm_fb_xrgb8888_to_gray8(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } @@ -664,7 +667,7 @@ static void drm_test_fb_xrgb8888_to_rgb332(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_rgb332(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_rgb332(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } @@ -697,12 +700,14 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_rgb565(&dst, dst_pitch, &src, &fb, ¶ms->clip, false); + drm_fb_xrgb8888_to_rgb565(&dst, dst_pitch, &src, &fb, ¶ms->clip, + &fmtcnv_state, false); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); buf = dst.vaddr; /* restore original value of buf */ - drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip, true); + drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip, + &fmtcnv_state, true); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected_swab, dst_size); @@ -711,7 +716,8 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB565, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB565, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); @@ -748,7 +754,7 @@ static void drm_test_fb_xrgb8888_to_xrgb1555(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_xrgb1555(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_xrgb1555(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -757,7 +763,8 @@ static void drm_test_fb_xrgb8888_to_xrgb1555(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB1555, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB1555, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); @@ -794,7 +801,7 @@ static void drm_test_fb_xrgb8888_to_argb1555(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_argb1555(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_argb1555(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -803,7 +810,8 @@ static void drm_test_fb_xrgb8888_to_argb1555(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB1555, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB1555, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); @@ -840,7 +848,7 @@ static void drm_test_fb_xrgb8888_to_rgba5551(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_rgba5551(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_rgba5551(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -849,7 +857,8 @@ static void drm_test_fb_xrgb8888_to_rgba5551(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGBA5551, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGBA5551, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); @@ -890,7 +899,7 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_rgb888(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_rgb888(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); buf = dst.vaddr; /* restore original value of buf */ @@ -898,7 +907,8 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_RGB888, &src, &fb, ¶ms->clip, + &fmtcnv_state); KUNIT_EXPECT_FALSE(test, blit_result); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -933,7 +943,7 @@ static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_argb8888(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_argb8888(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -942,7 +952,8 @@ static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); @@ -979,7 +990,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -989,7 +1000,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) int blit_result = 0; blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB2101010, &src, &fb, - ¶ms->clip); + ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_FALSE(test, blit_result); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -1024,7 +1035,7 @@ static void drm_test_fb_xrgb8888_to_argb2101010(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_argb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_argb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -1034,7 +1045,7 @@ static void drm_test_fb_xrgb8888_to_argb2101010(struct kunit *test) int blit_result = 0; blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ARGB2101010, &src, &fb, - ¶ms->clip); + ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); @@ -1071,7 +1082,7 @@ static void drm_test_fb_xrgb8888_to_mono(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_xrgb8888_to_mono(&dst, dst_pitch, &src, &fb, ¶ms->clip); + drm_fb_xrgb8888_to_mono(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } @@ -1104,7 +1115,7 @@ static void drm_test_fb_swab(struct kunit *test) const unsigned int *dst_pitch = (result->dst_pitch == TEST_USE_DEFAULT_PITCH) ? NULL : &result->dst_pitch; - drm_fb_swab(&dst, dst_pitch, &src, &fb, ¶ms->clip, false); + drm_fb_swab(&dst, dst_pitch, &src, &fb, ¶ms->clip, false, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); @@ -1114,7 +1125,7 @@ static void drm_test_fb_swab(struct kunit *test) int blit_result; blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB8888 | DRM_FORMAT_BIG_ENDIAN, - &src, &fb, ¶ms->clip); + &src, &fb, ¶ms->clip, &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_FALSE(test, blit_result); @@ -1123,7 +1134,8 @@ static void drm_test_fb_swab(struct kunit *test) buf = dst.vaddr; memset(buf, 0, dst_size); - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_BGRX8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_BGRX8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_FALSE(test, blit_result); @@ -1137,7 +1149,8 @@ static void drm_test_fb_swab(struct kunit *test) mock_format.format |= DRM_FORMAT_BIG_ENDIAN; fb.format = &mock_format; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XRGB8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_FALSE(test, blit_result); @@ -1175,7 +1188,8 @@ static void drm_test_fb_xrgb8888_to_abgr8888(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ABGR8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_ABGR8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); @@ -1214,7 +1228,8 @@ static void drm_test_fb_xrgb8888_to_xbgr8888(struct kunit *test) int blit_result = 0; - blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XBGR8888, &src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(&dst, dst_pitch, DRM_FORMAT_XBGR8888, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); @@ -1817,7 +1832,8 @@ static void drm_test_fb_memcpy(struct kunit *test) int blit_result; - blit_result = drm_fb_blit(dst, dst_pitches, params->format, src, &fb, ¶ms->clip); + blit_result = drm_fb_blit(dst, dst_pitches, params->format, src, &fb, ¶ms->clip, + &fmtcnv_state); KUNIT_EXPECT_FALSE(test, blit_result); for (size_t i = 0; i < fb.format->num_planes; i++) { diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c index c5c34cd2edc1..4e3a152f897a 100644 --- a/drivers/gpu/drm/tiny/cirrus.c +++ b/drivers/gpu/drm/tiny/cirrus.c @@ -411,7 +411,8 @@ static void cirrus_primary_plane_helper_atomic_update(struct drm_plane *plane, unsigned int offset = drm_fb_clip_offset(pitch, format, &damage); struct iosys_map dst = IOSYS_MAP_INIT_OFFSET(&vaddr, offset); - drm_fb_blit(&dst, &pitch, format->format, shadow_plane_state->data, fb, &damage); + drm_fb_blit(&dst, &pitch, format->format, shadow_plane_state->data, fb, + &damage, &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c index 4ceb68ffac4b..dd8b0a181be9 100644 --- a/drivers/gpu/drm/tiny/ili9225.c +++ b/drivers/gpu/drm/tiny/ili9225.c @@ -78,7 +78,7 @@ static inline int ili9225_command(struct mipi_dbi *dbi, u8 cmd, u16 data) } static void ili9225_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *rect) + struct drm_rect *rect, struct drm_format_conv_state *fmtcnv_state) { struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev); unsigned int height = rect->y2 - rect->y1; @@ -98,7 +98,7 @@ static void ili9225_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, if (!dbi->dc || !full || swap || fb->format->format == DRM_FORMAT_XRGB8888) { tr = dbidev->tx_buf; - ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap); + ret = mipi_dbi_buf_copy(tr, src, fb, rect, swap, fmtcnv_state); if (ret) goto err_msg; } else { @@ -171,7 +171,8 @@ static void ili9225_pipe_update(struct drm_simple_display_pipe *pipe, return; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); drm_dev_exit(idx); } @@ -281,7 +282,8 @@ static void ili9225_pipe_enable(struct drm_simple_display_pipe *pipe, ili9225_command(dbi, ILI9225_DISPLAY_CONTROL_1, 0x1017); - ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); out_exit: drm_dev_exit(idx); diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c index 2d999a0facde..404c83032cec 100644 --- a/drivers/gpu/drm/tiny/ofdrm.c +++ b/drivers/gpu/drm/tiny/ofdrm.c @@ -817,7 +817,7 @@ static void ofdrm_primary_plane_helper_atomic_update(struct drm_plane *plane, iosys_map_incr(&dst, drm_fb_clip_offset(dst_pitch, dst_format, &dst_clip)); drm_fb_blit(&dst, &dst_pitch, dst_format->format, shadow_plane_state->data, fb, - &damage); + &damage, &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c index 73dd4f4289c2..8fd6758f5725 100644 --- a/drivers/gpu/drm/tiny/repaper.c +++ b/drivers/gpu/drm/tiny/repaper.c @@ -509,7 +509,8 @@ static void repaper_get_temperature(struct repaper_epd *epd) epd->factored_stage_time = epd->stage_time * factor10x / 10; } -static int repaper_fb_dirty(struct drm_framebuffer *fb) +static int repaper_fb_dirty(struct drm_framebuffer *fb, + struct drm_format_conv_state *fmtcnv_state) { struct drm_gem_dma_object *dma_obj = drm_fb_dma_get_gem_obj(fb, 0); struct repaper_epd *epd = drm_to_epd(fb->dev); @@ -545,7 +546,7 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb) iosys_map_set_vaddr(&dst, buf); iosys_map_set_vaddr(&vmap, dma_obj->vaddr); - drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, &vmap, fb, &clip); + drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, &vmap, fb, &clip, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); @@ -830,13 +831,16 @@ static void repaper_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_plane_state *old_state) { struct drm_plane_state *state = pipe->plane.state; + struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT; struct drm_rect rect; if (!pipe->crtc.state->active) return; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - repaper_fb_dirty(state->fb); + repaper_fb_dirty(state->fb, &fmtcnv_state); + + drm_format_conv_state_release(&fmtcnv_state); } static const struct drm_simple_display_pipe_funcs repaper_pipe_funcs = { diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 8bdaf66044fc..4916e9b0d46a 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -609,7 +609,7 @@ static void simpledrm_primary_plane_helper_atomic_update(struct drm_plane *plane iosys_map_incr(&dst, drm_fb_clip_offset(sdev->pitch, sdev->format, &dst_clip)); drm_fb_blit(&dst, &sdev->pitch, sdev->format->format, shadow_plane_state->data, - fb, &damage); + fb, &damage, &shadow_plane_state->fmtcnv_state); } drm_dev_exit(idx); diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c index 3cf4eec16a81..7336fa1ddaed 100644 --- a/drivers/gpu/drm/tiny/st7586.c +++ b/drivers/gpu/drm/tiny/st7586.c @@ -64,7 +64,8 @@ static const u8 st7586_lookup[] = { 0x7, 0x4, 0x2, 0x0 }; static void st7586_xrgb8888_to_gray332(u8 *dst, void *vaddr, struct drm_framebuffer *fb, - struct drm_rect *clip) + struct drm_rect *clip, + struct drm_format_conv_state *fmtcnv_state) { size_t len = (clip->x2 - clip->x1) * (clip->y2 - clip->y1); unsigned int x, y; @@ -77,7 +78,7 @@ static void st7586_xrgb8888_to_gray332(u8 *dst, void *vaddr, iosys_map_set_vaddr(&dst_map, buf); iosys_map_set_vaddr(&vmap, vaddr); - drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, clip); + drm_fb_xrgb8888_to_gray8(&dst_map, NULL, &vmap, fb, clip, fmtcnv_state); src = buf; for (y = clip->y1; y < clip->y2; y++) { @@ -93,7 +94,7 @@ static void st7586_xrgb8888_to_gray332(u8 *dst, void *vaddr, } static int st7586_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *clip) + struct drm_rect *clip, struct drm_format_conv_state *fmtcnv_state) { int ret; @@ -101,7 +102,7 @@ static int st7586_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuf if (ret) return ret; - st7586_xrgb8888_to_gray332(dst, src->vaddr, fb, clip); + st7586_xrgb8888_to_gray332(dst, src->vaddr, fb, clip, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); @@ -109,7 +110,7 @@ static int st7586_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuf } static void st7586_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *rect) + struct drm_rect *rect, struct drm_format_conv_state *fmtcnv_state) { struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev); struct mipi_dbi *dbi = &dbidev->dbi; @@ -121,7 +122,7 @@ static void st7586_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, DRM_DEBUG_KMS("Flushing [FB:%d] " DRM_RECT_FMT "\n", fb->base.id, DRM_RECT_ARG(rect)); - ret = st7586_buf_copy(dbidev->tx_buf, src, fb, rect); + ret = st7586_buf_copy(dbidev->tx_buf, src, fb, rect, fmtcnv_state); if (ret) goto err_msg; @@ -160,7 +161,8 @@ static void st7586_pipe_update(struct drm_simple_display_pipe *pipe, return; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); drm_dev_exit(idx); } @@ -238,7 +240,8 @@ static void st7586_pipe_enable(struct drm_simple_display_pipe *pipe, msleep(100); - st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect); + st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect, + &shadow_plane_state->fmtcnv_state); mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_ON); out_exit: diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index 724a9baf7315..f13b34e0b752 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -74,45 +74,49 @@ void drm_fb_memcpy(struct iosys_map *dst, const unsigned int *dst_pitch, const struct drm_rect *clip); void drm_fb_swab(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, bool cached); + const struct drm_rect *clip, bool cached, + struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_rgb332(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, bool swab); + const struct drm_rect *clip, struct drm_format_conv_state *state, + bool swab); void drm_fb_xrgb8888_to_xrgb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_argb1555(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_xrgb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, + struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_argb2101010(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, + struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_gray8(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, struct drm_format_conv_state *state); int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t dst_format, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *rect); + const struct drm_rect *clip, struct drm_format_conv_state *state); void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip); + const struct drm_rect *clip, struct drm_format_conv_state *state); size_t drm_fb_build_fourcc_list(struct drm_device *dev, const u32 *native_fourccs, size_t native_nfourccs, diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h index 816f196b3d4c..e8e0f8d39f3a 100644 --- a/include/drm/drm_mipi_dbi.h +++ b/include/drm/drm_mipi_dbi.h @@ -12,6 +12,7 @@ #include #include +struct drm_format_conv_state; struct drm_rect; struct gpio_desc; struct iosys_map; @@ -192,7 +193,8 @@ int mipi_dbi_command_buf(struct mipi_dbi *dbi, u8 cmd, u8 *data, size_t len); int mipi_dbi_command_stackbuf(struct mipi_dbi *dbi, u8 cmd, const u8 *data, size_t len); int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer *fb, - struct drm_rect *clip, bool swap); + struct drm_rect *clip, bool swap, + struct drm_format_conv_state *fmtcnv_state); /** * mipi_dbi_command - MIPI DCS command with optional parameter(s) -- cgit v1.2.3 From 78dfe8a0ef779159a6ff51231d71b3a65c55ccf5 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 1 Nov 2023 11:35:42 +0100 Subject: drm: Remove struct drm_flip_task from DRM interfaces Contain struct drm_flip_task and its helper functions drm_flip_work_allocate_task() and drm_flip_work_queue_task() within drm_flip_work.c There are no callers outside of the flip-work code. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20231101103618.23806-2-tzimmermann@suse.de --- drivers/gpu/drm/drm_flip_work.c | 27 +++++++-------------------- include/drm/drm_flip_work.h | 18 ++---------------- 2 files changed, 9 insertions(+), 36 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c index 060b753881a2..8c6090a90d56 100644 --- a/drivers/gpu/drm/drm_flip_work.c +++ b/drivers/gpu/drm/drm_flip_work.c @@ -27,14 +27,12 @@ #include #include -/** - * drm_flip_work_allocate_task - allocate a flip-work task - * @data: data associated to the task - * @flags: allocator flags - * - * Allocate a drm_flip_task object and attach private data to it. - */ -struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags) +struct drm_flip_task { + struct list_head node; + void *data; +}; + +static struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags) { struct drm_flip_task *task; @@ -44,18 +42,8 @@ struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags) return task; } -EXPORT_SYMBOL(drm_flip_work_allocate_task); -/** - * drm_flip_work_queue_task - queue a specific task - * @work: the flip-work - * @task: the task to handle - * - * Queues task, that will later be run (passed back to drm_flip_func_t - * func) on a work queue after drm_flip_work_commit() is called. - */ -void drm_flip_work_queue_task(struct drm_flip_work *work, - struct drm_flip_task *task) +static void drm_flip_work_queue_task(struct drm_flip_work *work, struct drm_flip_task *task) { unsigned long flags; @@ -63,7 +51,6 @@ void drm_flip_work_queue_task(struct drm_flip_work *work, list_add_tail(&task->node, &work->queued); spin_unlock_irqrestore(&work->lock, flags); } -EXPORT_SYMBOL(drm_flip_work_queue_task); /** * drm_flip_work_queue - queue work diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h index 21c3d512d25c..6be4ba6f3514 100644 --- a/include/drm/drm_flip_work.h +++ b/include/drm/drm_flip_work.h @@ -33,9 +33,8 @@ * * Util to queue up work to run from work-queue context after flip/vblank. * Typically this can be used to defer unref of framebuffer's, cursor - * bo's, etc until after vblank. The APIs are all thread-safe. - * Moreover, drm_flip_work_queue_task and drm_flip_work_queue can be called - * in atomic context. + * bo's, etc until after vblank. The APIs are all thread-safe. Moreover, + * drm_flip_work_queue can be called in atomic context. */ struct drm_flip_work; @@ -51,16 +50,6 @@ struct drm_flip_work; */ typedef void (*drm_flip_func_t)(struct drm_flip_work *work, void *val); -/** - * struct drm_flip_task - flip work task - * @node: list entry element - * @data: data to pass to &drm_flip_work.func - */ -struct drm_flip_task { - struct list_head node; - void *data; -}; - /** * struct drm_flip_work - flip work queue * @name: debug name @@ -79,9 +68,6 @@ struct drm_flip_work { spinlock_t lock; }; -struct drm_flip_task *drm_flip_work_allocate_task(void *data, gfp_t flags); -void drm_flip_work_queue_task(struct drm_flip_work *work, - struct drm_flip_task *task); void drm_flip_work_queue(struct drm_flip_work *work, void *val); void drm_flip_work_commit(struct drm_flip_work *work, struct workqueue_struct *wq); -- cgit v1.2.3 From ce64630dca7026ed9dc880dcd005977f662c99fe Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 1 Nov 2023 11:35:43 +0100 Subject: drm: Fix flip-task docs Say that drm_flip_work_commit() is safe to call in atomic context. Turn the name into a hyperlink. Suggested-by: Daniel Vetter Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20231101103618.23806-3-tzimmermann@suse.de --- include/drm/drm_flip_work.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/drm') diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h index 6be4ba6f3514..1eef3283a109 100644 --- a/include/drm/drm_flip_work.h +++ b/include/drm/drm_flip_work.h @@ -31,10 +31,10 @@ /** * DOC: flip utils * - * Util to queue up work to run from work-queue context after flip/vblank. + * Utility to queue up work to run from work-queue context after flip/vblank. * Typically this can be used to defer unref of framebuffer's, cursor - * bo's, etc until after vblank. The APIs are all thread-safe. Moreover, - * drm_flip_work_queue can be called in atomic context. + * bo's, etc until after vblank. The APIs are all thread-safe. Moreover, + * drm_flip_work_commit() can be called in atomic context. */ struct drm_flip_work; -- cgit v1.2.3 From 0c2287c9652150cf659408b66c1789830822132f Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Fri, 10 Nov 2023 15:40:10 +0530 Subject: drm/display/dp: Add helper function to get DSC bpp precision Add helper to get the DSC bits_per_pixel precision for the DP sink. Signed-off-by: Ankit Nautiyal Reviewed-by: Suraj Kandpal Reviewed-by: Sui Jingfeng Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231110101020.4067342-2-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/display/drm_dp_helper.c | 27 +++++++++++++++++++++++++++ include/drm/display/drm_dp_helper.h | 1 + 2 files changed, 28 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 64c151e4f78c..ff8d67d0e9dc 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2325,6 +2325,33 @@ int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, } EXPORT_SYMBOL(drm_dp_read_desc); +/** + * drm_dp_dsc_sink_bpp_incr() - Get bits per pixel increment + * @dsc_dpcd: DSC capabilities from DPCD + * + * Returns the bpp precision supported by the DP sink. + */ +u8 drm_dp_dsc_sink_bpp_incr(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]) +{ + u8 bpp_increment_dpcd = dsc_dpcd[DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT]; + + switch (bpp_increment_dpcd) { + case DP_DSC_BITS_PER_PIXEL_1_16: + return 16; + case DP_DSC_BITS_PER_PIXEL_1_8: + return 8; + case DP_DSC_BITS_PER_PIXEL_1_4: + return 4; + case DP_DSC_BITS_PER_PIXEL_1_2: + return 2; + case DP_DSC_BITS_PER_PIXEL_1_1: + return 1; + } + + return 0; +} +EXPORT_SYMBOL(drm_dp_dsc_sink_bpp_incr); + /** * drm_dp_dsc_sink_max_slice_count() - Get the max slice count * supported by the DSC sink. diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 3c59f64d4a69..194715083399 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -164,6 +164,7 @@ drm_dp_is_branch(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) } /* DP/eDP DSC support */ +u8 drm_dp_dsc_sink_bpp_incr(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]); u8 drm_dp_dsc_sink_max_slice_count(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE], bool is_edp); u8 drm_dp_dsc_sink_line_buf_depth(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]); -- cgit v1.2.3 From e04d24c4e8062b5ed0bee7a871423a454d24ffed Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Thu, 16 Nov 2023 22:44:53 -0500 Subject: drm/print: Handle NULL drm device in __drm_printk() drm_{err,warn,...}() use __drm_printk() which takes a drm device pointer and uses the embedded device pointer to print the device. This facility handles NULL device pointer, but not NULL drm device pointer. This patch makes __drm_printk() also handle a NULL drm device pointer. The printed output is identical to if drm->dev had been NULL. Signed-off-by: Luben Tuikov Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231117035427.68125-2-ltuikov89@gmail.com --- include/drm/drm_print.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/drm') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index a93a387f8a1a..dd4883df876a 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -453,7 +453,7 @@ void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev, /* Helper for struct drm_device based logging. */ #define __drm_printk(drm, level, type, fmt, ...) \ - dev_##level##type((drm)->dev, "[drm] " fmt, ##__VA_ARGS__) + dev_##level##type((drm) ? (drm)->dev : NULL, "[drm] " fmt, ##__VA_ARGS__) #define drm_info(drm, fmt, ...) \ -- cgit v1.2.3 From a4dea9a06f72c7885f8d4dccedec7e477878d798 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 14 Nov 2023 17:14:06 +0200 Subject: drm/edid/firmware: drop drm_kms_helper.edid_firmware backward compat Since the edid_firmware module parameter was moved from drm_kms_helper.ko to drm.ko in v4.15, we've had a backwards compatibility helper in place, with a DRM_NOTE() suggesting to migrate to drm.edid_firmware. This was added in commit ac6c35a4d8c7 ("drm: add backwards compatibility support for drm_kms_helper.edid_firmware"). More than five years and 30+ kernel releases later, drop the backward compatibility. v2: Drop the warnings too Acked-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231114151406.61230-1-jani.nikula@intel.com --- drivers/gpu/drm/drm_edid_load.c | 16 ---------------- drivers/gpu/drm/drm_kms_helper_common.c | 32 -------------------------------- include/drm/drm_edid.h | 5 ----- 3 files changed, 53 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c index 5d9ef267ebb3..60fcb80bce61 100644 --- a/drivers/gpu/drm/drm_edid_load.c +++ b/drivers/gpu/drm/drm_edid_load.c @@ -23,22 +23,6 @@ module_param_string(edid_firmware, edid_firmware, sizeof(edid_firmware), 0644); MODULE_PARM_DESC(edid_firmware, "Do not probe monitor, use specified EDID blob " "from built-in data or /lib/firmware instead. "); -/* Use only for backward compatibility with drm_kms_helper.edid_firmware */ -int __drm_set_edid_firmware_path(const char *path) -{ - scnprintf(edid_firmware, sizeof(edid_firmware), "%s", path); - - return 0; -} -EXPORT_SYMBOL(__drm_set_edid_firmware_path); - -/* Use only for backward compatibility with drm_kms_helper.edid_firmware */ -int __drm_get_edid_firmware_path(char *buf, size_t bufsize) -{ - return scnprintf(buf, bufsize, "%s", edid_firmware); -} -EXPORT_SYMBOL(__drm_get_edid_firmware_path); - #define GENERIC_EDIDS 6 static const char * const generic_edid_name[GENERIC_EDIDS] = { "edid/800x600.bin", diff --git a/drivers/gpu/drm/drm_kms_helper_common.c b/drivers/gpu/drm/drm_kms_helper_common.c index 0bf0fc1abf54..0c7550c0462b 100644 --- a/drivers/gpu/drm/drm_kms_helper_common.c +++ b/drivers/gpu/drm/drm_kms_helper_common.c @@ -27,38 +27,6 @@ #include -#include -#include - -#include "drm_crtc_helper_internal.h" - MODULE_AUTHOR("David Airlie, Jesse Barnes"); MODULE_DESCRIPTION("DRM KMS helper"); MODULE_LICENSE("GPL and additional rights"); - -#if IS_ENABLED(CONFIG_DRM_LOAD_EDID_FIRMWARE) - -/* Backward compatibility for drm_kms_helper.edid_firmware */ -static int edid_firmware_set(const char *val, const struct kernel_param *kp) -{ - DRM_NOTE("drm_kms_helper.edid_firmware is deprecated, please use drm.edid_firmware instead.\n"); - - return __drm_set_edid_firmware_path(val); -} - -static int edid_firmware_get(char *buffer, const struct kernel_param *kp) -{ - return __drm_get_edid_firmware_path(buffer, PAGE_SIZE); -} - -static const struct kernel_param_ops edid_firmware_ops = { - .set = edid_firmware_set, - .get = edid_firmware_get, -}; - -module_param_cb(edid_firmware, &edid_firmware_ops, NULL, 0644); -__MODULE_PARM_TYPE(edid_firmware, "charp"); -MODULE_PARM_DESC(edid_firmware, - "DEPRECATED. Use drm.edid_firmware module parameter instead."); - -#endif diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index e98aa6818700..518d1b8106c7 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -329,11 +329,6 @@ int drm_edid_to_speaker_allocation(const struct edid *edid, u8 **sadb); int drm_av_sync_delay(struct drm_connector *connector, const struct drm_display_mode *mode); -#ifdef CONFIG_DRM_LOAD_EDID_FIRMWARE -int __drm_set_edid_firmware_path(const char *path); -int __drm_get_edid_firmware_path(char *buf, size_t bufsize); -#endif - bool drm_edid_are_equal(const struct edid *edid1, const struct edid *edid2); int -- cgit v1.2.3 From 191dc43935d1ece82bc6c9653463b3b1cd8198fb Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 16 Nov 2023 15:18:31 +0200 Subject: drm/dp_mst: Store the MST PBN divider value in fixed point format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On UHBR links the PBN divider is a fractional number, accordingly store it in fixed point format. For now drm_dp_get_vc_payload_bw() always returns a whole number and all callers will use only the integer part of it which should preserve the current behavior. The next patch will fix drm_dp_get_vc_payload_bw() for UHBR rates returning a fractional number for those (also accounting for the channel coding efficiency correctly). Cc: Lyude Paul Cc: Harry Wentland Cc: Alex Deucher Cc: Wayne Lin Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Ville Syrjälä Acked-by: Daniel Vetter Acked-by: Maarten Lankhorst [Rebased changes in dm_helpers_construct_old_payload() on drm-intel-next] Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231116131841.1588781-2-imre.deak@intel.com --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++-- .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 4 +++- .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 5 +++-- drivers/gpu/drm/display/drm_dp_mst_topology.c | 22 +++++++++++++++------- drivers/gpu/drm/i915/display/intel_dp_mst.c | 3 ++- drivers/gpu/drm/nouveau/dispnv50/disp.c | 6 ++++-- include/drm/display/drm_dp_mst_helper.h | 7 ++++--- 7 files changed, 34 insertions(+), 18 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index df3c42bb9274..2253e8fd2d18 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include #include @@ -6863,8 +6864,8 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - if (!mst_state->pbn_div) - mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link); + if (!mst_state->pbn_div.full) + mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link)); if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index baf7e5254fb3..53e323b71d26 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "dm_services.h" #include "amdgpu.h" @@ -205,13 +206,14 @@ void dm_helpers_dp_update_branch_info( static void dm_helpers_construct_old_payload( struct dc_link *link, - int pbn_per_slot, + fixed20_12 pbn_per_slot_fp, struct drm_dp_mst_atomic_payload *new_payload, struct drm_dp_mst_atomic_payload *old_payload) { struct link_mst_stream_allocation_table current_link_table = link->mst_stream_alloc_table; struct link_mst_stream_allocation *dc_alloc; + int pbn_per_slot = dfixed_trunc(pbn_per_slot_fp); int i; *old_payload = *new_payload; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 2afd1bc74978..d80f19e442bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "dm_services.h" #include "amdgpu.h" #include "amdgpu_dm.h" @@ -935,10 +936,10 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, link_timeslots_used = 0; for (i = 0; i < count; i++) - link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, mst_state->pbn_div); + link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, dfixed_trunc(mst_state->pbn_div)); fair_pbn_alloc = - (63 - link_timeslots_used) / remaining_to_increase * mst_state->pbn_div; + (63 - link_timeslots_used) / remaining_to_increase * dfixed_trunc(mst_state->pbn_div); if (initial_slack[next_index] > fair_pbn_alloc) { vars[next_index].pbn += fair_pbn_alloc; diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 0854fe428f17..7efbd8ef664f 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -3578,16 +3579,22 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr, * value is in units of PBNs/(timeslots/1 MTP). This value can be used to * convert the number of PBNs required for a given stream to the number of * timeslots this stream requires in each MTP. + * + * Returns the BW / timeslot value in 20.12 fixed point format. */ -int drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, - int link_rate, int link_lane_count) +fixed20_12 drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, + int link_rate, int link_lane_count) { + fixed20_12 ret; + if (link_rate == 0 || link_lane_count == 0) drm_dbg_kms(mgr->dev, "invalid link rate/lane count: (%d / %d)\n", link_rate, link_lane_count); /* See DP v2.0 2.6.4.2, VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */ - return link_rate * link_lane_count / 54000; + ret.full = dfixed_const(link_rate * link_lane_count / 54000); + + return ret; } EXPORT_SYMBOL(drm_dp_get_vc_payload_bw); @@ -4335,7 +4342,7 @@ int drm_dp_atomic_find_time_slots(struct drm_atomic_state *state, } } - req_slots = DIV_ROUND_UP(pbn, topology_state->pbn_div); + req_slots = DIV_ROUND_UP(pbn, dfixed_trunc(topology_state->pbn_div)); drm_dbg_atomic(mgr->dev, "[CONNECTOR:%d:%s] [MST PORT:%p] TU %d -> %d\n", port->connector->base.id, port->connector->name, @@ -4872,7 +4879,8 @@ void drm_dp_mst_dump_topology(struct seq_file *m, state = to_drm_dp_mst_topology_state(mgr->base.state); seq_printf(m, "\n*** Atomic state info ***\n"); seq_printf(m, "payload_mask: %x, max_payloads: %d, start_slot: %u, pbn_div: %d\n", - state->payload_mask, mgr->max_payloads, state->start_slot, state->pbn_div); + state->payload_mask, mgr->max_payloads, state->start_slot, + dfixed_trunc(state->pbn_div)); seq_printf(m, "\n| idx | port | vcpi | slots | pbn | dsc | status | sink name |\n"); for (i = 0; i < mgr->max_payloads; i++) { @@ -5330,10 +5338,10 @@ drm_dp_mst_atomic_check_payload_alloc_limits(struct drm_dp_mst_topology_mgr *mgr } if (!payload_count) - mst_state->pbn_div = 0; + mst_state->pbn_div.full = dfixed_const(0); drm_dbg_atomic(mgr->dev, "[MST MGR:%p] mst state %p TU pbn_div=%d avail=%d used=%d\n", - mgr, mst_state, mst_state->pbn_div, avail_slots, + mgr, mst_state, dfixed_trunc(mst_state->pbn_div), avail_slots, mst_state->total_avail_slots - avail_slots); return 0; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 0cb9405f59ea..e5d6b811c22e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "i915_drv.h" @@ -202,7 +203,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, */ drm_WARN_ON(&i915->drm, remote_m_n.tu < crtc_state->dp_m_n.tu); crtc_state->dp_m_n.tu = remote_m_n.tu; - crtc_state->pbn = remote_m_n.tu * mst_state->pbn_div; + crtc_state->pbn = remote_m_n.tu * dfixed_trunc(mst_state->pbn_div); slots = drm_dp_atomic_find_time_slots(state, &intel_dp->mst_mgr, connector->port, diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 3d390f5029f6..95fb097e5453 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -945,7 +946,8 @@ nv50_msto_prepare(struct drm_atomic_state *state, if (ret == 0) { nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, payload->vc_start_slot, payload->time_slots, - payload->pbn, payload->time_slots * mst_state->pbn_div); + payload->pbn, + payload->time_slots * dfixed_trunc(mst_state->pbn_div)); } else { nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0); } @@ -989,7 +991,7 @@ nv50_msto_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - if (!mst_state->pbn_div) { + if (!mst_state->pbn_div.full) { struct nouveau_encoder *outp = mstc->mstm->outp; mst_state->pbn_div = drm_dp_get_vc_payload_bw(&mstm->mgr, diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index a4aad6df71f1..9b19d8bd520a 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -25,6 +25,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) #include @@ -617,7 +618,7 @@ struct drm_dp_mst_topology_state { * @pbn_div: The current PBN divisor for this topology. The driver is expected to fill this * out itself. */ - int pbn_div; + fixed20_12 pbn_div; }; #define to_dp_mst_topology_mgr(x) container_of(x, struct drm_dp_mst_topology_mgr, base) @@ -839,8 +840,8 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port); -int drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, - int link_rate, int link_lane_count); +fixed20_12 drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, + int link_rate, int link_lane_count); int drm_dp_calc_pbn_mode(int clock, int bpp); -- cgit v1.2.3 From d389989ed530b3d8944974b7ee866b089720bc9c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Nov 2023 17:09:27 +0200 Subject: drm/dp_mst: Fix PBN divider calculation for UHBR rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current way of calculating the pbn_div value, the link BW per each MTP slot, worked only for DP 1.4 link rates. Fix things up for UHBR rates calculating with the correct channel coding efficiency based on the link rate. v2: - Return the fractional pbn_div value from drm_dp_get_vc_payload_bw(). v3: - Fix rounding up quotient while calculating req_slots. (Ville) Cc: Ville Syrjälä Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Ville Syrjälä Acked-by: Daniel Vetter Acked-by: Maarten Lankhorst Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231117150929.1767227-1-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 10 +++++++--- include/drm/display/drm_dp_helper.h | 13 +++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 7efbd8ef664f..ab39c7ad9b89 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3585,14 +3585,18 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr, fixed20_12 drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, int link_rate, int link_lane_count) { + int ch_coding_efficiency = + drm_dp_bw_channel_coding_efficiency(drm_dp_is_uhbr_rate(link_rate)); fixed20_12 ret; if (link_rate == 0 || link_lane_count == 0) drm_dbg_kms(mgr->dev, "invalid link rate/lane count: (%d / %d)\n", link_rate, link_lane_count); - /* See DP v2.0 2.6.4.2, VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */ - ret.full = dfixed_const(link_rate * link_lane_count / 54000); + /* See DP v2.0 2.6.4.2, 2.7.6.3 VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */ + ret.full = DIV_ROUND_DOWN_ULL(mul_u32_u32(link_rate * link_lane_count, + ch_coding_efficiency), + (1000000ULL * 8 * 5400) >> 12); return ret; } @@ -4342,7 +4346,7 @@ int drm_dp_atomic_find_time_slots(struct drm_atomic_state *state, } } - req_slots = DIV_ROUND_UP(pbn, dfixed_trunc(topology_state->pbn_div)); + req_slots = DIV_ROUND_UP(dfixed_const(pbn), topology_state->pbn_div.full); drm_dbg_atomic(mgr->dev, "[CONNECTOR:%d:%s] [MST PORT:%p] TU %d -> %d\n", port->connector->base.id, port->connector->name, diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 194715083399..b88cc53425e0 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -252,6 +252,19 @@ drm_edp_backlight_supported(const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]) return !!(edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP); } +/** + * drm_dp_is_uhbr_rate - Determine if a link rate is UHBR + * @link_rate: link rate in 10kbits/s units + * + * Determine if the provided link rate is an UHBR rate. + * + * Returns: %True if @link_rate is an UHBR rate. + */ +static inline bool drm_dp_is_uhbr_rate(int link_rate) +{ + return link_rate >= 1000000; +} + /* * DisplayPort AUX channel */ -- cgit v1.2.3 From 5032c607e886e0c40749a05d37b835c1757d38ff Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Mon, 20 Nov 2023 17:07:31 +0530 Subject: drm/i915: ATS-M device ID update ATS-M device ID update. BSpec: 44477 Signed-off-by: Haridhar Kalvala Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20231120113731.1570589-1-haridhar.kalvala@intel.com --- include/drm/i915_pciids.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 1c9ea6ab3eb9..fcf1849aa47c 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -738,7 +738,8 @@ INTEL_DG2_G12_IDS(info) #define INTEL_ATS_M150_IDS(info) \ - INTEL_VGA_DEVICE(0x56C0, info) + INTEL_VGA_DEVICE(0x56C0, info), \ + INTEL_VGA_DEVICE(0x56C2, info) #define INTEL_ATS_M75_IDS(info) \ INTEL_VGA_DEVICE(0x56C1, info) -- cgit v1.2.3 From a191f73d85484f804284674c14f2d9f572c18adb Mon Sep 17 00:00:00 2001 From: Donald Robson Date: Wed, 22 Nov 2023 16:34:23 +0000 Subject: drm/gpuvm: Helper to get range of unmap from a remap op. Determining the start and range of the unmap stage of a remap op is a common piece of code currently implemented by multiple drivers. Add a helper for this. Changes since v7: - Renamed helper to drm_gpuva_op_remap_to_unmap_range() - Improved documentation Changes since v6: - Remove use of __always_inline Signed-off-by: Donald Robson Signed-off-by: Sarah Walker Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/8a0a5b5eeec459d3c60fcdaa5a638ad14a18a59e.1700668843.git.donald.robson@imgtec.com Signed-off-by: Maxime Ripard --- include/drm/drm_gpuvm.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/drm') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 8ca10461d8ac..f94fec9a8517 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1213,4 +1213,32 @@ void drm_gpuva_remap(struct drm_gpuva *prev, void drm_gpuva_unmap(struct drm_gpuva_op_unmap *op); +/** + * drm_gpuva_op_remap_to_unmap_range() - Helper to get the start and range of + * the unmap stage of a remap op. + * @op: Remap op. + * @start_addr: Output pointer for the start of the required unmap. + * @range: Output pointer for the length of the required unmap. + * + * The given start address and range will be set such that they represent the + * range of the address space that was previously covered by the mapping being + * re-mapped, but is now empty. + */ +static inline void +drm_gpuva_op_remap_to_unmap_range(const struct drm_gpuva_op_remap *op, + u64 *start_addr, u64 *range) +{ + const u64 va_start = op->prev ? + op->prev->va.addr + op->prev->va.range : + op->unmap->va->va.addr; + const u64 va_end = op->next ? + op->next->va.addr : + op->unmap->va->va.addr + op->unmap->va->va.range; + + if (start_addr) + *start_addr = va_start; + if (range) + *range = va_end - va_start; +} + #endif /* __DRM_GPUVM_H__ */ -- cgit v1.2.3 From c6fbb6bca10838485b820e8a26c23996f77ce580 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 13 Oct 2023 16:13:59 +0300 Subject: drm: Fix color LUT rounding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of drm_color_lut_extract() generates weird results. Eg. if we go through all the values for 16->8bpc conversion we see the following pattern: in out (count) 0 - 7f -> 0 (128) 80 - 17f -> 1 (256) 180 - 27f -> 2 (256) 280 - 37f -> 3 (256) ... fb80 - fc7f -> fc (256) fc80 - fd7f -> fd (256) fd80 - fe7f -> fe (256) fe80 - ffff -> ff (384) So less values map to 0 and more values map 0xff, which doesn't seem particularly great. To get just the same number of input values to map to the same output values we'd just need to drop the rounding entrirely. But perhaps a better idea would be to follow the OpenGL int<->float conversion rules, in which case we get the following results: in out (count) 0 - 80 -> 0 (129) 81 - 181 -> 1 (257) 182 - 282 -> 2 (257) 283 - 383 -> 3 (257) ... fc7c - fd7c -> fc (257) fd7d - fe7d -> fd (257) fe7e - ff7e -> fe (257) ff7f - ffff -> ff (129) Note that since the divisor is constant the compiler is able to optimize away the integer division in most cases. The only exception is the _ULL() case on 32bit architectures since that gets emitted as inline asm via do_div() and thus the compiler doesn't get to optimize it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231013131402.24072-2-ville.syrjala@linux.intel.com Reviewed-by: Chaitanya Kumar Borah Reviewed-by: Jani Nikula Acked-by: Maxime Ripard --- include/drm/drm_color_mgmt.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'include/drm') diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 81c298488b0c..54b2b2467bfd 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -36,20 +36,17 @@ struct drm_plane; * * Extract a degamma/gamma LUT value provided by user (in the form of * &drm_color_lut entries) and round it to the precision supported by the - * hardware. + * hardware, following OpenGL int<->float conversion rules + * (see eg. OpenGL 4.6 specification - 2.3.5 Fixed-Point Data Conversions). */ static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision) { - u32 val = user_input; - u32 max = 0xffff >> (16 - bit_precision); - - /* Round only if we're not using full precision. */ - if (bit_precision < 16) { - val += 1UL << (16 - bit_precision - 1); - val >>= 16 - bit_precision; - } - - return clamp_val(val, 0, max); + if (bit_precision > 16) + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(user_input, (1 << bit_precision) - 1), + (1 << 16) - 1); + else + return DIV_ROUND_CLOSEST(user_input * ((1 << bit_precision) - 1), + (1 << 16) - 1); } u64 drm_color_ctm_s31_32_to_qm_n(u64 user_input, u32 m, u32 n); -- cgit v1.2.3 From 4e3b70da64a53784683cfcbac2deda5d6e540407 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 23 Oct 2023 09:46:05 +0200 Subject: drm: Disable the cursor plane on atomic contexts with virtualized drivers Cursor planes on virtualized drivers have special meaning and require that the clients handle them in specific ways, e.g. the cursor plane should react to the mouse movement the way a mouse cursor would be expected to and the client is required to set hotspot properties on it in order for the mouse events to be routed correctly. This breaks the contract as specified by the "universal planes". Fix it by disabling the cursor planes on virtualized drivers while adding a foundation on top of which it's possible to special case mouse cursor planes for clients that want it. Disabling the cursor planes makes some kms compositors which were broken, e.g. Weston, fallback to software cursor which works fine or at least better than currently while having no effect on others, e.g. gnome-shell or kwin, which put virtualized drivers on a deny-list when running in atomic context to make them fallback to legacy kms and avoid this issue. Signed-off-by: Zack Rusin Fixes: 681e7ec73044 ("drm: Allow userspace to ask for universal plane list (v2)") Cc: # v5.4+ Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: Dave Airlie Cc: Gerd Hoffmann Cc: Hans de Goede Cc: Gurchetan Singh Cc: Chia-I Wu Cc: dri-devel@lists.freedesktop.org Cc: virtualization@lists.linux-foundation.org Cc: spice-devel@lists.freedesktop.org Acked-by: Pekka Paalanen Reviewed-by: Javier Martinez Canillas Acked-by: Simon Ser Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-2-aesteve@redhat.com --- drivers/gpu/drm/drm_plane.c | 13 +++++++++++++ drivers/gpu/drm/qxl/qxl_drv.c | 2 +- drivers/gpu/drm/vboxvideo/vbox_drv.c | 2 +- drivers/gpu/drm/virtio/virtgpu_drv.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/drm_drv.h | 9 +++++++++ include/drm/drm_file.h | 12 ++++++++++++ 7 files changed, 38 insertions(+), 4 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 24e7998d1731..c6bbb0c209f4 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -678,6 +678,19 @@ int drm_mode_getplane_res(struct drm_device *dev, void *data, !file_priv->universal_planes) continue; + /* + * If we're running on a virtualized driver then, + * unless userspace advertizes support for the + * virtualized cursor plane, disable cursor planes + * because they'll be broken due to missing cursor + * hotspot info. + */ + if (plane->type == DRM_PLANE_TYPE_CURSOR && + drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT) && + file_priv->atomic && + !file_priv->supports_virtualized_cursor_plane) + continue; + if (drm_lease_held(file_priv, plane->base.id)) { if (count < plane_resp->count_planes && put_user(plane->base.id, plane_ptr + count)) diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 46de4f171970..beee5563031a 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -285,7 +285,7 @@ static const struct drm_ioctl_desc qxl_ioctls[] = { }; static struct drm_driver qxl_driver = { - .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_CURSOR_HOTSPOT, .dumb_create = qxl_mode_dumb_create, .dumb_map_offset = drm_gem_ttm_dumb_map_offset, diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c index 047b95812334..cd9e66a06596 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.c +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c @@ -182,7 +182,7 @@ DEFINE_DRM_GEM_FOPS(vbox_fops); static const struct drm_driver driver = { .driver_features = - DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, + DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC | DRIVER_CURSOR_HOTSPOT, .fops = &vbox_fops, .name = DRIVER_NAME, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 4334c7608408..f8e9abe647b9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -177,7 +177,7 @@ static const struct drm_driver driver = { * out via drm_device::driver_features: */ .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC | - DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, + DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE | DRIVER_CURSOR_HOTSPOT, .open = virtio_gpu_driver_open, .postclose = virtio_gpu_driver_postclose, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 8b24ecf60e3e..d3e308fdfd5b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1611,7 +1611,7 @@ static const struct file_operations vmwgfx_driver_fops = { static const struct drm_driver driver = { .driver_features = - DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC | DRIVER_GEM, + DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC | DRIVER_GEM | DRIVER_CURSOR_HOTSPOT, .ioctls = vmw_ioctls, .num_ioctls = ARRAY_SIZE(vmw_ioctls), .master_set = vmw_master_set, diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index e2640dc64e08..ea36aa79dca2 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -110,6 +110,15 @@ enum drm_driver_feature { * Driver supports user defined GPU VA bindings for GEM objects. */ DRIVER_GEM_GPUVA = BIT(8), + /** + * @DRIVER_CURSOR_HOTSPOT: + * + * Driver supports and requires cursor hotspot information in the + * cursor plane (e.g. cursor plane has to actually track the mouse + * cursor and the clients are required to set hotspot in order for + * the cursor planes to work correctly). + */ + DRIVER_CURSOR_HOTSPOT = BIT(9), /* IMPORTANT: Below are all the legacy flags, add new ones above. */ diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index e1b5b4282f75..8f35dcea82d3 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -226,6 +226,18 @@ struct drm_file { */ bool is_master; + /** + * @supports_virtualized_cursor_plane: + * + * This client is capable of handling the cursor plane with the + * restrictions imposed on it by the virtualized drivers. + * + * This implies that the cursor plane has to behave like a cursor + * i.e. track cursor movement. It also requires setting of the + * hotspot properties by the client on the cursor plane. + */ + bool supports_virtualized_cursor_plane; + /** * @master: * -- cgit v1.2.3 From 8f7179a1027d89bf949b0b80c388a544a5e096f2 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 23 Oct 2023 09:46:06 +0200 Subject: drm/atomic: Add support for mouse hotspots Atomic modesetting code lacked support for specifying mouse cursor hotspots. The legacy kms DRM_IOCTL_MODE_CURSOR2 had support for setting the hotspot but the functionality was not implemented in the new atomic paths. Due to the lack of hotspots in the atomic paths userspace compositors completely disable atomic modesetting for drivers that require it (i.e. all paravirtualized drivers). This change adds hotspot properties to the atomic codepaths throughtout the DRM core and will allow enabling atomic modesetting for virtualized drivers in the userspace. Signed-off-by: Zack Rusin Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Reviewed-by: Javier Martinez Canillas Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-3-aesteve@redhat.com --- drivers/gpu/drm/drm_atomic_state_helper.c | 14 +++++++++ drivers/gpu/drm/drm_atomic_uapi.c | 20 +++++++++++++ drivers/gpu/drm/drm_plane.c | 50 +++++++++++++++++++++++++++++++ include/drm/drm_plane.h | 14 +++++++++ 4 files changed, 98 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 784e63d70a42..54975de44a0e 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -275,6 +275,20 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state, plane_state->normalized_zpos = val; } } + + if (plane->hotspot_x_property) { + if (!drm_object_property_get_default_value(&plane->base, + plane->hotspot_x_property, + &val)) + plane_state->hotspot_x = val; + } + + if (plane->hotspot_y_property) { + if (!drm_object_property_get_default_value(&plane->base, + plane->hotspot_y_property, + &val)) + plane_state->hotspot_y = val; + } } EXPORT_SYMBOL(__drm_atomic_helper_plane_state_reset); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 116407002269..aee4a65d4959 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -593,6 +593,22 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, } else if (plane->funcs->atomic_set_property) { return plane->funcs->atomic_set_property(plane, state, property, val); + } else if (property == plane->hotspot_x_property) { + if (plane->type != DRM_PLANE_TYPE_CURSOR) { + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n", + plane->base.id, plane->name, val); + return -EINVAL; + } + state->hotspot_x = val; + } else if (property == plane->hotspot_y_property) { + if (plane->type != DRM_PLANE_TYPE_CURSOR) { + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] is not a cursor plane: 0x%llx\n", + plane->base.id, plane->name, val); + return -EINVAL; + } + state->hotspot_y = val; } else { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n", @@ -653,6 +669,10 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->scaling_filter; } else if (plane->funcs->atomic_get_property) { return plane->funcs->atomic_get_property(plane, state, property, val); + } else if (property == plane->hotspot_x_property) { + *val = state->hotspot_x; + } else if (property == plane->hotspot_y_property) { + *val = state->hotspot_y; } else { drm_dbg_atomic(dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]\n", diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index c6bbb0c209f4..eaca367bdc7e 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -230,6 +230,47 @@ static int create_in_format_blob(struct drm_device *dev, struct drm_plane *plane return 0; } +/** + * drm_plane_create_hotspot_properties - creates the mouse hotspot + * properties and attaches them to the given cursor plane + * + * @plane: drm cursor plane + * + * This function enables the mouse hotspot property on a given + * cursor plane. + * + * RETURNS: + * Zero for success or -errno + */ +static int drm_plane_create_hotspot_properties(struct drm_plane *plane) +{ + struct drm_property *prop_x; + struct drm_property *prop_y; + + drm_WARN_ON(plane->dev, + !drm_core_check_feature(plane->dev, + DRIVER_CURSOR_HOTSPOT)); + + prop_x = drm_property_create_signed_range(plane->dev, 0, "HOTSPOT_X", + INT_MIN, INT_MAX); + if (IS_ERR(prop_x)) + return PTR_ERR(prop_x); + + prop_y = drm_property_create_signed_range(plane->dev, 0, "HOTSPOT_Y", + INT_MIN, INT_MAX); + if (IS_ERR(prop_y)) { + drm_property_destroy(plane->dev, prop_x); + return PTR_ERR(prop_y); + } + + drm_object_attach_property(&plane->base, prop_x, 0); + drm_object_attach_property(&plane->base, prop_y, 0); + plane->hotspot_x_property = prop_x; + plane->hotspot_y_property = prop_y; + + return 0; +} + __printf(9, 0) static int __drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, @@ -348,6 +389,10 @@ static int __drm_universal_plane_init(struct drm_device *dev, drm_object_attach_property(&plane->base, config->prop_src_w, 0); drm_object_attach_property(&plane->base, config->prop_src_h, 0); } + if (drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT) && + type == DRM_PLANE_TYPE_CURSOR) { + drm_plane_create_hotspot_properties(plane); + } if (format_modifier_count) create_in_format_blob(dev, plane); @@ -1067,6 +1112,11 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, fb->hot_x = req->hot_x; fb->hot_y = req->hot_y; + + if (plane->hotspot_x_property && plane->state) + plane->state->hotspot_x = req->hot_x; + if (plane->hotspot_y_property && plane->state) + plane->state->hotspot_y = req->hot_y; } else { fb = NULL; } diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 79d62856defb..e2c671585775 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -116,6 +116,10 @@ struct drm_plane_state { /** @src_h: height of visible portion of plane (in 16.16) */ uint32_t src_h, src_w; + /** @hotspot_x: x offset to mouse cursor hotspot */ + /** @hotspot_y: y offset to mouse cursor hotspot */ + int32_t hotspot_x, hotspot_y; + /** * @alpha: * Opacity of the plane with 0 as completely transparent and 0xffff as @@ -748,6 +752,16 @@ struct drm_plane { * scaling. */ struct drm_property *scaling_filter_property; + + /** + * @hotspot_x_property: property to set mouse hotspot x offset. + */ + struct drm_property *hotspot_x_property; + + /** + * @hotspot_y_property: property to set mouse hotspot y offset. + */ + struct drm_property *hotspot_y_property; }; #define obj_to_plane(x) container_of(x, struct drm_plane, base) -- cgit v1.2.3 From bce3dab7eb6ee596388699e8a052a7d58954c472 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 23 Oct 2023 09:46:11 +0200 Subject: drm: Remove legacy cursor hotspot code Atomic modesetting supports mouse cursor offsets via the hotspot properties that are created on cursor planes. All drivers which support hotspots are atomic and the legacy code has been implemented in terms of the atomic properties as well. Due to the above the lagacy cursor hotspot code is no longer used or needed and can be removed. Signed-off-by: Zack Rusin Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Reviewed-by: Javier Martinez Canillas Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20231023074613.41327-8-aesteve@redhat.com --- drivers/gpu/drm/drm_plane.c | 3 --- include/drm/drm_framebuffer.h | 12 ------------ 2 files changed, 15 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index eaca367bdc7e..1dc00ad4c33c 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1110,9 +1110,6 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, return PTR_ERR(fb); } - fb->hot_x = req->hot_x; - fb->hot_y = req->hot_y; - if (plane->hotspot_x_property && plane->state) plane->state->hotspot_x = req->hot_x; if (plane->hotspot_y_property && plane->state) diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 80ece7b6dd9b..668077009fce 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -188,18 +188,6 @@ struct drm_framebuffer { * DRM_MODE_FB_MODIFIERS. */ int flags; - /** - * @hot_x: X coordinate of the cursor hotspot. Used by the legacy cursor - * IOCTL when the driver supports cursor through a DRM_PLANE_TYPE_CURSOR - * universal plane. - */ - int hot_x; - /** - * @hot_y: Y coordinate of the cursor hotspot. Used by the legacy cursor - * IOCTL when the driver supports cursor through a DRM_PLANE_TYPE_CURSOR - * universal plane. - */ - int hot_y; /** * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ -- cgit v1.2.3 From 35ed38d58257336c1df26b14fd5110b026e2adde Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 23 Nov 2023 23:13:00 +0100 Subject: drm: Allow drivers to indicate the damage helpers to ignore damage clips It allows drivers to set a struct drm_plane_state .ignore_damage_clips in their plane's .atomic_check callback, as an indication to damage helpers such as drm_atomic_helper_damage_iter_init() that the damage clips should be ignored. To be used by drivers that do per-buffer (e.g: virtio-gpu) uploads (rather than per-plane uploads), since these type of drivers need to handle buffer damages instead of frame damages. That way, these drivers could force a full plane update if the framebuffer attached to a plane's state has changed since the last update (page-flip). Fixes: 01f05940a9a7 ("drm/virtio: Enable fb damage clips property for the primary plane") Cc: # v6.4+ Reported-by: nerdopolis Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218115 Suggested-by: Thomas Zimmermann Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Reviewed-by: Zack Rusin Acked-by: Sima Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20231123221315.3579454-2-javierm@redhat.com --- Documentation/gpu/drm-kms.rst | 2 ++ drivers/gpu/drm/drm_damage_helper.c | 3 ++- include/drm/drm_plane.h | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 4a112b8cc7fb..13d3627d8bc0 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -548,6 +548,8 @@ Plane Composition Properties .. kernel-doc:: drivers/gpu/drm/drm_blend.c :doc: overview +.. _damage_tracking_properties: + Damage Tracking Properties -------------------------- diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c index d8b2955e88fd..afb02aae707b 100644 --- a/drivers/gpu/drm/drm_damage_helper.c +++ b/drivers/gpu/drm/drm_damage_helper.c @@ -241,7 +241,8 @@ drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter, iter->plane_src.x2 = (src.x2 >> 16) + !!(src.x2 & 0xFFFF); iter->plane_src.y2 = (src.y2 >> 16) + !!(src.y2 & 0xFFFF); - if (!iter->clips || !drm_rect_equals(&state->src, &old_state->src)) { + if (!iter->clips || state->ignore_damage_clips || + !drm_rect_equals(&state->src, &old_state->src)) { iter->clips = NULL; iter->num_clips = 0; iter->full_update = true; diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index e2c671585775..c6565a6f9324 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -194,6 +194,16 @@ struct drm_plane_state { */ struct drm_property_blob *fb_damage_clips; + /** + * @ignore_damage_clips: + * + * Set by drivers to indicate the drm_atomic_helper_damage_iter_init() + * helper that the @fb_damage_clips blob property should be ignored. + * + * See :ref:`damage_tracking_properties` for more information. + */ + bool ignore_damage_clips; + /** * @src: * -- cgit v1.2.3 From fe375c74806dbd30b00ec038a80a5b7bf4653ab7 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Tue, 14 Nov 2023 20:40:58 -0500 Subject: drm/sched: Rename priority MIN to LOW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename DRM_SCHED_PRIORITY_MIN to DRM_SCHED_PRIORITY_LOW. This mirrors DRM_SCHED_PRIORITY_HIGH, for a list of DRM scheduler priorities in ascending order, DRM_SCHED_PRIORITY_LOW, DRM_SCHED_PRIORITY_NORMAL, DRM_SCHED_PRIORITY_HIGH, DRM_SCHED_PRIORITY_KERNEL. Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Danilo Krummrich Cc: Alex Deucher Cc: Christian König Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Luben Tuikov Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20231124052752.6915-5-ltuikov89@gmail.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- drivers/gpu/drm/msm/msm_gpu.h | 2 +- drivers/gpu/drm/scheduler/sched_entity.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 10 +++++----- include/drm/gpu_scheduler.h | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e2ae9ba147ba..5cb33ac99f70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -73,10 +73,10 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) return DRM_SCHED_PRIORITY_NORMAL; case AMDGPU_CTX_PRIORITY_VERY_LOW: - return DRM_SCHED_PRIORITY_MIN; + return DRM_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_LOW: - return DRM_SCHED_PRIORITY_MIN; + return DRM_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_NORMAL: return DRM_SCHED_PRIORITY_NORMAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 62bb7fc7448a..1a25931607c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -325,7 +325,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) int i; /* Signal all jobs not yet scheduled */ - for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) { struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 4252e3839fbc..eb0c97433e5f 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -347,7 +347,7 @@ struct msm_gpu_perfcntr { * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some * cases, so we don't use it (no need for kernel generated jobs). */ -#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN) +#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_LOW) /** * struct msm_file_private - per-drm_file context diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index ee645d38e98d..dd2b8f777f51 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -88,7 +88,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, drm_err(sched_list[0], "entity with out-of-bounds priority:%u num_rqs:%u\n", entity->priority, sched_list[0]->num_rqs); entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1, - (s32) DRM_SCHED_PRIORITY_MIN); + (s32) DRM_SCHED_PRIORITY_LOW); } entity->rq = sched_list[0]->sched_rq[entity->priority]; } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 044a8c4875ba..b6d7bc49ff6e 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1052,7 +1052,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) int i; /* Kernel run queue has higher priority than normal run queue*/ - for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) { entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); @@ -1291,7 +1291,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, if (!sched->sched_rq) goto Out_free; sched->num_rqs = num_rqs; - for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) { + for (i = DRM_SCHED_PRIORITY_LOW; i < sched->num_rqs; i++) { sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); if (!sched->sched_rq[i]) goto Out_unroll; @@ -1312,7 +1312,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, sched->ready = true; return 0; Out_unroll: - for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--) + for (--i ; i >= DRM_SCHED_PRIORITY_LOW; i--) kfree(sched->sched_rq[i]); Out_free: kfree(sched->sched_rq); @@ -1338,7 +1338,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) drm_sched_wqueue_stop(sched); - for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) { struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); @@ -1390,7 +1390,7 @@ void drm_sched_increase_karma(struct drm_sched_job *bad) if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { atomic_inc(&bad->karma); - for (i = DRM_SCHED_PRIORITY_MIN; + for (i = DRM_SCHED_PRIORITY_LOW; i < min_t(typeof(sched->num_rqs), sched->num_rqs, DRM_SCHED_PRIORITY_KERNEL); i++) { struct drm_sched_rq *rq = sched->sched_rq[i]; diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9a50348bd5c0..d8e2d84d9223 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -63,7 +63,7 @@ struct drm_file; * to an array, and as such should start at 0. */ enum drm_sched_priority { - DRM_SCHED_PRIORITY_MIN, + DRM_SCHED_PRIORITY_LOW, DRM_SCHED_PRIORITY_NORMAL, DRM_SCHED_PRIORITY_HIGH, DRM_SCHED_PRIORITY_KERNEL, -- cgit v1.2.3 From 38f922a563aac3148ac73e73689805917f034cb5 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Wed, 22 Nov 2023 22:08:48 -0500 Subject: drm/sched: Reverse run-queue priority enumeration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverse run-queue priority enumeration such that the higest priority is now 0, and for each consecutive integer the prioirty diminishes. Run-queues correspond to priorities. To an external observer a scheduler created with a single run-queue, and another created with DRM_SCHED_PRIORITY_COUNT number of run-queues, should always schedule sched->sched_rq[0] with the same "priority", as that index run-queue exists in both schedulers, i.e. a scheduler with one run-queue or many. This patch makes it so. In other words, the "priority" of sched->sched_rq[n], n >= 0, is the same for any scheduler created with any allowable number of run-queues (priorities), 0 to DRM_SCHED_PRIORITY_COUNT. Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Danilo Krummrich Cc: Alex Deucher Cc: Christian König Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Luben Tuikov Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20231124052752.6915-6-ltuikov89@gmail.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- drivers/gpu/drm/msm/msm_gpu.h | 2 +- drivers/gpu/drm/scheduler/sched_entity.c | 5 +++-- drivers/gpu/drm/scheduler/sched_main.c | 15 +++++++-------- include/drm/gpu_scheduler.h | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 1a25931607c5..71a5cf37b472 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -325,7 +325,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) int i; /* Signal all jobs not yet scheduled */ - for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) { + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index eb0c97433e5f..2bfcb222e353 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -347,7 +347,7 @@ struct msm_gpu_perfcntr { * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some * cases, so we don't use it (no need for kernel generated jobs). */ -#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_LOW) +#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_LOW - DRM_SCHED_PRIORITY_HIGH) /** * struct msm_file_private - per-drm_file context diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index dd2b8f777f51..3c4f5a392b06 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -82,13 +82,14 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, pr_warn("%s: called with uninitialized scheduler\n", __func__); } else if (num_sched_list) { /* The "priority" of an entity cannot exceed the number of run-queues of a - * scheduler. Protect against num_rqs being 0, by converting to signed. + * scheduler. Protect against num_rqs being 0, by converting to signed. Choose + * the lowest priority available. */ if (entity->priority >= sched_list[0]->num_rqs) { drm_err(sched_list[0], "entity with out-of-bounds priority:%u num_rqs:%u\n", entity->priority, sched_list[0]->num_rqs); entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1, - (s32) DRM_SCHED_PRIORITY_LOW); + (s32) DRM_SCHED_PRIORITY_KERNEL); } entity->rq = sched_list[0]->sched_rq[entity->priority]; } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index b6d7bc49ff6e..682aebe96db7 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1051,8 +1051,9 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) struct drm_sched_entity *entity; int i; - /* Kernel run queue has higher priority than normal run queue*/ - for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) { + /* Start with the highest priority. + */ + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); @@ -1291,7 +1292,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, if (!sched->sched_rq) goto Out_free; sched->num_rqs = num_rqs; - for (i = DRM_SCHED_PRIORITY_LOW; i < sched->num_rqs; i++) { + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); if (!sched->sched_rq[i]) goto Out_unroll; @@ -1312,7 +1313,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, sched->ready = true; return 0; Out_unroll: - for (--i ; i >= DRM_SCHED_PRIORITY_LOW; i--) + for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--) kfree(sched->sched_rq[i]); Out_free: kfree(sched->sched_rq); @@ -1338,7 +1339,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) drm_sched_wqueue_stop(sched); - for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_LOW; i--) { + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); @@ -1390,9 +1391,7 @@ void drm_sched_increase_karma(struct drm_sched_job *bad) if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { atomic_inc(&bad->karma); - for (i = DRM_SCHED_PRIORITY_LOW; - i < min_t(typeof(sched->num_rqs), sched->num_rqs, DRM_SCHED_PRIORITY_KERNEL); - i++) { + for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) { struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index d8e2d84d9223..5acc64954a88 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -63,10 +63,10 @@ struct drm_file; * to an array, and as such should start at 0. */ enum drm_sched_priority { - DRM_SCHED_PRIORITY_LOW, - DRM_SCHED_PRIORITY_NORMAL, - DRM_SCHED_PRIORITY_HIGH, DRM_SCHED_PRIORITY_KERNEL, + DRM_SCHED_PRIORITY_HIGH, + DRM_SCHED_PRIORITY_NORMAL, + DRM_SCHED_PRIORITY_LOW, DRM_SCHED_PRIORITY_COUNT }; -- cgit v1.2.3 From 288b039db225676e0c520c981a1b5a2562d893a3 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 24 Nov 2023 10:42:30 +0100 Subject: drm/bridge: Fix typo in post_disable() description s/singals/signals/ Fixes: 199e4e967af4 ("drm: Extract drm_bridge.h") Signed-off-by: Dario Binacchi Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20231124094253.658064-1-dario.binacchi@amarulasolutions.com --- include/drm/drm_bridge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/drm') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index cfb7dcdb66c4..9ef461aa9b9e 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -194,7 +194,7 @@ struct drm_bridge_funcs { * or &drm_encoder_helper_funcs.dpms hook. * * The bridge must assume that the display pipe (i.e. clocks and timing - * singals) feeding it is no longer running when this callback is + * signals) feeding it is no longer running when this callback is * called. * * The @post_disable callback is optional. -- cgit v1.2.3 From e50e5fed41c7eed2db4119645bf3480ec43fec11 Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Fri, 27 Oct 2023 15:32:51 -0700 Subject: drm: Introduce pixel_source DRM plane property Add support for pixel_source property to drm_plane and related documentation. In addition, force pixel_source to DRM_PLANE_PIXEL_SOURCE_FB in DRM_IOCTL_MODE_SETPLANE as to not break legacy userspace. This enum property will allow user to specify a pixel source for the plane. Possible pixel sources will be defined in the drm_plane_pixel_source enum. Currently, the only pixel sources are DRM_PLANE_PIXEL_SOURCE_FB (the default value) and DRM_PLANE_PIXEL_SOURCE_NONE. Acked-by: Dmitry Baryshkov Acked-by: Pekka Paalanen Acked-by: Harry Wentland Acked-by: Sebastian Wick Acked-by: Simon Ser Signed-off-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-1-780188bfa7b2@quicinc.com --- drivers/gpu/drm/drm_atomic_state_helper.c | 1 + drivers/gpu/drm/drm_atomic_uapi.c | 4 ++ drivers/gpu/drm/drm_blend.c | 94 +++++++++++++++++++++++++++++++ drivers/gpu/drm/drm_plane.c | 19 +++++-- include/drm/drm_blend.h | 2 + include/drm/drm_plane.h | 21 +++++++ 6 files changed, 137 insertions(+), 4 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 54975de44a0e..311b04edf742 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -252,6 +252,7 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state, plane_state->alpha = DRM_BLEND_ALPHA_OPAQUE; plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI; + plane_state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB; if (plane->color_encoding_property) { if (!drm_object_property_get_default_value(&plane->base, diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index aee4a65d4959..bd7140531948 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -562,6 +562,8 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, state->src_w = val; } else if (property == config->prop_src_h) { state->src_h = val; + } else if (property == plane->pixel_source_property) { + state->pixel_source = val; } else if (property == plane->alpha_property) { state->alpha = val; } else if (property == plane->blend_mode_property) { @@ -650,6 +652,8 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->src_w; } else if (property == config->prop_src_h) { *val = state->src_h; + } else if (property == plane->pixel_source_property) { + *val = state->pixel_source; } else if (property == plane->alpha_property) { *val = state->alpha; } else if (property == plane->blend_mode_property) { diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index 6e74de833466..fce734cdb85b 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -185,6 +185,25 @@ * plane does not expose the "alpha" property, then this is * assumed to be 1.0 * + * pixel_source: + * pixel_source is set up with drm_plane_create_pixel_source_property(). + * It is used to toggle the active source of pixel data for the plane. + * The plane will only display data from the set pixel_source -- any + * data from other sources will be ignored. + * + * For non-framebuffer sources, if pixel_source is set to a non-framebuffer + * and non-NONE source, and the corresponding source property is NULL, then + * the atomic commit should return an error. + * + * Possible values: + * + * "NONE": + * No active pixel source. + * Committing with a NONE pixel source will disable the plane. + * + * "FB": + * Framebuffer source set by the "FB_ID" property. + * * Note that all the property extensions described here apply either to the * plane or the CRTC (e.g. for the background color, which currently is not * exposed and assumed to be black). @@ -615,3 +634,78 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane, return 0; } EXPORT_SYMBOL(drm_plane_create_blend_mode_property); + +static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = { + { DRM_PLANE_PIXEL_SOURCE_NONE, "NONE" }, + { DRM_PLANE_PIXEL_SOURCE_FB, "FB" }, +}; + +/** + * drm_plane_create_pixel_source_property - create a new pixel source property + * @plane: DRM plane + * @extra_sources: Bitmask of additional supported pixel_sources for the driver. + * DRM_PLANE_PIXEL_SOURCE_FB and DRM_PLANE_PIXEL_SOURCE_NONE will + * always be enabled as supported sources. + * + * This creates a new property describing the current source of pixel data for the + * plane. The pixel_source will be initialized as DRM_PLANE_PIXEL_SOURCE_FB by default. + * + * Drivers can set a custom default source by overriding the pixel_source value in + * drm_plane_funcs.reset() + * + * The property is exposed to userspace as an enumeration property called + * "pixel_source" and has the following enumeration values: + * + * "NONE": + * No active pixel source + * + * "FB": + * Framebuffer pixel source + * + * Returns: + * Zero on success, negative errno on failure. + */ +int drm_plane_create_pixel_source_property(struct drm_plane *plane, + unsigned long extra_sources) +{ + struct drm_device *dev = plane->dev; + struct drm_property *prop; + static const unsigned int valid_source_mask = BIT(DRM_PLANE_PIXEL_SOURCE_FB) | + BIT(DRM_PLANE_PIXEL_SOURCE_NONE); + int i; + + /* FB is supported by default */ + unsigned long supported_sources = extra_sources | + BIT(DRM_PLANE_PIXEL_SOURCE_FB) | + BIT(DRM_PLANE_PIXEL_SOURCE_NONE); + + if (WARN_ON(supported_sources & ~valid_source_mask)) + return -EINVAL; + + prop = drm_property_create(dev, DRM_MODE_PROP_ENUM | DRM_MODE_PROP_ATOMIC, "pixel_source", + hweight32(supported_sources)); + + if (!prop) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(drm_pixel_source_enum_list); i++) { + int ret; + + if (!test_bit(drm_pixel_source_enum_list[i].type, &supported_sources)) + continue; + + ret = drm_property_add_enum(prop, drm_pixel_source_enum_list[i].type, + drm_pixel_source_enum_list[i].name); + if (ret) { + drm_property_destroy(dev, prop); + + return ret; + } + } + + drm_object_attach_property(&plane->base, prop, DRM_PLANE_PIXEL_SOURCE_FB); + plane->pixel_source_property = prop; + + return 0; +} +EXPORT_SYMBOL(drm_plane_create_pixel_source_property); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 9e8e4c60983d..c8dbe5ae60cc 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -953,6 +953,14 @@ bool drm_any_plane_has_format(struct drm_device *dev, } EXPORT_SYMBOL(drm_any_plane_has_format); +static bool drm_plane_needs_disable(struct drm_plane_state *state, struct drm_framebuffer *fb) +{ + if (state->pixel_source == DRM_PLANE_PIXEL_SOURCE_NONE) + return true; + + return state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && fb == NULL; +} + /* * __setplane_internal - setplane handler for internal callers * @@ -975,8 +983,8 @@ static int __setplane_internal(struct drm_plane *plane, WARN_ON(drm_drv_uses_atomic_modeset(plane->dev)); - /* No fb means shut it down */ - if (!fb) { + /* No visible data means shut it down */ + if (drm_plane_needs_disable(plane->state, fb)) { plane->old_fb = plane->fb; ret = plane->funcs->disable_plane(plane, ctx); if (!ret) { @@ -1027,8 +1035,8 @@ static int __setplane_atomic(struct drm_plane *plane, WARN_ON(!drm_drv_uses_atomic_modeset(plane->dev)); - /* No fb means shut it down */ - if (!fb) + /* No visible data means shut it down */ + if (drm_plane_needs_disable(plane->state, fb)) return plane->funcs->disable_plane(plane, ctx); /* @@ -1101,6 +1109,9 @@ int drm_mode_setplane(struct drm_device *dev, void *data, return -ENOENT; } + if (plane->state) + plane->state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB; + if (plane_req->fb_id) { fb = drm_framebuffer_lookup(dev, file_priv, plane_req->fb_id); if (!fb) { diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h index 88bdfec3bd88..122bbfbaae33 100644 --- a/include/drm/drm_blend.h +++ b/include/drm/drm_blend.h @@ -58,4 +58,6 @@ int drm_atomic_normalize_zpos(struct drm_device *dev, struct drm_atomic_state *state); int drm_plane_create_blend_mode_property(struct drm_plane *plane, unsigned int supported_modes); +int drm_plane_create_pixel_source_property(struct drm_plane *plane, + unsigned long extra_sources); #endif diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index c6565a6f9324..bc0176ba25be 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -40,6 +40,12 @@ enum drm_scaling_filter { DRM_SCALING_FILTER_NEAREST_NEIGHBOR, }; +enum drm_plane_pixel_source { + DRM_PLANE_PIXEL_SOURCE_NONE, + DRM_PLANE_PIXEL_SOURCE_FB, + DRM_PLANE_PIXEL_SOURCE_MAX +}; + /** * struct drm_plane_state - mutable plane state * @@ -120,6 +126,14 @@ struct drm_plane_state { /** @hotspot_y: y offset to mouse cursor hotspot */ int32_t hotspot_x, hotspot_y; + /** + * @pixel_source: + * + * Source of pixel information for the plane. See + * drm_plane_create_pixel_source_property() for more details. + */ + enum drm_plane_pixel_source pixel_source; + /** * @alpha: * Opacity of the plane with 0 as completely transparent and 0xffff as @@ -713,6 +727,13 @@ struct drm_plane { */ struct drm_plane_state *state; + /* + * @pixel_source_property: + * Optional pixel_source property for this plane. See + * drm_plane_create_pixel_source_property(). + */ + struct drm_property *pixel_source_property; + /** * @alpha_property: * Optional alpha property for this plane. See -- cgit v1.2.3 From 85863a4e16e77079ee14865905ddc3ef9483a640 Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Fri, 27 Oct 2023 15:32:52 -0700 Subject: drm: Introduce solid fill DRM plane property Document and add support for solid_fill property to drm_plane. In addition, add support for setting and getting the values for solid_fill. To enable solid fill planes, userspace must assign a property blob to the "solid_fill" plane property containing the following information: struct drm_mode_solid_fill { u32 r, g, b, pad; }; Acked-by: Harry Wentland Acked-by: Sebastian Wick Signed-off-by: Jessica Zhang Reviewed-by: Dmitry Baryshkov Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-2-780188bfa7b2@quicinc.com --- drivers/gpu/drm/drm_atomic_state_helper.c | 9 ++++++++ drivers/gpu/drm/drm_atomic_uapi.c | 26 ++++++++++++++++++++++ drivers/gpu/drm/drm_blend.c | 30 ++++++++++++++++++++++++++ include/drm/drm_blend.h | 1 + include/drm/drm_plane.h | 36 +++++++++++++++++++++++++++++++ include/uapi/drm/drm_mode.h | 24 +++++++++++++++++++++ 6 files changed, 126 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 311b04edf742..f6c76cea8be4 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -254,6 +254,11 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state, plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI; plane_state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB; + if (plane_state->solid_fill_blob) { + drm_property_blob_put(plane_state->solid_fill_blob); + plane_state->solid_fill_blob = NULL; + } + if (plane->color_encoding_property) { if (!drm_object_property_get_default_value(&plane->base, plane->color_encoding_property, @@ -350,6 +355,9 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, if (state->fb) drm_framebuffer_get(state->fb); + if (state->solid_fill_blob) + drm_property_blob_get(state->solid_fill_blob); + state->fence = NULL; state->commit = NULL; state->fb_damage_clips = NULL; @@ -399,6 +407,7 @@ void __drm_atomic_helper_plane_destroy_state(struct drm_plane_state *state) drm_crtc_commit_put(state->commit); drm_property_blob_put(state->fb_damage_clips); + drm_property_blob_put(state->solid_fill_blob); } EXPORT_SYMBOL(__drm_atomic_helper_plane_destroy_state); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index bd7140531948..d7ae8e2c0265 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -316,6 +316,20 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state, } EXPORT_SYMBOL(drm_atomic_set_crtc_for_connector); +static void drm_atomic_set_solid_fill_prop(struct drm_plane_state *state) +{ + struct drm_mode_solid_fill *user_info; + + if (!state->solid_fill_blob) + return; + + user_info = (struct drm_mode_solid_fill *)state->solid_fill_blob->data; + + state->solid_fill.r = user_info->r; + state->solid_fill.g = user_info->g; + state->solid_fill.b = user_info->b; +} + static void set_out_fence_for_crtc(struct drm_atomic_state *state, struct drm_crtc *crtc, s32 __user *fence_ptr) { @@ -564,6 +578,15 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, state->src_h = val; } else if (property == plane->pixel_source_property) { state->pixel_source = val; + } else if (property == plane->solid_fill_property) { + ret = drm_atomic_replace_property_blob_from_id(dev, + &state->solid_fill_blob, + val, sizeof(struct drm_mode_solid_fill), + -1, &replaced); + if (ret) + return ret; + + drm_atomic_set_solid_fill_prop(state); } else if (property == plane->alpha_property) { state->alpha = val; } else if (property == plane->blend_mode_property) { @@ -654,6 +677,9 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->src_h; } else if (property == plane->pixel_source_property) { *val = state->pixel_source; + } else if (property == plane->solid_fill_property) { + *val = state->solid_fill_blob ? + state->solid_fill_blob->base.id : 0; } else if (property == plane->alpha_property) { *val = state->alpha; } else if (property == plane->blend_mode_property) { diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index fce734cdb85b..665c5d9b056f 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -204,6 +204,10 @@ * "FB": * Framebuffer source set by the "FB_ID" property. * + * solid_fill: + * solid_fill is set up with drm_plane_create_solid_fill_property(). It + * contains pixel data that drivers can use to fill a plane. + * * Note that all the property extensions described here apply either to the * plane or the CRTC (e.g. for the background color, which currently is not * exposed and assumed to be black). @@ -709,3 +713,29 @@ int drm_plane_create_pixel_source_property(struct drm_plane *plane, return 0; } EXPORT_SYMBOL(drm_plane_create_pixel_source_property); + +/** + * drm_plane_create_solid_fill_property - create a new solid_fill property + * @plane: drm plane + * + * This creates a new property blob that holds pixel data for solid fill planes. + * The property is exposed to userspace as a property blob called "solid_fill". + * + * For information on what the blob contains, see `drm_mode_solid_fill`. + */ +int drm_plane_create_solid_fill_property(struct drm_plane *plane) +{ + struct drm_property *prop; + + prop = drm_property_create(plane->dev, + DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB, + "solid_fill", 0); + if (!prop) + return -ENOMEM; + + drm_object_attach_property(&plane->base, prop, 0); + plane->solid_fill_property = prop; + + return 0; +} +EXPORT_SYMBOL(drm_plane_create_solid_fill_property); diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h index 122bbfbaae33..e7158fbee389 100644 --- a/include/drm/drm_blend.h +++ b/include/drm/drm_blend.h @@ -60,4 +60,5 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane, unsigned int supported_modes); int drm_plane_create_pixel_source_property(struct drm_plane *plane, unsigned long extra_sources); +int drm_plane_create_solid_fill_property(struct drm_plane *plane); #endif diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index bc0176ba25be..5bac644d4cc3 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -46,6 +46,18 @@ enum drm_plane_pixel_source { DRM_PLANE_PIXEL_SOURCE_MAX }; +/** + * struct solid_fill_property - RGB values for solid fill plane + * + * TODO: Add solid fill source and corresponding pixel source + * that supports RGBA color + */ +struct drm_solid_fill { + uint32_t r; + uint32_t g; + uint32_t b; +}; + /** * struct drm_plane_state - mutable plane state * @@ -134,6 +146,23 @@ struct drm_plane_state { */ enum drm_plane_pixel_source pixel_source; + /** + * @solid_fill_blob: + * + * Blob containing relevant information for a solid fill plane + * including RGB color values. See + * drm_plane_create_solid_fill_property() for more details. + */ + struct drm_property_blob *solid_fill_blob; + + /** + * @solid_fill: + * + * Pixel data for solid fill planes. See + * drm_plane_create_solid_fill_property() for more details. + */ + struct drm_solid_fill solid_fill; + /** * @alpha: * Opacity of the plane with 0 as completely transparent and 0xffff as @@ -734,6 +763,13 @@ struct drm_plane { */ struct drm_property *pixel_source_property; + /** + * @solid_fill_property: + * Optional solid_fill property for this plane. See + * drm_plane_create_solid_fill_property(). + */ + struct drm_property *solid_fill_property; + /** * @alpha_property: * Optional alpha property for this plane. See diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 95630f170110..cbd943a8c88b 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -259,6 +259,30 @@ struct drm_mode_modeinfo { char name[DRM_DISPLAY_MODE_LEN]; }; +/** + * struct drm_mode_solid_fill - User info for solid fill planes + * + * This is the userspace API solid fill information structure. + * + * Userspace can enable solid fill planes by assigning the plane "solid_fill" + * property to a blob containing a single drm_mode_solid_fill struct populated with an RGB323232 + * color and setting the pixel source to "SOLID_FILL". + * + * For information on the plane property, see drm_plane_create_solid_fill_property() + * + * @r: Red color value of single pixel + * @g: Green color value of single pixel + * @b: Blue color value of single pixel + * @pad: padding, must be zero + */ +struct drm_mode_solid_fill { + __u32 r; + __u32 g; + __u32 b; + __u32 pad; +}; + + struct drm_mode_card_res { __u64 fb_id_ptr; __u64 crtc_id_ptr; -- cgit v1.2.3 From 4b64167042927531f4cfaf035b8f88c2f7a05f06 Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Fri, 27 Oct 2023 15:32:53 -0700 Subject: drm: Add solid fill pixel source Add "SOLID_FILL" as a valid pixel source. If the pixel_source property is set to "SOLID_FILL", it will display data from the drm_plane "solid_fill" blob property. Reviewed-by: Dmitry Baryshkov Acked-by: Pekka Paalanen Acked-by: Harry Wentland Acked-by: Sebastian Wick Signed-off-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-3-780188bfa7b2@quicinc.com --- drivers/gpu/drm/drm_blend.c | 10 +++++++++- include/drm/drm_plane.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index 665c5d9b056f..37b31b7e5ce5 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -204,6 +204,9 @@ * "FB": * Framebuffer source set by the "FB_ID" property. * + * "SOLID_FILL": + * Solid fill color source set by the "solid_fill" property. + * * solid_fill: * solid_fill is set up with drm_plane_create_solid_fill_property(). It * contains pixel data that drivers can use to fill a plane. @@ -642,6 +645,7 @@ EXPORT_SYMBOL(drm_plane_create_blend_mode_property); static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = { { DRM_PLANE_PIXEL_SOURCE_NONE, "NONE" }, { DRM_PLANE_PIXEL_SOURCE_FB, "FB" }, + { DRM_PLANE_PIXEL_SOURCE_SOLID_FILL, "SOLID_FILL" }, }; /** @@ -666,6 +670,9 @@ static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = { * "FB": * Framebuffer pixel source * + * "SOLID_FILL": + * Solid fill color pixel source + * * Returns: * Zero on success, negative errno on failure. */ @@ -675,7 +682,8 @@ int drm_plane_create_pixel_source_property(struct drm_plane *plane, struct drm_device *dev = plane->dev; struct drm_property *prop; static const unsigned int valid_source_mask = BIT(DRM_PLANE_PIXEL_SOURCE_FB) | - BIT(DRM_PLANE_PIXEL_SOURCE_NONE); + BIT(DRM_PLANE_PIXEL_SOURCE_NONE) | + BIT(DRM_PLANE_PIXEL_SOURCE_SOLID_FILL); int i; /* FB is supported by default */ diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 5bac644d4cc3..4b7af4381bbe 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -43,6 +43,7 @@ enum drm_scaling_filter { enum drm_plane_pixel_source { DRM_PLANE_PIXEL_SOURCE_NONE, DRM_PLANE_PIXEL_SOURCE_FB, + DRM_PLANE_PIXEL_SOURCE_SOLID_FILL, DRM_PLANE_PIXEL_SOURCE_MAX }; -- cgit v1.2.3 From e86413f5442ee094e66b3e75f2d3419ed0df9520 Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Fri, 27 Oct 2023 15:32:55 -0700 Subject: drm/atomic: Add solid fill data to plane state dump Add solid_fill property data to the atomic plane state dump. Reviewed-by: Dmitry Baryshkov Acked-by: Harry Wentland Acked-by: Sebastian Wick Signed-off-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-5-780188bfa7b2@quicinc.com --- drivers/gpu/drm/drm_atomic.c | 4 ++++ drivers/gpu/drm/drm_plane.c | 8 ++++++++ include/drm/drm_plane.h | 3 +++ 3 files changed, 15 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 02aa7df832cc..1339785fbe80 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -726,6 +726,10 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, drm_printf(p, "\tfb=%u\n", state->fb ? state->fb->base.id : 0); if (state->fb) drm_framebuffer_print_info(p, 2, state->fb); + drm_printf(p, "\tsolid_fill=%u\n", + state->solid_fill_blob ? state->solid_fill_blob->base.id : 0); + if (state->solid_fill_blob) + drm_plane_solid_fill_print_info(p, 2, state); drm_printf(p, "\tcrtc-pos=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&dest)); drm_printf(p, "\tsrc-pos=" DRM_RECT_FP_FMT "\n", DRM_RECT_FP_ARG(&src)); drm_printf(p, "\trotation=%x\n", state->rotation); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index c8dbe5ae60cc..c65c72701dd2 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1641,6 +1641,14 @@ __drm_plane_get_damage_clips(const struct drm_plane_state *state) state->fb_damage_clips->data : NULL); } +void drm_plane_solid_fill_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_plane_state *state) +{ + drm_printf_indent(p, indent, "r=0x%08x\n", state->solid_fill.r); + drm_printf_indent(p, indent, "g=0x%08x\n", state->solid_fill.g); + drm_printf_indent(p, indent, "b=0x%08x\n", state->solid_fill.b); +} + /** * drm_plane_get_damage_clips - Returns damage clips. * @state: Plane state. diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 4b7af4381bbe..d14e2f1db234 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -1025,6 +1025,9 @@ drm_plane_get_damage_clips_count(const struct drm_plane_state *state); struct drm_mode_rect * drm_plane_get_damage_clips(const struct drm_plane_state *state); +void drm_plane_solid_fill_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_plane_state *state); + int drm_plane_create_scaling_filter_property(struct drm_plane *plane, unsigned int supported_filters); -- cgit v1.2.3 From f1e75da5364e780905d9cd6043f9c74cdcf84073 Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Fri, 27 Oct 2023 15:32:57 -0700 Subject: drm/atomic: Loosen FB atomic checks Loosen the requirements for atomic and legacy commit so that, in cases where pixel_source != FB, the commit can still go through. This includes adding framebuffer NULL checks in other areas to account for FB being NULL when non-FB pixel sources are enabled. To disable a plane, the pixel_source must be NONE or the FB must be NULL if pixel_source == FB. Signed-off-by: Jessica Zhang Reviewed-by: Dmitry Baryshkov Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231027-solid-fill-v7-7-780188bfa7b2@quicinc.com --- drivers/gpu/drm/drm_atomic.c | 21 ++++++++++---------- drivers/gpu/drm/drm_atomic_helper.c | 39 +++++++++++++++++++++---------------- include/drm/drm_atomic_helper.h | 4 ++-- include/drm/drm_plane.h | 29 +++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 29 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index c6f2b86c48ae..aed0a694c74c 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -674,17 +674,16 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, { struct drm_plane *plane = new_plane_state->plane; struct drm_crtc *crtc = new_plane_state->crtc; - const struct drm_framebuffer *fb = new_plane_state->fb; int ret; - /* either *both* CRTC and FB must be set, or neither */ - if (crtc && !fb) { - drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] CRTC set but no FB\n", + /* either *both* CRTC and pixel source must be set, or neither */ + if (crtc && !drm_plane_has_visible_data(new_plane_state)) { + drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] CRTC set but no visible data\n", plane->base.id, plane->name); return -EINVAL; - } else if (fb && !crtc) { - drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] FB set but no CRTC\n", - plane->base.id, plane->name); + } else if (drm_plane_has_visible_data(new_plane_state) && !crtc) { + drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] Source %d has visible data but no CRTC\n", + plane->base.id, plane->name, new_plane_state->pixel_source); return -EINVAL; } @@ -715,9 +714,11 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, } - ret = drm_atomic_plane_check_fb(new_plane_state); - if (ret) - return ret; + if (new_plane_state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && new_plane_state->fb) { + ret = drm_atomic_plane_check_fb(new_plane_state); + if (ret) + return ret; + } if (plane_switching_crtc(old_plane_state, new_plane_state)) { drm_dbg_atomic(plane->dev, diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index c3f677130def..dc048988e3f3 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -861,7 +861,6 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, bool can_position, bool can_update_disabled) { - struct drm_framebuffer *fb = plane_state->fb; struct drm_rect *src = &plane_state->src; struct drm_rect *dst = &plane_state->dst; unsigned int rotation = plane_state->rotation; @@ -873,7 +872,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, *src = drm_plane_state_src(plane_state); *dst = drm_plane_state_dest(plane_state); - if (!fb) { + if (!drm_plane_has_visible_data(plane_state)) { plane_state->visible = false; return 0; } @@ -890,25 +889,31 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, return -EINVAL; } - drm_rect_rotate(src, fb->width << 16, fb->height << 16, rotation); + /* Check that selected pixel source is valid */ + if (plane_state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && plane_state->fb) { + struct drm_framebuffer *fb = plane_state->fb; + drm_rect_rotate(src, fb->width << 16, fb->height << 16, rotation); - /* Check scaling */ - hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); - vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale); - if (hscale < 0 || vscale < 0) { - drm_dbg_kms(plane_state->plane->dev, - "Invalid scaling of plane\n"); - drm_rect_debug_print("src: ", &plane_state->src, true); - drm_rect_debug_print("dst: ", &plane_state->dst, false); - return -ERANGE; - } + /* Check scaling */ + hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); + vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale); - if (crtc_state->enable) - drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2); + if (hscale < 0 || vscale < 0) { + drm_dbg_kms(plane_state->plane->dev, + "Invalid scaling of plane\n"); + drm_rect_debug_print("src: ", &plane_state->src, true); + drm_rect_debug_print("dst: ", &plane_state->dst, false); + return -ERANGE; + } - plane_state->visible = drm_rect_clip_scaled(src, dst, &clip); + if (crtc_state->enable) + drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2); - drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation); + plane_state->visible = drm_rect_clip_scaled(src, dst, &clip); + drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation); + } else if (drm_plane_solid_fill_enabled(plane_state)) { + plane_state->visible = true; + } if (!plane_state->visible) /* diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h index 536a0b0091c3..6d97f38ac1f6 100644 --- a/include/drm/drm_atomic_helper.h +++ b/include/drm/drm_atomic_helper.h @@ -256,8 +256,8 @@ drm_atomic_plane_disabling(struct drm_plane_state *old_plane_state, * Anything else should be considered a bug in the atomic core, so we * gently warn about it. */ - WARN_ON((new_plane_state->crtc == NULL && new_plane_state->fb != NULL) || - (new_plane_state->crtc != NULL && new_plane_state->fb == NULL)); + WARN_ON((new_plane_state->crtc == NULL && drm_plane_has_visible_data(new_plane_state)) || + (new_plane_state->crtc != NULL && !drm_plane_has_visible_data(new_plane_state))); return old_plane_state->crtc && !new_plane_state->crtc; } diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index d14e2f1db234..3b187f3f5466 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -1016,6 +1016,35 @@ static inline struct drm_plane *drm_plane_find(struct drm_device *dev, #define drm_for_each_plane(plane, dev) \ list_for_each_entry(plane, &(dev)->mode_config.plane_list, head) +/** + * drm_plane_solid_fill_enabled - Check if solid fill is enabled on plane + * @state: plane state + * + * Returns: + * Whether the plane has been assigned a solid_fill_blob + */ +static inline bool drm_plane_solid_fill_enabled(struct drm_plane_state *state) +{ + if (!state) + return false; + return state->pixel_source == DRM_PLANE_PIXEL_SOURCE_SOLID_FILL && state->solid_fill_blob; +} + +static inline bool drm_plane_has_visible_data(const struct drm_plane_state *state) +{ + switch (state->pixel_source) { + case DRM_PLANE_PIXEL_SOURCE_NONE: + return false; + case DRM_PLANE_PIXEL_SOURCE_SOLID_FILL: + return state->solid_fill_blob != NULL; + case DRM_PLANE_PIXEL_SOURCE_FB: + default: + WARN_ON(state->pixel_source != DRM_PLANE_PIXEL_SOURCE_FB); + } + + return state->fb != NULL; +} + bool drm_any_plane_has_format(struct drm_device *dev, u32 format, u64 modifier); -- cgit v1.2.3 From a5b2dcb96d6acb286459612a142371b0d74543bf Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Wed, 20 Sep 2023 13:13:58 -0700 Subject: drm: improve the documentation of connector hpd ops While making the changes in [1], it was noted that the documentation of the enable_hpd() and disable_hpd() does not make it clear that these ops should not try to do hpd state maintenance and should only enable/disable hpd related hardware for the connector. The state management of these calls to make sure these calls are balanced is handled by the DRM core and we should keep it that way to minimize the overhead in the drivers which implement these ops. [1]: https://patchwork.freedesktop.org/patch/558387/ Signed-off-by: Abhinav Kumar Suggested-by: Laurent Pinchart Reviewed-by: Laurent Pinchart Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20230920201358.27597-1-quic_abhinavk@quicinc.com --- include/drm/drm_modeset_helper_vtables.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/drm') diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index e3c3ac615909..a33cf7488737 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -1154,6 +1154,11 @@ struct drm_connector_helper_funcs { * This operation is optional. * * This callback is used by the drm_kms_helper_poll_enable() helpers. + * + * This operation does not need to perform any hpd state tracking as + * the DRM core handles that maintenance and ensures the calls to enable + * and disable hpd are balanced. + * */ void (*enable_hpd)(struct drm_connector *connector); @@ -1165,6 +1170,11 @@ struct drm_connector_helper_funcs { * This operation is optional. * * This callback is used by the drm_kms_helper_poll_disable() helpers. + * + * This operation does not need to perform any hpd state tracking as + * the DRM core handles that maintenance and ensures the calls to enable + * and disable hpd are balanced. + * */ void (*disable_hpd)(struct drm_connector *connector); }; -- cgit v1.2.3 From e4256751df4a0a3860f181588ee730dd19cb0c30 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Thu, 30 Nov 2023 15:15:10 -0800 Subject: drm/display/dp: Add the remaining Square PHY patterns DPCD register definitions DP2.1 Specs added new DPCDs definitions for square pattern configs[1] These new definitions are used for UHBR Source Transmitter Equalizations tests[2]. Add the 3 new values for square pattern. v2: rebase [1]: DP2.1 Specs - 2.12.3.6.5 Square Pattern [2]: DP2.1 PHY CTS specs - 4.3 UHBR Source Transmitter Equalization Cc: Jani Nikula Cc: Imre Deak Cc: Lee Shawn C Signed-off-by: Khaled Almahallawy Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231130231510.221143-1-khaled.almahallawy@intel.com --- include/drm/display/drm_dp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/drm') diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index 83d2039c018b..3731828825bd 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -651,6 +651,9 @@ # define DP_LINK_QUAL_PATTERN_PRSBS31 0x38 # define DP_LINK_QUAL_PATTERN_CUSTOM 0x40 # define DP_LINK_QUAL_PATTERN_SQUARE 0x48 +# define DP_LINK_QUAL_PATTERN_SQUARE_PRESHOOT_DISABLED 0x49 +# define DP_LINK_QUAL_PATTERN_SQUARE_DEEMPHASIS_DISABLED 0x4a +# define DP_LINK_QUAL_PATTERN_SQUARE_PRESHOOT_DEEMPHASIS_DISABLED 0x4b #define DP_TRAINING_LANE0_1_SET2 0x10f #define DP_TRAINING_LANE2_3_SET2 0x110 -- cgit v1.2.3 From 2a04739139b2b2761571e18937e2400e71eff664 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 14:43:28 +0300 Subject: drm/bridge: add transparent bridge helper Define a helper for creating simple transparent bridges which serve the only purpose of linking devices into the bridge chain up to the last bridge representing the connector. This is especially useful for DP/USB-C bridge chains, which can span across several devices, but do not require any additional functionality from the intermediate bridges. Reviewed-by: Neil Armstrong Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231203114333.1305826-2-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/bridge/Kconfig | 9 +++ drivers/gpu/drm/bridge/Makefile | 1 + drivers/gpu/drm/bridge/aux-bridge.c | 140 ++++++++++++++++++++++++++++++++++++ include/drm/bridge/aux-bridge.h | 19 +++++ 4 files changed, 169 insertions(+) create mode 100644 drivers/gpu/drm/bridge/aux-bridge.c create mode 100644 include/drm/bridge/aux-bridge.h (limited to 'include/drm') diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index ba82a1142adf..f12eab62799f 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -12,6 +12,15 @@ config DRM_PANEL_BRIDGE help DRM bridge wrapper of DRM panels +config DRM_AUX_BRIDGE + tristate + depends on DRM_BRIDGE && OF + select AUXILIARY_BUS + select DRM_PANEL_BRIDGE + help + Simple transparent bridge that is used by several non-DRM drivers to + build bridges chain. + menu "Display Interface Bridges" depends on DRM && DRM_BRIDGE diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile index 2b892b7ed59e..918e3bfff079 100644 --- a/drivers/gpu/drm/bridge/Makefile +++ b/drivers/gpu/drm/bridge/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_DRM_AUX_BRIDGE) += aux-bridge.o obj-$(CONFIG_DRM_CHIPONE_ICN6211) += chipone-icn6211.o obj-$(CONFIG_DRM_CHRONTEL_CH7033) += chrontel-ch7033.o obj-$(CONFIG_DRM_CROS_EC_ANX7688) += cros-ec-anx7688.o diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c new file mode 100644 index 000000000000..49d7c2ab1ecc --- /dev/null +++ b/drivers/gpu/drm/bridge/aux-bridge.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2023 Linaro Ltd. + * + * Author: Dmitry Baryshkov + */ +#include +#include + +#include +#include + +static DEFINE_IDA(drm_aux_bridge_ida); + +static void drm_aux_bridge_release(struct device *dev) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + + ida_free(&drm_aux_bridge_ida, adev->id); + + kfree(adev); +} + +static void drm_aux_bridge_unregister_adev(void *_adev) +{ + struct auxiliary_device *adev = _adev; + + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +/** + * drm_aux_bridge_register - Create a simple bridge device to link the chain + * @parent: device instance providing this bridge + * + * Creates a simple DRM bridge that doesn't implement any drm_bridge + * operations. Such bridges merely fill a place in the bridge chain linking + * surrounding DRM bridges. + * + * Return: zero on success, negative error code on failure + */ +int drm_aux_bridge_register(struct device *parent) +{ + struct auxiliary_device *adev; + int ret; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + + ret = ida_alloc(&drm_aux_bridge_ida, GFP_KERNEL); + if (ret < 0) { + kfree(adev); + return ret; + } + + adev->id = ret; + adev->name = "aux_bridge"; + adev->dev.parent = parent; + adev->dev.of_node = parent->of_node; + adev->dev.release = drm_aux_bridge_release; + + ret = auxiliary_device_init(adev); + if (ret) { + ida_free(&drm_aux_bridge_ida, adev->id); + kfree(adev); + return ret; + } + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ret; + } + + return devm_add_action_or_reset(parent, drm_aux_bridge_unregister_adev, adev); +} +EXPORT_SYMBOL_GPL(drm_aux_bridge_register); + +struct drm_aux_bridge_data { + struct drm_bridge bridge; + struct drm_bridge *next_bridge; + struct device *dev; +}; + +static int drm_aux_bridge_attach(struct drm_bridge *bridge, + enum drm_bridge_attach_flags flags) +{ + struct drm_aux_bridge_data *data; + + if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)) + return -EINVAL; + + data = container_of(bridge, struct drm_aux_bridge_data, bridge); + + return drm_bridge_attach(bridge->encoder, data->next_bridge, bridge, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); +} + +static const struct drm_bridge_funcs drm_aux_bridge_funcs = { + .attach = drm_aux_bridge_attach, +}; + +static int drm_aux_bridge_probe(struct auxiliary_device *auxdev, + const struct auxiliary_device_id *id) +{ + struct drm_aux_bridge_data *data; + + data = devm_kzalloc(&auxdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->dev = &auxdev->dev; + data->next_bridge = devm_drm_of_get_bridge(&auxdev->dev, auxdev->dev.of_node, 0, 0); + if (IS_ERR(data->next_bridge)) + return dev_err_probe(&auxdev->dev, PTR_ERR(data->next_bridge), + "failed to acquire drm_bridge\n"); + + data->bridge.funcs = &drm_aux_bridge_funcs; + data->bridge.of_node = data->dev->of_node; + + return devm_drm_bridge_add(data->dev, &data->bridge); +} + +static const struct auxiliary_device_id drm_aux_bridge_table[] = { + { .name = KBUILD_MODNAME ".aux_bridge" }, + {}, +}; +MODULE_DEVICE_TABLE(auxiliary, drm_aux_bridge_table); + +static struct auxiliary_driver drm_aux_bridge_drv = { + .name = "aux_bridge", + .id_table = drm_aux_bridge_table, + .probe = drm_aux_bridge_probe, +}; +module_auxiliary_driver(drm_aux_bridge_drv); + +MODULE_AUTHOR("Dmitry Baryshkov "); +MODULE_DESCRIPTION("DRM transparent bridge"); +MODULE_LICENSE("GPL"); diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h new file mode 100644 index 000000000000..b3a9cc9c862f --- /dev/null +++ b/include/drm/bridge/aux-bridge.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) 2023 Linaro Ltd. + * + * Author: Dmitry Baryshkov + */ +#ifndef DRM_AUX_BRIDGE_H +#define DRM_AUX_BRIDGE_H + +#if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE) +int drm_aux_bridge_register(struct device *parent); +#else +static inline int drm_aux_bridge_register(struct device *parent) +{ + return 0; +} +#endif + +#endif -- cgit v1.2.3 From e560518a6c2e60f1566473c146fddcff3281f617 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 14:43:31 +0300 Subject: drm/bridge: implement generic DP HPD bridge Several USB-C controllers implement a pretty simple DRM bridge which implements just the HPD notification operations. Add special helper for creating such simple bridges. Acked-by: Neil Armstrong Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231203114333.1305826-5-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/bridge/Kconfig | 8 ++ drivers/gpu/drm/bridge/Makefile | 1 + drivers/gpu/drm/bridge/aux-hpd-bridge.c | 163 ++++++++++++++++++++++++++++++++ include/drm/bridge/aux-bridge.h | 18 ++++ 4 files changed, 190 insertions(+) create mode 100644 drivers/gpu/drm/bridge/aux-hpd-bridge.c (limited to 'include/drm') diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index f12eab62799f..19d2dc05c397 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -21,6 +21,14 @@ config DRM_AUX_BRIDGE Simple transparent bridge that is used by several non-DRM drivers to build bridges chain. +config DRM_AUX_HPD_BRIDGE + tristate + depends on DRM_BRIDGE && OF + select AUXILIARY_BUS + help + Simple bridge that terminates the bridge chain and provides HPD + support. + menu "Display Interface Bridges" depends on DRM && DRM_BRIDGE diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile index 918e3bfff079..017b5832733b 100644 --- a/drivers/gpu/drm/bridge/Makefile +++ b/drivers/gpu/drm/bridge/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DRM_AUX_BRIDGE) += aux-bridge.o +obj-$(CONFIG_DRM_AUX_HPD_BRIDGE) += aux-hpd-bridge.o obj-$(CONFIG_DRM_CHIPONE_ICN6211) += chipone-icn6211.o obj-$(CONFIG_DRM_CHRONTEL_CH7033) += chrontel-ch7033.o obj-$(CONFIG_DRM_CROS_EC_ANX7688) += cros-ec-anx7688.o diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c new file mode 100644 index 000000000000..5d2ab3a715f9 --- /dev/null +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2023 Linaro Ltd. + * + * Author: Dmitry Baryshkov + */ +#include +#include +#include + +#include +#include + +static DEFINE_IDA(drm_aux_hpd_bridge_ida); + +struct drm_aux_hpd_bridge_data { + struct drm_bridge bridge; + struct device *dev; +}; + +static void drm_aux_hpd_bridge_release(struct device *dev) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + + ida_free(&drm_aux_hpd_bridge_ida, adev->id); + + of_node_put(adev->dev.platform_data); + + kfree(adev); +} + +static void drm_aux_hpd_bridge_unregister_adev(void *_adev) +{ + struct auxiliary_device *adev = _adev; + + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +/** + * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge + * @parent: device instance providing this bridge + * @np: device node pointer corresponding to this bridge instance + * + * Creates a simple DRM bridge with the type set to + * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is + * able to send the HPD events. + * + * Return: device instance that will handle created bridge or an error code + * encoded into the pointer. + */ +struct device *drm_dp_hpd_bridge_register(struct device *parent, + struct device_node *np) +{ + struct auxiliary_device *adev; + int ret; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return ERR_PTR(-ENOMEM); + + ret = ida_alloc(&drm_aux_hpd_bridge_ida, GFP_KERNEL); + if (ret < 0) { + kfree(adev); + return ERR_PTR(ret); + } + + adev->id = ret; + adev->name = "dp_hpd_bridge"; + adev->dev.parent = parent; + adev->dev.of_node = parent->of_node; + adev->dev.release = drm_aux_hpd_bridge_release; + adev->dev.platform_data = np; + + ret = auxiliary_device_init(adev); + if (ret) { + ida_free(&drm_aux_hpd_bridge_ida, adev->id); + kfree(adev); + return ERR_PTR(ret); + } + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ERR_PTR(ret); + } + + ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev); + if (ret) + return ERR_PTR(ret); + + return &adev->dev; +} +EXPORT_SYMBOL_GPL(drm_dp_hpd_bridge_register); + +/** + * drm_aux_hpd_bridge_notify - notify hot plug detection events + * @dev: device created for the HPD bridge + * @status: output connection status + * + * A wrapper around drm_bridge_hpd_notify() that is used to report hot plug + * detection events for bridges created via drm_dp_hpd_bridge_register(). + * + * This function shall be called in a context that can sleep. + */ +void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + struct drm_aux_hpd_bridge_data *data = auxiliary_get_drvdata(adev); + + if (!data) + return; + + drm_bridge_hpd_notify(&data->bridge, status); +} +EXPORT_SYMBOL_GPL(drm_aux_hpd_bridge_notify); + +static int drm_aux_hpd_bridge_attach(struct drm_bridge *bridge, + enum drm_bridge_attach_flags flags) +{ + return flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR ? 0 : -EINVAL; +} + +static const struct drm_bridge_funcs drm_aux_hpd_bridge_funcs = { + .attach = drm_aux_hpd_bridge_attach, +}; + +static int drm_aux_hpd_bridge_probe(struct auxiliary_device *auxdev, + const struct auxiliary_device_id *id) +{ + struct drm_aux_hpd_bridge_data *data; + + data = devm_kzalloc(&auxdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->dev = &auxdev->dev; + data->bridge.funcs = &drm_aux_hpd_bridge_funcs; + data->bridge.of_node = dev_get_platdata(data->dev); + data->bridge.ops = DRM_BRIDGE_OP_HPD; + data->bridge.type = id->driver_data; + + auxiliary_set_drvdata(auxdev, data); + + return devm_drm_bridge_add(data->dev, &data->bridge); +} + +static const struct auxiliary_device_id drm_aux_hpd_bridge_table[] = { + { .name = KBUILD_MODNAME ".dp_hpd_bridge", .driver_data = DRM_MODE_CONNECTOR_DisplayPort, }, + {}, +}; +MODULE_DEVICE_TABLE(auxiliary, drm_aux_hpd_bridge_table); + +static struct auxiliary_driver drm_aux_hpd_bridge_drv = { + .name = "aux_hpd_bridge", + .id_table = drm_aux_hpd_bridge_table, + .probe = drm_aux_hpd_bridge_probe, +}; +module_auxiliary_driver(drm_aux_hpd_bridge_drv); + +MODULE_AUTHOR("Dmitry Baryshkov "); +MODULE_DESCRIPTION("DRM HPD bridge"); +MODULE_LICENSE("GPL"); diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h index b3a9cc9c862f..66249ff0858e 100644 --- a/include/drm/bridge/aux-bridge.h +++ b/include/drm/bridge/aux-bridge.h @@ -7,6 +7,8 @@ #ifndef DRM_AUX_BRIDGE_H #define DRM_AUX_BRIDGE_H +#include + #if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE) int drm_aux_bridge_register(struct device *parent); #else @@ -16,4 +18,20 @@ static inline int drm_aux_bridge_register(struct device *parent) } #endif +#if IS_ENABLED(CONFIG_DRM_AUX_HPD_BRIDGE) +struct device *drm_dp_hpd_bridge_register(struct device *parent, + struct device_node *np); +void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status); +#else +static inline struct device *drm_dp_hpd_bridge_register(struct device *parent, + struct device_node *np) +{ + return 0; +} + +static inline void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status) +{ +} +#endif + #endif -- cgit v1.2.3 From caf525ed45b4960b450cbd4e811d9b247bc2586c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 14:53:13 +0300 Subject: drm/encoder: register per-encoder debugfs dir Each of connectors and CRTCs used by the DRM device provides debugfs directory, which is used by several standard debugfs files and can further be extended by the driver. Add such generic debugfs directories for encoder. Reviewed-by: Neil Armstrong Acked-by: Maxime Ripard Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231203115315.1306124-2-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/drm_debugfs.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/drm_encoder.c | 4 ++++ drivers/gpu/drm/drm_internal.h | 10 ++++++++++ include/drm/drm_encoder.h | 16 +++++++++++++++- 4 files changed, 54 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index f291fb4b359f..00406b4f3235 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -589,4 +589,29 @@ void drm_debugfs_crtc_remove(struct drm_crtc *crtc) crtc->debugfs_entry = NULL; } +void drm_debugfs_encoder_add(struct drm_encoder *encoder) +{ + struct drm_minor *minor = encoder->dev->primary; + struct dentry *root; + char *name; + + name = kasprintf(GFP_KERNEL, "encoder-%d", encoder->index); + if (!name) + return; + + root = debugfs_create_dir(name, minor->debugfs_root); + kfree(name); + + encoder->debugfs_entry = root; + + if (encoder->funcs->debugfs_init) + encoder->funcs->debugfs_init(encoder, root); +} + +void drm_debugfs_encoder_remove(struct drm_encoder *encoder) +{ + debugfs_remove_recursive(encoder->debugfs_entry); + encoder->debugfs_entry = NULL; +} + #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/drm_encoder.c b/drivers/gpu/drm/drm_encoder.c index 1143bc7f3252..8f2bc6a28482 100644 --- a/drivers/gpu/drm/drm_encoder.c +++ b/drivers/gpu/drm/drm_encoder.c @@ -30,6 +30,7 @@ #include #include "drm_crtc_internal.h" +#include "drm_internal.h" /** * DOC: overview @@ -74,6 +75,8 @@ int drm_encoder_register_all(struct drm_device *dev) int ret = 0; drm_for_each_encoder(encoder, dev) { + drm_debugfs_encoder_add(encoder); + if (encoder->funcs && encoder->funcs->late_register) ret = encoder->funcs->late_register(encoder); if (ret) @@ -90,6 +93,7 @@ void drm_encoder_unregister_all(struct drm_device *dev) drm_for_each_encoder(encoder, dev) { if (encoder->funcs && encoder->funcs->early_unregister) encoder->funcs->early_unregister(encoder); + drm_debugfs_encoder_remove(encoder); } } diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index b12c463bc460..b7a311efa2b1 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -194,6 +194,8 @@ void drm_debugfs_connector_remove(struct drm_connector *connector); void drm_debugfs_crtc_add(struct drm_crtc *crtc); void drm_debugfs_crtc_remove(struct drm_crtc *crtc); void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc); +void drm_debugfs_encoder_add(struct drm_encoder *encoder); +void drm_debugfs_encoder_remove(struct drm_encoder *encoder); #else static inline void drm_debugfs_dev_fini(struct drm_device *dev) { @@ -231,6 +233,14 @@ static inline void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc) { } +static inline void drm_debugfs_encoder_add(struct drm_encoder *encoder) +{ +} + +static inline void drm_debugfs_encoder_remove(struct drm_encoder *encoder) +{ +} + #endif drm_ioctl_t drm_version; diff --git a/include/drm/drm_encoder.h b/include/drm/drm_encoder.h index 3a09682af685..977a9381c8ba 100644 --- a/include/drm/drm_encoder.h +++ b/include/drm/drm_encoder.h @@ -60,7 +60,7 @@ struct drm_encoder_funcs { * @late_register: * * This optional hook can be used to register additional userspace - * interfaces attached to the encoder like debugfs interfaces. + * interfaces attached to the encoder. * It is called late in the driver load sequence from drm_dev_register(). * Everything added from this callback should be unregistered in * the early_unregister callback. @@ -81,6 +81,13 @@ struct drm_encoder_funcs { * before data structures are torndown. */ void (*early_unregister)(struct drm_encoder *encoder); + + /** + * @debugfs_init: + * + * Allows encoders to create encoder-specific debugfs files. + */ + void (*debugfs_init)(struct drm_encoder *encoder, struct dentry *root); }; /** @@ -184,6 +191,13 @@ struct drm_encoder { const struct drm_encoder_funcs *funcs; const struct drm_encoder_helper_funcs *helper_private; + + /** + * @debugfs_entry: + * + * Debugfs directory for this CRTC. + */ + struct dentry *debugfs_entry; }; #define obj_to_encoder(x) container_of(x, struct drm_encoder, base) -- cgit v1.2.3 From d0b3c318e04cc6c4e2a3c30ee0f6f619aa8d0db5 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 14:53:14 +0300 Subject: drm/bridge: migrate bridge_chains to per-encoder file Instead of having a single file with all bridge chains, list bridges under a corresponding per-encoder debugfs directory. While we are at it, also slightly improve the formatting of the bridge data: split a single line entry into multiple lines, include the symbol name of the bridge funcs and add the textual representation of the bridge ops. Example of the listing: $ cat /sys/kernel/debug/dri/0/encoder-0/bridges bridge[0]: dsi_mgr_bridge_funcs type: [0] Unknown ops: [0] bridge[1]: lt9611uxc_bridge_funcs type: [11] HDMI-A OF: /soc@0/geniqup@9c0000/i2c@994000/hdmi-bridge@2b:lontium,lt9611uxc ops: [7] detect edid hpd Reviewed-by: Neil Armstrong Reviewed-by: Tomi Valkeinen Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231203115315.1306124-3-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/drm_bridge.c | 44 ------------------------------------------- drivers/gpu/drm/drm_debugfs.c | 40 ++++++++++++++++++++++++++++++++++++--- include/drm/drm_bridge.h | 2 -- 3 files changed, 37 insertions(+), 49 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index 30d66bee0ec6..cee3188adf3d 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -1347,50 +1347,6 @@ struct drm_bridge *of_drm_find_bridge(struct device_node *np) EXPORT_SYMBOL(of_drm_find_bridge); #endif -#ifdef CONFIG_DEBUG_FS -static int drm_bridge_chains_info(struct seq_file *m, void *data) -{ - struct drm_debugfs_entry *entry = m->private; - struct drm_device *dev = entry->dev; - struct drm_printer p = drm_seq_file_printer(m); - struct drm_mode_config *config = &dev->mode_config; - struct drm_encoder *encoder; - unsigned int bridge_idx = 0; - - list_for_each_entry(encoder, &config->encoder_list, head) { - struct drm_bridge *bridge; - - drm_printf(&p, "encoder[%u]\n", encoder->base.id); - - drm_for_each_bridge_in_chain(encoder, bridge) { - drm_printf(&p, "\tbridge[%u] type: %u, ops: %#x", - bridge_idx, bridge->type, bridge->ops); - -#ifdef CONFIG_OF - if (bridge->of_node) - drm_printf(&p, ", OF: %pOFfc", bridge->of_node); -#endif - - drm_printf(&p, "\n"); - - bridge_idx++; - } - } - - return 0; -} - -static const struct drm_debugfs_info drm_bridge_debugfs_list[] = { - { "bridge_chains", drm_bridge_chains_info, 0 }, -}; - -void drm_bridge_debugfs_init(struct drm_device *dev) -{ - drm_debugfs_add_files(dev, drm_bridge_debugfs_list, - ARRAY_SIZE(drm_bridge_debugfs_list)); -} -#endif - MODULE_AUTHOR("Ajay Kumar "); MODULE_DESCRIPTION("DRM bridge infrastructure"); MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 00406b4f3235..02e7481758c0 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -314,10 +314,8 @@ void drm_debugfs_dev_register(struct drm_device *dev) drm_framebuffer_debugfs_init(dev); drm_client_debugfs_init(dev); } - if (drm_drv_uses_atomic_modeset(dev)) { + if (drm_drv_uses_atomic_modeset(dev)) drm_atomic_debugfs_init(dev); - drm_bridge_debugfs_init(dev); - } } int drm_debugfs_register(struct drm_minor *minor, int minor_id, @@ -589,6 +587,38 @@ void drm_debugfs_crtc_remove(struct drm_crtc *crtc) crtc->debugfs_entry = NULL; } +static int bridges_show(struct seq_file *m, void *data) +{ + struct drm_encoder *encoder = m->private; + struct drm_printer p = drm_seq_file_printer(m); + struct drm_bridge *bridge; + unsigned int idx = 0; + + drm_for_each_bridge_in_chain(encoder, bridge) { + drm_printf(&p, "bridge[%d]: %ps\n", idx++, bridge->funcs); + drm_printf(&p, "\ttype: [%d] %s\n", + bridge->type, + drm_get_connector_type_name(bridge->type)); +#ifdef CONFIG_OF + if (bridge->of_node) + drm_printf(&p, "\tOF: %pOFfc\n", bridge->of_node); +#endif + drm_printf(&p, "\tops: [0x%x]", bridge->ops); + if (bridge->ops & DRM_BRIDGE_OP_DETECT) + drm_puts(&p, " detect"); + if (bridge->ops & DRM_BRIDGE_OP_EDID) + drm_puts(&p, " edid"); + if (bridge->ops & DRM_BRIDGE_OP_HPD) + drm_puts(&p, " hpd"); + if (bridge->ops & DRM_BRIDGE_OP_MODES) + drm_puts(&p, " modes"); + drm_puts(&p, "\n"); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(bridges); + void drm_debugfs_encoder_add(struct drm_encoder *encoder) { struct drm_minor *minor = encoder->dev->primary; @@ -604,6 +634,10 @@ void drm_debugfs_encoder_add(struct drm_encoder *encoder) encoder->debugfs_entry = root; + /* bridges list */ + debugfs_create_file("bridges", 0444, root, encoder, + &bridges_fops); + if (encoder->funcs->debugfs_init) encoder->funcs->debugfs_init(encoder, root); } diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 9ef461aa9b9e..e39da5807ba7 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -950,6 +950,4 @@ static inline struct drm_bridge *drmm_of_get_bridge(struct drm_device *drm, } #endif -void drm_bridge_debugfs_init(struct drm_device *dev); - #endif -- cgit v1.2.3 From 1c0a80f160965c88f16e73ff69015db2f044c486 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 4 Dec 2023 15:13:48 +0200 Subject: Revert "drm/atomic: Loosen FB atomic checks" This reverts commit f1e75da5364e780905d9cd6043f9c74cdcf84073. Although the Solid Fill planes patchset got all reviews and acknowledgements, it doesn't fulfill requirements for the new uABI. It has neither corresponding open-source userspace implementation nor the IGT tests coverage. Reverting this patchset until userspace obligations are fulfilled. Acked-by: Simon Ser Acked-by: Maxime Ripard Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-2-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/drm_atomic.c | 21 ++++++++++---------- drivers/gpu/drm/drm_atomic_helper.c | 39 ++++++++++++++++--------------------- include/drm/drm_atomic_helper.h | 4 ++-- include/drm/drm_plane.h | 29 --------------------------- 4 files changed, 29 insertions(+), 64 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index aed0a694c74c..c6f2b86c48ae 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -674,16 +674,17 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, { struct drm_plane *plane = new_plane_state->plane; struct drm_crtc *crtc = new_plane_state->crtc; + const struct drm_framebuffer *fb = new_plane_state->fb; int ret; - /* either *both* CRTC and pixel source must be set, or neither */ - if (crtc && !drm_plane_has_visible_data(new_plane_state)) { - drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] CRTC set but no visible data\n", + /* either *both* CRTC and FB must be set, or neither */ + if (crtc && !fb) { + drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] CRTC set but no FB\n", plane->base.id, plane->name); return -EINVAL; - } else if (drm_plane_has_visible_data(new_plane_state) && !crtc) { - drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] Source %d has visible data but no CRTC\n", - plane->base.id, plane->name, new_plane_state->pixel_source); + } else if (fb && !crtc) { + drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] FB set but no CRTC\n", + plane->base.id, plane->name); return -EINVAL; } @@ -714,11 +715,9 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, } - if (new_plane_state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && new_plane_state->fb) { - ret = drm_atomic_plane_check_fb(new_plane_state); - if (ret) - return ret; - } + ret = drm_atomic_plane_check_fb(new_plane_state); + if (ret) + return ret; if (plane_switching_crtc(old_plane_state, new_plane_state)) { drm_dbg_atomic(plane->dev, diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index dc048988e3f3..c3f677130def 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -861,6 +861,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, bool can_position, bool can_update_disabled) { + struct drm_framebuffer *fb = plane_state->fb; struct drm_rect *src = &plane_state->src; struct drm_rect *dst = &plane_state->dst; unsigned int rotation = plane_state->rotation; @@ -872,7 +873,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, *src = drm_plane_state_src(plane_state); *dst = drm_plane_state_dest(plane_state); - if (!drm_plane_has_visible_data(plane_state)) { + if (!fb) { plane_state->visible = false; return 0; } @@ -889,31 +890,25 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, return -EINVAL; } - /* Check that selected pixel source is valid */ - if (plane_state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && plane_state->fb) { - struct drm_framebuffer *fb = plane_state->fb; - drm_rect_rotate(src, fb->width << 16, fb->height << 16, rotation); + drm_rect_rotate(src, fb->width << 16, fb->height << 16, rotation); - /* Check scaling */ - hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); - vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale); + /* Check scaling */ + hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); + vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale); + if (hscale < 0 || vscale < 0) { + drm_dbg_kms(plane_state->plane->dev, + "Invalid scaling of plane\n"); + drm_rect_debug_print("src: ", &plane_state->src, true); + drm_rect_debug_print("dst: ", &plane_state->dst, false); + return -ERANGE; + } - if (hscale < 0 || vscale < 0) { - drm_dbg_kms(plane_state->plane->dev, - "Invalid scaling of plane\n"); - drm_rect_debug_print("src: ", &plane_state->src, true); - drm_rect_debug_print("dst: ", &plane_state->dst, false); - return -ERANGE; - } + if (crtc_state->enable) + drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2); - if (crtc_state->enable) - drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2); + plane_state->visible = drm_rect_clip_scaled(src, dst, &clip); - plane_state->visible = drm_rect_clip_scaled(src, dst, &clip); - drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation); - } else if (drm_plane_solid_fill_enabled(plane_state)) { - plane_state->visible = true; - } + drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation); if (!plane_state->visible) /* diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h index 6d97f38ac1f6..536a0b0091c3 100644 --- a/include/drm/drm_atomic_helper.h +++ b/include/drm/drm_atomic_helper.h @@ -256,8 +256,8 @@ drm_atomic_plane_disabling(struct drm_plane_state *old_plane_state, * Anything else should be considered a bug in the atomic core, so we * gently warn about it. */ - WARN_ON((new_plane_state->crtc == NULL && drm_plane_has_visible_data(new_plane_state)) || - (new_plane_state->crtc != NULL && !drm_plane_has_visible_data(new_plane_state))); + WARN_ON((new_plane_state->crtc == NULL && new_plane_state->fb != NULL) || + (new_plane_state->crtc != NULL && new_plane_state->fb == NULL)); return old_plane_state->crtc && !new_plane_state->crtc; } diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 3b187f3f5466..d14e2f1db234 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -1016,35 +1016,6 @@ static inline struct drm_plane *drm_plane_find(struct drm_device *dev, #define drm_for_each_plane(plane, dev) \ list_for_each_entry(plane, &(dev)->mode_config.plane_list, head) -/** - * drm_plane_solid_fill_enabled - Check if solid fill is enabled on plane - * @state: plane state - * - * Returns: - * Whether the plane has been assigned a solid_fill_blob - */ -static inline bool drm_plane_solid_fill_enabled(struct drm_plane_state *state) -{ - if (!state) - return false; - return state->pixel_source == DRM_PLANE_PIXEL_SOURCE_SOLID_FILL && state->solid_fill_blob; -} - -static inline bool drm_plane_has_visible_data(const struct drm_plane_state *state) -{ - switch (state->pixel_source) { - case DRM_PLANE_PIXEL_SOURCE_NONE: - return false; - case DRM_PLANE_PIXEL_SOURCE_SOLID_FILL: - return state->solid_fill_blob != NULL; - case DRM_PLANE_PIXEL_SOURCE_FB: - default: - WARN_ON(state->pixel_source != DRM_PLANE_PIXEL_SOURCE_FB); - } - - return state->fb != NULL; -} - bool drm_any_plane_has_format(struct drm_device *dev, u32 format, u64 modifier); -- cgit v1.2.3 From a513f095b941e9e96196f04f11f253d763310c08 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 4 Dec 2023 15:13:50 +0200 Subject: Revert "drm/atomic: Add solid fill data to plane state dump" This reverts commit e86413f5442ee094e66b3e75f2d3419ed0df9520. Although the Solid Fill planes patchset got all reviews and acknowledgements, it doesn't fulfill requirements for the new uABI. It has neither corresponding open-source userspace implementation nor the IGT tests coverage. Reverting this patchset until userspace obligations are fulfilled. Acked-by: Simon Ser Acked-by: Maxime Ripard Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-4-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/drm_atomic.c | 4 ---- drivers/gpu/drm/drm_plane.c | 8 -------- include/drm/drm_plane.h | 3 --- 3 files changed, 15 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 1339785fbe80..02aa7df832cc 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -726,10 +726,6 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, drm_printf(p, "\tfb=%u\n", state->fb ? state->fb->base.id : 0); if (state->fb) drm_framebuffer_print_info(p, 2, state->fb); - drm_printf(p, "\tsolid_fill=%u\n", - state->solid_fill_blob ? state->solid_fill_blob->base.id : 0); - if (state->solid_fill_blob) - drm_plane_solid_fill_print_info(p, 2, state); drm_printf(p, "\tcrtc-pos=" DRM_RECT_FMT "\n", DRM_RECT_ARG(&dest)); drm_printf(p, "\tsrc-pos=" DRM_RECT_FP_FMT "\n", DRM_RECT_FP_ARG(&src)); drm_printf(p, "\trotation=%x\n", state->rotation); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index c65c72701dd2..c8dbe5ae60cc 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1641,14 +1641,6 @@ __drm_plane_get_damage_clips(const struct drm_plane_state *state) state->fb_damage_clips->data : NULL); } -void drm_plane_solid_fill_print_info(struct drm_printer *p, unsigned int indent, - const struct drm_plane_state *state) -{ - drm_printf_indent(p, indent, "r=0x%08x\n", state->solid_fill.r); - drm_printf_indent(p, indent, "g=0x%08x\n", state->solid_fill.g); - drm_printf_indent(p, indent, "b=0x%08x\n", state->solid_fill.b); -} - /** * drm_plane_get_damage_clips - Returns damage clips. * @state: Plane state. diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index d14e2f1db234..4b7af4381bbe 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -1025,9 +1025,6 @@ drm_plane_get_damage_clips_count(const struct drm_plane_state *state); struct drm_mode_rect * drm_plane_get_damage_clips(const struct drm_plane_state *state); -void drm_plane_solid_fill_print_info(struct drm_printer *p, unsigned int indent, - const struct drm_plane_state *state); - int drm_plane_create_scaling_filter_property(struct drm_plane *plane, unsigned int supported_filters); -- cgit v1.2.3 From 5fb1ad3f5725c5c4d1a0c24ba4f82f239dc6878d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 4 Dec 2023 15:13:52 +0200 Subject: Revert "drm: Add solid fill pixel source" This reverts commit 4b64167042927531f4cfaf035b8f88c2f7a05f06. Although the Solid Fill planes patchset got all reviews and acknowledgements, it doesn't fulfill requirements for the new uABI. It has neither corresponding open-source userspace implementation nor the IGT tests coverage. Reverting this patchset until userspace obligations are fulfilled. Acked-by: Simon Ser Acked-by: Maxime Ripard Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-6-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/drm_blend.c | 10 +--------- include/drm/drm_plane.h | 1 - 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index 37b31b7e5ce5..665c5d9b056f 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -204,9 +204,6 @@ * "FB": * Framebuffer source set by the "FB_ID" property. * - * "SOLID_FILL": - * Solid fill color source set by the "solid_fill" property. - * * solid_fill: * solid_fill is set up with drm_plane_create_solid_fill_property(). It * contains pixel data that drivers can use to fill a plane. @@ -645,7 +642,6 @@ EXPORT_SYMBOL(drm_plane_create_blend_mode_property); static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = { { DRM_PLANE_PIXEL_SOURCE_NONE, "NONE" }, { DRM_PLANE_PIXEL_SOURCE_FB, "FB" }, - { DRM_PLANE_PIXEL_SOURCE_SOLID_FILL, "SOLID_FILL" }, }; /** @@ -670,9 +666,6 @@ static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = { * "FB": * Framebuffer pixel source * - * "SOLID_FILL": - * Solid fill color pixel source - * * Returns: * Zero on success, negative errno on failure. */ @@ -682,8 +675,7 @@ int drm_plane_create_pixel_source_property(struct drm_plane *plane, struct drm_device *dev = plane->dev; struct drm_property *prop; static const unsigned int valid_source_mask = BIT(DRM_PLANE_PIXEL_SOURCE_FB) | - BIT(DRM_PLANE_PIXEL_SOURCE_NONE) | - BIT(DRM_PLANE_PIXEL_SOURCE_SOLID_FILL); + BIT(DRM_PLANE_PIXEL_SOURCE_NONE); int i; /* FB is supported by default */ diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 4b7af4381bbe..5bac644d4cc3 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -43,7 +43,6 @@ enum drm_scaling_filter { enum drm_plane_pixel_source { DRM_PLANE_PIXEL_SOURCE_NONE, DRM_PLANE_PIXEL_SOURCE_FB, - DRM_PLANE_PIXEL_SOURCE_SOLID_FILL, DRM_PLANE_PIXEL_SOURCE_MAX }; -- cgit v1.2.3 From e5fba1ada1c1d676438138d815acd8f427a1eaf0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 4 Dec 2023 15:13:53 +0200 Subject: Revert "drm: Introduce solid fill DRM plane property" This reverts commit 85863a4e16e77079ee14865905ddc3ef9483a640. Although the Solid Fill planes patchset got all reviews and acknowledgements, it doesn't fulfill requirements for the new uABI. It has neither corresponding open-source userspace implementation nor the IGT tests coverage. Reverting this patchset until userspace obligations are fulfilled. Acked-by: Simon Ser Acked-by: Maxime Ripard Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-7-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/drm_atomic_state_helper.c | 9 -------- drivers/gpu/drm/drm_atomic_uapi.c | 26 ---------------------- drivers/gpu/drm/drm_blend.c | 30 -------------------------- include/drm/drm_blend.h | 1 - include/drm/drm_plane.h | 36 ------------------------------- include/uapi/drm/drm_mode.h | 24 --------------------- 6 files changed, 126 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index f6c76cea8be4..311b04edf742 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -254,11 +254,6 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state, plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI; plane_state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB; - if (plane_state->solid_fill_blob) { - drm_property_blob_put(plane_state->solid_fill_blob); - plane_state->solid_fill_blob = NULL; - } - if (plane->color_encoding_property) { if (!drm_object_property_get_default_value(&plane->base, plane->color_encoding_property, @@ -355,9 +350,6 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, if (state->fb) drm_framebuffer_get(state->fb); - if (state->solid_fill_blob) - drm_property_blob_get(state->solid_fill_blob); - state->fence = NULL; state->commit = NULL; state->fb_damage_clips = NULL; @@ -407,7 +399,6 @@ void __drm_atomic_helper_plane_destroy_state(struct drm_plane_state *state) drm_crtc_commit_put(state->commit); drm_property_blob_put(state->fb_damage_clips); - drm_property_blob_put(state->solid_fill_blob); } EXPORT_SYMBOL(__drm_atomic_helper_plane_destroy_state); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index d7ae8e2c0265..bd7140531948 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -316,20 +316,6 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state, } EXPORT_SYMBOL(drm_atomic_set_crtc_for_connector); -static void drm_atomic_set_solid_fill_prop(struct drm_plane_state *state) -{ - struct drm_mode_solid_fill *user_info; - - if (!state->solid_fill_blob) - return; - - user_info = (struct drm_mode_solid_fill *)state->solid_fill_blob->data; - - state->solid_fill.r = user_info->r; - state->solid_fill.g = user_info->g; - state->solid_fill.b = user_info->b; -} - static void set_out_fence_for_crtc(struct drm_atomic_state *state, struct drm_crtc *crtc, s32 __user *fence_ptr) { @@ -578,15 +564,6 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, state->src_h = val; } else if (property == plane->pixel_source_property) { state->pixel_source = val; - } else if (property == plane->solid_fill_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, - &state->solid_fill_blob, - val, sizeof(struct drm_mode_solid_fill), - -1, &replaced); - if (ret) - return ret; - - drm_atomic_set_solid_fill_prop(state); } else if (property == plane->alpha_property) { state->alpha = val; } else if (property == plane->blend_mode_property) { @@ -677,9 +654,6 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->src_h; } else if (property == plane->pixel_source_property) { *val = state->pixel_source; - } else if (property == plane->solid_fill_property) { - *val = state->solid_fill_blob ? - state->solid_fill_blob->base.id : 0; } else if (property == plane->alpha_property) { *val = state->alpha; } else if (property == plane->blend_mode_property) { diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index 665c5d9b056f..fce734cdb85b 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -204,10 +204,6 @@ * "FB": * Framebuffer source set by the "FB_ID" property. * - * solid_fill: - * solid_fill is set up with drm_plane_create_solid_fill_property(). It - * contains pixel data that drivers can use to fill a plane. - * * Note that all the property extensions described here apply either to the * plane or the CRTC (e.g. for the background color, which currently is not * exposed and assumed to be black). @@ -713,29 +709,3 @@ int drm_plane_create_pixel_source_property(struct drm_plane *plane, return 0; } EXPORT_SYMBOL(drm_plane_create_pixel_source_property); - -/** - * drm_plane_create_solid_fill_property - create a new solid_fill property - * @plane: drm plane - * - * This creates a new property blob that holds pixel data for solid fill planes. - * The property is exposed to userspace as a property blob called "solid_fill". - * - * For information on what the blob contains, see `drm_mode_solid_fill`. - */ -int drm_plane_create_solid_fill_property(struct drm_plane *plane) -{ - struct drm_property *prop; - - prop = drm_property_create(plane->dev, - DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB, - "solid_fill", 0); - if (!prop) - return -ENOMEM; - - drm_object_attach_property(&plane->base, prop, 0); - plane->solid_fill_property = prop; - - return 0; -} -EXPORT_SYMBOL(drm_plane_create_solid_fill_property); diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h index e7158fbee389..122bbfbaae33 100644 --- a/include/drm/drm_blend.h +++ b/include/drm/drm_blend.h @@ -60,5 +60,4 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane, unsigned int supported_modes); int drm_plane_create_pixel_source_property(struct drm_plane *plane, unsigned long extra_sources); -int drm_plane_create_solid_fill_property(struct drm_plane *plane); #endif diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 5bac644d4cc3..bc0176ba25be 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -46,18 +46,6 @@ enum drm_plane_pixel_source { DRM_PLANE_PIXEL_SOURCE_MAX }; -/** - * struct solid_fill_property - RGB values for solid fill plane - * - * TODO: Add solid fill source and corresponding pixel source - * that supports RGBA color - */ -struct drm_solid_fill { - uint32_t r; - uint32_t g; - uint32_t b; -}; - /** * struct drm_plane_state - mutable plane state * @@ -146,23 +134,6 @@ struct drm_plane_state { */ enum drm_plane_pixel_source pixel_source; - /** - * @solid_fill_blob: - * - * Blob containing relevant information for a solid fill plane - * including RGB color values. See - * drm_plane_create_solid_fill_property() for more details. - */ - struct drm_property_blob *solid_fill_blob; - - /** - * @solid_fill: - * - * Pixel data for solid fill planes. See - * drm_plane_create_solid_fill_property() for more details. - */ - struct drm_solid_fill solid_fill; - /** * @alpha: * Opacity of the plane with 0 as completely transparent and 0xffff as @@ -763,13 +734,6 @@ struct drm_plane { */ struct drm_property *pixel_source_property; - /** - * @solid_fill_property: - * Optional solid_fill property for this plane. See - * drm_plane_create_solid_fill_property(). - */ - struct drm_property *solid_fill_property; - /** * @alpha_property: * Optional alpha property for this plane. See diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index cbd943a8c88b..95630f170110 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -259,30 +259,6 @@ struct drm_mode_modeinfo { char name[DRM_DISPLAY_MODE_LEN]; }; -/** - * struct drm_mode_solid_fill - User info for solid fill planes - * - * This is the userspace API solid fill information structure. - * - * Userspace can enable solid fill planes by assigning the plane "solid_fill" - * property to a blob containing a single drm_mode_solid_fill struct populated with an RGB323232 - * color and setting the pixel source to "SOLID_FILL". - * - * For information on the plane property, see drm_plane_create_solid_fill_property() - * - * @r: Red color value of single pixel - * @g: Green color value of single pixel - * @b: Blue color value of single pixel - * @pad: padding, must be zero - */ -struct drm_mode_solid_fill { - __u32 r; - __u32 g; - __u32 b; - __u32 pad; -}; - - struct drm_mode_card_res { __u64 fb_id_ptr; __u64 crtc_id_ptr; -- cgit v1.2.3 From 90422201f8f2b4e26ab7bd43b92786a11c1ffebf Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 4 Dec 2023 15:13:54 +0200 Subject: Revert "drm: Introduce pixel_source DRM plane property" This reverts commit e50e5fed41c7eed2db4119645bf3480ec43fec11. Although the Solid Fill planes patchset got all reviews and acknowledgements, it doesn't fulfill requirements for the new uABI. It has neither corresponding open-source userspace implementation nor the IGT tests coverage. Reverting this patchset until userspace obligations are fulfilled. Acked-by: Simon Ser Acked-by: Maxime Ripard Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231204131455.19023-8-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/drm_atomic_state_helper.c | 1 - drivers/gpu/drm/drm_atomic_uapi.c | 4 -- drivers/gpu/drm/drm_blend.c | 94 ------------------------------- drivers/gpu/drm/drm_plane.c | 19 ++----- include/drm/drm_blend.h | 2 - include/drm/drm_plane.h | 21 ------- 6 files changed, 4 insertions(+), 137 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 311b04edf742..54975de44a0e 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -252,7 +252,6 @@ void __drm_atomic_helper_plane_state_reset(struct drm_plane_state *plane_state, plane_state->alpha = DRM_BLEND_ALPHA_OPAQUE; plane_state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI; - plane_state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB; if (plane->color_encoding_property) { if (!drm_object_property_get_default_value(&plane->base, diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index bd7140531948..aee4a65d4959 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -562,8 +562,6 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, state->src_w = val; } else if (property == config->prop_src_h) { state->src_h = val; - } else if (property == plane->pixel_source_property) { - state->pixel_source = val; } else if (property == plane->alpha_property) { state->alpha = val; } else if (property == plane->blend_mode_property) { @@ -652,8 +650,6 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->src_w; } else if (property == config->prop_src_h) { *val = state->src_h; - } else if (property == plane->pixel_source_property) { - *val = state->pixel_source; } else if (property == plane->alpha_property) { *val = state->alpha; } else if (property == plane->blend_mode_property) { diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index fce734cdb85b..6e74de833466 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -185,25 +185,6 @@ * plane does not expose the "alpha" property, then this is * assumed to be 1.0 * - * pixel_source: - * pixel_source is set up with drm_plane_create_pixel_source_property(). - * It is used to toggle the active source of pixel data for the plane. - * The plane will only display data from the set pixel_source -- any - * data from other sources will be ignored. - * - * For non-framebuffer sources, if pixel_source is set to a non-framebuffer - * and non-NONE source, and the corresponding source property is NULL, then - * the atomic commit should return an error. - * - * Possible values: - * - * "NONE": - * No active pixel source. - * Committing with a NONE pixel source will disable the plane. - * - * "FB": - * Framebuffer source set by the "FB_ID" property. - * * Note that all the property extensions described here apply either to the * plane or the CRTC (e.g. for the background color, which currently is not * exposed and assumed to be black). @@ -634,78 +615,3 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane, return 0; } EXPORT_SYMBOL(drm_plane_create_blend_mode_property); - -static const struct drm_prop_enum_list drm_pixel_source_enum_list[] = { - { DRM_PLANE_PIXEL_SOURCE_NONE, "NONE" }, - { DRM_PLANE_PIXEL_SOURCE_FB, "FB" }, -}; - -/** - * drm_plane_create_pixel_source_property - create a new pixel source property - * @plane: DRM plane - * @extra_sources: Bitmask of additional supported pixel_sources for the driver. - * DRM_PLANE_PIXEL_SOURCE_FB and DRM_PLANE_PIXEL_SOURCE_NONE will - * always be enabled as supported sources. - * - * This creates a new property describing the current source of pixel data for the - * plane. The pixel_source will be initialized as DRM_PLANE_PIXEL_SOURCE_FB by default. - * - * Drivers can set a custom default source by overriding the pixel_source value in - * drm_plane_funcs.reset() - * - * The property is exposed to userspace as an enumeration property called - * "pixel_source" and has the following enumeration values: - * - * "NONE": - * No active pixel source - * - * "FB": - * Framebuffer pixel source - * - * Returns: - * Zero on success, negative errno on failure. - */ -int drm_plane_create_pixel_source_property(struct drm_plane *plane, - unsigned long extra_sources) -{ - struct drm_device *dev = plane->dev; - struct drm_property *prop; - static const unsigned int valid_source_mask = BIT(DRM_PLANE_PIXEL_SOURCE_FB) | - BIT(DRM_PLANE_PIXEL_SOURCE_NONE); - int i; - - /* FB is supported by default */ - unsigned long supported_sources = extra_sources | - BIT(DRM_PLANE_PIXEL_SOURCE_FB) | - BIT(DRM_PLANE_PIXEL_SOURCE_NONE); - - if (WARN_ON(supported_sources & ~valid_source_mask)) - return -EINVAL; - - prop = drm_property_create(dev, DRM_MODE_PROP_ENUM | DRM_MODE_PROP_ATOMIC, "pixel_source", - hweight32(supported_sources)); - - if (!prop) - return -ENOMEM; - - for (i = 0; i < ARRAY_SIZE(drm_pixel_source_enum_list); i++) { - int ret; - - if (!test_bit(drm_pixel_source_enum_list[i].type, &supported_sources)) - continue; - - ret = drm_property_add_enum(prop, drm_pixel_source_enum_list[i].type, - drm_pixel_source_enum_list[i].name); - if (ret) { - drm_property_destroy(dev, prop); - - return ret; - } - } - - drm_object_attach_property(&plane->base, prop, DRM_PLANE_PIXEL_SOURCE_FB); - plane->pixel_source_property = prop; - - return 0; -} -EXPORT_SYMBOL(drm_plane_create_pixel_source_property); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index c8dbe5ae60cc..9e8e4c60983d 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -953,14 +953,6 @@ bool drm_any_plane_has_format(struct drm_device *dev, } EXPORT_SYMBOL(drm_any_plane_has_format); -static bool drm_plane_needs_disable(struct drm_plane_state *state, struct drm_framebuffer *fb) -{ - if (state->pixel_source == DRM_PLANE_PIXEL_SOURCE_NONE) - return true; - - return state->pixel_source == DRM_PLANE_PIXEL_SOURCE_FB && fb == NULL; -} - /* * __setplane_internal - setplane handler for internal callers * @@ -983,8 +975,8 @@ static int __setplane_internal(struct drm_plane *plane, WARN_ON(drm_drv_uses_atomic_modeset(plane->dev)); - /* No visible data means shut it down */ - if (drm_plane_needs_disable(plane->state, fb)) { + /* No fb means shut it down */ + if (!fb) { plane->old_fb = plane->fb; ret = plane->funcs->disable_plane(plane, ctx); if (!ret) { @@ -1035,8 +1027,8 @@ static int __setplane_atomic(struct drm_plane *plane, WARN_ON(!drm_drv_uses_atomic_modeset(plane->dev)); - /* No visible data means shut it down */ - if (drm_plane_needs_disable(plane->state, fb)) + /* No fb means shut it down */ + if (!fb) return plane->funcs->disable_plane(plane, ctx); /* @@ -1109,9 +1101,6 @@ int drm_mode_setplane(struct drm_device *dev, void *data, return -ENOENT; } - if (plane->state) - plane->state->pixel_source = DRM_PLANE_PIXEL_SOURCE_FB; - if (plane_req->fb_id) { fb = drm_framebuffer_lookup(dev, file_priv, plane_req->fb_id); if (!fb) { diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h index 122bbfbaae33..88bdfec3bd88 100644 --- a/include/drm/drm_blend.h +++ b/include/drm/drm_blend.h @@ -58,6 +58,4 @@ int drm_atomic_normalize_zpos(struct drm_device *dev, struct drm_atomic_state *state); int drm_plane_create_blend_mode_property(struct drm_plane *plane, unsigned int supported_modes); -int drm_plane_create_pixel_source_property(struct drm_plane *plane, - unsigned long extra_sources); #endif diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index bc0176ba25be..c6565a6f9324 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -40,12 +40,6 @@ enum drm_scaling_filter { DRM_SCALING_FILTER_NEAREST_NEIGHBOR, }; -enum drm_plane_pixel_source { - DRM_PLANE_PIXEL_SOURCE_NONE, - DRM_PLANE_PIXEL_SOURCE_FB, - DRM_PLANE_PIXEL_SOURCE_MAX -}; - /** * struct drm_plane_state - mutable plane state * @@ -126,14 +120,6 @@ struct drm_plane_state { /** @hotspot_y: y offset to mouse cursor hotspot */ int32_t hotspot_x, hotspot_y; - /** - * @pixel_source: - * - * Source of pixel information for the plane. See - * drm_plane_create_pixel_source_property() for more details. - */ - enum drm_plane_pixel_source pixel_source; - /** * @alpha: * Opacity of the plane with 0 as completely transparent and 0xffff as @@ -727,13 +713,6 @@ struct drm_plane { */ struct drm_plane_state *state; - /* - * @pixel_source_property: - * Optional pixel_source property for this plane. See - * drm_plane_create_pixel_source_property(). - */ - struct drm_property *pixel_source_property; - /** * @alpha_property: * Optional alpha property for this plane. See -- cgit v1.2.3 From e759f2ca29d918d3db57a61cdf838025beb03465 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 29 Nov 2023 23:08:00 +0100 Subject: drm/gpuvm: fall back to drm_exec_lock_obj() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fall back to drm_exec_lock_obj() if num_fences is zero for the drm_gpuvm_prepare_* function family. Otherwise dma_resv_reserve_fences() would actually allocate slots even though num_fences is zero. Cc: Christian König Acked-by: Donald Robson Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231129220835.297885-2-dakr@redhat.com --- drivers/gpu/drm/drm_gpuvm.c | 43 ++++++++++++++++++++++++++++++++++++++----- include/drm/drm_gpuvm.h | 23 +++-------------------- 2 files changed, 41 insertions(+), 25 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 54f5e8851de5..07a6676bc4f9 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -1085,6 +1085,37 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm) } EXPORT_SYMBOL_GPL(drm_gpuvm_put); +static int +exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj, + unsigned int num_fences) +{ + return num_fences ? drm_exec_prepare_obj(exec, obj, num_fences) : + drm_exec_lock_obj(exec, obj); +} + +/** + * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv + * @gpuvm: the &drm_gpuvm + * @exec: the &drm_exec context + * @num_fences: the amount of &dma_fences to reserve + * + * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object; if + * @num_fences is zero drm_exec_lock_obj() is called instead. + * + * Using this function directly, it is the drivers responsibility to call + * drm_exec_init() and drm_exec_fini() accordingly. + * + * Returns: 0 on success, negative error code on failure. + */ +int +drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +{ + return exec_prepare_obj(exec, gpuvm->r_obj, num_fences); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_vm); + static int __drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, struct drm_exec *exec, @@ -1095,7 +1126,7 @@ __drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, int ret = 0; for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) { - ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences); + ret = exec_prepare_obj(exec, vm_bo->obj, num_fences); if (ret) break; } @@ -1116,7 +1147,7 @@ drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, drm_gpuvm_resv_assert_held(gpuvm); list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) { - ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences); + ret = exec_prepare_obj(exec, vm_bo->obj, num_fences); if (ret) break; @@ -1134,7 +1165,8 @@ drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, * @num_fences: the amount of &dma_fences to reserve * * Calls drm_exec_prepare_obj() for all &drm_gem_objects the given - * &drm_gpuvm contains mappings of. + * &drm_gpuvm contains mappings of; if @num_fences is zero drm_exec_lock_obj() + * is called instead. * * Using this function directly, it is the drivers responsibility to call * drm_exec_init() and drm_exec_fini() accordingly. @@ -1171,7 +1203,8 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_objects); * @num_fences: the amount of &dma_fences to reserve * * Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr - * and @addr + @range. + * and @addr + @range; if @num_fences is zero drm_exec_lock_obj() is called + * instead. * * Returns: 0 on success, negative error code on failure. */ @@ -1186,7 +1219,7 @@ drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec, drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) { struct drm_gem_object *obj = va->gem.obj; - ret = drm_exec_prepare_obj(exec, obj, num_fences); + ret = exec_prepare_obj(exec, obj, num_fences); if (ret) return ret; } diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index f94fec9a8517..b3f82ec7fb17 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -544,26 +544,9 @@ struct drm_gpuvm_exec { } extra; }; -/** - * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv - * @gpuvm: the &drm_gpuvm - * @exec: the &drm_exec context - * @num_fences: the amount of &dma_fences to reserve - * - * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object. - * - * Using this function directly, it is the drivers responsibility to call - * drm_exec_init() and drm_exec_fini() accordingly. - * - * Returns: 0 on success, negative error code on failure. - */ -static inline int -drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm, - struct drm_exec *exec, - unsigned int num_fences) -{ - return drm_exec_prepare_obj(exec, gpuvm->r_obj, num_fences); -} +int drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences); int drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, struct drm_exec *exec, -- cgit v1.2.3 From c50a291d621aa7abaa27b05f56d450a388b64948 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 4 Dec 2023 16:14:06 +0100 Subject: drm/gpuvm: Let drm_gpuvm_bo_put() report when the vm_bo object is destroyed Some users need to release resources attached to the vm_bo object when it's destroyed. In Panthor's case, we need to release the pin ref so BO pages can be returned to the system when all GPU mappings are gone. This could be done through a custom drm_gpuvm::vm_bo_free() hook, but this has all sort of locking implications that would force us to expose a drm_gem_shmem_unpin_locked() helper, not to mention the fact that having a ::vm_bo_free() implementation without a ::vm_bo_alloc() one seems odd. So let's keep things simple, and extend drm_gpuvm_bo_put() to report when the object is destroyed. Signed-off-by: Boris Brezillon Reviewed-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231204151406.1977285-1-boris.brezillon@collabora.com --- drivers/gpu/drm/drm_gpuvm.c | 8 ++++++-- include/drm/drm_gpuvm.h | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 07a6676bc4f9..9c0922c1a5a2 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -1535,14 +1535,18 @@ drm_gpuvm_bo_destroy(struct kref *kref) * hold the dma-resv or driver specific GEM gpuva lock. * * This function may only be called from non-atomic context. + * + * Returns: true if vm_bo was destroyed, false otherwise. */ -void +bool drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo) { might_sleep(); if (vm_bo) - kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy); + return !!kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy); + + return false; } EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put); diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index b3f82ec7fb17..6258849382e1 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -721,7 +721,7 @@ drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo) return vm_bo; } -void drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo); +bool drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo); struct drm_gpuvm_bo * drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, -- cgit v1.2.3 From 2798ffcc1d6a788b5769b1fbcf0750dfc06ae98a Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 22 Nov 2023 13:09:40 +0100 Subject: drm: Remove locking for legacy ioctls and DRM_UNLOCKED Modern DRM drivers acquire ioctl locks by themselves. Legacy ioctls for user-space mode setting used to acquire drm_global_mutex. After removing the ioctl entry points, also remove the locking code. The only legacy ioctl without global locking was VBLANK_WAIT, which has been removed as well. Hence remove the related DRM_UNLOCKED flag. Signed-off-by: Thomas Zimmermann Reviewed-by: David Airlie Reviewed-by: Daniel Vetter Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-12-tzimmermann@suse.de --- drivers/gpu/drm/drm_ioc32.c | 2 +- drivers/gpu/drm/drm_ioctl.c | 23 +++++++---------------- include/drm/drm_ioctl.h | 11 ----------- 3 files changed, 8 insertions(+), 28 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index 910cadf14756..129e2b91dbfe 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -273,7 +273,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, req.request.type = req32.request.type; req.request.sequence = req32.request.sequence; req.request.signal = req32.request.signal; - err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, DRM_UNLOCKED); + err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, 0); req32.reply.type = req.reply.type; req32.reply.sequence = req.reply.sequence; diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index ebba34ba734e..e368fc084c77 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -596,7 +596,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_FINISH, drm_noop, DRM_AUTH), - DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank_ioctl, 0), DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), @@ -729,7 +729,7 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata, { struct drm_file *file_priv = file->private_data; struct drm_device *dev = file_priv->minor->dev; - int retcode; + int ret; /* Update drm_file owner if fd was passed along. */ drm_file_update_pid(file_priv); @@ -737,20 +737,11 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata, if (drm_dev_is_unplugged(dev)) return -ENODEV; - retcode = drm_ioctl_permit(flags, file_priv); - if (unlikely(retcode)) - return retcode; - - /* Enforce sane locking for modern driver ioctls. */ - if (likely(!drm_core_check_feature(dev, DRIVER_LEGACY)) || - (flags & DRM_UNLOCKED)) - retcode = func(dev, kdata, file_priv); - else { - mutex_lock(&drm_global_mutex); - retcode = func(dev, kdata, file_priv); - mutex_unlock(&drm_global_mutex); - } - return retcode; + ret = drm_ioctl_permit(flags, file_priv); + if (unlikely(ret)) + return ret; + + return func(dev, kdata, file_priv); } EXPORT_SYMBOL(drm_ioctl_kernel); diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h index 6ed61c371f6c..171760b6c4a1 100644 --- a/include/drm/drm_ioctl.h +++ b/include/drm/drm_ioctl.h @@ -109,17 +109,6 @@ enum drm_ioctl_flags { * This is equivalent to callers with the SYSADMIN capability. */ DRM_ROOT_ONLY = BIT(2), - /** - * @DRM_UNLOCKED: - * - * Whether &drm_ioctl_desc.func should be called with the DRM BKL held - * or not. Enforced as the default for all modern drivers, hence there - * should never be a need to set this flag. - * - * Do not use anywhere else than for the VBLANK_WAIT IOCTL, which is the - * only legacy IOCTL which needs this. - */ - DRM_UNLOCKED = BIT(4), /** * @DRM_RENDER_ALLOW: * -- cgit v1.2.3 From 2504c7ec728b7a2b6ca067e2a908fd1af2aad57c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 22 Nov 2023 13:09:41 +0100 Subject: drm: Remove source code for non-KMS drivers Remove all remaining source code for non-KMS drivers. These drivers have been removed in v6.3 and won't comeback. Signed-off-by: Thomas Zimmermann Reviewed-by: David Airlie Reviewed-by: Daniel Vetter Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20231122122449.11588-13-tzimmermann@suse.de --- drivers/gpu/drm/Makefile | 12 - drivers/gpu/drm/drm_agpsupport.c | 451 ---------- drivers/gpu/drm/drm_bufs.c | 1627 ------------------------------------- drivers/gpu/drm/drm_context.c | 513 ------------ drivers/gpu/drm/drm_dma.c | 178 ---- drivers/gpu/drm/drm_hashtab.c | 203 ----- drivers/gpu/drm/drm_internal.h | 5 - drivers/gpu/drm/drm_irq.c | 204 ----- drivers/gpu/drm/drm_legacy.h | 290 ------- drivers/gpu/drm/drm_legacy_misc.c | 105 --- drivers/gpu/drm/drm_lock.c | 373 --------- drivers/gpu/drm/drm_memory.c | 138 ---- drivers/gpu/drm/drm_pci.c | 203 ----- drivers/gpu/drm/drm_scatter.c | 220 ----- drivers/gpu/drm/drm_vm.c | 665 --------------- include/drm/drm_auth.h | 22 - include/drm/drm_device.h | 71 +- include/drm/drm_drv.h | 19 - include/drm/drm_file.h | 5 - include/drm/drm_legacy.h | 331 -------- 20 files changed, 2 insertions(+), 5633 deletions(-) delete mode 100644 drivers/gpu/drm/drm_agpsupport.c delete mode 100644 drivers/gpu/drm/drm_bufs.c delete mode 100644 drivers/gpu/drm/drm_context.c delete mode 100644 drivers/gpu/drm/drm_dma.c delete mode 100644 drivers/gpu/drm/drm_hashtab.c delete mode 100644 drivers/gpu/drm/drm_irq.c delete mode 100644 drivers/gpu/drm/drm_legacy.h delete mode 100644 drivers/gpu/drm/drm_legacy_misc.c delete mode 100644 drivers/gpu/drm/drm_lock.c delete mode 100644 drivers/gpu/drm/drm_memory.c delete mode 100644 drivers/gpu/drm/drm_scatter.c delete mode 100644 drivers/gpu/drm/drm_vm.c delete mode 100644 include/drm/drm_legacy.h (limited to 'include/drm') diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index b4cb0835620a..8ac6f4b9546e 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -47,18 +47,6 @@ drm-y := \ drm_vblank_work.o \ drm_vma_manager.o \ drm_writeback.o -drm-$(CONFIG_DRM_LEGACY) += \ - drm_agpsupport.o \ - drm_bufs.o \ - drm_context.o \ - drm_dma.o \ - drm_hashtab.o \ - drm_irq.o \ - drm_legacy_misc.o \ - drm_lock.o \ - drm_memory.o \ - drm_scatter.o \ - drm_vm.o drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o drm-$(CONFIG_COMPAT) += drm_ioc32.o drm-$(CONFIG_DRM_PANEL) += drm_panel.o diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c deleted file mode 100644 index a4ad6fd13abc..000000000000 --- a/drivers/gpu/drm/drm_agpsupport.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * \file drm_agpsupport.c - * DRM support for AGP/GART backend - * - * \author Rickard E. (Rik) Faith - * \author Gareth Hughes - */ - -/* - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#if IS_ENABLED(CONFIG_AGP) -#include -#endif - -#include -#include -#include -#include - -#include "drm_legacy.h" - -#if IS_ENABLED(CONFIG_AGP) - -/* - * Get AGP information. - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device has been initialized and acquired and fills in the - * drm_agp_info structure with the information in drm_agp_head::agp_info. - */ -int drm_legacy_agp_info(struct drm_device *dev, struct drm_agp_info *info) -{ - struct agp_kern_info *kern; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - - kern = &dev->agp->agp_info; - info->agp_version_major = kern->version.major; - info->agp_version_minor = kern->version.minor; - info->mode = kern->mode; - info->aperture_base = kern->aper_base; - info->aperture_size = kern->aper_size * 1024 * 1024; - info->memory_allowed = kern->max_memory << PAGE_SHIFT; - info->memory_used = kern->current_memory << PAGE_SHIFT; - info->id_vendor = kern->device->vendor; - info->id_device = kern->device->device; - - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_info); - -int drm_legacy_agp_info_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_info *info = data; - int err; - - err = drm_legacy_agp_info(dev, info); - if (err) - return err; - - return 0; -} - -/* - * Acquire the AGP device. - * - * \param dev DRM device that is to acquire AGP. - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device hasn't been acquired before and calls - * \c agp_backend_acquire. - */ -int drm_legacy_agp_acquire(struct drm_device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - - if (!dev->agp) - return -ENODEV; - if (dev->agp->acquired) - return -EBUSY; - dev->agp->bridge = agp_backend_acquire(pdev); - if (!dev->agp->bridge) - return -ENODEV; - dev->agp->acquired = 1; - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_acquire); - -/* - * Acquire the AGP device (ioctl). - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device hasn't been acquired before and calls - * \c agp_backend_acquire. - */ -int drm_legacy_agp_acquire_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - return drm_legacy_agp_acquire((struct drm_device *)file_priv->minor->dev); -} - -/* - * Release the AGP device. - * - * \param dev DRM device that is to release AGP. - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device has been acquired and calls \c agp_backend_release. - */ -int drm_legacy_agp_release(struct drm_device *dev) -{ - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - agp_backend_release(dev->agp->bridge); - dev->agp->acquired = 0; - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_release); - -int drm_legacy_agp_release_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - return drm_legacy_agp_release(dev); -} - -/* - * Enable the AGP bus. - * - * \param dev DRM device that has previously acquired AGP. - * \param mode Requested AGP mode. - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device has been acquired but not enabled, and calls - * \c agp_enable. - */ -int drm_legacy_agp_enable(struct drm_device *dev, struct drm_agp_mode mode) -{ - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - - dev->agp->mode = mode.mode; - agp_enable(dev->agp->bridge, mode.mode); - dev->agp->enabled = 1; - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_enable); - -int drm_legacy_agp_enable_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_mode *mode = data; - - return drm_legacy_agp_enable(dev, *mode); -} - -/* - * Allocate AGP memory. - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device is present and has been acquired, allocates the - * memory via agp_allocate_memory() and creates a drm_agp_mem entry for it. - */ -int drm_legacy_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request) -{ - struct drm_agp_mem *entry; - struct agp_memory *memory; - unsigned long pages; - u32 type; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - pages = DIV_ROUND_UP(request->size, PAGE_SIZE); - type = (u32) request->type; - memory = agp_allocate_memory(dev->agp->bridge, pages, type); - if (!memory) { - kfree(entry); - return -ENOMEM; - } - - entry->handle = (unsigned long)memory->key + 1; - entry->memory = memory; - entry->bound = 0; - entry->pages = pages; - list_add(&entry->head, &dev->agp->memory); - - request->handle = entry->handle; - request->physical = memory->physical; - - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_alloc); - - -int drm_legacy_agp_alloc_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_buffer *request = data; - - return drm_legacy_agp_alloc(dev, request); -} - -/* - * Search for the AGP memory entry associated with a handle. - * - * \param dev DRM device structure. - * \param handle AGP memory handle. - * \return pointer to the drm_agp_mem structure associated with \p handle. - * - * Walks through drm_agp_head::memory until finding a matching handle. - */ -static struct drm_agp_mem *drm_legacy_agp_lookup_entry(struct drm_device *dev, - unsigned long handle) -{ - struct drm_agp_mem *entry; - - list_for_each_entry(entry, &dev->agp->memory, head) { - if (entry->handle == handle) - return entry; - } - return NULL; -} - -/* - * Unbind AGP memory from the GATT (ioctl). - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device is present and acquired, looks-up the AGP memory - * entry and passes it to the unbind_agp() function. - */ -int drm_legacy_agp_unbind(struct drm_device *dev, struct drm_agp_binding *request) -{ - struct drm_agp_mem *entry; - int ret; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - entry = drm_legacy_agp_lookup_entry(dev, request->handle); - if (!entry || !entry->bound) - return -EINVAL; - ret = agp_unbind_memory(entry->memory); - if (ret == 0) - entry->bound = 0; - return ret; -} -EXPORT_SYMBOL(drm_legacy_agp_unbind); - - -int drm_legacy_agp_unbind_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_binding *request = data; - - return drm_legacy_agp_unbind(dev, request); -} - -/* - * Bind AGP memory into the GATT (ioctl) - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device is present and has been acquired and that no memory - * is currently bound into the GATT. Looks-up the AGP memory entry and passes - * it to bind_agp() function. - */ -int drm_legacy_agp_bind(struct drm_device *dev, struct drm_agp_binding *request) -{ - struct drm_agp_mem *entry; - int retcode; - int page; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - entry = drm_legacy_agp_lookup_entry(dev, request->handle); - if (!entry || entry->bound) - return -EINVAL; - page = DIV_ROUND_UP(request->offset, PAGE_SIZE); - retcode = agp_bind_memory(entry->memory, page); - if (retcode) - return retcode; - entry->bound = dev->agp->base + (page << PAGE_SHIFT); - DRM_DEBUG("base = 0x%lx entry->bound = 0x%lx\n", - dev->agp->base, entry->bound); - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_bind); - - -int drm_legacy_agp_bind_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_binding *request = data; - - return drm_legacy_agp_bind(dev, request); -} - -/* - * Free AGP memory (ioctl). - * - * \return zero on success or a negative number on failure. - * - * Verifies the AGP device is present and has been acquired and looks up the - * AGP memory entry. If the memory is currently bound, unbind it via - * unbind_agp(). Frees it via free_agp() as well as the entry itself - * and unlinks from the doubly linked list it's inserted in. - */ -int drm_legacy_agp_free(struct drm_device *dev, struct drm_agp_buffer *request) -{ - struct drm_agp_mem *entry; - - if (!dev->agp || !dev->agp->acquired) - return -EINVAL; - entry = drm_legacy_agp_lookup_entry(dev, request->handle); - if (!entry) - return -EINVAL; - if (entry->bound) - agp_unbind_memory(entry->memory); - - list_del(&entry->head); - - agp_free_memory(entry->memory); - kfree(entry); - return 0; -} -EXPORT_SYMBOL(drm_legacy_agp_free); - - -int drm_legacy_agp_free_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_agp_buffer *request = data; - - return drm_legacy_agp_free(dev, request); -} - -/* - * Initialize the AGP resources. - * - * \return pointer to a drm_agp_head structure. - * - * Gets the drm_agp_t structure which is made available by the agpgart module - * via the inter_module_* functions. Creates and initializes a drm_agp_head - * structure. - * - * Note that final cleanup of the kmalloced structure is directly done in - * drm_pci_agp_destroy. - */ -struct drm_agp_head *drm_legacy_agp_init(struct drm_device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - struct drm_agp_head *head = NULL; - - head = kzalloc(sizeof(*head), GFP_KERNEL); - if (!head) - return NULL; - head->bridge = agp_find_bridge(pdev); - if (!head->bridge) { - head->bridge = agp_backend_acquire(pdev); - if (!head->bridge) { - kfree(head); - return NULL; - } - agp_copy_info(head->bridge, &head->agp_info); - agp_backend_release(head->bridge); - } else { - agp_copy_info(head->bridge, &head->agp_info); - } - if (head->agp_info.chipset == NOT_SUPPORTED) { - kfree(head); - return NULL; - } - INIT_LIST_HEAD(&head->memory); - head->cant_use_aperture = head->agp_info.cant_use_aperture; - head->page_mask = head->agp_info.page_mask; - head->base = head->agp_info.aper_base; - return head; -} -/* Only exported for i810.ko */ -EXPORT_SYMBOL(drm_legacy_agp_init); - -/** - * drm_legacy_agp_clear - Clear AGP resource list - * @dev: DRM device - * - * Iterate over all AGP resources and remove them. But keep the AGP head - * intact so it can still be used. It is safe to call this if AGP is disabled or - * was already removed. - * - * Cleanup is only done for drivers who have DRIVER_LEGACY set. - */ -void drm_legacy_agp_clear(struct drm_device *dev) -{ - struct drm_agp_mem *entry, *tempe; - - if (!dev->agp) - return; - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - list_for_each_entry_safe(entry, tempe, &dev->agp->memory, head) { - if (entry->bound) - agp_unbind_memory(entry->memory); - agp_free_memory(entry->memory); - kfree(entry); - } - INIT_LIST_HEAD(&dev->agp->memory); - - if (dev->agp->acquired) - drm_legacy_agp_release(dev); - - dev->agp->acquired = 0; - dev->agp->enabled = 0; -} - -#endif diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c deleted file mode 100644 index 86700560fea2..000000000000 --- a/drivers/gpu/drm/drm_bufs.c +++ /dev/null @@ -1,1627 +0,0 @@ -/* - * Legacy: Generic DRM Buffer Management - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Author: Rickard E. (Rik) Faith - * Author: Gareth Hughes - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -#include "drm_legacy.h" - - -static struct drm_map_list *drm_find_matching_map(struct drm_device *dev, - struct drm_local_map *map) -{ - struct drm_map_list *entry; - - list_for_each_entry(entry, &dev->maplist, head) { - /* - * Because the kernel-userspace ABI is fixed at a 32-bit offset - * while PCI resources may live above that, we only compare the - * lower 32 bits of the map offset for maps of type - * _DRM_FRAMEBUFFER or _DRM_REGISTERS. - * It is assumed that if a driver have more than one resource - * of each type, the lower 32 bits are different. - */ - if (!entry->map || - map->type != entry->map->type || - entry->master != dev->master) - continue; - switch (map->type) { - case _DRM_SHM: - if (map->flags != _DRM_CONTAINS_LOCK) - break; - return entry; - case _DRM_REGISTERS: - case _DRM_FRAME_BUFFER: - if ((entry->map->offset & 0xffffffff) == - (map->offset & 0xffffffff)) - return entry; - break; - default: /* Make gcc happy */ - break; - } - if (entry->map->offset == map->offset) - return entry; - } - - return NULL; -} - -static int drm_map_handle(struct drm_device *dev, struct drm_hash_item *hash, - unsigned long user_token, int hashed_handle, int shm) -{ - int use_hashed_handle, shift; - unsigned long add; - -#if (BITS_PER_LONG == 64) - use_hashed_handle = ((user_token & 0xFFFFFFFF00000000UL) || hashed_handle); -#elif (BITS_PER_LONG == 32) - use_hashed_handle = hashed_handle; -#else -#error Unsupported long size. Neither 64 nor 32 bits. -#endif - - if (!use_hashed_handle) { - int ret; - - hash->key = user_token >> PAGE_SHIFT; - ret = drm_ht_insert_item(&dev->map_hash, hash); - if (ret != -EINVAL) - return ret; - } - - shift = 0; - add = DRM_MAP_HASH_OFFSET >> PAGE_SHIFT; - if (shm && (SHMLBA > PAGE_SIZE)) { - int bits = ilog2(SHMLBA >> PAGE_SHIFT) + 1; - - /* For shared memory, we have to preserve the SHMLBA - * bits of the eventual vma->vm_pgoff value during - * mmap(). Otherwise we run into cache aliasing problems - * on some platforms. On these platforms, the pgoff of - * a mmap() request is used to pick a suitable virtual - * address for the mmap() region such that it will not - * cause cache aliasing problems. - * - * Therefore, make sure the SHMLBA relevant bits of the - * hash value we use are equal to those in the original - * kernel virtual address. - */ - shift = bits; - add |= ((user_token >> PAGE_SHIFT) & ((1UL << bits) - 1UL)); - } - - return drm_ht_just_insert_please(&dev->map_hash, hash, - user_token, 32 - PAGE_SHIFT - 3, - shift, add); -} - -/* - * Core function to create a range of memory available for mapping by a - * non-root process. - * - * Adjusts the memory offset to its absolute value according to the mapping - * type. Adds the map to the map list drm_device::maplist. Adds MTRR's where - * applicable and if supported by the kernel. - */ -static int drm_addmap_core(struct drm_device *dev, resource_size_t offset, - unsigned int size, enum drm_map_type type, - enum drm_map_flags flags, - struct drm_map_list **maplist) -{ - struct drm_local_map *map; - struct drm_map_list *list; - unsigned long user_token; - int ret; - - map = kmalloc(sizeof(*map), GFP_KERNEL); - if (!map) - return -ENOMEM; - - map->offset = offset; - map->size = size; - map->flags = flags; - map->type = type; - - /* Only allow shared memory to be removable since we only keep enough - * book keeping information about shared memory to allow for removal - * when processes fork. - */ - if ((map->flags & _DRM_REMOVABLE) && map->type != _DRM_SHM) { - kfree(map); - return -EINVAL; - } - DRM_DEBUG("offset = 0x%08llx, size = 0x%08lx, type = %d\n", - (unsigned long long)map->offset, map->size, map->type); - - /* page-align _DRM_SHM maps. They are allocated here so there is no security - * hole created by that and it works around various broken drivers that use - * a non-aligned quantity to map the SAREA. --BenH - */ - if (map->type == _DRM_SHM) - map->size = PAGE_ALIGN(map->size); - - if ((map->offset & (~(resource_size_t)PAGE_MASK)) || (map->size & (~PAGE_MASK))) { - kfree(map); - return -EINVAL; - } - map->mtrr = -1; - map->handle = NULL; - - switch (map->type) { - case _DRM_REGISTERS: - case _DRM_FRAME_BUFFER: -#if !defined(__sparc__) && !defined(__alpha__) && !defined(__ia64__) && !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__arm__) - if (map->offset + (map->size-1) < map->offset || - map->offset < virt_to_phys(high_memory)) { - kfree(map); - return -EINVAL; - } -#endif - /* Some drivers preinitialize some maps, without the X Server - * needing to be aware of it. Therefore, we just return success - * when the server tries to create a duplicate map. - */ - list = drm_find_matching_map(dev, map); - if (list != NULL) { - if (list->map->size != map->size) { - DRM_DEBUG("Matching maps of type %d with " - "mismatched sizes, (%ld vs %ld)\n", - map->type, map->size, - list->map->size); - list->map->size = map->size; - } - - kfree(map); - *maplist = list; - return 0; - } - - if (map->type == _DRM_FRAME_BUFFER || - (map->flags & _DRM_WRITE_COMBINING)) { - map->mtrr = - arch_phys_wc_add(map->offset, map->size); - } - if (map->type == _DRM_REGISTERS) { - if (map->flags & _DRM_WRITE_COMBINING) - map->handle = ioremap_wc(map->offset, - map->size); - else - map->handle = ioremap(map->offset, map->size); - if (!map->handle) { - kfree(map); - return -ENOMEM; - } - } - - break; - case _DRM_SHM: - list = drm_find_matching_map(dev, map); - if (list != NULL) { - if (list->map->size != map->size) { - DRM_DEBUG("Matching maps of type %d with " - "mismatched sizes, (%ld vs %ld)\n", - map->type, map->size, list->map->size); - list->map->size = map->size; - } - - kfree(map); - *maplist = list; - return 0; - } - map->handle = vmalloc_user(map->size); - DRM_DEBUG("%lu %d %p\n", - map->size, order_base_2(map->size), map->handle); - if (!map->handle) { - kfree(map); - return -ENOMEM; - } - map->offset = (unsigned long)map->handle; - if (map->flags & _DRM_CONTAINS_LOCK) { - /* Prevent a 2nd X Server from creating a 2nd lock */ - if (dev->master->lock.hw_lock != NULL) { - vfree(map->handle); - kfree(map); - return -EBUSY; - } - dev->sigdata.lock = dev->master->lock.hw_lock = map->handle; /* Pointer to lock */ - } - break; - case _DRM_AGP: { - struct drm_agp_mem *entry; - int valid = 0; - - if (!dev->agp) { - kfree(map); - return -EINVAL; - } -#ifdef __alpha__ - map->offset += dev->hose->mem_space->start; -#endif - /* In some cases (i810 driver), user space may have already - * added the AGP base itself, because dev->agp->base previously - * only got set during AGP enable. So, only add the base - * address if the map's offset isn't already within the - * aperture. - */ - if (map->offset < dev->agp->base || - map->offset > dev->agp->base + - dev->agp->agp_info.aper_size * 1024 * 1024 - 1) { - map->offset += dev->agp->base; - } - map->mtrr = dev->agp->agp_mtrr; /* for getmap */ - - /* This assumes the DRM is in total control of AGP space. - * It's not always the case as AGP can be in the control - * of user space (i.e. i810 driver). So this loop will get - * skipped and we double check that dev->agp->memory is - * actually set as well as being invalid before EPERM'ing - */ - list_for_each_entry(entry, &dev->agp->memory, head) { - if ((map->offset >= entry->bound) && - (map->offset + map->size <= entry->bound + entry->pages * PAGE_SIZE)) { - valid = 1; - break; - } - } - if (!list_empty(&dev->agp->memory) && !valid) { - kfree(map); - return -EPERM; - } - DRM_DEBUG("AGP offset = 0x%08llx, size = 0x%08lx\n", - (unsigned long long)map->offset, map->size); - - break; - } - case _DRM_SCATTER_GATHER: - if (!dev->sg) { - kfree(map); - return -EINVAL; - } - map->offset += (unsigned long)dev->sg->virtual; - break; - case _DRM_CONSISTENT: - /* dma_addr_t is 64bit on i386 with CONFIG_HIGHMEM64G, - * As we're limiting the address to 2^32-1 (or less), - * casting it down to 32 bits is no problem, but we - * need to point to a 64bit variable first. - */ - map->handle = dma_alloc_coherent(dev->dev, - map->size, - &map->offset, - GFP_KERNEL); - if (!map->handle) { - kfree(map); - return -ENOMEM; - } - break; - default: - kfree(map); - return -EINVAL; - } - - list = kzalloc(sizeof(*list), GFP_KERNEL); - if (!list) { - if (map->type == _DRM_REGISTERS) - iounmap(map->handle); - kfree(map); - return -EINVAL; - } - list->map = map; - - mutex_lock(&dev->struct_mutex); - list_add(&list->head, &dev->maplist); - - /* Assign a 32-bit handle */ - /* We do it here so that dev->struct_mutex protects the increment */ - user_token = (map->type == _DRM_SHM) ? (unsigned long)map->handle : - map->offset; - ret = drm_map_handle(dev, &list->hash, user_token, 0, - (map->type == _DRM_SHM)); - if (ret) { - if (map->type == _DRM_REGISTERS) - iounmap(map->handle); - kfree(map); - kfree(list); - mutex_unlock(&dev->struct_mutex); - return ret; - } - - list->user_token = list->hash.key << PAGE_SHIFT; - mutex_unlock(&dev->struct_mutex); - - if (!(map->flags & _DRM_DRIVER)) - list->master = dev->master; - *maplist = list; - return 0; -} - -int drm_legacy_addmap(struct drm_device *dev, resource_size_t offset, - unsigned int size, enum drm_map_type type, - enum drm_map_flags flags, struct drm_local_map **map_ptr) -{ - struct drm_map_list *list; - int rc; - - rc = drm_addmap_core(dev, offset, size, type, flags, &list); - if (!rc) - *map_ptr = list->map; - return rc; -} -EXPORT_SYMBOL(drm_legacy_addmap); - -struct drm_local_map *drm_legacy_findmap(struct drm_device *dev, - unsigned int token) -{ - struct drm_map_list *_entry; - - list_for_each_entry(_entry, &dev->maplist, head) - if (_entry->user_token == token) - return _entry->map; - return NULL; -} -EXPORT_SYMBOL(drm_legacy_findmap); - -/* - * Ioctl to specify a range of memory that is available for mapping by a - * non-root process. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a drm_map structure. - * \return zero on success or a negative value on error. - * - */ -int drm_legacy_addmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_map *map = data; - struct drm_map_list *maplist; - int err; - - if (!(capable(CAP_SYS_ADMIN) || map->type == _DRM_AGP || map->type == _DRM_SHM)) - return -EPERM; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - err = drm_addmap_core(dev, map->offset, map->size, map->type, - map->flags, &maplist); - - if (err) - return err; - - /* avoid a warning on 64-bit, this casting isn't very nice, but the API is set so too late */ - map->handle = (void *)(unsigned long)maplist->user_token; - - /* - * It appears that there are no users of this value whatsoever -- - * drmAddMap just discards it. Let's not encourage its use. - * (Keeping drm_addmap_core's returned mtrr value would be wrong -- - * it's not a real mtrr index anymore.) - */ - map->mtrr = -1; - - return 0; -} - -/* - * Get a mapping information. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument, pointing to a drm_map structure. - * - * \return zero on success or a negative number on failure. - * - * Searches for the mapping with the specified offset and copies its information - * into userspace - */ -int drm_legacy_getmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_map *map = data; - struct drm_map_list *r_list = NULL; - struct list_head *list; - int idx; - int i; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - idx = map->offset; - if (idx < 0) - return -EINVAL; - - i = 0; - mutex_lock(&dev->struct_mutex); - list_for_each(list, &dev->maplist) { - if (i == idx) { - r_list = list_entry(list, struct drm_map_list, head); - break; - } - i++; - } - if (!r_list || !r_list->map) { - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - - map->offset = r_list->map->offset; - map->size = r_list->map->size; - map->type = r_list->map->type; - map->flags = r_list->map->flags; - map->handle = (void *)(unsigned long) r_list->user_token; - map->mtrr = arch_phys_wc_index(r_list->map->mtrr); - - mutex_unlock(&dev->struct_mutex); - - return 0; -} - -/* - * Remove a map private from list and deallocate resources if the mapping - * isn't in use. - * - * Searches the map on drm_device::maplist, removes it from the list, see if - * it's being used, and free any associated resource (such as MTRR's) if it's not - * being on use. - * - * \sa drm_legacy_addmap - */ -int drm_legacy_rmmap_locked(struct drm_device *dev, struct drm_local_map *map) -{ - struct drm_map_list *r_list = NULL, *list_t; - int found = 0; - struct drm_master *master; - - /* Find the list entry for the map and remove it */ - list_for_each_entry_safe(r_list, list_t, &dev->maplist, head) { - if (r_list->map == map) { - master = r_list->master; - list_del(&r_list->head); - drm_ht_remove_key(&dev->map_hash, - r_list->user_token >> PAGE_SHIFT); - kfree(r_list); - found = 1; - break; - } - } - - if (!found) - return -EINVAL; - - switch (map->type) { - case _DRM_REGISTERS: - iounmap(map->handle); - fallthrough; - case _DRM_FRAME_BUFFER: - arch_phys_wc_del(map->mtrr); - break; - case _DRM_SHM: - vfree(map->handle); - if (master) { - if (dev->sigdata.lock == master->lock.hw_lock) - dev->sigdata.lock = NULL; - master->lock.hw_lock = NULL; /* SHM removed */ - master->lock.file_priv = NULL; - wake_up_interruptible_all(&master->lock.lock_queue); - } - break; - case _DRM_AGP: - case _DRM_SCATTER_GATHER: - break; - case _DRM_CONSISTENT: - dma_free_coherent(dev->dev, - map->size, - map->handle, - map->offset); - break; - } - kfree(map); - - return 0; -} -EXPORT_SYMBOL(drm_legacy_rmmap_locked); - -void drm_legacy_rmmap(struct drm_device *dev, struct drm_local_map *map) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->struct_mutex); - drm_legacy_rmmap_locked(dev, map); - mutex_unlock(&dev->struct_mutex); -} -EXPORT_SYMBOL(drm_legacy_rmmap); - -void drm_legacy_master_rmmaps(struct drm_device *dev, struct drm_master *master) -{ - struct drm_map_list *r_list, *list_temp; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->struct_mutex); - list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head) { - if (r_list->master == master) { - drm_legacy_rmmap_locked(dev, r_list->map); - r_list = NULL; - } - } - mutex_unlock(&dev->struct_mutex); -} - -void drm_legacy_rmmaps(struct drm_device *dev) -{ - struct drm_map_list *r_list, *list_temp; - - list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head) - drm_legacy_rmmap(dev, r_list->map); -} - -/* The rmmap ioctl appears to be unnecessary. All mappings are torn down on - * the last close of the device, and this is necessary for cleanup when things - * exit uncleanly. Therefore, having userland manually remove mappings seems - * like a pointless exercise since they're going away anyway. - * - * One use case might be after addmap is allowed for normal users for SHM and - * gets used by drivers that the server doesn't need to care about. This seems - * unlikely. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a struct drm_map structure. - * \return zero on success or a negative value on error. - */ -int drm_legacy_rmmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_map *request = data; - struct drm_local_map *map = NULL; - struct drm_map_list *r_list; - int ret; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - mutex_lock(&dev->struct_mutex); - list_for_each_entry(r_list, &dev->maplist, head) { - if (r_list->map && - r_list->user_token == (unsigned long)request->handle && - r_list->map->flags & _DRM_REMOVABLE) { - map = r_list->map; - break; - } - } - - /* List has wrapped around to the head pointer, or it's empty we didn't - * find anything. - */ - if (list_empty(&dev->maplist) || !map) { - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - - /* Register and framebuffer maps are permanent */ - if ((map->type == _DRM_REGISTERS) || (map->type == _DRM_FRAME_BUFFER)) { - mutex_unlock(&dev->struct_mutex); - return 0; - } - - ret = drm_legacy_rmmap_locked(dev, map); - - mutex_unlock(&dev->struct_mutex); - - return ret; -} - -/* - * Cleanup after an error on one of the addbufs() functions. - * - * \param dev DRM device. - * \param entry buffer entry where the error occurred. - * - * Frees any pages and buffers associated with the given entry. - */ -static void drm_cleanup_buf_error(struct drm_device *dev, - struct drm_buf_entry *entry) -{ - drm_dma_handle_t *dmah; - int i; - - if (entry->seg_count) { - for (i = 0; i < entry->seg_count; i++) { - if (entry->seglist[i]) { - dmah = entry->seglist[i]; - dma_free_coherent(dev->dev, - dmah->size, - dmah->vaddr, - dmah->busaddr); - kfree(dmah); - } - } - kfree(entry->seglist); - - entry->seg_count = 0; - } - - if (entry->buf_count) { - for (i = 0; i < entry->buf_count; i++) { - kfree(entry->buflist[i].dev_private); - } - kfree(entry->buflist); - - entry->buf_count = 0; - } -} - -#if IS_ENABLED(CONFIG_AGP) -/* - * Add AGP buffers for DMA transfers. - * - * \param dev struct drm_device to which the buffers are to be added. - * \param request pointer to a struct drm_buf_desc describing the request. - * \return zero on success or a negative number on failure. - * - * After some sanity checks creates a drm_buf structure for each buffer and - * reallocates the buffer list of the same size order to accommodate the new - * buffers. - */ -int drm_legacy_addbufs_agp(struct drm_device *dev, - struct drm_buf_desc *request) -{ - struct drm_device_dma *dma = dev->dma; - struct drm_buf_entry *entry; - struct drm_agp_mem *agp_entry; - struct drm_buf *buf; - unsigned long offset; - unsigned long agp_offset; - int count; - int order; - int size; - int alignment; - int page_order; - int total; - int byte_count; - int i, valid; - struct drm_buf **temp_buflist; - - if (!dma) - return -EINVAL; - - count = request->count; - order = order_base_2(request->size); - size = 1 << order; - - alignment = (request->flags & _DRM_PAGE_ALIGN) - ? PAGE_ALIGN(size) : size; - page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; - total = PAGE_SIZE << page_order; - - byte_count = 0; - agp_offset = dev->agp->base + request->agp_start; - - DRM_DEBUG("count: %d\n", count); - DRM_DEBUG("order: %d\n", order); - DRM_DEBUG("size: %d\n", size); - DRM_DEBUG("agp_offset: %lx\n", agp_offset); - DRM_DEBUG("alignment: %d\n", alignment); - DRM_DEBUG("page_order: %d\n", page_order); - DRM_DEBUG("total: %d\n", total); - - if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) - return -EINVAL; - - /* Make sure buffers are located in AGP memory that we own */ - valid = 0; - list_for_each_entry(agp_entry, &dev->agp->memory, head) { - if ((agp_offset >= agp_entry->bound) && - (agp_offset + total * count <= agp_entry->bound + agp_entry->pages * PAGE_SIZE)) { - valid = 1; - break; - } - } - if (!list_empty(&dev->agp->memory) && !valid) { - DRM_DEBUG("zone invalid\n"); - return -EINVAL; - } - spin_lock(&dev->buf_lock); - if (dev->buf_use) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - atomic_inc(&dev->buf_alloc); - spin_unlock(&dev->buf_lock); - - mutex_lock(&dev->struct_mutex); - entry = &dma->bufs[order]; - if (entry->buf_count) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; /* May only call once for each order */ - } - - if (count < 0 || count > 4096) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -EINVAL; - } - - entry->buflist = kcalloc(count, sizeof(*entry->buflist), GFP_KERNEL); - if (!entry->buflist) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - entry->buf_size = size; - entry->page_order = page_order; - - offset = 0; - - while (entry->buf_count < count) { - buf = &entry->buflist[entry->buf_count]; - buf->idx = dma->buf_count + entry->buf_count; - buf->total = alignment; - buf->order = order; - buf->used = 0; - - buf->offset = (dma->byte_count + offset); - buf->bus_address = agp_offset + offset; - buf->address = (void *)(agp_offset + offset); - buf->next = NULL; - buf->waiting = 0; - buf->pending = 0; - buf->file_priv = NULL; - - buf->dev_priv_size = dev->driver->dev_priv_size; - buf->dev_private = kzalloc(buf->dev_priv_size, GFP_KERNEL); - if (!buf->dev_private) { - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - drm_cleanup_buf_error(dev, entry); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - DRM_DEBUG("buffer %d @ %p\n", entry->buf_count, buf->address); - - offset += alignment; - entry->buf_count++; - byte_count += PAGE_SIZE << page_order; - } - - DRM_DEBUG("byte_count: %d\n", byte_count); - - temp_buflist = krealloc(dma->buflist, - (dma->buf_count + entry->buf_count) * - sizeof(*dma->buflist), GFP_KERNEL); - if (!temp_buflist) { - /* Free the entry because it isn't valid */ - drm_cleanup_buf_error(dev, entry); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - dma->buflist = temp_buflist; - - for (i = 0; i < entry->buf_count; i++) { - dma->buflist[i + dma->buf_count] = &entry->buflist[i]; - } - - dma->buf_count += entry->buf_count; - dma->seg_count += entry->seg_count; - dma->page_count += byte_count >> PAGE_SHIFT; - dma->byte_count += byte_count; - - DRM_DEBUG("dma->buf_count : %d\n", dma->buf_count); - DRM_DEBUG("entry->buf_count : %d\n", entry->buf_count); - - mutex_unlock(&dev->struct_mutex); - - request->count = entry->buf_count; - request->size = size; - - dma->flags = _DRM_DMA_USE_AGP; - - atomic_dec(&dev->buf_alloc); - return 0; -} -EXPORT_SYMBOL(drm_legacy_addbufs_agp); -#endif /* CONFIG_AGP */ - -int drm_legacy_addbufs_pci(struct drm_device *dev, - struct drm_buf_desc *request) -{ - struct drm_device_dma *dma = dev->dma; - int count; - int order; - int size; - int total; - int page_order; - struct drm_buf_entry *entry; - drm_dma_handle_t *dmah; - struct drm_buf *buf; - int alignment; - unsigned long offset; - int i; - int byte_count; - int page_count; - unsigned long *temp_pagelist; - struct drm_buf **temp_buflist; - - if (!drm_core_check_feature(dev, DRIVER_PCI_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - count = request->count; - order = order_base_2(request->size); - size = 1 << order; - - DRM_DEBUG("count=%d, size=%d (%d), order=%d\n", - request->count, request->size, size, order); - - if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) - return -EINVAL; - - alignment = (request->flags & _DRM_PAGE_ALIGN) - ? PAGE_ALIGN(size) : size; - page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; - total = PAGE_SIZE << page_order; - - spin_lock(&dev->buf_lock); - if (dev->buf_use) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - atomic_inc(&dev->buf_alloc); - spin_unlock(&dev->buf_lock); - - mutex_lock(&dev->struct_mutex); - entry = &dma->bufs[order]; - if (entry->buf_count) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; /* May only call once for each order */ - } - - if (count < 0 || count > 4096) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -EINVAL; - } - - entry->buflist = kcalloc(count, sizeof(*entry->buflist), GFP_KERNEL); - if (!entry->buflist) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - entry->seglist = kcalloc(count, sizeof(*entry->seglist), GFP_KERNEL); - if (!entry->seglist) { - kfree(entry->buflist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - /* Keep the original pagelist until we know all the allocations - * have succeeded - */ - temp_pagelist = kmalloc_array(dma->page_count + (count << page_order), - sizeof(*dma->pagelist), - GFP_KERNEL); - if (!temp_pagelist) { - kfree(entry->buflist); - kfree(entry->seglist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - memcpy(temp_pagelist, - dma->pagelist, dma->page_count * sizeof(*dma->pagelist)); - DRM_DEBUG("pagelist: %d entries\n", - dma->page_count + (count << page_order)); - - entry->buf_size = size; - entry->page_order = page_order; - byte_count = 0; - page_count = 0; - - while (entry->buf_count < count) { - dmah = kmalloc(sizeof(drm_dma_handle_t), GFP_KERNEL); - if (!dmah) { - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - entry->seg_count = count; - drm_cleanup_buf_error(dev, entry); - kfree(temp_pagelist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - dmah->size = total; - dmah->vaddr = dma_alloc_coherent(dev->dev, - dmah->size, - &dmah->busaddr, - GFP_KERNEL); - if (!dmah->vaddr) { - kfree(dmah); - - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - entry->seg_count = count; - drm_cleanup_buf_error(dev, entry); - kfree(temp_pagelist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - entry->seglist[entry->seg_count++] = dmah; - for (i = 0; i < (1 << page_order); i++) { - DRM_DEBUG("page %d @ 0x%08lx\n", - dma->page_count + page_count, - (unsigned long)dmah->vaddr + PAGE_SIZE * i); - temp_pagelist[dma->page_count + page_count++] - = (unsigned long)dmah->vaddr + PAGE_SIZE * i; - } - for (offset = 0; - offset + size <= total && entry->buf_count < count; - offset += alignment, ++entry->buf_count) { - buf = &entry->buflist[entry->buf_count]; - buf->idx = dma->buf_count + entry->buf_count; - buf->total = alignment; - buf->order = order; - buf->used = 0; - buf->offset = (dma->byte_count + byte_count + offset); - buf->address = (void *)(dmah->vaddr + offset); - buf->bus_address = dmah->busaddr + offset; - buf->next = NULL; - buf->waiting = 0; - buf->pending = 0; - buf->file_priv = NULL; - - buf->dev_priv_size = dev->driver->dev_priv_size; - buf->dev_private = kzalloc(buf->dev_priv_size, - GFP_KERNEL); - if (!buf->dev_private) { - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - entry->seg_count = count; - drm_cleanup_buf_error(dev, entry); - kfree(temp_pagelist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - DRM_DEBUG("buffer %d @ %p\n", - entry->buf_count, buf->address); - } - byte_count += PAGE_SIZE << page_order; - } - - temp_buflist = krealloc(dma->buflist, - (dma->buf_count + entry->buf_count) * - sizeof(*dma->buflist), GFP_KERNEL); - if (!temp_buflist) { - /* Free the entry because it isn't valid */ - drm_cleanup_buf_error(dev, entry); - kfree(temp_pagelist); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - dma->buflist = temp_buflist; - - for (i = 0; i < entry->buf_count; i++) { - dma->buflist[i + dma->buf_count] = &entry->buflist[i]; - } - - /* No allocations failed, so now we can replace the original pagelist - * with the new one. - */ - if (dma->page_count) { - kfree(dma->pagelist); - } - dma->pagelist = temp_pagelist; - - dma->buf_count += entry->buf_count; - dma->seg_count += entry->seg_count; - dma->page_count += entry->seg_count << page_order; - dma->byte_count += PAGE_SIZE * (entry->seg_count << page_order); - - mutex_unlock(&dev->struct_mutex); - - request->count = entry->buf_count; - request->size = size; - - if (request->flags & _DRM_PCI_BUFFER_RO) - dma->flags = _DRM_DMA_USE_PCI_RO; - - atomic_dec(&dev->buf_alloc); - return 0; - -} -EXPORT_SYMBOL(drm_legacy_addbufs_pci); - -static int drm_legacy_addbufs_sg(struct drm_device *dev, - struct drm_buf_desc *request) -{ - struct drm_device_dma *dma = dev->dma; - struct drm_buf_entry *entry; - struct drm_buf *buf; - unsigned long offset; - unsigned long agp_offset; - int count; - int order; - int size; - int alignment; - int page_order; - int total; - int byte_count; - int i; - struct drm_buf **temp_buflist; - - if (!drm_core_check_feature(dev, DRIVER_SG)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - count = request->count; - order = order_base_2(request->size); - size = 1 << order; - - alignment = (request->flags & _DRM_PAGE_ALIGN) - ? PAGE_ALIGN(size) : size; - page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0; - total = PAGE_SIZE << page_order; - - byte_count = 0; - agp_offset = request->agp_start; - - DRM_DEBUG("count: %d\n", count); - DRM_DEBUG("order: %d\n", order); - DRM_DEBUG("size: %d\n", size); - DRM_DEBUG("agp_offset: %lu\n", agp_offset); - DRM_DEBUG("alignment: %d\n", alignment); - DRM_DEBUG("page_order: %d\n", page_order); - DRM_DEBUG("total: %d\n", total); - - if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) - return -EINVAL; - - spin_lock(&dev->buf_lock); - if (dev->buf_use) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - atomic_inc(&dev->buf_alloc); - spin_unlock(&dev->buf_lock); - - mutex_lock(&dev->struct_mutex); - entry = &dma->bufs[order]; - if (entry->buf_count) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; /* May only call once for each order */ - } - - if (count < 0 || count > 4096) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -EINVAL; - } - - entry->buflist = kcalloc(count, sizeof(*entry->buflist), GFP_KERNEL); - if (!entry->buflist) { - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - entry->buf_size = size; - entry->page_order = page_order; - - offset = 0; - - while (entry->buf_count < count) { - buf = &entry->buflist[entry->buf_count]; - buf->idx = dma->buf_count + entry->buf_count; - buf->total = alignment; - buf->order = order; - buf->used = 0; - - buf->offset = (dma->byte_count + offset); - buf->bus_address = agp_offset + offset; - buf->address = (void *)(agp_offset + offset - + (unsigned long)dev->sg->virtual); - buf->next = NULL; - buf->waiting = 0; - buf->pending = 0; - buf->file_priv = NULL; - - buf->dev_priv_size = dev->driver->dev_priv_size; - buf->dev_private = kzalloc(buf->dev_priv_size, GFP_KERNEL); - if (!buf->dev_private) { - /* Set count correctly so we free the proper amount. */ - entry->buf_count = count; - drm_cleanup_buf_error(dev, entry); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - - DRM_DEBUG("buffer %d @ %p\n", entry->buf_count, buf->address); - - offset += alignment; - entry->buf_count++; - byte_count += PAGE_SIZE << page_order; - } - - DRM_DEBUG("byte_count: %d\n", byte_count); - - temp_buflist = krealloc(dma->buflist, - (dma->buf_count + entry->buf_count) * - sizeof(*dma->buflist), GFP_KERNEL); - if (!temp_buflist) { - /* Free the entry because it isn't valid */ - drm_cleanup_buf_error(dev, entry); - mutex_unlock(&dev->struct_mutex); - atomic_dec(&dev->buf_alloc); - return -ENOMEM; - } - dma->buflist = temp_buflist; - - for (i = 0; i < entry->buf_count; i++) { - dma->buflist[i + dma->buf_count] = &entry->buflist[i]; - } - - dma->buf_count += entry->buf_count; - dma->seg_count += entry->seg_count; - dma->page_count += byte_count >> PAGE_SHIFT; - dma->byte_count += byte_count; - - DRM_DEBUG("dma->buf_count : %d\n", dma->buf_count); - DRM_DEBUG("entry->buf_count : %d\n", entry->buf_count); - - mutex_unlock(&dev->struct_mutex); - - request->count = entry->buf_count; - request->size = size; - - dma->flags = _DRM_DMA_USE_SG; - - atomic_dec(&dev->buf_alloc); - return 0; -} - -/* - * Add buffers for DMA transfers (ioctl). - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a struct drm_buf_desc request. - * \return zero on success or a negative number on failure. - * - * According with the memory type specified in drm_buf_desc::flags and the - * build options, it dispatches the call either to addbufs_agp(), - * addbufs_sg() or addbufs_pci() for AGP, scatter-gather or consistent - * PCI memory respectively. - */ -int drm_legacy_addbufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_buf_desc *request = data; - int ret; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - -#if IS_ENABLED(CONFIG_AGP) - if (request->flags & _DRM_AGP_BUFFER) - ret = drm_legacy_addbufs_agp(dev, request); - else -#endif - if (request->flags & _DRM_SG_BUFFER) - ret = drm_legacy_addbufs_sg(dev, request); - else if (request->flags & _DRM_FB_BUFFER) - ret = -EINVAL; - else - ret = drm_legacy_addbufs_pci(dev, request); - - return ret; -} - -/* - * Get information about the buffer mappings. - * - * This was originally mean for debugging purposes, or by a sophisticated - * client library to determine how best to use the available buffers (e.g., - * large buffers can be used for image transfer). - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a drm_buf_info structure. - * \return zero on success or a negative number on failure. - * - * Increments drm_device::buf_use while holding the drm_device::buf_lock - * lock, preventing of allocating more buffers after this call. Information - * about each requested buffer is then copied into user space. - */ -int __drm_legacy_infobufs(struct drm_device *dev, - void *data, int *p, - int (*f)(void *, int, struct drm_buf_entry *)) -{ - struct drm_device_dma *dma = dev->dma; - int i; - int count; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - spin_lock(&dev->buf_lock); - if (atomic_read(&dev->buf_alloc)) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - ++dev->buf_use; /* Can't allocate more after this call */ - spin_unlock(&dev->buf_lock); - - for (i = 0, count = 0; i < DRM_MAX_ORDER + 1; i++) { - if (dma->bufs[i].buf_count) - ++count; - } - - DRM_DEBUG("count = %d\n", count); - - if (*p >= count) { - for (i = 0, count = 0; i < DRM_MAX_ORDER + 1; i++) { - struct drm_buf_entry *from = &dma->bufs[i]; - - if (from->buf_count) { - if (f(data, count, from) < 0) - return -EFAULT; - DRM_DEBUG("%d %d %d %d %d\n", - i, - dma->bufs[i].buf_count, - dma->bufs[i].buf_size, - dma->bufs[i].low_mark, - dma->bufs[i].high_mark); - ++count; - } - } - } - *p = count; - - return 0; -} - -static int copy_one_buf(void *data, int count, struct drm_buf_entry *from) -{ - struct drm_buf_info *request = data; - struct drm_buf_desc __user *to = &request->list[count]; - struct drm_buf_desc v = {.count = from->buf_count, - .size = from->buf_size, - .low_mark = from->low_mark, - .high_mark = from->high_mark}; - - if (copy_to_user(to, &v, offsetof(struct drm_buf_desc, flags))) - return -EFAULT; - return 0; -} - -int drm_legacy_infobufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_buf_info *request = data; - - return __drm_legacy_infobufs(dev, data, &request->count, copy_one_buf); -} - -/* - * Specifies a low and high water mark for buffer allocation - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg a pointer to a drm_buf_desc structure. - * \return zero on success or a negative number on failure. - * - * Verifies that the size order is bounded between the admissible orders and - * updates the respective drm_device_dma::bufs entry low and high water mark. - * - * \note This ioctl is deprecated and mostly never used. - */ -int drm_legacy_markbufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_device_dma *dma = dev->dma; - struct drm_buf_desc *request = data; - int order; - struct drm_buf_entry *entry; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - DRM_DEBUG("%d, %d, %d\n", - request->size, request->low_mark, request->high_mark); - order = order_base_2(request->size); - if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER) - return -EINVAL; - entry = &dma->bufs[order]; - - if (request->low_mark < 0 || request->low_mark > entry->buf_count) - return -EINVAL; - if (request->high_mark < 0 || request->high_mark > entry->buf_count) - return -EINVAL; - - entry->low_mark = request->low_mark; - entry->high_mark = request->high_mark; - - return 0; -} - -/* - * Unreserve the buffers in list, previously reserved using drmDMA. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a drm_buf_free structure. - * \return zero on success or a negative number on failure. - * - * Calls free_buffer() for each used buffer. - * This function is primarily used for debugging. - */ -int drm_legacy_freebufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_device_dma *dma = dev->dma; - struct drm_buf_free *request = data; - int i; - int idx; - struct drm_buf *buf; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - DRM_DEBUG("%d\n", request->count); - for (i = 0; i < request->count; i++) { - if (copy_from_user(&idx, &request->list[i], sizeof(idx))) - return -EFAULT; - if (idx < 0 || idx >= dma->buf_count) { - DRM_ERROR("Index %d (of %d max)\n", - idx, dma->buf_count - 1); - return -EINVAL; - } - idx = array_index_nospec(idx, dma->buf_count); - buf = dma->buflist[idx]; - if (buf->file_priv != file_priv) { - DRM_ERROR("Process %d freeing buffer not owned\n", - task_pid_nr(current)); - return -EINVAL; - } - drm_legacy_free_buffer(dev, buf); - } - - return 0; -} - -/* - * Maps all of the DMA buffers into client-virtual space (ioctl). - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg pointer to a drm_buf_map structure. - * \return zero on success or a negative number on failure. - * - * Maps the AGP, SG or PCI buffer region with vm_mmap(), and copies information - * about each buffer into user space. For PCI buffers, it calls vm_mmap() with - * offset equal to 0, which drm_mmap() interprets as PCI buffers and calls - * drm_mmap_dma(). - */ -int __drm_legacy_mapbufs(struct drm_device *dev, void *data, int *p, - void __user **v, - int (*f)(void *, int, unsigned long, - struct drm_buf *), - struct drm_file *file_priv) -{ - struct drm_device_dma *dma = dev->dma; - int retcode = 0; - unsigned long virtual; - int i; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) - return -EOPNOTSUPP; - - if (!dma) - return -EINVAL; - - spin_lock(&dev->buf_lock); - if (atomic_read(&dev->buf_alloc)) { - spin_unlock(&dev->buf_lock); - return -EBUSY; - } - dev->buf_use++; /* Can't allocate more after this call */ - spin_unlock(&dev->buf_lock); - - if (*p >= dma->buf_count) { - if ((dev->agp && (dma->flags & _DRM_DMA_USE_AGP)) - || (drm_core_check_feature(dev, DRIVER_SG) - && (dma->flags & _DRM_DMA_USE_SG))) { - struct drm_local_map *map = dev->agp_buffer_map; - unsigned long token = dev->agp_buffer_token; - - if (!map) { - retcode = -EINVAL; - goto done; - } - virtual = vm_mmap(file_priv->filp, 0, map->size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - token); - } else { - virtual = vm_mmap(file_priv->filp, 0, dma->byte_count, - PROT_READ | PROT_WRITE, - MAP_SHARED, 0); - } - if (virtual > -1024UL) { - /* Real error */ - retcode = (signed long)virtual; - goto done; - } - *v = (void __user *)virtual; - - for (i = 0; i < dma->buf_count; i++) { - if (f(data, i, virtual, dma->buflist[i]) < 0) { - retcode = -EFAULT; - goto done; - } - } - } - done: - *p = dma->buf_count; - DRM_DEBUG("%d buffers, retcode = %d\n", *p, retcode); - - return retcode; -} - -static int map_one_buf(void *data, int idx, unsigned long virtual, - struct drm_buf *buf) -{ - struct drm_buf_map *request = data; - unsigned long address = virtual + buf->offset; /* *** */ - - if (copy_to_user(&request->list[idx].idx, &buf->idx, - sizeof(request->list[0].idx))) - return -EFAULT; - if (copy_to_user(&request->list[idx].total, &buf->total, - sizeof(request->list[0].total))) - return -EFAULT; - if (clear_user(&request->list[idx].used, sizeof(int))) - return -EFAULT; - if (copy_to_user(&request->list[idx].address, &address, - sizeof(address))) - return -EFAULT; - return 0; -} - -int drm_legacy_mapbufs(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_buf_map *request = data; - - return __drm_legacy_mapbufs(dev, data, &request->count, - &request->virtual, map_one_buf, - file_priv); -} - -int drm_legacy_dma_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (dev->driver->dma_ioctl) - return dev->driver->dma_ioctl(dev, data, file_priv); - else - return -EINVAL; -} - -struct drm_local_map *drm_legacy_getsarea(struct drm_device *dev) -{ - struct drm_map_list *entry; - - list_for_each_entry(entry, &dev->maplist, head) { - if (entry->map && entry->map->type == _DRM_SHM && - (entry->map->flags & _DRM_CONTAINS_LOCK)) { - return entry->map; - } - } - return NULL; -} -EXPORT_SYMBOL(drm_legacy_getsarea); diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c deleted file mode 100644 index a0fc779e5e1e..000000000000 --- a/drivers/gpu/drm/drm_context.c +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Legacy: Generic DRM Contexts - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Author: Rickard E. (Rik) Faith - * Author: Gareth Hughes - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include - -#include -#include -#include - -#include "drm_legacy.h" - -struct drm_ctx_list { - struct list_head head; - drm_context_t handle; - struct drm_file *tag; -}; - -/******************************************************************/ -/** \name Context bitmap support */ -/*@{*/ - -/* - * Free a handle from the context bitmap. - * - * \param dev DRM device. - * \param ctx_handle context handle. - * - * Clears the bit specified by \p ctx_handle in drm_device::ctx_bitmap and the entry - * in drm_device::ctx_idr, while holding the drm_device::struct_mutex - * lock. - */ -void drm_legacy_ctxbitmap_free(struct drm_device * dev, int ctx_handle) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->struct_mutex); - idr_remove(&dev->ctx_idr, ctx_handle); - mutex_unlock(&dev->struct_mutex); -} - -/* - * Context bitmap allocation. - * - * \param dev DRM device. - * \return (non-negative) context handle on success or a negative number on failure. - * - * Allocate a new idr from drm_device::ctx_idr while holding the - * drm_device::struct_mutex lock. - */ -static int drm_legacy_ctxbitmap_next(struct drm_device * dev) -{ - int ret; - - mutex_lock(&dev->struct_mutex); - ret = idr_alloc(&dev->ctx_idr, NULL, DRM_RESERVED_CONTEXTS, 0, - GFP_KERNEL); - mutex_unlock(&dev->struct_mutex); - return ret; -} - -/* - * Context bitmap initialization. - * - * \param dev DRM device. - * - * Initialise the drm_device::ctx_idr - */ -void drm_legacy_ctxbitmap_init(struct drm_device * dev) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - idr_init(&dev->ctx_idr); -} - -/* - * Context bitmap cleanup. - * - * \param dev DRM device. - * - * Free all idr members using drm_ctx_sarea_free helper function - * while holding the drm_device::struct_mutex lock. - */ -void drm_legacy_ctxbitmap_cleanup(struct drm_device * dev) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->struct_mutex); - idr_destroy(&dev->ctx_idr); - mutex_unlock(&dev->struct_mutex); -} - -/** - * drm_legacy_ctxbitmap_flush() - Flush all contexts owned by a file - * @dev: DRM device to operate on - * @file: Open file to flush contexts for - * - * This iterates over all contexts on @dev and drops them if they're owned by - * @file. Note that after this call returns, new contexts might be added if - * the file is still alive. - */ -void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file) -{ - struct drm_ctx_list *pos, *tmp; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - mutex_lock(&dev->ctxlist_mutex); - - list_for_each_entry_safe(pos, tmp, &dev->ctxlist, head) { - if (pos->tag == file && - pos->handle != DRM_KERNEL_CONTEXT) { - if (dev->driver->context_dtor) - dev->driver->context_dtor(dev, pos->handle); - - drm_legacy_ctxbitmap_free(dev, pos->handle); - list_del(&pos->head); - kfree(pos); - } - } - - mutex_unlock(&dev->ctxlist_mutex); -} - -/*@}*/ - -/******************************************************************/ -/** \name Per Context SAREA Support */ -/*@{*/ - -/* - * Get per-context SAREA. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx_priv_map structure. - * \return zero on success or a negative number on failure. - * - * Gets the map from drm_device::ctx_idr with the handle specified and - * returns its handle. - */ -int drm_legacy_getsareactx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx_priv_map *request = data; - struct drm_local_map *map; - struct drm_map_list *_entry; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - mutex_lock(&dev->struct_mutex); - - map = idr_find(&dev->ctx_idr, request->ctx_id); - if (!map) { - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - - request->handle = NULL; - list_for_each_entry(_entry, &dev->maplist, head) { - if (_entry->map == map) { - request->handle = - (void *)(unsigned long)_entry->user_token; - break; - } - } - - mutex_unlock(&dev->struct_mutex); - - if (request->handle == NULL) - return -EINVAL; - - return 0; -} - -/* - * Set per-context SAREA. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx_priv_map structure. - * \return zero on success or a negative number on failure. - * - * Searches the mapping specified in \p arg and update the entry in - * drm_device::ctx_idr with it. - */ -int drm_legacy_setsareactx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx_priv_map *request = data; - struct drm_local_map *map = NULL; - struct drm_map_list *r_list = NULL; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - mutex_lock(&dev->struct_mutex); - list_for_each_entry(r_list, &dev->maplist, head) { - if (r_list->map - && r_list->user_token == (unsigned long) request->handle) - goto found; - } - bad: - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - - found: - map = r_list->map; - if (!map) - goto bad; - - if (IS_ERR(idr_replace(&dev->ctx_idr, map, request->ctx_id))) - goto bad; - - mutex_unlock(&dev->struct_mutex); - - return 0; -} - -/*@}*/ - -/******************************************************************/ -/** \name The actual DRM context handling routines */ -/*@{*/ - -/* - * Switch context. - * - * \param dev DRM device. - * \param old old context handle. - * \param new new context handle. - * \return zero on success or a negative number on failure. - * - * Attempt to set drm_device::context_flag. - */ -static int drm_context_switch(struct drm_device * dev, int old, int new) -{ - if (test_and_set_bit(0, &dev->context_flag)) { - DRM_ERROR("Reentering -- FIXME\n"); - return -EBUSY; - } - - DRM_DEBUG("Context switch from %d to %d\n", old, new); - - if (new == dev->last_context) { - clear_bit(0, &dev->context_flag); - return 0; - } - - return 0; -} - -/* - * Complete context switch. - * - * \param dev DRM device. - * \param new new context handle. - * \return zero on success or a negative number on failure. - * - * Updates drm_device::last_context and drm_device::last_switch. Verifies the - * hardware lock is held, clears the drm_device::context_flag and wakes up - * drm_device::context_wait. - */ -static int drm_context_switch_complete(struct drm_device *dev, - struct drm_file *file_priv, int new) -{ - dev->last_context = new; /* PRE/POST: This is the _only_ writer. */ - - if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock)) { - DRM_ERROR("Lock isn't held after context switch\n"); - } - - /* If a context switch is ever initiated - when the kernel holds the lock, release - that lock here. - */ - clear_bit(0, &dev->context_flag); - - return 0; -} - -/* - * Reserve contexts. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx_res structure. - * \return zero on success or a negative number on failure. - */ -int drm_legacy_resctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx_res *res = data; - struct drm_ctx ctx; - int i; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (res->count >= DRM_RESERVED_CONTEXTS) { - memset(&ctx, 0, sizeof(ctx)); - for (i = 0; i < DRM_RESERVED_CONTEXTS; i++) { - ctx.handle = i; - if (copy_to_user(&res->contexts[i], &ctx, sizeof(ctx))) - return -EFAULT; - } - } - res->count = DRM_RESERVED_CONTEXTS; - - return 0; -} - -/* - * Add context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - * - * Get a new handle for the context and copy to userspace. - */ -int drm_legacy_addctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx_list *ctx_entry; - struct drm_ctx *ctx = data; - int tmp_handle; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - tmp_handle = drm_legacy_ctxbitmap_next(dev); - if (tmp_handle == DRM_KERNEL_CONTEXT) { - /* Skip kernel's context and get a new one. */ - tmp_handle = drm_legacy_ctxbitmap_next(dev); - } - DRM_DEBUG("%d\n", tmp_handle); - if (tmp_handle < 0) { - DRM_DEBUG("Not enough free contexts.\n"); - /* Should this return -EBUSY instead? */ - return tmp_handle; - } - - ctx->handle = tmp_handle; - - ctx_entry = kmalloc(sizeof(*ctx_entry), GFP_KERNEL); - if (!ctx_entry) { - DRM_DEBUG("out of memory\n"); - return -ENOMEM; - } - - INIT_LIST_HEAD(&ctx_entry->head); - ctx_entry->handle = ctx->handle; - ctx_entry->tag = file_priv; - - mutex_lock(&dev->ctxlist_mutex); - list_add(&ctx_entry->head, &dev->ctxlist); - mutex_unlock(&dev->ctxlist_mutex); - - return 0; -} - -/* - * Get context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - */ -int drm_legacy_getctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx *ctx = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - /* This is 0, because we don't handle any context flags */ - ctx->flags = 0; - - return 0; -} - -/* - * Switch context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - * - * Calls context_switch(). - */ -int drm_legacy_switchctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx *ctx = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - DRM_DEBUG("%d\n", ctx->handle); - return drm_context_switch(dev, dev->last_context, ctx->handle); -} - -/* - * New context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - * - * Calls context_switch_complete(). - */ -int drm_legacy_newctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx *ctx = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - DRM_DEBUG("%d\n", ctx->handle); - drm_context_switch_complete(dev, file_priv, ctx->handle); - - return 0; -} - -/* - * Remove context. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument pointing to a drm_ctx structure. - * \return zero on success or a negative number on failure. - * - * If not the special kernel context, calls ctxbitmap_free() to free the specified context. - */ -int drm_legacy_rmctx(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_ctx *ctx = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - DRM_DEBUG("%d\n", ctx->handle); - if (ctx->handle != DRM_KERNEL_CONTEXT) { - if (dev->driver->context_dtor) - dev->driver->context_dtor(dev, ctx->handle); - drm_legacy_ctxbitmap_free(dev, ctx->handle); - } - - mutex_lock(&dev->ctxlist_mutex); - if (!list_empty(&dev->ctxlist)) { - struct drm_ctx_list *pos, *n; - - list_for_each_entry_safe(pos, n, &dev->ctxlist, head) { - if (pos->handle == ctx->handle) { - list_del(&pos->head); - kfree(pos); - } - } - } - mutex_unlock(&dev->ctxlist_mutex); - - return 0; -} - -/*@}*/ diff --git a/drivers/gpu/drm/drm_dma.c b/drivers/gpu/drm/drm_dma.c deleted file mode 100644 index eb6b741a6f99..000000000000 --- a/drivers/gpu/drm/drm_dma.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * \file drm_dma.c - * DMA IOCTL and function support - * - * \author Rickard E. (Rik) Faith - * \author Gareth Hughes - */ - -/* - * Created: Fri Mar 19 14:30:16 1999 by faith@valinux.com - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include - -#include -#include - -#include "drm_legacy.h" - -/** - * drm_legacy_dma_setup() - Initialize the DMA data. - * - * @dev: DRM device. - * Return: zero on success or a negative value on failure. - * - * Allocate and initialize a drm_device_dma structure. - */ -int drm_legacy_dma_setup(struct drm_device *dev) -{ - int i; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) || - !drm_core_check_feature(dev, DRIVER_LEGACY)) - return 0; - - dev->buf_use = 0; - atomic_set(&dev->buf_alloc, 0); - - dev->dma = kzalloc(sizeof(*dev->dma), GFP_KERNEL); - if (!dev->dma) - return -ENOMEM; - - for (i = 0; i <= DRM_MAX_ORDER; i++) - memset(&dev->dma->bufs[i], 0, sizeof(dev->dma->bufs[0])); - - return 0; -} - -/** - * drm_legacy_dma_takedown() - Cleanup the DMA resources. - * - * @dev: DRM device. - * - * Free all pages associated with DMA buffers, the buffers and pages lists, and - * finally the drm_device::dma structure itself. - */ -void drm_legacy_dma_takedown(struct drm_device *dev) -{ - struct drm_device_dma *dma = dev->dma; - drm_dma_handle_t *dmah; - int i, j; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) || - !drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - if (!dma) - return; - - /* Clear dma buffers */ - for (i = 0; i <= DRM_MAX_ORDER; i++) { - if (dma->bufs[i].seg_count) { - DRM_DEBUG("order %d: buf_count = %d," - " seg_count = %d\n", - i, - dma->bufs[i].buf_count, - dma->bufs[i].seg_count); - for (j = 0; j < dma->bufs[i].seg_count; j++) { - if (dma->bufs[i].seglist[j]) { - dmah = dma->bufs[i].seglist[j]; - dma_free_coherent(dev->dev, - dmah->size, - dmah->vaddr, - dmah->busaddr); - kfree(dmah); - } - } - kfree(dma->bufs[i].seglist); - } - if (dma->bufs[i].buf_count) { - for (j = 0; j < dma->bufs[i].buf_count; j++) { - kfree(dma->bufs[i].buflist[j].dev_private); - } - kfree(dma->bufs[i].buflist); - } - } - - kfree(dma->buflist); - kfree(dma->pagelist); - kfree(dev->dma); - dev->dma = NULL; -} - -/** - * drm_legacy_free_buffer() - Free a buffer. - * - * @dev: DRM device. - * @buf: buffer to free. - * - * Resets the fields of \p buf. - */ -void drm_legacy_free_buffer(struct drm_device *dev, struct drm_buf * buf) -{ - if (!buf) - return; - - buf->waiting = 0; - buf->pending = 0; - buf->file_priv = NULL; - buf->used = 0; -} - -/** - * drm_legacy_reclaim_buffers() - Reclaim the buffers. - * - * @dev: DRM device. - * @file_priv: DRM file private. - * - * Frees each buffer associated with \p file_priv not already on the hardware. - */ -void drm_legacy_reclaim_buffers(struct drm_device *dev, - struct drm_file *file_priv) -{ - struct drm_device_dma *dma = dev->dma; - int i; - - if (!dma) - return; - for (i = 0; i < dma->buf_count; i++) { - if (dma->buflist[i]->file_priv == file_priv) { - switch (dma->buflist[i]->list) { - case DRM_LIST_NONE: - drm_legacy_free_buffer(dev, dma->buflist[i]); - break; - case DRM_LIST_WAIT: - dma->buflist[i]->list = DRM_LIST_RECLAIM; - break; - default: - /* Buffer already on hardware. */ - break; - } - } - } -} diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c deleted file mode 100644 index 60afa1865559..000000000000 --- a/drivers/gpu/drm/drm_hashtab.c +++ /dev/null @@ -1,203 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * - **************************************************************************/ -/* - * Simple open hash tab implementation. - * - * Authors: - * Thomas Hellström - */ - -#include -#include -#include -#include -#include - -#include - -#include "drm_legacy.h" - -int drm_ht_create(struct drm_open_hash *ht, unsigned int order) -{ - unsigned int size = 1 << order; - - ht->order = order; - ht->table = NULL; - if (size <= PAGE_SIZE / sizeof(*ht->table)) - ht->table = kcalloc(size, sizeof(*ht->table), GFP_KERNEL); - else - ht->table = vzalloc(array_size(size, sizeof(*ht->table))); - if (!ht->table) { - DRM_ERROR("Out of memory for hash table\n"); - return -ENOMEM; - } - return 0; -} - -void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key) -{ - struct drm_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - int count = 0; - - hashed_key = hash_long(key, ht->order); - DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, h_list, head) - DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key); -} - -static struct hlist_node *drm_ht_find_key(struct drm_open_hash *ht, - unsigned long key) -{ - struct drm_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, h_list, head) { - if (entry->key == key) - return &entry->head; - if (entry->key > key) - break; - } - return NULL; -} - -static struct hlist_node *drm_ht_find_key_rcu(struct drm_open_hash *ht, - unsigned long key) -{ - struct drm_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry_rcu(entry, h_list, head) { - if (entry->key == key) - return &entry->head; - if (entry->key > key) - break; - } - return NULL; -} - -int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item) -{ - struct drm_hash_item *entry; - struct hlist_head *h_list; - struct hlist_node *parent; - unsigned int hashed_key; - unsigned long key = item->key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - parent = NULL; - hlist_for_each_entry(entry, h_list, head) { - if (entry->key == key) - return -EINVAL; - if (entry->key > key) - break; - parent = &entry->head; - } - if (parent) { - hlist_add_behind_rcu(&item->head, parent); - } else { - hlist_add_head_rcu(&item->head, h_list); - } - return 0; -} - -/* - * Just insert an item and return any "bits" bit key that hasn't been - * used before. - */ -int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add) -{ - int ret; - unsigned long mask = (1UL << bits) - 1; - unsigned long first, unshifted_key; - - unshifted_key = hash_long(seed, bits); - first = unshifted_key; - do { - item->key = (unshifted_key << shift) + add; - ret = drm_ht_insert_item(ht, item); - if (ret) - unshifted_key = (unshifted_key + 1) & mask; - } while(ret && (unshifted_key != first)); - - if (ret) { - DRM_ERROR("Available key bit space exhausted\n"); - return -EINVAL; - } - return 0; -} - -int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, - struct drm_hash_item **item) -{ - struct hlist_node *list; - - list = drm_ht_find_key_rcu(ht, key); - if (!list) - return -EINVAL; - - *item = hlist_entry(list, struct drm_hash_item, head); - return 0; -} - -int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key) -{ - struct hlist_node *list; - - list = drm_ht_find_key(ht, key); - if (list) { - hlist_del_init_rcu(list); - return 0; - } - return -EINVAL; -} - -int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item) -{ - hlist_del_init_rcu(&item->head); - return 0; -} - -void drm_ht_remove(struct drm_open_hash *ht) -{ - if (ht->table) { - kvfree(ht->table); - ht->table = NULL; - } -} diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index fa08a313127e..8e4faf0a28e6 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -121,11 +121,6 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, /* drm_irq.c */ /* IOCTLS */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_irq_control(struct drm_device *dev, void *data, - struct drm_file *file_priv); -#endif - int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c deleted file mode 100644 index d327638e15ee..000000000000 --- a/drivers/gpu/drm/drm_irq.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * drm_irq.c IRQ and vblank support - * - * \author Rickard E. (Rik) Faith - * \author Gareth Hughes - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Created: Fri Mar 19 14:30:16 1999 by faith@valinux.com - * - * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include -#include /* For task queue support */ -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "drm_internal.h" - -static int drm_legacy_irq_install(struct drm_device *dev, int irq) -{ - int ret; - unsigned long sh_flags = 0; - - if (irq == 0) - return -EINVAL; - - if (dev->irq_enabled) - return -EBUSY; - dev->irq_enabled = true; - - DRM_DEBUG("irq=%d\n", irq); - - /* Before installing handler */ - if (dev->driver->irq_preinstall) - dev->driver->irq_preinstall(dev); - - /* PCI devices require shared interrupts. */ - if (dev_is_pci(dev->dev)) - sh_flags = IRQF_SHARED; - - ret = request_irq(irq, dev->driver->irq_handler, - sh_flags, dev->driver->name, dev); - - if (ret < 0) { - dev->irq_enabled = false; - return ret; - } - - /* After installing handler */ - if (dev->driver->irq_postinstall) - ret = dev->driver->irq_postinstall(dev); - - if (ret < 0) { - dev->irq_enabled = false; - if (drm_core_check_feature(dev, DRIVER_LEGACY)) - vga_client_unregister(to_pci_dev(dev->dev)); - free_irq(irq, dev); - } else { - dev->irq = irq; - } - - return ret; -} - -int drm_legacy_irq_uninstall(struct drm_device *dev) -{ - unsigned long irqflags; - bool irq_enabled; - int i; - - irq_enabled = dev->irq_enabled; - dev->irq_enabled = false; - - /* - * Wake up any waiters so they don't hang. This is just to paper over - * issues for UMS drivers which aren't in full control of their - * vblank/irq handling. KMS drivers must ensure that vblanks are all - * disabled when uninstalling the irq handler. - */ - if (drm_dev_has_vblank(dev)) { - spin_lock_irqsave(&dev->vbl_lock, irqflags); - for (i = 0; i < dev->num_crtcs; i++) { - struct drm_vblank_crtc *vblank = &dev->vblank[i]; - - if (!vblank->enabled) - continue; - - WARN_ON(drm_core_check_feature(dev, DRIVER_MODESET)); - - drm_vblank_disable_and_save(dev, i); - wake_up(&vblank->queue); - } - spin_unlock_irqrestore(&dev->vbl_lock, irqflags); - } - - if (!irq_enabled) - return -EINVAL; - - DRM_DEBUG("irq=%d\n", dev->irq); - - if (drm_core_check_feature(dev, DRIVER_LEGACY)) - vga_client_unregister(to_pci_dev(dev->dev)); - - if (dev->driver->irq_uninstall) - dev->driver->irq_uninstall(dev); - - free_irq(dev->irq, dev); - - return 0; -} -EXPORT_SYMBOL(drm_legacy_irq_uninstall); - -int drm_legacy_irq_control(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_control *ctl = data; - int ret = 0, irq; - struct pci_dev *pdev; - - /* if we haven't irq we fallback for compatibility reasons - - * this used to be a separate function in drm_dma.h - */ - - if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) - return 0; - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return 0; - /* UMS was only ever supported on pci devices. */ - if (WARN_ON(!dev_is_pci(dev->dev))) - return -EINVAL; - - switch (ctl->func) { - case DRM_INST_HANDLER: - pdev = to_pci_dev(dev->dev); - irq = pdev->irq; - - if (dev->if_version < DRM_IF_VERSION(1, 2) && - ctl->irq != irq) - return -EINVAL; - mutex_lock(&dev->struct_mutex); - ret = drm_legacy_irq_install(dev, irq); - mutex_unlock(&dev->struct_mutex); - - return ret; - case DRM_UNINST_HANDLER: - mutex_lock(&dev->struct_mutex); - ret = drm_legacy_irq_uninstall(dev); - mutex_unlock(&dev->struct_mutex); - - return ret; - default: - return -EINVAL; - } -} diff --git a/drivers/gpu/drm/drm_legacy.h b/drivers/gpu/drm/drm_legacy.h deleted file mode 100644 index 70c9dba114a6..000000000000 --- a/drivers/gpu/drm/drm_legacy.h +++ /dev/null @@ -1,290 +0,0 @@ -#ifndef __DRM_LEGACY_H__ -#define __DRM_LEGACY_H__ - -/* - * Copyright (c) 2014 David Herrmann - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * This file contains legacy interfaces that modern drm drivers - * should no longer be using. They cannot be removed as legacy - * drivers use them, and removing them are API breaks. - */ -#include - -#include -#include -#include - -struct agp_memory; -struct drm_buf_desc; -struct drm_device; -struct drm_file; -struct drm_hash_item; -struct drm_open_hash; - -/* - * Hash-table Support - */ - -#define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member) - -/* drm_hashtab.c */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_ht_create(struct drm_open_hash *ht, unsigned int order); -int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item); -int drm_ht_just_insert_please(struct drm_open_hash *ht, struct drm_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add); -int drm_ht_find_item(struct drm_open_hash *ht, unsigned long key, struct drm_hash_item **item); - -void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key); -int drm_ht_remove_key(struct drm_open_hash *ht, unsigned long key); -int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item); -void drm_ht_remove(struct drm_open_hash *ht); -#endif - -/* - * RCU-safe interface - * - * The user of this API needs to make sure that two or more instances of the - * hash table manipulation functions are never run simultaneously. - * The lookup function drm_ht_find_item_rcu may, however, run simultaneously - * with any of the manipulation functions as long as it's called from within - * an RCU read-locked section. - */ -#define drm_ht_insert_item_rcu drm_ht_insert_item -#define drm_ht_just_insert_please_rcu drm_ht_just_insert_please -#define drm_ht_remove_key_rcu drm_ht_remove_key -#define drm_ht_remove_item_rcu drm_ht_remove_item -#define drm_ht_find_item_rcu drm_ht_find_item - -/* - * Generic DRM Contexts - */ - -#define DRM_KERNEL_CONTEXT 0 -#define DRM_RESERVED_CONTEXTS 1 - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_ctxbitmap_init(struct drm_device *dev); -void drm_legacy_ctxbitmap_cleanup(struct drm_device *dev); -void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file); -#else -static inline void drm_legacy_ctxbitmap_init(struct drm_device *dev) {} -static inline void drm_legacy_ctxbitmap_cleanup(struct drm_device *dev) {} -static inline void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file) {} -#endif - -void drm_legacy_ctxbitmap_free(struct drm_device *dev, int ctx_handle); - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_resctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_addctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_getctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_switchctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_newctx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_rmctx(struct drm_device *d, void *v, struct drm_file *f); - -int drm_legacy_setsareactx(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_getsareactx(struct drm_device *d, void *v, struct drm_file *f); -#endif - -/* - * Generic Buffer Management - */ - -#define DRM_MAP_HASH_OFFSET 0x10000000 - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -static inline int drm_legacy_create_map_hash(struct drm_device *dev) -{ - return drm_ht_create(&dev->map_hash, 12); -} - -static inline void drm_legacy_remove_map_hash(struct drm_device *dev) -{ - drm_ht_remove(&dev->map_hash); -} -#else -static inline int drm_legacy_create_map_hash(struct drm_device *dev) -{ - return 0; -} - -static inline void drm_legacy_remove_map_hash(struct drm_device *dev) {} -#endif - - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_getmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_addmap_ioctl(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_rmmap_ioctl(struct drm_device *d, void *v, struct drm_file *f); - -int drm_legacy_addbufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_infobufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_markbufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_freebufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_mapbufs(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_dma_ioctl(struct drm_device *d, void *v, struct drm_file *f); -#endif - -int __drm_legacy_infobufs(struct drm_device *, void *, int *, - int (*)(void *, int, struct drm_buf_entry *)); -int __drm_legacy_mapbufs(struct drm_device *, void *, int *, - void __user **, - int (*)(void *, int, unsigned long, struct drm_buf *), - struct drm_file *); - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_master_rmmaps(struct drm_device *dev, - struct drm_master *master); -void drm_legacy_rmmaps(struct drm_device *dev); -#else -static inline void drm_legacy_master_rmmaps(struct drm_device *dev, - struct drm_master *master) {} -static inline void drm_legacy_rmmaps(struct drm_device *dev) {} -#endif - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_vma_flush(struct drm_device *d); -#else -static inline void drm_legacy_vma_flush(struct drm_device *d) -{ - /* do nothing */ -} -#endif - -/* - * AGP Support - */ - -struct drm_agp_mem { - unsigned long handle; - struct agp_memory *memory; - unsigned long bound; - int pages; - struct list_head head; -}; - -/* drm_agpsupport.c */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) && IS_ENABLED(CONFIG_AGP) -void drm_legacy_agp_clear(struct drm_device *dev); - -int drm_legacy_agp_acquire_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_release_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_enable_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_info_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_alloc_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_free_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_unbind_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_agp_bind_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -#else -static inline void drm_legacy_agp_clear(struct drm_device *dev) {} -#endif - -/* drm_lock.c */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_lock(struct drm_device *d, void *v, struct drm_file *f); -int drm_legacy_unlock(struct drm_device *d, void *v, struct drm_file *f); -void drm_legacy_lock_release(struct drm_device *dev, struct file *filp); -#else -static inline void drm_legacy_lock_release(struct drm_device *dev, struct file *filp) {} -#endif - -/* DMA support */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -int drm_legacy_dma_setup(struct drm_device *dev); -void drm_legacy_dma_takedown(struct drm_device *dev); -#else -static inline int drm_legacy_dma_setup(struct drm_device *dev) -{ - return 0; -} -#endif - -void drm_legacy_free_buffer(struct drm_device *dev, - struct drm_buf * buf); -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_reclaim_buffers(struct drm_device *dev, - struct drm_file *filp); -#else -static inline void drm_legacy_reclaim_buffers(struct drm_device *dev, - struct drm_file *filp) {} -#endif - -/* Scatter Gather Support */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_sg_cleanup(struct drm_device *dev); -int drm_legacy_sg_alloc(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int drm_legacy_sg_free(struct drm_device *dev, void *data, - struct drm_file *file_priv); -#endif - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_init_members(struct drm_device *dev); -void drm_legacy_destroy_members(struct drm_device *dev); -void drm_legacy_dev_reinit(struct drm_device *dev); -int drm_legacy_setup(struct drm_device * dev); -#else -static inline void drm_legacy_init_members(struct drm_device *dev) {} -static inline void drm_legacy_destroy_members(struct drm_device *dev) {} -static inline void drm_legacy_dev_reinit(struct drm_device *dev) {} -static inline int drm_legacy_setup(struct drm_device * dev) { return 0; } -#endif - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *master); -#else -static inline void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *master) {} -#endif - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_master_legacy_init(struct drm_master *master); -#else -static inline void drm_master_legacy_init(struct drm_master *master) {} -#endif - -/* drm_pci.c */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) && IS_ENABLED(CONFIG_PCI) -int drm_legacy_irq_by_busid(struct drm_device *dev, void *data, struct drm_file *file_priv); -void drm_legacy_pci_agp_destroy(struct drm_device *dev); -#else -static inline int drm_legacy_irq_by_busid(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - return -EINVAL; -} - -static inline void drm_legacy_pci_agp_destroy(struct drm_device *dev) {} -#endif - -#endif /* __DRM_LEGACY_H__ */ diff --git a/drivers/gpu/drm/drm_legacy_misc.c b/drivers/gpu/drm/drm_legacy_misc.c deleted file mode 100644 index d4c5434062d7..000000000000 --- a/drivers/gpu/drm/drm_legacy_misc.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * \file drm_legacy_misc.c - * Misc legacy support functions. - * - * \author Rickard E. (Rik) Faith - * \author Gareth Hughes - */ - -/* - * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#include "drm_internal.h" -#include "drm_legacy.h" - -void drm_legacy_init_members(struct drm_device *dev) -{ - INIT_LIST_HEAD(&dev->ctxlist); - INIT_LIST_HEAD(&dev->vmalist); - INIT_LIST_HEAD(&dev->maplist); - spin_lock_init(&dev->buf_lock); - mutex_init(&dev->ctxlist_mutex); -} - -void drm_legacy_destroy_members(struct drm_device *dev) -{ - mutex_destroy(&dev->ctxlist_mutex); -} - -int drm_legacy_setup(struct drm_device * dev) -{ - int ret; - - if (dev->driver->firstopen && - drm_core_check_feature(dev, DRIVER_LEGACY)) { - ret = dev->driver->firstopen(dev); - if (ret != 0) - return ret; - } - - ret = drm_legacy_dma_setup(dev); - if (ret < 0) - return ret; - - - DRM_DEBUG("\n"); - return 0; -} - -void drm_legacy_dev_reinit(struct drm_device *dev) -{ - if (dev->irq_enabled) - drm_legacy_irq_uninstall(dev); - - mutex_lock(&dev->struct_mutex); - - drm_legacy_agp_clear(dev); - - drm_legacy_sg_cleanup(dev); - drm_legacy_vma_flush(dev); - drm_legacy_dma_takedown(dev); - - mutex_unlock(&dev->struct_mutex); - - dev->sigdata.lock = NULL; - - dev->context_flag = 0; - dev->last_context = 0; - dev->if_version = 0; - - DRM_DEBUG("lastclose completed\n"); -} - -void drm_master_legacy_init(struct drm_master *master) -{ - spin_lock_init(&master->lock.spinlock); - init_waitqueue_head(&master->lock.lock_queue); -} diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c deleted file mode 100644 index 1efbd5389d89..000000000000 --- a/drivers/gpu/drm/drm_lock.c +++ /dev/null @@ -1,373 +0,0 @@ -/* - * \file drm_lock.c - * IOCTLs for locking - * - * \author Rickard E. (Rik) Faith - * \author Gareth Hughes - */ - -/* - * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include - -#include -#include -#include -#include - -#include "drm_internal.h" -#include "drm_legacy.h" - -static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context); - -/* - * Take the heavyweight lock. - * - * \param lock lock pointer. - * \param context locking context. - * \return one if the lock is held, or zero otherwise. - * - * Attempt to mark the lock as held by the given context, via the \p cmpxchg instruction. - */ -static -int drm_lock_take(struct drm_lock_data *lock_data, - unsigned int context) -{ - unsigned int old, new, prev; - volatile unsigned int *lock = &lock_data->hw_lock->lock; - - spin_lock_bh(&lock_data->spinlock); - do { - old = *lock; - if (old & _DRM_LOCK_HELD) - new = old | _DRM_LOCK_CONT; - else { - new = context | _DRM_LOCK_HELD | - ((lock_data->user_waiters + lock_data->kernel_waiters > 1) ? - _DRM_LOCK_CONT : 0); - } - prev = cmpxchg(lock, old, new); - } while (prev != old); - spin_unlock_bh(&lock_data->spinlock); - - if (_DRM_LOCKING_CONTEXT(old) == context) { - if (old & _DRM_LOCK_HELD) { - if (context != DRM_KERNEL_CONTEXT) { - DRM_ERROR("%d holds heavyweight lock\n", - context); - } - return 0; - } - } - - if ((_DRM_LOCKING_CONTEXT(new)) == context && (new & _DRM_LOCK_HELD)) { - /* Have lock */ - return 1; - } - return 0; -} - -/* - * This takes a lock forcibly and hands it to context. Should ONLY be used - * inside *_unlock to give lock to kernel before calling *_dma_schedule. - * - * \param dev DRM device. - * \param lock lock pointer. - * \param context locking context. - * \return always one. - * - * Resets the lock file pointer. - * Marks the lock as held by the given context, via the \p cmpxchg instruction. - */ -static int drm_lock_transfer(struct drm_lock_data *lock_data, - unsigned int context) -{ - unsigned int old, new, prev; - volatile unsigned int *lock = &lock_data->hw_lock->lock; - - lock_data->file_priv = NULL; - do { - old = *lock; - new = context | _DRM_LOCK_HELD; - prev = cmpxchg(lock, old, new); - } while (prev != old); - return 1; -} - -static int drm_legacy_lock_free(struct drm_lock_data *lock_data, - unsigned int context) -{ - unsigned int old, new, prev; - volatile unsigned int *lock = &lock_data->hw_lock->lock; - - spin_lock_bh(&lock_data->spinlock); - if (lock_data->kernel_waiters != 0) { - drm_lock_transfer(lock_data, 0); - lock_data->idle_has_lock = 1; - spin_unlock_bh(&lock_data->spinlock); - return 1; - } - spin_unlock_bh(&lock_data->spinlock); - - do { - old = *lock; - new = _DRM_LOCKING_CONTEXT(old); - prev = cmpxchg(lock, old, new); - } while (prev != old); - - if (_DRM_LOCK_IS_HELD(old) && _DRM_LOCKING_CONTEXT(old) != context) { - DRM_ERROR("%d freed heavyweight lock held by %d\n", - context, _DRM_LOCKING_CONTEXT(old)); - return 1; - } - wake_up_interruptible(&lock_data->lock_queue); - return 0; -} - -/* - * Lock ioctl. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument, pointing to a drm_lock structure. - * \return zero on success or negative number on failure. - * - * Add the current task to the lock wait queue, and attempt to take to lock. - */ -int drm_legacy_lock(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - DECLARE_WAITQUEUE(entry, current); - struct drm_lock *lock = data; - struct drm_master *master = file_priv->master; - int ret = 0; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - ++file_priv->lock_count; - - if (lock->context == DRM_KERNEL_CONTEXT) { - DRM_ERROR("Process %d using kernel context %d\n", - task_pid_nr(current), lock->context); - return -EINVAL; - } - - DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n", - lock->context, task_pid_nr(current), - master->lock.hw_lock ? master->lock.hw_lock->lock : -1, - lock->flags); - - add_wait_queue(&master->lock.lock_queue, &entry); - spin_lock_bh(&master->lock.spinlock); - master->lock.user_waiters++; - spin_unlock_bh(&master->lock.spinlock); - - for (;;) { - __set_current_state(TASK_INTERRUPTIBLE); - if (!master->lock.hw_lock) { - /* Device has been unregistered */ - send_sig(SIGTERM, current, 0); - ret = -EINTR; - break; - } - if (drm_lock_take(&master->lock, lock->context)) { - master->lock.file_priv = file_priv; - master->lock.lock_time = jiffies; - break; /* Got lock */ - } - - /* Contention */ - mutex_unlock(&drm_global_mutex); - schedule(); - mutex_lock(&drm_global_mutex); - if (signal_pending(current)) { - ret = -EINTR; - break; - } - } - spin_lock_bh(&master->lock.spinlock); - master->lock.user_waiters--; - spin_unlock_bh(&master->lock.spinlock); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&master->lock.lock_queue, &entry); - - DRM_DEBUG("%d %s\n", lock->context, - ret ? "interrupted" : "has lock"); - if (ret) return ret; - - /* don't set the block all signals on the master process for now - * really probably not the correct answer but lets us debug xkb - * xserver for now */ - if (!drm_is_current_master(file_priv)) { - dev->sigdata.context = lock->context; - dev->sigdata.lock = master->lock.hw_lock; - } - - if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT)) - { - if (dev->driver->dma_quiescent(dev)) { - DRM_DEBUG("%d waiting for DMA quiescent\n", - lock->context); - return -EBUSY; - } - } - - return 0; -} - -/* - * Unlock ioctl. - * - * \param inode device inode. - * \param file_priv DRM file private. - * \param cmd command. - * \param arg user argument, pointing to a drm_lock structure. - * \return zero on success or negative number on failure. - * - * Transfer and free the lock. - */ -int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) -{ - struct drm_lock *lock = data; - struct drm_master *master = file_priv->master; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (lock->context == DRM_KERNEL_CONTEXT) { - DRM_ERROR("Process %d using kernel context %d\n", - task_pid_nr(current), lock->context); - return -EINVAL; - } - - if (drm_legacy_lock_free(&master->lock, lock->context)) { - /* FIXME: Should really bail out here. */ - } - - return 0; -} - -/* - * This function returns immediately and takes the hw lock - * with the kernel context if it is free, otherwise it gets the highest priority when and if - * it is eventually released. - * - * This guarantees that the kernel will _eventually_ have the lock _unless_ it is held - * by a blocked process. (In the latter case an explicit wait for the hardware lock would cause - * a deadlock, which is why the "idlelock" was invented). - * - * This should be sufficient to wait for GPU idle without - * having to worry about starvation. - */ -void drm_legacy_idlelock_take(struct drm_lock_data *lock_data) -{ - int ret; - - spin_lock_bh(&lock_data->spinlock); - lock_data->kernel_waiters++; - if (!lock_data->idle_has_lock) { - - spin_unlock_bh(&lock_data->spinlock); - ret = drm_lock_take(lock_data, DRM_KERNEL_CONTEXT); - spin_lock_bh(&lock_data->spinlock); - - if (ret == 1) - lock_data->idle_has_lock = 1; - } - spin_unlock_bh(&lock_data->spinlock); -} -EXPORT_SYMBOL(drm_legacy_idlelock_take); - -void drm_legacy_idlelock_release(struct drm_lock_data *lock_data) -{ - unsigned int old, prev; - volatile unsigned int *lock = &lock_data->hw_lock->lock; - - spin_lock_bh(&lock_data->spinlock); - if (--lock_data->kernel_waiters == 0) { - if (lock_data->idle_has_lock) { - do { - old = *lock; - prev = cmpxchg(lock, old, DRM_KERNEL_CONTEXT); - } while (prev != old); - wake_up_interruptible(&lock_data->lock_queue); - lock_data->idle_has_lock = 0; - } - } - spin_unlock_bh(&lock_data->spinlock); -} -EXPORT_SYMBOL(drm_legacy_idlelock_release); - -static int drm_legacy_i_have_hw_lock(struct drm_device *dev, - struct drm_file *file_priv) -{ - struct drm_master *master = file_priv->master; - - return (file_priv->lock_count && master->lock.hw_lock && - _DRM_LOCK_IS_HELD(master->lock.hw_lock->lock) && - master->lock.file_priv == file_priv); -} - -void drm_legacy_lock_release(struct drm_device *dev, struct file *filp) -{ - struct drm_file *file_priv = filp->private_data; - - /* if the master has gone away we can't do anything with the lock */ - if (!dev->master) - return; - - if (drm_legacy_i_have_hw_lock(dev, file_priv)) { - DRM_DEBUG("File %p released, freeing lock for context %d\n", - filp, _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock)); - drm_legacy_lock_free(&file_priv->master->lock, - _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock)); - } -} - -void drm_legacy_lock_master_cleanup(struct drm_device *dev, struct drm_master *master) -{ - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return; - - /* - * Since the master is disappearing, so is the - * possibility to lock. - */ - mutex_lock(&dev->struct_mutex); - if (master->lock.hw_lock) { - if (dev->sigdata.lock == master->lock.hw_lock) - dev->sigdata.lock = NULL; - master->lock.hw_lock = NULL; - master->lock.file_priv = NULL; - wake_up_interruptible_all(&master->lock.lock_queue); - } - mutex_unlock(&dev->struct_mutex); -} diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c deleted file mode 100644 index d2e1dccd8113..000000000000 --- a/drivers/gpu/drm/drm_memory.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * \file drm_memory.c - * Memory management wrappers for DRM - * - * \author Rickard E. (Rik) Faith - * \author Gareth Hughes - */ - -/* - * Created: Thu Feb 4 14:00:34 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include - -#include -#include - -#include "drm_legacy.h" - -#if IS_ENABLED(CONFIG_AGP) - -#ifdef HAVE_PAGE_AGP -# include -#else -# ifdef __powerpc__ -# define PAGE_AGP pgprot_noncached_wc(PAGE_KERNEL) -# else -# define PAGE_AGP PAGE_KERNEL -# endif -#endif - -static void *agp_remap(unsigned long offset, unsigned long size, - struct drm_device *dev) -{ - unsigned long i, num_pages = - PAGE_ALIGN(size) / PAGE_SIZE; - struct drm_agp_mem *agpmem; - struct page **page_map; - struct page **phys_page_map; - void *addr; - - size = PAGE_ALIGN(size); - -#ifdef __alpha__ - offset -= dev->hose->mem_space->start; -#endif - - list_for_each_entry(agpmem, &dev->agp->memory, head) - if (agpmem->bound <= offset - && (agpmem->bound + (agpmem->pages << PAGE_SHIFT)) >= - (offset + size)) - break; - if (&agpmem->head == &dev->agp->memory) - return NULL; - - /* - * OK, we're mapping AGP space on a chipset/platform on which memory accesses by - * the CPU do not get remapped by the GART. We fix this by using the kernel's - * page-table instead (that's probably faster anyhow...). - */ - /* note: use vmalloc() because num_pages could be large... */ - page_map = vmalloc(array_size(num_pages, sizeof(struct page *))); - if (!page_map) - return NULL; - - phys_page_map = (agpmem->memory->pages + (offset - agpmem->bound) / PAGE_SIZE); - for (i = 0; i < num_pages; ++i) - page_map[i] = phys_page_map[i]; - addr = vmap(page_map, num_pages, VM_IOREMAP, PAGE_AGP); - vfree(page_map); - - return addr; -} - -#else /* CONFIG_AGP */ -static inline void *agp_remap(unsigned long offset, unsigned long size, - struct drm_device *dev) -{ - return NULL; -} - -#endif /* CONFIG_AGP */ - -void drm_legacy_ioremap(struct drm_local_map *map, struct drm_device *dev) -{ - if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP) - map->handle = agp_remap(map->offset, map->size, dev); - else - map->handle = ioremap(map->offset, map->size); -} -EXPORT_SYMBOL(drm_legacy_ioremap); - -void drm_legacy_ioremap_wc(struct drm_local_map *map, struct drm_device *dev) -{ - if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP) - map->handle = agp_remap(map->offset, map->size, dev); - else - map->handle = ioremap_wc(map->offset, map->size); -} -EXPORT_SYMBOL(drm_legacy_ioremap_wc); - -void drm_legacy_ioremapfree(struct drm_local_map *map, struct drm_device *dev) -{ - if (!map->handle || !map->size) - return; - - if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP) - vunmap(map->handle); - else - iounmap(map->handle); -} -EXPORT_SYMBOL(drm_legacy_ioremapfree); diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 67907dd42721..c585f1e8803e 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -35,13 +35,6 @@ #include #include "drm_internal.h" -#include "drm_legacy.h" - -#ifdef CONFIG_DRM_LEGACY -/* List of devices hanging off drivers with stealth attach. */ -static LIST_HEAD(legacy_dev_list); -static DEFINE_MUTEX(legacy_dev_list_lock); -#endif static int drm_get_pci_domain(struct drm_device *dev) { @@ -72,199 +65,3 @@ int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master) master->unique_len = strlen(master->unique); return 0; } - -#ifdef CONFIG_DRM_LEGACY - -static int drm_legacy_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - - if ((p->busnum >> 8) != drm_get_pci_domain(dev) || - (p->busnum & 0xff) != pdev->bus->number || - p->devnum != PCI_SLOT(pdev->devfn) || p->funcnum != PCI_FUNC(pdev->devfn)) - return -EINVAL; - - p->irq = pdev->irq; - - DRM_DEBUG("%d:%d:%d => IRQ %d\n", p->busnum, p->devnum, p->funcnum, - p->irq); - return 0; -} - -/** - * drm_legacy_irq_by_busid - Get interrupt from bus ID - * @dev: DRM device - * @data: IOCTL parameter pointing to a drm_irq_busid structure - * @file_priv: DRM file private. - * - * Finds the PCI device with the specified bus id and gets its IRQ number. - * This IOCTL is deprecated, and will now return EINVAL for any busid not equal - * to that of the device that this DRM instance attached to. - * - * Return: 0 on success or a negative error code on failure. - */ -int drm_legacy_irq_by_busid(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_irq_busid *p = data; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - /* UMS was only ever support on PCI devices. */ - if (WARN_ON(!dev_is_pci(dev->dev))) - return -EINVAL; - - if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) - return -EOPNOTSUPP; - - return drm_legacy_pci_irq_by_busid(dev, p); -} - -void drm_legacy_pci_agp_destroy(struct drm_device *dev) -{ - if (dev->agp) { - arch_phys_wc_del(dev->agp->agp_mtrr); - drm_legacy_agp_clear(dev); - kfree(dev->agp); - dev->agp = NULL; - } -} - -static void drm_legacy_pci_agp_init(struct drm_device *dev) -{ - if (drm_core_check_feature(dev, DRIVER_USE_AGP)) { - if (pci_find_capability(to_pci_dev(dev->dev), PCI_CAP_ID_AGP)) - dev->agp = drm_legacy_agp_init(dev); - if (dev->agp) { - dev->agp->agp_mtrr = arch_phys_wc_add( - dev->agp->agp_info.aper_base, - dev->agp->agp_info.aper_size * - 1024 * 1024); - } - } -} - -static int drm_legacy_get_pci_dev(struct pci_dev *pdev, - const struct pci_device_id *ent, - const struct drm_driver *driver) -{ - struct drm_device *dev; - int ret; - - DRM_DEBUG("\n"); - - dev = drm_dev_alloc(driver, &pdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); - - ret = pci_enable_device(pdev); - if (ret) - goto err_free; - -#ifdef __alpha__ - dev->hose = pdev->sysdata; -#endif - - drm_legacy_pci_agp_init(dev); - - ret = drm_dev_register(dev, ent->driver_data); - if (ret) - goto err_agp; - - if (drm_core_check_feature(dev, DRIVER_LEGACY)) { - mutex_lock(&legacy_dev_list_lock); - list_add_tail(&dev->legacy_dev_list, &legacy_dev_list); - mutex_unlock(&legacy_dev_list_lock); - } - - return 0; - -err_agp: - drm_legacy_pci_agp_destroy(dev); - pci_disable_device(pdev); -err_free: - drm_dev_put(dev); - return ret; -} - -/** - * drm_legacy_pci_init - shadow-attach a legacy DRM PCI driver - * @driver: DRM device driver - * @pdriver: PCI device driver - * - * This is only used by legacy dri1 drivers and deprecated. - * - * Return: 0 on success or a negative error code on failure. - */ -int drm_legacy_pci_init(const struct drm_driver *driver, - struct pci_driver *pdriver) -{ - struct pci_dev *pdev = NULL; - const struct pci_device_id *pid; - int i; - - DRM_DEBUG("\n"); - - if (WARN_ON(!(driver->driver_features & DRIVER_LEGACY))) - return -EINVAL; - - /* If not using KMS, fall back to stealth mode manual scanning. */ - for (i = 0; pdriver->id_table[i].vendor != 0; i++) { - pid = &pdriver->id_table[i]; - - /* Loop around setting up a DRM device for each PCI device - * matching our ID and device class. If we had the internal - * function that pci_get_subsys and pci_get_class used, we'd - * be able to just pass pid in instead of doing a two-stage - * thing. - */ - pdev = NULL; - while ((pdev = - pci_get_subsys(pid->vendor, pid->device, pid->subvendor, - pid->subdevice, pdev)) != NULL) { - if ((pdev->class & pid->class_mask) != pid->class) - continue; - - /* stealth mode requires a manual probe */ - pci_dev_get(pdev); - drm_legacy_get_pci_dev(pdev, pid, driver); - } - } - return 0; -} -EXPORT_SYMBOL(drm_legacy_pci_init); - -/** - * drm_legacy_pci_exit - unregister shadow-attach legacy DRM driver - * @driver: DRM device driver - * @pdriver: PCI device driver - * - * Unregister a DRM driver shadow-attached through drm_legacy_pci_init(). This - * is deprecated and only used by dri1 drivers. - */ -void drm_legacy_pci_exit(const struct drm_driver *driver, - struct pci_driver *pdriver) -{ - struct drm_device *dev, *tmp; - - DRM_DEBUG("\n"); - - if (!(driver->driver_features & DRIVER_LEGACY)) { - WARN_ON(1); - } else { - mutex_lock(&legacy_dev_list_lock); - list_for_each_entry_safe(dev, tmp, &legacy_dev_list, - legacy_dev_list) { - if (dev->driver == driver) { - list_del(&dev->legacy_dev_list); - drm_put_dev(dev); - } - } - mutex_unlock(&legacy_dev_list_lock); - } - DRM_INFO("Module unloaded\n"); -} -EXPORT_SYMBOL(drm_legacy_pci_exit); - -#endif diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c deleted file mode 100644 index f4e6184d1877..000000000000 --- a/drivers/gpu/drm/drm_scatter.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * \file drm_scatter.c - * IOCTLs to manage scatter/gather memory - * - * \author Gareth Hughes - */ - -/* - * Created: Mon Dec 18 23:20:54 2000 by gareth@valinux.com - * - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#include -#include -#include - -#include "drm_legacy.h" - -#define DEBUG_SCATTER 0 - -static void drm_sg_cleanup(struct drm_sg_mem * entry) -{ - struct page *page; - int i; - - for (i = 0; i < entry->pages; i++) { - page = entry->pagelist[i]; - if (page) - ClearPageReserved(page); - } - - vfree(entry->virtual); - - kfree(entry->busaddr); - kfree(entry->pagelist); - kfree(entry); -} - -void drm_legacy_sg_cleanup(struct drm_device *dev) -{ - if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg && - drm_core_check_feature(dev, DRIVER_LEGACY)) { - drm_sg_cleanup(dev->sg); - dev->sg = NULL; - } -} -#ifdef _LP64 -# define ScatterHandle(x) (unsigned int)((x >> 32) + (x & ((1L << 32) - 1))) -#else -# define ScatterHandle(x) (unsigned int)(x) -#endif - -int drm_legacy_sg_alloc(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_scatter_gather *request = data; - struct drm_sg_mem *entry; - unsigned long pages, i, j; - - DRM_DEBUG("\n"); - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_SG)) - return -EOPNOTSUPP; - - if (request->size > SIZE_MAX - PAGE_SIZE) - return -EINVAL; - - if (dev->sg) - return -EINVAL; - - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - pages = (request->size + PAGE_SIZE - 1) / PAGE_SIZE; - DRM_DEBUG("size=%ld pages=%ld\n", request->size, pages); - - entry->pages = pages; - entry->pagelist = kcalloc(pages, sizeof(*entry->pagelist), GFP_KERNEL); - if (!entry->pagelist) { - kfree(entry); - return -ENOMEM; - } - - entry->busaddr = kcalloc(pages, sizeof(*entry->busaddr), GFP_KERNEL); - if (!entry->busaddr) { - kfree(entry->pagelist); - kfree(entry); - return -ENOMEM; - } - - entry->virtual = vmalloc_32(pages << PAGE_SHIFT); - if (!entry->virtual) { - kfree(entry->busaddr); - kfree(entry->pagelist); - kfree(entry); - return -ENOMEM; - } - - /* This also forces the mapping of COW pages, so our page list - * will be valid. Please don't remove it... - */ - memset(entry->virtual, 0, pages << PAGE_SHIFT); - - entry->handle = ScatterHandle((unsigned long)entry->virtual); - - DRM_DEBUG("handle = %08lx\n", entry->handle); - DRM_DEBUG("virtual = %p\n", entry->virtual); - - for (i = (unsigned long)entry->virtual, j = 0; j < pages; - i += PAGE_SIZE, j++) { - entry->pagelist[j] = vmalloc_to_page((void *)i); - if (!entry->pagelist[j]) - goto failed; - SetPageReserved(entry->pagelist[j]); - } - - request->handle = entry->handle; - - dev->sg = entry; - -#if DEBUG_SCATTER - /* Verify that each page points to its virtual address, and vice - * versa. - */ - { - int error = 0; - - for (i = 0; i < pages; i++) { - unsigned long *tmp; - - tmp = page_address(entry->pagelist[i]); - for (j = 0; - j < PAGE_SIZE / sizeof(unsigned long); - j++, tmp++) { - *tmp = 0xcafebabe; - } - tmp = (unsigned long *)((u8 *) entry->virtual + - (PAGE_SIZE * i)); - for (j = 0; - j < PAGE_SIZE / sizeof(unsigned long); - j++, tmp++) { - if (*tmp != 0xcafebabe && error == 0) { - error = 1; - DRM_ERROR("Scatter allocation error, " - "pagelist does not match " - "virtual mapping\n"); - } - } - tmp = page_address(entry->pagelist[i]); - for (j = 0; - j < PAGE_SIZE / sizeof(unsigned long); - j++, tmp++) { - *tmp = 0; - } - } - if (error == 0) - DRM_ERROR("Scatter allocation matches pagelist\n"); - } -#endif - - return 0; - - failed: - drm_sg_cleanup(entry); - return -ENOMEM; -} - -int drm_legacy_sg_free(struct drm_device *dev, void *data, - struct drm_file *file_priv) -{ - struct drm_scatter_gather *request = data; - struct drm_sg_mem *entry; - - if (!drm_core_check_feature(dev, DRIVER_LEGACY)) - return -EOPNOTSUPP; - - if (!drm_core_check_feature(dev, DRIVER_SG)) - return -EOPNOTSUPP; - - entry = dev->sg; - dev->sg = NULL; - - if (!entry || entry->handle != request->handle) - return -EINVAL; - - DRM_DEBUG("virtual = %p\n", entry->virtual); - - drm_sg_cleanup(entry); - - return 0; -} diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c deleted file mode 100644 index 87c9fe55dec7..000000000000 --- a/drivers/gpu/drm/drm_vm.c +++ /dev/null @@ -1,665 +0,0 @@ -/* - * \file drm_vm.c - * Memory mapping for DRM - * - * \author Rickard E. (Rik) Faith - * \author Gareth Hughes - */ - -/* - * Created: Mon Jan 4 08:58:31 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#if defined(__ia64__) -#include -#include -#endif -#include - -#include -#include -#include -#include -#include - -#include "drm_internal.h" -#include "drm_legacy.h" - -struct drm_vma_entry { - struct list_head head; - struct vm_area_struct *vma; - pid_t pid; -}; - -static void drm_vm_open(struct vm_area_struct *vma); -static void drm_vm_close(struct vm_area_struct *vma); - -static pgprot_t drm_io_prot(struct drm_local_map *map, - struct vm_area_struct *vma) -{ - pgprot_t tmp = vm_get_page_prot(vma->vm_flags); - -#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \ - defined(__mips__) || defined(__loongarch__) - if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING)) - tmp = pgprot_noncached(tmp); - else - tmp = pgprot_writecombine(tmp); -#elif defined(__ia64__) - if (efi_range_is_wc(vma->vm_start, vma->vm_end - - vma->vm_start)) - tmp = pgprot_writecombine(tmp); - else - tmp = pgprot_noncached(tmp); -#elif defined(__sparc__) || defined(__arm__) - tmp = pgprot_noncached(tmp); -#endif - return tmp; -} - -static pgprot_t drm_dma_prot(uint32_t map_type, struct vm_area_struct *vma) -{ - pgprot_t tmp = vm_get_page_prot(vma->vm_flags); - -#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE) - tmp = pgprot_noncached_wc(tmp); -#endif - return tmp; -} - -/* - * \c fault method for AGP virtual memory. - * - * \param vma virtual memory area. - * \param address access address. - * \return pointer to the page structure. - * - * Find the right map and if it's AGP memory find the real physical page to - * map, get the page, increment the use count and return it. - */ -#if IS_ENABLED(CONFIG_AGP) -static vm_fault_t drm_vm_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_local_map *map = NULL; - struct drm_map_list *r_list; - struct drm_hash_item *hash; - - /* - * Find the right map - */ - if (!dev->agp) - goto vm_fault_error; - - if (!dev->agp || !dev->agp->cant_use_aperture) - goto vm_fault_error; - - if (drm_ht_find_item(&dev->map_hash, vma->vm_pgoff, &hash)) - goto vm_fault_error; - - r_list = drm_hash_entry(hash, struct drm_map_list, hash); - map = r_list->map; - - if (map && map->type == _DRM_AGP) { - /* - * Using vm_pgoff as a selector forces us to use this unusual - * addressing scheme. - */ - resource_size_t offset = vmf->address - vma->vm_start; - resource_size_t baddr = map->offset + offset; - struct drm_agp_mem *agpmem; - struct page *page; - -#ifdef __alpha__ - /* - * Adjust to a bus-relative address - */ - baddr -= dev->hose->mem_space->start; -#endif - - /* - * It's AGP memory - find the real physical page to map - */ - list_for_each_entry(agpmem, &dev->agp->memory, head) { - if (agpmem->bound <= baddr && - agpmem->bound + agpmem->pages * PAGE_SIZE > baddr) - break; - } - - if (&agpmem->head == &dev->agp->memory) - goto vm_fault_error; - - /* - * Get the page, inc the use count, and return it - */ - offset = (baddr - agpmem->bound) >> PAGE_SHIFT; - page = agpmem->memory->pages[offset]; - get_page(page); - vmf->page = page; - - DRM_DEBUG - ("baddr = 0x%llx page = 0x%p, offset = 0x%llx, count=%d\n", - (unsigned long long)baddr, - agpmem->memory->pages[offset], - (unsigned long long)offset, - page_count(page)); - return 0; - } -vm_fault_error: - return VM_FAULT_SIGBUS; /* Disallow mremap */ -} -#else -static vm_fault_t drm_vm_fault(struct vm_fault *vmf) -{ - return VM_FAULT_SIGBUS; -} -#endif - -/* - * \c nopage method for shared virtual memory. - * - * \param vma virtual memory area. - * \param address access address. - * \return pointer to the page structure. - * - * Get the mapping, find the real physical page to map, get the page, and - * return it. - */ -static vm_fault_t drm_vm_shm_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_local_map *map = vma->vm_private_data; - unsigned long offset; - unsigned long i; - struct page *page; - - if (!map) - return VM_FAULT_SIGBUS; /* Nothing allocated */ - - offset = vmf->address - vma->vm_start; - i = (unsigned long)map->handle + offset; - page = vmalloc_to_page((void *)i); - if (!page) - return VM_FAULT_SIGBUS; - get_page(page); - vmf->page = page; - - DRM_DEBUG("shm_fault 0x%lx\n", offset); - return 0; -} - -/* - * \c close method for shared virtual memory. - * - * \param vma virtual memory area. - * - * Deletes map information if we are the last - * person to close a mapping and it's not in the global maplist. - */ -static void drm_vm_shm_close(struct vm_area_struct *vma) -{ - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_vma_entry *pt, *temp; - struct drm_local_map *map; - struct drm_map_list *r_list; - int found_maps = 0; - - DRM_DEBUG("0x%08lx,0x%08lx\n", - vma->vm_start, vma->vm_end - vma->vm_start); - - map = vma->vm_private_data; - - mutex_lock(&dev->struct_mutex); - list_for_each_entry_safe(pt, temp, &dev->vmalist, head) { - if (pt->vma->vm_private_data == map) - found_maps++; - if (pt->vma == vma) { - list_del(&pt->head); - kfree(pt); - } - } - - /* We were the only map that was found */ - if (found_maps == 1 && map->flags & _DRM_REMOVABLE) { - /* Check to see if we are in the maplist, if we are not, then - * we delete this mappings information. - */ - found_maps = 0; - list_for_each_entry(r_list, &dev->maplist, head) { - if (r_list->map == map) - found_maps++; - } - - if (!found_maps) { - switch (map->type) { - case _DRM_REGISTERS: - case _DRM_FRAME_BUFFER: - arch_phys_wc_del(map->mtrr); - iounmap(map->handle); - break; - case _DRM_SHM: - vfree(map->handle); - break; - case _DRM_AGP: - case _DRM_SCATTER_GATHER: - break; - case _DRM_CONSISTENT: - dma_free_coherent(dev->dev, - map->size, - map->handle, - map->offset); - break; - } - kfree(map); - } - } - mutex_unlock(&dev->struct_mutex); -} - -/* - * \c fault method for DMA virtual memory. - * - * \param address access address. - * \return pointer to the page structure. - * - * Determine the page number from the page offset and get it from drm_device_dma::pagelist. - */ -static vm_fault_t drm_vm_dma_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_device_dma *dma = dev->dma; - unsigned long offset; - unsigned long page_nr; - struct page *page; - - if (!dma) - return VM_FAULT_SIGBUS; /* Error */ - if (!dma->pagelist) - return VM_FAULT_SIGBUS; /* Nothing allocated */ - - offset = vmf->address - vma->vm_start; - /* vm_[pg]off[set] should be 0 */ - page_nr = offset >> PAGE_SHIFT; /* page_nr could just be vmf->pgoff */ - page = virt_to_page((void *)dma->pagelist[page_nr]); - - get_page(page); - vmf->page = page; - - DRM_DEBUG("dma_fault 0x%lx (page %lu)\n", offset, page_nr); - return 0; -} - -/* - * \c fault method for scatter-gather virtual memory. - * - * \param address access address. - * \return pointer to the page structure. - * - * Determine the map offset from the page offset and get it from drm_sg_mem::pagelist. - */ -static vm_fault_t drm_vm_sg_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_local_map *map = vma->vm_private_data; - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_sg_mem *entry = dev->sg; - unsigned long offset; - unsigned long map_offset; - unsigned long page_offset; - struct page *page; - - if (!entry) - return VM_FAULT_SIGBUS; /* Error */ - if (!entry->pagelist) - return VM_FAULT_SIGBUS; /* Nothing allocated */ - - offset = vmf->address - vma->vm_start; - map_offset = map->offset - (unsigned long)dev->sg->virtual; - page_offset = (offset >> PAGE_SHIFT) + (map_offset >> PAGE_SHIFT); - page = entry->pagelist[page_offset]; - get_page(page); - vmf->page = page; - - return 0; -} - -/** AGP virtual memory operations */ -static const struct vm_operations_struct drm_vm_ops = { - .fault = drm_vm_fault, - .open = drm_vm_open, - .close = drm_vm_close, -}; - -/** Shared virtual memory operations */ -static const struct vm_operations_struct drm_vm_shm_ops = { - .fault = drm_vm_shm_fault, - .open = drm_vm_open, - .close = drm_vm_shm_close, -}; - -/** DMA virtual memory operations */ -static const struct vm_operations_struct drm_vm_dma_ops = { - .fault = drm_vm_dma_fault, - .open = drm_vm_open, - .close = drm_vm_close, -}; - -/** Scatter-gather virtual memory operations */ -static const struct vm_operations_struct drm_vm_sg_ops = { - .fault = drm_vm_sg_fault, - .open = drm_vm_open, - .close = drm_vm_close, -}; - -static void drm_vm_open_locked(struct drm_device *dev, - struct vm_area_struct *vma) -{ - struct drm_vma_entry *vma_entry; - - DRM_DEBUG("0x%08lx,0x%08lx\n", - vma->vm_start, vma->vm_end - vma->vm_start); - - vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL); - if (vma_entry) { - vma_entry->vma = vma; - vma_entry->pid = current->pid; - list_add(&vma_entry->head, &dev->vmalist); - } -} - -static void drm_vm_open(struct vm_area_struct *vma) -{ - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - - mutex_lock(&dev->struct_mutex); - drm_vm_open_locked(dev, vma); - mutex_unlock(&dev->struct_mutex); -} - -static void drm_vm_close_locked(struct drm_device *dev, - struct vm_area_struct *vma) -{ - struct drm_vma_entry *pt, *temp; - - DRM_DEBUG("0x%08lx,0x%08lx\n", - vma->vm_start, vma->vm_end - vma->vm_start); - - list_for_each_entry_safe(pt, temp, &dev->vmalist, head) { - if (pt->vma == vma) { - list_del(&pt->head); - kfree(pt); - break; - } - } -} - -/* - * \c close method for all virtual memory types. - * - * \param vma virtual memory area. - * - * Search the \p vma private data entry in drm_device::vmalist, unlink it, and - * free it. - */ -static void drm_vm_close(struct vm_area_struct *vma) -{ - struct drm_file *priv = vma->vm_file->private_data; - struct drm_device *dev = priv->minor->dev; - - mutex_lock(&dev->struct_mutex); - drm_vm_close_locked(dev, vma); - mutex_unlock(&dev->struct_mutex); -} - -/* - * mmap DMA memory. - * - * \param file_priv DRM file private. - * \param vma virtual memory area. - * \return zero on success or a negative number on failure. - * - * Sets the virtual memory area operations structure to vm_dma_ops, the file - * pointer, and calls vm_open(). - */ -static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_file *priv = filp->private_data; - struct drm_device *dev; - struct drm_device_dma *dma; - unsigned long length = vma->vm_end - vma->vm_start; - - dev = priv->minor->dev; - dma = dev->dma; - DRM_DEBUG("start = 0x%lx, end = 0x%lx, page offset = 0x%lx\n", - vma->vm_start, vma->vm_end, vma->vm_pgoff); - - /* Length must match exact page count */ - if (!dma || (length >> PAGE_SHIFT) != dma->page_count) { - return -EINVAL; - } - - if (!capable(CAP_SYS_ADMIN) && - (dma->flags & _DRM_DMA_USE_PCI_RO)) { - vm_flags_clear(vma, VM_WRITE | VM_MAYWRITE); -#if defined(__i386__) || defined(__x86_64__) - pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW; -#else - /* Ye gads this is ugly. With more thought - we could move this up higher and use - `protection_map' instead. */ - vma->vm_page_prot = - __pgprot(pte_val - (pte_wrprotect - (__pte(pgprot_val(vma->vm_page_prot))))); -#endif - } - - vma->vm_ops = &drm_vm_dma_ops; - - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP); - - drm_vm_open_locked(dev, vma); - return 0; -} - -static resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) -{ -#ifdef __alpha__ - return dev->hose->dense_mem_base; -#else - return 0; -#endif -} - -/* - * mmap DMA memory. - * - * \param file_priv DRM file private. - * \param vma virtual memory area. - * \return zero on success or a negative number on failure. - * - * If the virtual memory area has no offset associated with it then it's a DMA - * area, so calls mmap_dma(). Otherwise searches the map in drm_device::maplist, - * checks that the restricted flag is not set, sets the virtual memory operations - * according to the mapping type and remaps the pages. Finally sets the file - * pointer and calls vm_open(). - */ -static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_local_map *map = NULL; - resource_size_t offset = 0; - struct drm_hash_item *hash; - - DRM_DEBUG("start = 0x%lx, end = 0x%lx, page offset = 0x%lx\n", - vma->vm_start, vma->vm_end, vma->vm_pgoff); - - if (!priv->authenticated) - return -EACCES; - - /* We check for "dma". On Apple's UniNorth, it's valid to have - * the AGP mapped at physical address 0 - * --BenH. - */ - if (!vma->vm_pgoff -#if IS_ENABLED(CONFIG_AGP) - && (!dev->agp - || dev->agp->agp_info.device->vendor != PCI_VENDOR_ID_APPLE) -#endif - ) - return drm_mmap_dma(filp, vma); - - if (drm_ht_find_item(&dev->map_hash, vma->vm_pgoff, &hash)) { - DRM_ERROR("Could not find map\n"); - return -EINVAL; - } - - map = drm_hash_entry(hash, struct drm_map_list, hash)->map; - if (!map || ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) - return -EPERM; - - /* Check for valid size. */ - if (map->size < vma->vm_end - vma->vm_start) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN) && (map->flags & _DRM_READ_ONLY)) { - vm_flags_clear(vma, VM_WRITE | VM_MAYWRITE); -#if defined(__i386__) || defined(__x86_64__) - pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW; -#else - /* Ye gads this is ugly. With more thought - we could move this up higher and use - `protection_map' instead. */ - vma->vm_page_prot = - __pgprot(pte_val - (pte_wrprotect - (__pte(pgprot_val(vma->vm_page_prot))))); -#endif - } - - switch (map->type) { -#if !defined(__arm__) - case _DRM_AGP: - if (dev->agp && dev->agp->cant_use_aperture) { - /* - * On some platforms we can't talk to bus dma address from the CPU, so for - * memory of type DRM_AGP, we'll deal with sorting out the real physical - * pages and mappings in fault() - */ -#if defined(__powerpc__) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -#endif - vma->vm_ops = &drm_vm_ops; - break; - } - fallthrough; /* to _DRM_FRAME_BUFFER... */ -#endif - case _DRM_FRAME_BUFFER: - case _DRM_REGISTERS: - offset = drm_core_get_reg_ofs(dev); - vma->vm_page_prot = drm_io_prot(map, vma); - if (io_remap_pfn_range(vma, vma->vm_start, - (map->offset + offset) >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - DRM_DEBUG(" Type = %d; start = 0x%lx, end = 0x%lx," - " offset = 0x%llx\n", - map->type, - vma->vm_start, vma->vm_end, (unsigned long long)(map->offset + offset)); - - vma->vm_ops = &drm_vm_ops; - break; - case _DRM_CONSISTENT: - /* Consistent memory is really like shared memory. But - * it's allocated in a different way, so avoid fault */ - if (remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(map->handle)), - vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return -EAGAIN; - vma->vm_page_prot = drm_dma_prot(map->type, vma); - fallthrough; /* to _DRM_SHM */ - case _DRM_SHM: - vma->vm_ops = &drm_vm_shm_ops; - vma->vm_private_data = (void *)map; - break; - case _DRM_SCATTER_GATHER: - vma->vm_ops = &drm_vm_sg_ops; - vma->vm_private_data = (void *)map; - vma->vm_page_prot = drm_dma_prot(map->type, vma); - break; - default: - return -EINVAL; /* This should never happen. */ - } - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP); - - drm_vm_open_locked(dev, vma); - return 0; -} - -int drm_legacy_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - int ret; - - if (drm_dev_is_unplugged(dev)) - return -ENODEV; - - mutex_lock(&dev->struct_mutex); - ret = drm_mmap_locked(filp, vma); - mutex_unlock(&dev->struct_mutex); - - return ret; -} -EXPORT_SYMBOL(drm_legacy_mmap); - -#if IS_ENABLED(CONFIG_DRM_LEGACY) -void drm_legacy_vma_flush(struct drm_device *dev) -{ - struct drm_vma_entry *vma, *vma_temp; - - /* Clear vma list (only needed for legacy drivers) */ - list_for_each_entry_safe(vma, vma_temp, &dev->vmalist, head) { - list_del(&vma->head); - kfree(vma); - } -} -#endif diff --git a/include/drm/drm_auth.h b/include/drm/drm_auth.h index ba248ca8866f..50131383ed81 100644 --- a/include/drm/drm_auth.h +++ b/include/drm/drm_auth.h @@ -33,24 +33,6 @@ #include struct drm_file; -struct drm_hw_lock; - -/* - * Legacy DRI1 locking data structure. Only here instead of in drm_legacy.h for - * include ordering reasons. - * - * DO NOT USE. - */ -struct drm_lock_data { - struct drm_hw_lock *hw_lock; - struct drm_file *file_priv; - wait_queue_head_t lock_queue; - unsigned long lock_time; - spinlock_t spinlock; - uint32_t kernel_waiters; - uint32_t user_waiters; - int idle_has_lock; -}; /** * struct drm_master - drm master structure @@ -145,10 +127,6 @@ struct drm_master { * Protected by &drm_device.mode_config's &drm_mode_config.idr_mutex. */ struct idr lessee_idr; - /* private: */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) - struct drm_lock_data lock; -#endif }; struct drm_master *drm_master_get(struct drm_master *master); diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index c490977ee250..63767cf24371 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -6,7 +6,6 @@ #include #include -#include #include struct drm_driver; @@ -153,8 +152,8 @@ struct drm_device { * * Lock for others (not &drm_minor.master and &drm_file.is_master) * - * WARNING: - * Only drivers annotated with DRIVER_LEGACY should be using this. + * TODO: This lock used to be the BKL of the DRM subsystem. Move the + * lock into i915, which is the only remaining user. */ struct mutex struct_mutex; @@ -317,72 +316,6 @@ struct drm_device { * Root directory for debugfs files. */ struct dentry *debugfs_root; - - /* Everything below here is for legacy driver, never use! */ - /* private: */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) - /* List of devices per driver for stealth attach cleanup */ - struct list_head legacy_dev_list; - -#ifdef __alpha__ - /** @hose: PCI hose, only used on ALPHA platforms. */ - struct pci_controller *hose; -#endif - - /* AGP data */ - struct drm_agp_head *agp; - - /* Context handle management - linked list of context handles */ - struct list_head ctxlist; - - /* Context handle management - mutex for &ctxlist */ - struct mutex ctxlist_mutex; - - /* Context handle management */ - struct idr ctx_idr; - - /* Memory management - linked list of regions */ - struct list_head maplist; - - /* Memory management - user token hash table for maps */ - struct drm_open_hash map_hash; - - /* Context handle management - list of vmas (for debugging) */ - struct list_head vmalist; - - /* Optional pointer for DMA support */ - struct drm_device_dma *dma; - - /* Context swapping flag */ - __volatile__ long context_flag; - - /* Last current context */ - int last_context; - - /* Lock for &buf_use and a few other things. */ - spinlock_t buf_lock; - - /* Usage counter for buffers in use -- cannot alloc */ - int buf_use; - - /* Buffer allocation in progress */ - atomic_t buf_alloc; - - struct { - int context; - struct drm_hw_lock *lock; - } sigdata; - - struct drm_local_map *agp_buffer_map; - unsigned int agp_buffer_token; - - /* Scatter gather memory */ - struct drm_sg_mem *sg; - - /* IRQs */ - bool irq_enabled; - int irq; -#endif }; #endif diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index ea36aa79dca2..8878260d7529 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -442,25 +442,6 @@ struct drm_driver { * some examples. */ const struct file_operations *fops; - -#ifdef CONFIG_DRM_LEGACY - /* Everything below here is for legacy driver, never use! */ - /* private: */ - - int (*firstopen) (struct drm_device *); - void (*preclose) (struct drm_device *, struct drm_file *file_priv); - int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv); - int (*dma_quiescent) (struct drm_device *); - int (*context_dtor) (struct drm_device *dev, int context); - irqreturn_t (*irq_handler)(int irq, void *arg); - void (*irq_preinstall)(struct drm_device *dev); - int (*irq_postinstall)(struct drm_device *dev); - void (*irq_uninstall)(struct drm_device *dev); - u32 (*get_vblank_counter)(struct drm_device *dev, unsigned int pipe); - int (*enable_vblank)(struct drm_device *dev, unsigned int pipe); - void (*disable_vblank)(struct drm_device *dev, unsigned int pipe); - int dev_priv_size; -#endif }; void *__devm_drm_dev_alloc(struct device *parent, diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 8f35dcea82d3..ab230d3af138 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -386,11 +386,6 @@ struct drm_file { * Per-file buffer caches used by the PRIME buffer sharing code. */ struct drm_prime_file_private prime; - - /* private: */ -#if IS_ENABLED(CONFIG_DRM_LEGACY) - unsigned long lock_count; /* DRI1 legacy lock count */ -#endif }; /** diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h deleted file mode 100644 index 0fc85418aad8..000000000000 --- a/include/drm/drm_legacy.h +++ /dev/null @@ -1,331 +0,0 @@ -#ifndef __DRM_DRM_LEGACY_H__ -#define __DRM_DRM_LEGACY_H__ -/* - * Legacy driver interfaces for the Direct Rendering Manager - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * Copyright (c) 2009-2010, Code Aurora Forum. - * All rights reserved. - * Copyright © 2014 Intel Corporation - * Daniel Vetter - * - * Author: Rickard E. (Rik) Faith - * Author: Gareth Hughes - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -#include -#include - -struct drm_device; -struct drm_driver; -struct file; -struct pci_driver; - -/* - * Legacy Support for palateontologic DRM drivers - * - * If you add a new driver and it uses any of these functions or structures, - * you're doing it terribly wrong. - */ - -/* - * Hash-table Support - */ - -struct drm_hash_item { - struct hlist_node head; - unsigned long key; -}; - -struct drm_open_hash { - struct hlist_head *table; - u8 order; -}; - -/** - * DMA buffer. - */ -struct drm_buf { - int idx; /**< Index into master buflist */ - int total; /**< Buffer size */ - int order; /**< log-base-2(total) */ - int used; /**< Amount of buffer in use (for DMA) */ - unsigned long offset; /**< Byte offset (used internally) */ - void *address; /**< Address of buffer */ - unsigned long bus_address; /**< Bus address of buffer */ - struct drm_buf *next; /**< Kernel-only: used for free list */ - __volatile__ int waiting; /**< On kernel DMA queue */ - __volatile__ int pending; /**< On hardware DMA queue */ - struct drm_file *file_priv; /**< Private of holding file descr */ - int context; /**< Kernel queue for this buffer */ - int while_locked; /**< Dispatch this buffer while locked */ - enum { - DRM_LIST_NONE = 0, - DRM_LIST_FREE = 1, - DRM_LIST_WAIT = 2, - DRM_LIST_PEND = 3, - DRM_LIST_PRIO = 4, - DRM_LIST_RECLAIM = 5 - } list; /**< Which list we're on */ - - int dev_priv_size; /**< Size of buffer private storage */ - void *dev_private; /**< Per-buffer private storage */ -}; - -typedef struct drm_dma_handle { - dma_addr_t busaddr; - void *vaddr; - size_t size; -} drm_dma_handle_t; - -/** - * Buffer entry. There is one of this for each buffer size order. - */ -struct drm_buf_entry { - int buf_size; /**< size */ - int buf_count; /**< number of buffers */ - struct drm_buf *buflist; /**< buffer list */ - int seg_count; - int page_order; - struct drm_dma_handle **seglist; - - int low_mark; /**< Low water mark */ - int high_mark; /**< High water mark */ -}; - -/** - * DMA data. - */ -struct drm_device_dma { - - struct drm_buf_entry bufs[DRM_MAX_ORDER + 1]; /**< buffers, grouped by their size order */ - int buf_count; /**< total number of buffers */ - struct drm_buf **buflist; /**< Vector of pointers into drm_device_dma::bufs */ - int seg_count; - int page_count; /**< number of pages */ - unsigned long *pagelist; /**< page list */ - unsigned long byte_count; - enum { - _DRM_DMA_USE_AGP = 0x01, - _DRM_DMA_USE_SG = 0x02, - _DRM_DMA_USE_FB = 0x04, - _DRM_DMA_USE_PCI_RO = 0x08 - } flags; - -}; - -/** - * Scatter-gather memory. - */ -struct drm_sg_mem { - unsigned long handle; - void *virtual; - int pages; - struct page **pagelist; - dma_addr_t *busaddr; -}; - -/** - * Kernel side of a mapping - */ -struct drm_local_map { - dma_addr_t offset; /**< Requested physical address (0 for SAREA)*/ - unsigned long size; /**< Requested physical size (bytes) */ - enum drm_map_type type; /**< Type of memory to map */ - enum drm_map_flags flags; /**< Flags */ - void *handle; /**< User-space: "Handle" to pass to mmap() */ - /**< Kernel-space: kernel-virtual address */ - int mtrr; /**< MTRR slot used */ -}; - -typedef struct drm_local_map drm_local_map_t; - -/** - * Mappings list - */ -struct drm_map_list { - struct list_head head; /**< list head */ - struct drm_hash_item hash; - struct drm_local_map *map; /**< mapping */ - uint64_t user_token; - struct drm_master *master; -}; - -int drm_legacy_addmap(struct drm_device *d, resource_size_t offset, - unsigned int size, enum drm_map_type type, - enum drm_map_flags flags, struct drm_local_map **map_p); -struct drm_local_map *drm_legacy_findmap(struct drm_device *dev, unsigned int token); -void drm_legacy_rmmap(struct drm_device *d, struct drm_local_map *map); -int drm_legacy_rmmap_locked(struct drm_device *d, struct drm_local_map *map); -struct drm_local_map *drm_legacy_getsarea(struct drm_device *dev); -int drm_legacy_mmap(struct file *filp, struct vm_area_struct *vma); - -int drm_legacy_addbufs_agp(struct drm_device *d, struct drm_buf_desc *req); -int drm_legacy_addbufs_pci(struct drm_device *d, struct drm_buf_desc *req); - -/** - * Test that the hardware lock is held by the caller, returning otherwise. - * - * \param dev DRM device. - * \param filp file pointer of the caller. - */ -#define LOCK_TEST_WITH_RETURN( dev, _file_priv ) \ -do { \ - if (!_DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock) || \ - _file_priv->master->lock.file_priv != _file_priv) { \ - DRM_ERROR( "%s called without lock held, held %d owner %p %p\n",\ - __func__, _DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock),\ - _file_priv->master->lock.file_priv, _file_priv); \ - return -EINVAL; \ - } \ -} while (0) - -void drm_legacy_idlelock_take(struct drm_lock_data *lock); -void drm_legacy_idlelock_release(struct drm_lock_data *lock); - -/* drm_irq.c */ -int drm_legacy_irq_uninstall(struct drm_device *dev); - -/* drm_pci.c */ - -#ifdef CONFIG_PCI - -int drm_legacy_pci_init(const struct drm_driver *driver, - struct pci_driver *pdriver); -void drm_legacy_pci_exit(const struct drm_driver *driver, - struct pci_driver *pdriver); - -#else - -static inline struct drm_dma_handle *drm_pci_alloc(struct drm_device *dev, - size_t size, size_t align) -{ - return NULL; -} - -static inline void drm_pci_free(struct drm_device *dev, - struct drm_dma_handle *dmah) -{ -} - -static inline int drm_legacy_pci_init(const struct drm_driver *driver, - struct pci_driver *pdriver) -{ - return -EINVAL; -} - -static inline void drm_legacy_pci_exit(const struct drm_driver *driver, - struct pci_driver *pdriver) -{ -} - -#endif - -/* - * AGP Support - */ - -struct drm_agp_head { - struct agp_kern_info agp_info; - struct list_head memory; - unsigned long mode; - struct agp_bridge_data *bridge; - int enabled; - int acquired; - unsigned long base; - int agp_mtrr; - int cant_use_aperture; - unsigned long page_mask; -}; - -#if IS_ENABLED(CONFIG_DRM_LEGACY) && IS_ENABLED(CONFIG_AGP) -struct drm_agp_head *drm_legacy_agp_init(struct drm_device *dev); -int drm_legacy_agp_acquire(struct drm_device *dev); -int drm_legacy_agp_release(struct drm_device *dev); -int drm_legacy_agp_enable(struct drm_device *dev, struct drm_agp_mode mode); -int drm_legacy_agp_info(struct drm_device *dev, struct drm_agp_info *info); -int drm_legacy_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request); -int drm_legacy_agp_free(struct drm_device *dev, struct drm_agp_buffer *request); -int drm_legacy_agp_unbind(struct drm_device *dev, struct drm_agp_binding *request); -int drm_legacy_agp_bind(struct drm_device *dev, struct drm_agp_binding *request); -#else -static inline struct drm_agp_head *drm_legacy_agp_init(struct drm_device *dev) -{ - return NULL; -} - -static inline int drm_legacy_agp_acquire(struct drm_device *dev) -{ - return -ENODEV; -} - -static inline int drm_legacy_agp_release(struct drm_device *dev) -{ - return -ENODEV; -} - -static inline int drm_legacy_agp_enable(struct drm_device *dev, - struct drm_agp_mode mode) -{ - return -ENODEV; -} - -static inline int drm_legacy_agp_info(struct drm_device *dev, - struct drm_agp_info *info) -{ - return -ENODEV; -} - -static inline int drm_legacy_agp_alloc(struct drm_device *dev, - struct drm_agp_buffer *request) -{ - return -ENODEV; -} - -static inline int drm_legacy_agp_free(struct drm_device *dev, - struct drm_agp_buffer *request) -{ - return -ENODEV; -} - -static inline int drm_legacy_agp_unbind(struct drm_device *dev, - struct drm_agp_binding *request) -{ - return -ENODEV; -} - -static inline int drm_legacy_agp_bind(struct drm_device *dev, - struct drm_agp_binding *request) -{ - return -ENODEV; -} -#endif - -/* drm_memory.c */ -void drm_legacy_ioremap(struct drm_local_map *map, struct drm_device *dev); -void drm_legacy_ioremap_wc(struct drm_local_map *map, struct drm_device *dev); -void drm_legacy_ioremapfree(struct drm_local_map *map, struct drm_device *dev); - -#endif /* __DRM_DRM_LEGACY_H__ */ -- cgit v1.2.3 From a0fce84cb1b3b88d3d5853f7ac5f1a3ef7e38620 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 4 Dec 2023 10:07:46 +0100 Subject: drm/plane-helper: Move drm_plane_helper_atomic_check() into udl The udl driver is the only caller of drm_plane_helper_atomic_check(). Move the function into the driver. No functional changes. v2: * fix documenation (Sui) Signed-off-by: Thomas Zimmermann Reviewed-by: Alex Deucher Acked-by: Sui Jingfeng Link: https://patchwork.freedesktop.org/patch/msgid/20231204090852.1650-2-tzimmermann@suse.de --- drivers/gpu/drm/drm_crtc_helper.c | 7 ++----- drivers/gpu/drm/drm_plane_helper.c | 32 -------------------------------- drivers/gpu/drm/udl/udl_modeset.c | 19 +++++++++++++++++-- include/drm/drm_plane_helper.h | 2 -- 4 files changed, 19 insertions(+), 41 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index a209659a996c..2dafc39a27cb 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -439,11 +439,8 @@ EXPORT_SYMBOL(drm_crtc_helper_set_mode); * @state: atomic state object * * Provides a default CRTC-state check handler for CRTCs that only have - * one primary plane attached to it. - * - * This is often the case for the CRTC of simple framebuffers. See also - * drm_plane_helper_atomic_check() for the respective plane-state check - * helper function. + * one primary plane attached to it. This is often the case for the CRTC + * of simple framebuffers. * * RETURNS: * Zero on success, or an errno code otherwise. diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index 5e95089676ff..7982be4b0306 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -279,35 +279,3 @@ void drm_plane_helper_destroy(struct drm_plane *plane) kfree(plane); } EXPORT_SYMBOL(drm_plane_helper_destroy); - -/** - * drm_plane_helper_atomic_check() - Helper to check plane atomic-state - * @plane: plane to check - * @state: atomic state object - * - * Provides a default plane-state check handler for planes whose atomic-state - * scale and positioning are not expected to change since the plane is always - * a fullscreen scanout buffer. - * - * This is often the case for the primary plane of simple framebuffers. See - * also drm_crtc_helper_atomic_check() for the respective CRTC-state check - * helper function. - * - * RETURNS: - * Zero on success, or an errno code otherwise. - */ -int drm_plane_helper_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) -{ - struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); - struct drm_crtc *new_crtc = new_plane_state->crtc; - struct drm_crtc_state *new_crtc_state = NULL; - - if (new_crtc) - new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); - - return drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state, - DRM_PLANE_NO_SCALING, - DRM_PLANE_NO_SCALING, - false, false); -} -EXPORT_SYMBOL(drm_plane_helper_atomic_check); diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index 40876bcdd79a..7702359c90c2 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -261,6 +260,22 @@ static const uint64_t udl_primary_plane_fmtmods[] = { DRM_FORMAT_MOD_INVALID }; +static int udl_primary_plane_helper_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_crtc *new_crtc = new_plane_state->crtc; + struct drm_crtc_state *new_crtc_state = NULL; + + if (new_crtc) + new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); + + return drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, + false, false); +} + static void udl_primary_plane_helper_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -296,7 +311,7 @@ out_drm_gem_fb_end_cpu_access: static const struct drm_plane_helper_funcs udl_primary_plane_helper_funcs = { DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, - .atomic_check = drm_plane_helper_atomic_check, + .atomic_check = udl_primary_plane_helper_atomic_check, .atomic_update = udl_primary_plane_helper_atomic_update, }; diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h index 3a574e8cd22f..75f9c4830564 100644 --- a/include/drm/drm_plane_helper.h +++ b/include/drm/drm_plane_helper.h @@ -26,7 +26,6 @@ #include -struct drm_atomic_state; struct drm_crtc; struct drm_framebuffer; struct drm_modeset_acquire_ctx; @@ -42,7 +41,6 @@ int drm_plane_helper_update_primary(struct drm_plane *plane, struct drm_crtc *cr int drm_plane_helper_disable_primary(struct drm_plane *plane, struct drm_modeset_acquire_ctx *ctx); void drm_plane_helper_destroy(struct drm_plane *plane); -int drm_plane_helper_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state); /** * DRM_PLANE_NON_ATOMIC_FUNCS - Default plane functions for non-atomic drivers -- cgit v1.2.3 From 90d50b8d85834e73536fdccd5aa913b30494fef0 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 21 Sep 2023 13:50:32 +0300 Subject: drm/mipi-dsi: Fix detach call without attach It's been reported that DSI host driver's detach can be called without the attach ever happening: https://lore.kernel.org/all/20230412073954.20601-1-tony@atomide.com/ After reading the code, I think this is what happens: We have a DSI host defined in the device tree and a DSI peripheral under that host (i.e. an i2c device using the DSI as data bus doesn't exhibit this behavior). The host driver calls mipi_dsi_host_register(), which causes (via a few functions) mipi_dsi_device_add() to be called for the DSI peripheral. So now we have a DSI device under the host, but attach hasn't been called. Normally the probing of the devices continues, and eventually the DSI peripheral's driver will call mipi_dsi_attach(), attaching the peripheral. However, if the host driver's probe encounters an error after calling mipi_dsi_host_register(), and before the peripheral has called mipi_dsi_attach(), the host driver will do cleanups and return an error from its probe function. The cleanups include calling mipi_dsi_host_unregister(). mipi_dsi_host_unregister() will call two functions for all its DSI peripheral devices: mipi_dsi_detach() and mipi_dsi_device_unregister(). The latter makes sense, as the device exists, but the former may be wrong as attach has not necessarily been done. To fix this, track the attached state of the peripheral, and only detach from mipi_dsi_host_unregister() if the peripheral was attached. Note that I have only tested this with a board with an i2c DSI peripheral, not with a "pure" DSI peripheral. However, slightly related, the unregister machinery still seems broken. E.g. if the DSI host driver is unbound, it'll detach and unregister the DSI peripherals. After that, when the DSI peripheral driver unbound it'll call detach either directly or using the devm variant, leading to a crash. And probably the driver will crash if it happens, for some reason, to try to send a message via the DSI bus. But that's another topic. Tested-by: H. Nikolaus Schaller Acked-by: Maxime Ripard Reviewed-by: Sebastian Reichel Tested-by: Tony Lindgren Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20230921-dsi-detach-fix-v1-1-d0de2d1621d9@ideasonboard.com --- drivers/gpu/drm/drm_mipi_dsi.c | 17 +++++++++++++++-- include/drm/drm_mipi_dsi.h | 2 ++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 14201f73aab1..843a6dbda93a 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -347,7 +347,8 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv) { struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev); - mipi_dsi_detach(dsi); + if (dsi->attached) + mipi_dsi_detach(dsi); mipi_dsi_device_unregister(dsi); return 0; @@ -370,11 +371,18 @@ EXPORT_SYMBOL(mipi_dsi_host_unregister); int mipi_dsi_attach(struct mipi_dsi_device *dsi) { const struct mipi_dsi_host_ops *ops = dsi->host->ops; + int ret; if (!ops || !ops->attach) return -ENOSYS; - return ops->attach(dsi->host, dsi); + ret = ops->attach(dsi->host, dsi); + if (ret) + return ret; + + dsi->attached = true; + + return 0; } EXPORT_SYMBOL(mipi_dsi_attach); @@ -386,9 +394,14 @@ int mipi_dsi_detach(struct mipi_dsi_device *dsi) { const struct mipi_dsi_host_ops *ops = dsi->host->ops; + if (WARN_ON(!dsi->attached)) + return -EINVAL; + if (!ops || !ops->detach) return -ENOSYS; + dsi->attached = false; + return ops->detach(dsi->host, dsi); } EXPORT_SYMBOL(mipi_dsi_detach); diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index c9df0407980c..c0aec0d4d664 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -168,6 +168,7 @@ struct mipi_dsi_device_info { * struct mipi_dsi_device - DSI peripheral device * @host: DSI host for this peripheral * @dev: driver model device node for this peripheral + * @attached: the DSI device has been successfully attached * @name: DSI peripheral chip type * @channel: virtual channel assigned to the peripheral * @format: pixel format for video mode @@ -184,6 +185,7 @@ struct mipi_dsi_device_info { struct mipi_dsi_device { struct mipi_dsi_host *host; struct device dev; + bool attached; char name[DSI_DEV_NAME_SIZE]; unsigned int channel; -- cgit v1.2.3 From 812cc1da7ffd9e178ef66b8a22113be10fba466c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 5 Dec 2023 13:13:36 -0700 Subject: drm/bridge: Return NULL instead of plain 0 in drm_dp_hpd_bridge_register() stub sparse complains: drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c: note: in included file: include/drm/bridge/aux-bridge.h:29:16: sparse: sparse: Using plain integer as NULL pointer Return NULL to clear up the warning. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312060025.BdeqZrWx-lkp@intel.com/ Fixes: e560518a6c2e ("drm/bridge: implement generic DP HPD bridge") Signed-off-by: Nathan Chancellor Reviewed-by: Bryan O'Donoghue Reviewed-by: Guenter Roeck Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231205-drm_aux_bridge-fixes-v1-3-d242a0ae9df4@kernel.org --- include/drm/bridge/aux-bridge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/drm') diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h index 66249ff0858e..c4c423e97f06 100644 --- a/include/drm/bridge/aux-bridge.h +++ b/include/drm/bridge/aux-bridge.h @@ -26,7 +26,7 @@ void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status sta static inline struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) { - return 0; + return NULL; } static inline void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status) -- cgit v1.2.3 From 66f011145b835f9a68af9d7156c8d84a6f29c331 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 8 Dec 2023 04:03:13 +0300 Subject: drm/atomic-helper: rename drm_atomic_helper_check_wb_encoder_state The drm_atomic_helper_check_wb_encoder_state() function doesn't use encoder for anything other than getting the drm_device instance. The function's description talks about checking the writeback connector state, not the encoder state. Moreover, there is no such thing as an encoder state, encoders generally do not have a state on their own. Rename the function to drm_atomic_helper_check_wb_connector_state() and change arguments to drm_writeback_connector and drm_atomic_state. Signed-off-by: Dmitry Baryshkov Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231208010314.3395904-2-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/drm_atomic_helper.c | 16 +++++++++------- drivers/gpu/drm/vkms/vkms_writeback.c | 3 ++- include/drm/drm_atomic_helper.h | 5 ++--- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index c3f677130def..c98a766ca3bd 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -795,9 +795,9 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, EXPORT_SYMBOL(drm_atomic_helper_check_modeset); /** - * drm_atomic_helper_check_wb_encoder_state() - Check writeback encoder state - * @encoder: encoder state to check - * @conn_state: connector state to check + * drm_atomic_helper_check_wb_connector_state() - Check writeback connector state + * @connector: corresponding connector + * @state: the driver state object * * Checks if the writeback connector state is valid, and returns an error if it * isn't. @@ -806,9 +806,11 @@ EXPORT_SYMBOL(drm_atomic_helper_check_modeset); * Zero for success or -errno */ int -drm_atomic_helper_check_wb_encoder_state(struct drm_encoder *encoder, - struct drm_connector_state *conn_state) +drm_atomic_helper_check_wb_connector_state(struct drm_connector *connector, + struct drm_atomic_state *state) { + struct drm_connector_state *conn_state = + drm_atomic_get_new_connector_state(state, connector); struct drm_writeback_job *wb_job = conn_state->writeback_job; struct drm_property_blob *pixel_format_blob; struct drm_framebuffer *fb; @@ -827,11 +829,11 @@ drm_atomic_helper_check_wb_encoder_state(struct drm_encoder *encoder, if (fb->format->format == formats[i]) return 0; - drm_dbg_kms(encoder->dev, "Invalid pixel format %p4cc\n", &fb->format->format); + drm_dbg_kms(connector->dev, "Invalid pixel format %p4cc\n", &fb->format->format); return -EINVAL; } -EXPORT_SYMBOL(drm_atomic_helper_check_wb_encoder_state); +EXPORT_SYMBOL(drm_atomic_helper_check_wb_connector_state); /** * drm_atomic_helper_check_plane_state() - Check plane state for validity diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index d7e63aa14663..fef7f3daf2c9 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -34,6 +34,7 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { + struct drm_connector *connector = conn_state->connector; struct drm_framebuffer *fb; const struct drm_display_mode *mode = &crtc_state->mode; int ret; @@ -48,7 +49,7 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, return -EINVAL; } - ret = drm_atomic_helper_check_wb_encoder_state(encoder, conn_state); + ret = drm_atomic_helper_check_wb_connector_state(connector, conn_state->state); if (ret < 0) return ret; diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h index 536a0b0091c3..fea528aacfe2 100644 --- a/include/drm/drm_atomic_helper.h +++ b/include/drm/drm_atomic_helper.h @@ -49,9 +49,8 @@ struct drm_private_state; int drm_atomic_helper_check_modeset(struct drm_device *dev, struct drm_atomic_state *state); -int -drm_atomic_helper_check_wb_encoder_state(struct drm_encoder *encoder, - struct drm_connector_state *conn_state); +int drm_atomic_helper_check_wb_connector_state(struct drm_connector *connector, + struct drm_atomic_state *state); int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, const struct drm_crtc_state *crtc_state, int min_scale, -- cgit v1.2.3 From 05d249352f1ae909230c230767ca8f4e9fdf8e7b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 20 Nov 2023 16:38:50 -0800 Subject: drm/exec: Pass in initial # of objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cases where the # is known ahead of time, it is silly to do the table resize dance. Signed-off-by: Rob Clark Reviewed-by: Christian König Patchwork: https://patchwork.freedesktop.org/patch/568338/ --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- drivers/gpu/drm/drm_exec.c | 13 ++++++++++--- drivers/gpu/drm/drm_gpuvm.c | 4 ++-- drivers/gpu/drm/imagination/pvr_job.c | 2 +- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +- drivers/gpu/drm/tests/drm_exec_test.c | 16 ++++++++-------- include/drm/drm_exec.h | 2 +- 13 files changed, 37 insertions(+), 30 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 41fbc4fd0fac..0bd3c4a6267a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1137,7 +1137,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->n_vms = 1; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&ctx->exec) { ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2); drm_exec_retry_on_contention(&ctx->exec); @@ -1176,7 +1176,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, int ret; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&ctx->exec) { ctx->n_vms = 0; list_for_each_entry(entry, &mem->attachments, list) { @@ -2552,7 +2552,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); /* Reserve all BOs and page tables for validation */ drm_exec_until_all_locked(&exec) { /* Reserve all the page directories */ @@ -2793,7 +2793,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) mutex_lock(&process_info->lock); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e50be6500030..440e9a6786fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -66,7 +66,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, amdgpu_sync_create(&p->sync); drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + DRM_EXEC_IGNORE_DUPLICATES, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 720011019741..796fa6f1420b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) @@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 84beeaa4d21c..49a5f1c73b3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_exec exec; long r; - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); drm_exec_retry_on_contention(&exec); @@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, } drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { if (gobj) { r = drm_exec_lock_obj(&exec, gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 9ddbf1494326..abd0b9763904 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1122,7 +1122,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &ctx_data->meta_data_obj->tbo.base); @@ -1193,7 +1193,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, struct drm_exec exec; long r; - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &ctx_data->meta_data_obj->tbo.base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index ca45ba8ac171..bfbf59326ee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -86,7 +86,7 @@ static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); @@ -149,7 +149,7 @@ static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; long r; - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f2f3c338fd94..76d9f14ccc7c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1529,7 +1529,7 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr) uint32_t gpuidx; int r; - drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0); + drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0); drm_exec_until_all_locked(&ctx->exec) { for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) { pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c index 5d2809de4517..48ee851b61d9 100644 --- a/drivers/gpu/drm/drm_exec.c +++ b/drivers/gpu/drm/drm_exec.c @@ -69,16 +69,23 @@ static void drm_exec_unlock_all(struct drm_exec *exec) * drm_exec_init - initialize a drm_exec object * @exec: the drm_exec object to initialize * @flags: controls locking behavior, see DRM_EXEC_* defines + * @nr: the initial # of objects * * Initialize the object and make sure that we can track locked objects. + * + * If nr is non-zero then it is used as the initial objects table size. + * In either case, the table will grow (be re-allocated) on demand. */ -void drm_exec_init(struct drm_exec *exec, uint32_t flags) +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr) { + if (!nr) + nr = PAGE_SIZE / sizeof(void *); + exec->flags = flags; - exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL); + exec->objects = kvmalloc_array(nr, sizeof(void *), GFP_KERNEL); /* If allocation here fails, just delay that till the first use */ - exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0; + exec->max_objects = exec->objects ? nr : 0; exec->num_objects = 0; exec->contended = DRM_EXEC_DUMMY; exec->prelocked = NULL; diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 9c0922c1a5a2..dc8edca61764 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -1250,7 +1250,7 @@ drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec) unsigned int num_fences = vm_exec->num_fences; int ret; - drm_exec_init(exec, vm_exec->flags); + drm_exec_init(exec, vm_exec->flags, 0); drm_exec_until_all_locked(exec) { ret = drm_gpuvm_prepare_vm(gpuvm, exec, num_fences); @@ -1341,7 +1341,7 @@ drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec, struct drm_exec *exec = &vm_exec->exec; int ret; - drm_exec_init(exec, vm_exec->flags); + drm_exec_init(exec, vm_exec->flags, 0); drm_exec_until_all_locked(exec) { ret = drm_gpuvm_prepare_range(gpuvm, exec, addr, range, diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c index 04139da6c04d..78c2f3c6dce0 100644 --- a/drivers/gpu/drm/imagination/pvr_job.c +++ b/drivers/gpu/drm/imagination/pvr_job.c @@ -746,7 +746,7 @@ pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file, if (err) goto out_job_data_cleanup; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); xa_init_flags(&signal_array, XA_FLAGS_ALLOC); diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index dae3baf707a0..4f223c972c6a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1347,7 +1347,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job, } } - drm_exec_init(exec, vme->flags); + drm_exec_init(exec, vme->flags, 0); drm_exec_until_all_locked(exec) { ret = bind_lock_validate(job, exec, vme->num_fences); drm_exec_retry_on_contention(exec); diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c index 563949d777dd..81f928a429ba 100644 --- a/drivers/gpu/drm/tests/drm_exec_test.c +++ b/drivers/gpu/drm/tests/drm_exec_test.c @@ -46,7 +46,7 @@ static void sanitycheck(struct kunit *test) { struct drm_exec exec; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_fini(&exec); KUNIT_SUCCEED(test); } @@ -60,7 +60,7 @@ static void test_lock(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_lock_obj(&exec, &gobj); drm_exec_retry_on_contention(&exec); @@ -80,7 +80,7 @@ static void test_lock_unlock(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_lock_obj(&exec, &gobj); drm_exec_retry_on_contention(&exec); @@ -107,7 +107,7 @@ static void test_duplicates(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_lock_obj(&exec, &gobj); drm_exec_retry_on_contention(&exec); @@ -134,7 +134,7 @@ static void test_prepare(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_prepare_obj(&exec, &gobj, 1); drm_exec_retry_on_contention(&exec); @@ -159,7 +159,7 @@ static void test_prepare_array(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj1, PAGE_SIZE); drm_gem_private_object_init(priv->drm, &gobj2, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) ret = drm_exec_prepare_array(&exec, array, ARRAY_SIZE(array), 1); @@ -174,14 +174,14 @@ static void test_multiple_loops(struct kunit *test) { struct drm_exec exec; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { break; } drm_exec_fini(&exec); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { break; diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h index b5bf0b6da791..f1a66c048721 100644 --- a/include/drm/drm_exec.h +++ b/include/drm/drm_exec.h @@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec) return !!exec->contended; } -void drm_exec_init(struct drm_exec *exec, uint32_t flags); +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr); void drm_exec_fini(struct drm_exec *exec); bool drm_exec_cleanup(struct drm_exec *exec); int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj); -- cgit v1.2.3 From dd08ebf6c3525a7ea2186e636df064ea47281987 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 30 Mar 2023 17:31:57 -0400 Subject: drm/xe: Introduce a new DRM driver for Intel GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe, is a new driver for Intel GPUs that supports both integrated and discrete platforms starting with Tiger Lake (first Intel Xe Architecture). The code is at a stage where it is already functional and has experimental support for multiple platforms starting from Tiger Lake, with initial support implemented in Mesa (for Iris and Anv, our OpenGL and Vulkan drivers), as well as in NEO (for OpenCL and Level0). The new Xe driver leverages a lot from i915. As for display, the intent is to share the display code with the i915 driver so that there is maximum reuse there. But it is not added in this patch. This initial work is a collaboration of many people and unfortunately the big squashed patch won't fully honor the proper credits. But let's get some git quick stats so we can at least try to preserve some of the credits: Co-developed-by: Matthew Brost Co-developed-by: Matthew Auld Co-developed-by: Matt Roper Co-developed-by: Thomas Hellström Co-developed-by: Francois Dugast Co-developed-by: Lucas De Marchi Co-developed-by: Maarten Lankhorst Co-developed-by: Philippe Lecluse Co-developed-by: Nirmoy Das Co-developed-by: Jani Nikula Co-developed-by: José Roberto de Souza Co-developed-by: Rodrigo Vivi Co-developed-by: Dave Airlie Co-developed-by: Faith Ekstrand Co-developed-by: Daniel Vetter Co-developed-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Signed-off-by: Matthew Brost --- Documentation/gpu/drivers.rst | 1 + Documentation/gpu/xe/index.rst | 23 + Documentation/gpu/xe/xe_cs.rst | 8 + Documentation/gpu/xe/xe_firmware.rst | 34 + Documentation/gpu/xe/xe_gt_mcr.rst | 13 + Documentation/gpu/xe/xe_map.rst | 8 + Documentation/gpu/xe/xe_migrate.rst | 8 + Documentation/gpu/xe/xe_mm.rst | 14 + Documentation/gpu/xe/xe_pcode.rst | 14 + Documentation/gpu/xe/xe_pm.rst | 14 + Documentation/gpu/xe/xe_rtp.rst | 20 + Documentation/gpu/xe/xe_wa.rst | 14 + drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/xe/.gitignore | 2 + drivers/gpu/drm/xe/Kconfig | 63 + drivers/gpu/drm/xe/Kconfig.debug | 96 + drivers/gpu/drm/xe/Makefile | 121 + drivers/gpu/drm/xe/abi/guc_actions_abi.h | 219 ++ drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h | 249 ++ drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h | 189 ++ .../gpu/drm/xe/abi/guc_communication_mmio_abi.h | 49 + drivers/gpu/drm/xe/abi/guc_errors_abi.h | 37 + drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 322 ++ drivers/gpu/drm/xe/abi/guc_messages_abi.h | 234 ++ drivers/gpu/drm/xe/tests/Makefile | 4 + drivers/gpu/drm/xe/tests/xe_bo.c | 303 ++ drivers/gpu/drm/xe/tests/xe_bo_test.c | 25 + drivers/gpu/drm/xe/tests/xe_dma_buf.c | 259 ++ drivers/gpu/drm/xe/tests/xe_dma_buf_test.c | 23 + drivers/gpu/drm/xe/tests/xe_migrate.c | 378 +++ drivers/gpu/drm/xe/tests/xe_migrate_test.c | 23 + drivers/gpu/drm/xe/tests/xe_test.h | 66 + drivers/gpu/drm/xe/xe_bb.c | 97 + drivers/gpu/drm/xe/xe_bb.h | 27 + drivers/gpu/drm/xe/xe_bb_types.h | 20 + drivers/gpu/drm/xe/xe_bo.c | 1698 ++++++++++ drivers/gpu/drm/xe/xe_bo.h | 290 ++ drivers/gpu/drm/xe/xe_bo_doc.h | 179 + drivers/gpu/drm/xe/xe_bo_evict.c | 225 ++ drivers/gpu/drm/xe/xe_bo_evict.h | 15 + drivers/gpu/drm/xe/xe_bo_types.h | 73 + drivers/gpu/drm/xe/xe_debugfs.c | 129 + drivers/gpu/drm/xe/xe_debugfs.h | 13 + drivers/gpu/drm/xe/xe_device.c | 359 +++ drivers/gpu/drm/xe/xe_device.h | 126 + drivers/gpu/drm/xe/xe_device_types.h | 214 ++ drivers/gpu/drm/xe/xe_dma_buf.c | 307 ++ drivers/gpu/drm/xe/xe_dma_buf.h | 15 + drivers/gpu/drm/xe/xe_drv.h | 24 + drivers/gpu/drm/xe/xe_engine.c | 734 +++++ drivers/gpu/drm/xe/xe_engine.h | 54 + drivers/gpu/drm/xe/xe_engine_types.h | 208 ++ drivers/gpu/drm/xe/xe_exec.c | 390 +++ drivers/gpu/drm/xe/xe_exec.h | 14 + drivers/gpu/drm/xe/xe_execlist.c | 489 +++ drivers/gpu/drm/xe/xe_execlist.h | 21 + drivers/gpu/drm/xe/xe_execlist_types.h | 49 + drivers/gpu/drm/xe/xe_force_wake.c | 203 ++ drivers/gpu/drm/xe/xe_force_wake.h | 40 + drivers/gpu/drm/xe/xe_force_wake_types.h | 84 + drivers/gpu/drm/xe/xe_ggtt.c | 304 ++ drivers/gpu/drm/xe/xe_ggtt.h | 28 + drivers/gpu/drm/xe/xe_ggtt_types.h | 28 + drivers/gpu/drm/xe/xe_gpu_scheduler.c | 101 + drivers/gpu/drm/xe/xe_gpu_scheduler.h | 73 + drivers/gpu/drm/xe/xe_gpu_scheduler_types.h | 57 + drivers/gpu/drm/xe/xe_gt.c | 830 +++++ drivers/gpu/drm/xe/xe_gt.h | 64 + drivers/gpu/drm/xe/xe_gt_clock.c | 83 + drivers/gpu/drm/xe/xe_gt_clock.h | 13 + drivers/gpu/drm/xe/xe_gt_debugfs.c | 160 + drivers/gpu/drm/xe/xe_gt_debugfs.h | 13 + drivers/gpu/drm/xe/xe_gt_mcr.c | 552 ++++ drivers/gpu/drm/xe/xe_gt_mcr.h | 26 + drivers/gpu/drm/xe/xe_gt_pagefault.c | 750 +++++ drivers/gpu/drm/xe/xe_gt_pagefault.h | 22 + drivers/gpu/drm/xe/xe_gt_sysfs.c | 55 + drivers/gpu/drm/xe/xe_gt_sysfs.h | 19 + drivers/gpu/drm/xe/xe_gt_sysfs_types.h | 26 + drivers/gpu/drm/xe/xe_gt_topology.c | 144 + drivers/gpu/drm/xe/xe_gt_topology.h | 20 + drivers/gpu/drm/xe/xe_gt_types.h | 320 ++ drivers/gpu/drm/xe/xe_guc.c | 875 +++++ drivers/gpu/drm/xe/xe_guc.h | 57 + drivers/gpu/drm/xe/xe_guc_ads.c | 676 ++++ drivers/gpu/drm/xe/xe_guc_ads.h | 17 + drivers/gpu/drm/xe/xe_guc_ads_types.h | 25 + drivers/gpu/drm/xe/xe_guc_ct.c | 1196 +++++++ drivers/gpu/drm/xe/xe_guc_ct.h | 62 + drivers/gpu/drm/xe/xe_guc_ct_types.h | 87 + drivers/gpu/drm/xe/xe_guc_debugfs.c | 105 + drivers/gpu/drm/xe/xe_guc_debugfs.h | 14 + drivers/gpu/drm/xe/xe_guc_engine_types.h | 52 + drivers/gpu/drm/xe/xe_guc_fwif.h | 392 +++ drivers/gpu/drm/xe/xe_guc_hwconfig.c | 125 + drivers/gpu/drm/xe/xe_guc_hwconfig.h | 17 + drivers/gpu/drm/xe/xe_guc_log.c | 109 + drivers/gpu/drm/xe/xe_guc_log.h | 48 + drivers/gpu/drm/xe/xe_guc_log_types.h | 23 + drivers/gpu/drm/xe/xe_guc_pc.c | 843 +++++ drivers/gpu/drm/xe/xe_guc_pc.h | 15 + drivers/gpu/drm/xe/xe_guc_pc_types.h | 34 + drivers/gpu/drm/xe/xe_guc_reg.h | 147 + drivers/gpu/drm/xe/xe_guc_submit.c | 1695 ++++++++++ drivers/gpu/drm/xe/xe_guc_submit.h | 30 + drivers/gpu/drm/xe/xe_guc_types.h | 71 + drivers/gpu/drm/xe/xe_huc.c | 131 + drivers/gpu/drm/xe/xe_huc.h | 19 + drivers/gpu/drm/xe/xe_huc_debugfs.c | 71 + drivers/gpu/drm/xe/xe_huc_debugfs.h | 14 + drivers/gpu/drm/xe/xe_huc_types.h | 19 + drivers/gpu/drm/xe/xe_hw_engine.c | 658 ++++ drivers/gpu/drm/xe/xe_hw_engine.h | 27 + drivers/gpu/drm/xe/xe_hw_engine_types.h | 107 + drivers/gpu/drm/xe/xe_hw_fence.c | 230 ++ drivers/gpu/drm/xe/xe_hw_fence.h | 27 + drivers/gpu/drm/xe/xe_hw_fence_types.h | 72 + drivers/gpu/drm/xe/xe_irq.c | 565 ++++ drivers/gpu/drm/xe/xe_irq.h | 18 + drivers/gpu/drm/xe/xe_lrc.c | 841 +++++ drivers/gpu/drm/xe/xe_lrc.h | 50 + drivers/gpu/drm/xe/xe_lrc_types.h | 47 + drivers/gpu/drm/xe/xe_macros.h | 20 + drivers/gpu/drm/xe/xe_map.h | 93 + drivers/gpu/drm/xe/xe_migrate.c | 1168 +++++++ drivers/gpu/drm/xe/xe_migrate.h | 88 + drivers/gpu/drm/xe/xe_migrate_doc.h | 88 + drivers/gpu/drm/xe/xe_mmio.c | 466 +++ drivers/gpu/drm/xe/xe_mmio.h | 110 + drivers/gpu/drm/xe/xe_mocs.c | 557 ++++ drivers/gpu/drm/xe/xe_mocs.h | 29 + drivers/gpu/drm/xe/xe_module.c | 76 + drivers/gpu/drm/xe/xe_module.h | 13 + drivers/gpu/drm/xe/xe_pci.c | 651 ++++ drivers/gpu/drm/xe/xe_pci.h | 21 + drivers/gpu/drm/xe/xe_pcode.c | 296 ++ drivers/gpu/drm/xe/xe_pcode.h | 25 + drivers/gpu/drm/xe/xe_pcode_api.h | 40 + drivers/gpu/drm/xe/xe_platform_types.h | 32 + drivers/gpu/drm/xe/xe_pm.c | 207 ++ drivers/gpu/drm/xe/xe_pm.h | 24 + drivers/gpu/drm/xe/xe_preempt_fence.c | 157 + drivers/gpu/drm/xe/xe_preempt_fence.h | 61 + drivers/gpu/drm/xe/xe_preempt_fence_types.h | 33 + drivers/gpu/drm/xe/xe_pt.c | 1542 +++++++++ drivers/gpu/drm/xe/xe_pt.h | 54 + drivers/gpu/drm/xe/xe_pt_types.h | 57 + drivers/gpu/drm/xe/xe_pt_walk.c | 160 + drivers/gpu/drm/xe/xe_pt_walk.h | 161 + drivers/gpu/drm/xe/xe_query.c | 387 +++ drivers/gpu/drm/xe/xe_query.h | 14 + drivers/gpu/drm/xe/xe_reg_sr.c | 248 ++ drivers/gpu/drm/xe/xe_reg_sr.h | 28 + drivers/gpu/drm/xe/xe_reg_sr_types.h | 44 + drivers/gpu/drm/xe/xe_reg_whitelist.c | 73 + drivers/gpu/drm/xe/xe_reg_whitelist.h | 13 + drivers/gpu/drm/xe/xe_res_cursor.h | 226 ++ drivers/gpu/drm/xe/xe_ring_ops.c | 373 +++ drivers/gpu/drm/xe/xe_ring_ops.h | 17 + drivers/gpu/drm/xe/xe_ring_ops_types.h | 22 + drivers/gpu/drm/xe/xe_rtp.c | 144 + drivers/gpu/drm/xe/xe_rtp.h | 340 ++ drivers/gpu/drm/xe/xe_rtp_types.h | 105 + drivers/gpu/drm/xe/xe_sa.c | 96 + drivers/gpu/drm/xe/xe_sa.h | 42 + drivers/gpu/drm/xe/xe_sa_types.h | 19 + drivers/gpu/drm/xe/xe_sched_job.c | 246 ++ drivers/gpu/drm/xe/xe_sched_job.h | 76 + drivers/gpu/drm/xe/xe_sched_job_types.h | 46 + drivers/gpu/drm/xe/xe_step.c | 189 ++ drivers/gpu/drm/xe/xe_step.h | 18 + drivers/gpu/drm/xe/xe_step_types.h | 51 + drivers/gpu/drm/xe/xe_sync.c | 276 ++ drivers/gpu/drm/xe/xe_sync.h | 27 + drivers/gpu/drm/xe/xe_sync_types.h | 27 + drivers/gpu/drm/xe/xe_trace.c | 9 + drivers/gpu/drm/xe/xe_trace.h | 513 +++ drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c | 130 + drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h | 16 + drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h | 18 + drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 403 +++ drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 41 + drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 44 + drivers/gpu/drm/xe/xe_tuning.c | 39 + drivers/gpu/drm/xe/xe_tuning.h | 13 + drivers/gpu/drm/xe/xe_uc.c | 226 ++ drivers/gpu/drm/xe/xe_uc.h | 21 + drivers/gpu/drm/xe/xe_uc_debugfs.c | 26 + drivers/gpu/drm/xe/xe_uc_debugfs.h | 14 + drivers/gpu/drm/xe/xe_uc_fw.c | 406 +++ drivers/gpu/drm/xe/xe_uc_fw.h | 180 ++ drivers/gpu/drm/xe/xe_uc_fw_abi.h | 81 + drivers/gpu/drm/xe/xe_uc_fw_types.h | 112 + drivers/gpu/drm/xe/xe_uc_types.h | 25 + drivers/gpu/drm/xe/xe_vm.c | 3407 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.h | 141 + drivers/gpu/drm/xe/xe_vm_doc.h | 555 ++++ drivers/gpu/drm/xe/xe_vm_madvise.c | 347 ++ drivers/gpu/drm/xe/xe_vm_madvise.h | 15 + drivers/gpu/drm/xe/xe_vm_types.h | 337 ++ drivers/gpu/drm/xe/xe_wa.c | 326 ++ drivers/gpu/drm/xe/xe_wa.h | 18 + drivers/gpu/drm/xe/xe_wait_user_fence.c | 202 ++ drivers/gpu/drm/xe/xe_wait_user_fence.h | 15 + drivers/gpu/drm/xe/xe_wopcm.c | 263 ++ drivers/gpu/drm/xe/xe_wopcm.h | 16 + drivers/gpu/drm/xe/xe_wopcm_types.h | 26 + include/drm/xe_pciids.h | 195 ++ include/uapi/drm/xe_drm.h | 787 +++++ 210 files changed, 40575 insertions(+) create mode 100644 Documentation/gpu/xe/index.rst create mode 100644 Documentation/gpu/xe/xe_cs.rst create mode 100644 Documentation/gpu/xe/xe_firmware.rst create mode 100644 Documentation/gpu/xe/xe_gt_mcr.rst create mode 100644 Documentation/gpu/xe/xe_map.rst create mode 100644 Documentation/gpu/xe/xe_migrate.rst create mode 100644 Documentation/gpu/xe/xe_mm.rst create mode 100644 Documentation/gpu/xe/xe_pcode.rst create mode 100644 Documentation/gpu/xe/xe_pm.rst create mode 100644 Documentation/gpu/xe/xe_rtp.rst create mode 100644 Documentation/gpu/xe/xe_wa.rst create mode 100644 drivers/gpu/drm/xe/.gitignore create mode 100644 drivers/gpu/drm/xe/Kconfig create mode 100644 drivers/gpu/drm/xe/Kconfig.debug create mode 100644 drivers/gpu/drm/xe/Makefile create mode 100644 drivers/gpu/drm/xe/abi/guc_actions_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_errors_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_klvs_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_messages_abi.h create mode 100644 drivers/gpu/drm/xe/tests/Makefile create mode 100644 drivers/gpu/drm/xe/tests/xe_bo.c create mode 100644 drivers/gpu/drm/xe/tests/xe_bo_test.c create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf.c create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf_test.c create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate.c create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate_test.c create mode 100644 drivers/gpu/drm/xe/tests/xe_test.h create mode 100644 drivers/gpu/drm/xe/xe_bb.c create mode 100644 drivers/gpu/drm/xe/xe_bb.h create mode 100644 drivers/gpu/drm/xe/xe_bb_types.h create mode 100644 drivers/gpu/drm/xe/xe_bo.c create mode 100644 drivers/gpu/drm/xe/xe_bo.h create mode 100644 drivers/gpu/drm/xe/xe_bo_doc.h create mode 100644 drivers/gpu/drm/xe/xe_bo_evict.c create mode 100644 drivers/gpu/drm/xe/xe_bo_evict.h create mode 100644 drivers/gpu/drm/xe/xe_bo_types.h create mode 100644 drivers/gpu/drm/xe/xe_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_device.c create mode 100644 drivers/gpu/drm/xe/xe_device.h create mode 100644 drivers/gpu/drm/xe/xe_device_types.h create mode 100644 drivers/gpu/drm/xe/xe_dma_buf.c create mode 100644 drivers/gpu/drm/xe/xe_dma_buf.h create mode 100644 drivers/gpu/drm/xe/xe_drv.h create mode 100644 drivers/gpu/drm/xe/xe_engine.c create mode 100644 drivers/gpu/drm/xe/xe_engine.h create mode 100644 drivers/gpu/drm/xe/xe_engine_types.h create mode 100644 drivers/gpu/drm/xe/xe_exec.c create mode 100644 drivers/gpu/drm/xe/xe_exec.h create mode 100644 drivers/gpu/drm/xe/xe_execlist.c create mode 100644 drivers/gpu/drm/xe/xe_execlist.h create mode 100644 drivers/gpu/drm/xe/xe_execlist_types.h create mode 100644 drivers/gpu/drm/xe/xe_force_wake.c create mode 100644 drivers/gpu/drm/xe/xe_force_wake.h create mode 100644 drivers/gpu/drm/xe/xe_force_wake_types.h create mode 100644 drivers/gpu/drm/xe/xe_ggtt.c create mode 100644 drivers/gpu/drm/xe/xe_ggtt.h create mode 100644 drivers/gpu/drm/xe/xe_ggtt_types.h create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler.c create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler.h create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler_types.h create mode 100644 drivers/gpu/drm/xe/xe_gt.c create mode 100644 drivers/gpu/drm/xe/xe_gt.h create mode 100644 drivers/gpu/drm/xe/xe_gt_clock.c create mode 100644 drivers/gpu/drm/xe/xe_gt_clock.h create mode 100644 drivers/gpu/drm/xe/xe_gt_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_gt_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_gt_mcr.c create mode 100644 drivers/gpu/drm/xe/xe_gt_mcr.h create mode 100644 drivers/gpu/drm/xe/xe_gt_pagefault.c create mode 100644 drivers/gpu/drm/xe/xe_gt_pagefault.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs_types.h create mode 100644 drivers/gpu/drm/xe/xe_gt_topology.c create mode 100644 drivers/gpu/drm/xe/xe_gt_topology.h create mode 100644 drivers/gpu/drm/xe/xe_gt_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc.c create mode 100644 drivers/gpu/drm/xe/xe_guc.h create mode 100644 drivers/gpu/drm/xe/xe_guc_ads.c create mode 100644 drivers/gpu/drm/xe/xe_guc_ads.h create mode 100644 drivers/gpu/drm/xe/xe_guc_ads_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_ct.c create mode 100644 drivers/gpu/drm/xe/xe_guc_ct.h create mode 100644 drivers/gpu/drm/xe/xe_guc_ct_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_guc_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_guc_engine_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_fwif.h create mode 100644 drivers/gpu/drm/xe/xe_guc_hwconfig.c create mode 100644 drivers/gpu/drm/xe/xe_guc_hwconfig.h create mode 100644 drivers/gpu/drm/xe/xe_guc_log.c create mode 100644 drivers/gpu/drm/xe/xe_guc_log.h create mode 100644 drivers/gpu/drm/xe/xe_guc_log_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_pc.c create mode 100644 drivers/gpu/drm/xe/xe_guc_pc.h create mode 100644 drivers/gpu/drm/xe/xe_guc_pc_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_reg.h create mode 100644 drivers/gpu/drm/xe/xe_guc_submit.c create mode 100644 drivers/gpu/drm/xe/xe_guc_submit.h create mode 100644 drivers/gpu/drm/xe/xe_guc_types.h create mode 100644 drivers/gpu/drm/xe/xe_huc.c create mode 100644 drivers/gpu/drm/xe/xe_huc.h create mode 100644 drivers/gpu/drm/xe/xe_huc_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_huc_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_huc_types.h create mode 100644 drivers/gpu/drm/xe/xe_hw_engine.c create mode 100644 drivers/gpu/drm/xe/xe_hw_engine.h create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_types.h create mode 100644 drivers/gpu/drm/xe/xe_hw_fence.c create mode 100644 drivers/gpu/drm/xe/xe_hw_fence.h create mode 100644 drivers/gpu/drm/xe/xe_hw_fence_types.h create mode 100644 drivers/gpu/drm/xe/xe_irq.c create mode 100644 drivers/gpu/drm/xe/xe_irq.h create mode 100644 drivers/gpu/drm/xe/xe_lrc.c create mode 100644 drivers/gpu/drm/xe/xe_lrc.h create mode 100644 drivers/gpu/drm/xe/xe_lrc_types.h create mode 100644 drivers/gpu/drm/xe/xe_macros.h create mode 100644 drivers/gpu/drm/xe/xe_map.h create mode 100644 drivers/gpu/drm/xe/xe_migrate.c create mode 100644 drivers/gpu/drm/xe/xe_migrate.h create mode 100644 drivers/gpu/drm/xe/xe_migrate_doc.h create mode 100644 drivers/gpu/drm/xe/xe_mmio.c create mode 100644 drivers/gpu/drm/xe/xe_mmio.h create mode 100644 drivers/gpu/drm/xe/xe_mocs.c create mode 100644 drivers/gpu/drm/xe/xe_mocs.h create mode 100644 drivers/gpu/drm/xe/xe_module.c create mode 100644 drivers/gpu/drm/xe/xe_module.h create mode 100644 drivers/gpu/drm/xe/xe_pci.c create mode 100644 drivers/gpu/drm/xe/xe_pci.h create mode 100644 drivers/gpu/drm/xe/xe_pcode.c create mode 100644 drivers/gpu/drm/xe/xe_pcode.h create mode 100644 drivers/gpu/drm/xe/xe_pcode_api.h create mode 100644 drivers/gpu/drm/xe/xe_platform_types.h create mode 100644 drivers/gpu/drm/xe/xe_pm.c create mode 100644 drivers/gpu/drm/xe/xe_pm.h create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence.c create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence.h create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence_types.h create mode 100644 drivers/gpu/drm/xe/xe_pt.c create mode 100644 drivers/gpu/drm/xe/xe_pt.h create mode 100644 drivers/gpu/drm/xe/xe_pt_types.h create mode 100644 drivers/gpu/drm/xe/xe_pt_walk.c create mode 100644 drivers/gpu/drm/xe/xe_pt_walk.h create mode 100644 drivers/gpu/drm/xe/xe_query.c create mode 100644 drivers/gpu/drm/xe/xe_query.h create mode 100644 drivers/gpu/drm/xe/xe_reg_sr.c create mode 100644 drivers/gpu/drm/xe/xe_reg_sr.h create mode 100644 drivers/gpu/drm/xe/xe_reg_sr_types.h create mode 100644 drivers/gpu/drm/xe/xe_reg_whitelist.c create mode 100644 drivers/gpu/drm/xe/xe_reg_whitelist.h create mode 100644 drivers/gpu/drm/xe/xe_res_cursor.h create mode 100644 drivers/gpu/drm/xe/xe_ring_ops.c create mode 100644 drivers/gpu/drm/xe/xe_ring_ops.h create mode 100644 drivers/gpu/drm/xe/xe_ring_ops_types.h create mode 100644 drivers/gpu/drm/xe/xe_rtp.c create mode 100644 drivers/gpu/drm/xe/xe_rtp.h create mode 100644 drivers/gpu/drm/xe/xe_rtp_types.h create mode 100644 drivers/gpu/drm/xe/xe_sa.c create mode 100644 drivers/gpu/drm/xe/xe_sa.h create mode 100644 drivers/gpu/drm/xe/xe_sa_types.h create mode 100644 drivers/gpu/drm/xe/xe_sched_job.c create mode 100644 drivers/gpu/drm/xe/xe_sched_job.h create mode 100644 drivers/gpu/drm/xe/xe_sched_job_types.h create mode 100644 drivers/gpu/drm/xe/xe_step.c create mode 100644 drivers/gpu/drm/xe/xe_step.h create mode 100644 drivers/gpu/drm/xe/xe_step_types.h create mode 100644 drivers/gpu/drm/xe/xe_sync.c create mode 100644 drivers/gpu/drm/xe/xe_sync.h create mode 100644 drivers/gpu/drm/xe/xe_sync_types.h create mode 100644 drivers/gpu/drm/xe/xe_trace.c create mode 100644 drivers/gpu/drm/xe/xe_trace.h create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h create mode 100644 drivers/gpu/drm/xe/xe_tuning.c create mode 100644 drivers/gpu/drm/xe/xe_tuning.h create mode 100644 drivers/gpu/drm/xe/xe_uc.c create mode 100644 drivers/gpu/drm/xe/xe_uc.h create mode 100644 drivers/gpu/drm/xe/xe_uc_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_uc_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_uc_fw.c create mode 100644 drivers/gpu/drm/xe/xe_uc_fw.h create mode 100644 drivers/gpu/drm/xe/xe_uc_fw_abi.h create mode 100644 drivers/gpu/drm/xe/xe_uc_fw_types.h create mode 100644 drivers/gpu/drm/xe/xe_uc_types.h create mode 100644 drivers/gpu/drm/xe/xe_vm.c create mode 100644 drivers/gpu/drm/xe/xe_vm.h create mode 100644 drivers/gpu/drm/xe/xe_vm_doc.h create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.c create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.h create mode 100644 drivers/gpu/drm/xe/xe_vm_types.h create mode 100644 drivers/gpu/drm/xe/xe_wa.c create mode 100644 drivers/gpu/drm/xe/xe_wa.h create mode 100644 drivers/gpu/drm/xe/xe_wait_user_fence.c create mode 100644 drivers/gpu/drm/xe/xe_wait_user_fence.h create mode 100644 drivers/gpu/drm/xe/xe_wopcm.c create mode 100644 drivers/gpu/drm/xe/xe_wopcm.h create mode 100644 drivers/gpu/drm/xe/xe_wopcm_types.h create mode 100644 include/drm/xe_pciids.h create mode 100644 include/uapi/drm/xe_drm.h (limited to 'include/drm') diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index cc6535f5f28c..b899cbc5c2b4 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -18,6 +18,7 @@ GPU Driver Documentation vkms bridge/dw-hdmi xen-front + xe/index afbc komeda-kms panfrost diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst new file mode 100644 index 000000000000..2fddf9ed251e --- /dev/null +++ b/Documentation/gpu/xe/index.rst @@ -0,0 +1,23 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======================= +drm/xe Intel GFX Driver +======================= + +The drm/xe driver supports some future GFX cards with rendering, display, +compute and media. Support for currently available platforms like TGL, ADL, +DG2, etc is provided to prototype the driver. + +.. toctree:: + :titlesonly: + + xe_mm + xe_map + xe_migrate + xe_cs + xe_pm + xe_pcode + xe_gt_mcr + xe_wa + xe_rtp + xe_firmware diff --git a/Documentation/gpu/xe/xe_cs.rst b/Documentation/gpu/xe/xe_cs.rst new file mode 100644 index 000000000000..e379aed4f5a8 --- /dev/null +++ b/Documentation/gpu/xe/xe_cs.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +================== +Command submission +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_exec.c + :doc: Execbuf (User GPU command submission) diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst new file mode 100644 index 000000000000..c01246ae99f5 --- /dev/null +++ b/Documentation/gpu/xe/xe_firmware.rst @@ -0,0 +1,34 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======== +Firmware +======== + +Firmware Layout +=============== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h + :doc: Firmware Layout + +Write Once Protected Content Memory (WOPCM) Layout +================================================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_wopcm.c + :doc: Write Once Protected Content Memory (WOPCM) Layout + +GuC CTB Blob +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_ct.c + :doc: GuC CTB Blob + +GuC Power Conservation (PC) +=========================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c + :doc: GuC Power Conservation (PC) + +Internal API +============ + +TODO diff --git a/Documentation/gpu/xe/xe_gt_mcr.rst b/Documentation/gpu/xe/xe_gt_mcr.rst new file mode 100644 index 000000000000..848c07bc36d0 --- /dev/null +++ b/Documentation/gpu/xe/xe_gt_mcr.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +============================================== +GT Multicast/Replicated (MCR) Register Support +============================================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_mcr.c + :doc: GT Multicast/Replicated (MCR) Register Support + +Internal API +============ + +TODO diff --git a/Documentation/gpu/xe/xe_map.rst b/Documentation/gpu/xe/xe_map.rst new file mode 100644 index 000000000000..a098cfd2df04 --- /dev/null +++ b/Documentation/gpu/xe/xe_map.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========= +Map Layer +========= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_map.h + :doc: Map layer diff --git a/Documentation/gpu/xe/xe_migrate.rst b/Documentation/gpu/xe/xe_migrate.rst new file mode 100644 index 000000000000..f92faec0ac94 --- /dev/null +++ b/Documentation/gpu/xe/xe_migrate.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +============= +Migrate Layer +============= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_migrate_doc.h + :doc: Migrate Layer diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst new file mode 100644 index 000000000000..6c8fd8b4a466 --- /dev/null +++ b/Documentation/gpu/xe/xe_mm.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +================= +Memory Management +================= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h + :doc: Buffer Objects (BO) + +Pagetable building +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pt.c + :doc: Pagetable building diff --git a/Documentation/gpu/xe/xe_pcode.rst b/Documentation/gpu/xe/xe_pcode.rst new file mode 100644 index 000000000000..d2e22cc45061 --- /dev/null +++ b/Documentation/gpu/xe/xe_pcode.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +===== +Pcode +===== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c + :doc: PCODE + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c + :internal: diff --git a/Documentation/gpu/xe/xe_pm.rst b/Documentation/gpu/xe/xe_pm.rst new file mode 100644 index 000000000000..6781cdfb24f6 --- /dev/null +++ b/Documentation/gpu/xe/xe_pm.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======================== +Runtime Power Management +======================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c + :doc: Xe Power Management + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c + :internal: diff --git a/Documentation/gpu/xe/xe_rtp.rst b/Documentation/gpu/xe/xe_rtp.rst new file mode 100644 index 000000000000..7fdf4b6c1a04 --- /dev/null +++ b/Documentation/gpu/xe/xe_rtp.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========================= +Register Table Processing +========================= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c + :doc: Register Table Processing + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp_types.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c + :internal: diff --git a/Documentation/gpu/xe/xe_wa.rst b/Documentation/gpu/xe/xe_wa.rst new file mode 100644 index 000000000000..f8811cc6adcc --- /dev/null +++ b/Documentation/gpu/xe/xe_wa.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +==================== +Hardware workarounds +==================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c + :doc: Hardware workarounds + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c + :internal: diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 31cfe2c2a2af..2520db0b776e 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -276,6 +276,8 @@ source "drivers/gpu/drm/nouveau/Kconfig" source "drivers/gpu/drm/i915/Kconfig" +source "drivers/gpu/drm/xe/Kconfig" + source "drivers/gpu/drm/kmb/Kconfig" config DRM_VGEM diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 8ac6f4b9546e..104b42df2e95 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -134,6 +134,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/ obj-$(CONFIG_DRM_I915) += i915/ +obj-$(CONFIG_DRM_XE) += xe/ obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/ obj-$(CONFIG_DRM_MGAG200) += mgag200/ obj-$(CONFIG_DRM_V3D) += v3d/ diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore new file mode 100644 index 000000000000..81972dce1aff --- /dev/null +++ b/drivers/gpu/drm/xe/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +*.hdrtest diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig new file mode 100644 index 000000000000..62f54e6d62d9 --- /dev/null +++ b/drivers/gpu/drm/xe/Kconfig @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_XE + tristate "Intel Xe Graphics" + depends on DRM && PCI && MMU + select INTERVAL_TREE + # we need shmfs for the swappable backing store, and in particular + # the shmem_readpage() which depends upon tmpfs + select SHMEM + select TMPFS + select DRM_BUDDY + select DRM_KMS_HELPER + select DRM_PANEL + select DRM_SUBALLOC_HELPER + select RELAY + select IRQ_WORK + select SYNC_FILE + select IOSF_MBI + select CRC32 + select SND_HDA_I915 if SND_HDA_CORE + select CEC_CORE if CEC_NOTIFIER + select VMAP_PFN + select DRM_TTM + select DRM_TTM_HELPER + select DRM_SCHED + select MMU_NOTIFIER + help + Experimental driver for Intel Xe series GPUs + + If "M" is selected, the module will be called xe. + +config DRM_XE_FORCE_PROBE + string "Force probe xe for selected Intel hardware IDs" + depends on DRM_XE + help + This is the default value for the xe.force_probe module + parameter. Using the module parameter overrides this option. + + Force probe the xe for Intel graphics devices that are + recognized but not properly supported by this kernel version. It is + recommended to upgrade to a kernel version with proper support as soon + as it is available. + + It can also be used to block the probe of recognized and fully + supported devices. + + Use "" to disable force probe. If in doubt, use this. + + Use "[,,...]" to force probe the xe for listed + devices. For example, "4500" or "4500,4571". + + Use "*" to force probe the driver for all known devices. + + Use "!" right before the ID to block the probe of the device. For + example, "4500,!4571" forces the probe of 4500 and blocks the probe of + 4571. + + Use "!*" to block the probe of the driver for all known devices. + +menu "drm/Xe Debugging" +depends on DRM_XE +depends on EXPERT +source "drivers/gpu/drm/xe/Kconfig.debug" +endmenu diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug new file mode 100644 index 000000000000..b61fd43a76fe --- /dev/null +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_XE_WERROR + bool "Force GCC to throw an error instead of a warning when compiling" + # As this may inadvertently break the build, only allow the user + # to shoot oneself in the foot iff they aim really hard + depends on EXPERT + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST + default n + help + Add -Werror to the build flags for (and only for) xe.ko. + Do not enable this unless you are writing code for the xe.ko module. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG + bool "Enable additional driver debugging" + depends on DRM_XE + depends on EXPERT + depends on !COMPILE_TEST + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG_VM + bool "Enable extra VM debugging info" + default n + help + Enable extra VM debugging info + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG_MEM + bool "Enable passing SYS/LMEM addresses to user space" + default n + help + Pass object location trough uapi. Intended for extended + testing and development only. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_SIMPLE_ERROR_CAPTURE + bool "Enable simple error capture to dmesg on job timeout" + default n + help + Choose this option when debugging an unexpected job timeout + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_KUNIT_TEST + tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS + depends on DRM_XE && KUNIT + default KUNIT_ALL_TESTS + select DRM_EXPORT_FOR_TESTS if m + help + Choose this option to allow the driver to perform selftests under + the kunit framework + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_LARGE_GUC_BUFFER + bool "Enable larger guc log buffer" + default n + help + Choose this option when debugging guc issues. + Buffer should be large enough for complex issues. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_USERPTR_INVAL_INJECT + bool "Inject userptr invalidation -EINVAL errors" + default n + help + Choose this option when debugging error paths that + are hit during checks for userptr invalidations. + + Recomended for driver developers only. + If in doubt, say "N". diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile new file mode 100644 index 000000000000..228a87f2fe7b --- /dev/null +++ b/drivers/gpu/drm/xe/Makefile @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the drm device driver. This driver provides support for the +# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. + +# Add a set of useful warning flags and enable -Werror for CI to prevent +# trivial mistakes from creeping in. We have to do this piecemeal as we reject +# any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we +# need to filter out dubious warnings. Still it is our interest +# to keep running locally with W=1 C=1 until we are completely clean. +# +# Note the danger in using -Wall -Wextra is that when CI updates gcc we +# will most likely get a sudden build breakage... Hopefully we will fix +# new warnings before CI updates! +subdir-ccflags-y := -Wall -Wextra +# making these call cc-disable-warning breaks when trying to build xe.mod.o +# by calling make M=drivers/gpu/drm/xe. This doesn't happen in upstream tree, +# so it was somehow fixed by the changes in the build system. Move it back to +# $(call cc-disable-warning, ...) after rebase. +subdir-ccflags-y += -Wno-unused-parameter +subdir-ccflags-y += -Wno-type-limits +#subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) +#subdir-ccflags-y += $(call cc-disable-warning, type-limits) +subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) +subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) +# clang warnings +subdir-ccflags-y += $(call cc-disable-warning, sign-compare) +subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) +subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) +subdir-ccflags-y += $(call cc-disable-warning, frame-address) +subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror + +# Fine grained warnings disable +CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init) + +subdir-ccflags-y += -I$(srctree)/$(src) + +# Please keep these build lists sorted! + +# core driver code + +xe-y += xe_bb.o \ + xe_bo.o \ + xe_bo_evict.o \ + xe_debugfs.o \ + xe_device.o \ + xe_dma_buf.o \ + xe_engine.o \ + xe_exec.o \ + xe_execlist.o \ + xe_force_wake.o \ + xe_ggtt.o \ + xe_gpu_scheduler.o \ + xe_gt.o \ + xe_gt_clock.o \ + xe_gt_debugfs.o \ + xe_gt_mcr.o \ + xe_gt_pagefault.o \ + xe_gt_sysfs.o \ + xe_gt_topology.o \ + xe_guc.o \ + xe_guc_ads.o \ + xe_guc_ct.o \ + xe_guc_debugfs.o \ + xe_guc_hwconfig.o \ + xe_guc_log.o \ + xe_guc_pc.o \ + xe_guc_submit.o \ + xe_hw_engine.o \ + xe_hw_fence.o \ + xe_huc.o \ + xe_huc_debugfs.o \ + xe_irq.o \ + xe_lrc.o \ + xe_migrate.o \ + xe_mmio.o \ + xe_mocs.o \ + xe_module.o \ + xe_pci.o \ + xe_pcode.o \ + xe_pm.o \ + xe_preempt_fence.o \ + xe_pt.o \ + xe_pt_walk.o \ + xe_query.o \ + xe_reg_sr.o \ + xe_reg_whitelist.o \ + xe_rtp.o \ + xe_ring_ops.o \ + xe_sa.o \ + xe_sched_job.o \ + xe_step.o \ + xe_sync.o \ + xe_trace.o \ + xe_ttm_gtt_mgr.o \ + xe_ttm_vram_mgr.o \ + xe_tuning.o \ + xe_uc.o \ + xe_uc_debugfs.o \ + xe_uc_fw.o \ + xe_vm.o \ + xe_vm_madvise.o \ + xe_wait_user_fence.o \ + xe_wa.o \ + xe_wopcm.o + +# XXX: Needed for i915 register definitions. Will be removed after xe-regs. +subdir-ccflags-y += -I$(srctree)/drivers/gpu/drm/i915/ + +obj-$(CONFIG_DRM_XE) += xe.o +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ +\ +# header test +always-$(CONFIG_DRM_XE_WERROR) += \ + $(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h')) + +quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) + cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@ + +$(obj)/%.hdrtest: $(src)/%.h FORCE + $(call if_changed_dep,hdrtest) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h new file mode 100644 index 000000000000..3062e0e0d467 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_ACTIONS_ABI_H +#define _ABI_GUC_ACTIONS_ABI_H + +/** + * DOC: HOST2GUC_SELF_CFG + * + * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **KLV_LEN** - KLV length | + * | | | | + * | | | - 32 bit KLV = 1 | + * | | | - 64 bit KLV = 2 | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **VALUE32** - Bits 31-0 of the KLV value | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2) | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_SELF_CFG 0x0508 + +#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn +#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn + +#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** + * DOC: HOST2GUC_CONTROL_CTB + * + * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **CONTROL** - control `CTB based communication`_ | + * | | | | + * | | | - _`GUC_CTB_CONTROL_DISABLE` = 0 | + * | | | - _`GUC_CTB_CONTROL_ENABLE` = 1 | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_CONTROL_CTB 0x4509 + +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL GUC_HXG_REQUEST_MSG_n_DATAn +#define GUC_CTB_CONTROL_DISABLE 0u +#define GUC_CTB_CONTROL_ENABLE 1u + +#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/* legacy definitions */ + +enum xe_guc_action { + XE_GUC_ACTION_DEFAULT = 0x0, + XE_GUC_ACTION_REQUEST_PREEMPTION = 0x2, + XE_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3, + XE_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, + XE_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, + XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + XE_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, + XE_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, + XE_GUC_ACTION_ENTER_S_STATE = 0x501, + XE_GUC_ACTION_EXIT_S_STATE = 0x502, + XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, + XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, + XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, + XE_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, + XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + XE_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, + XE_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + XE_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, + XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, + XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, + XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, + XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, + XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + XE_GUC_ACTION_GET_HWCONFIG = 0x4100, + XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502, + XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, + XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, + XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, + XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, + XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, + XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, + XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, + XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, + XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, + XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY = 0x6004, + XE_GUC_ACTION_TLB_INVALIDATION = 0x7000, + XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001, + XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002, + XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002, + XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, + XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, + XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, + XE_GUC_ACTION_LIMIT +}; + +enum xe_guc_rc_options { + XE_GUCRC_HOST_CONTROL, + XE_GUCRC_FIRMWARE_CONTROL, +}; + +enum xe_guc_preempt_options { + XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, + XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, +}; + +enum xe_guc_report_status { + XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, + XE_GUC_REPORT_STATUS_ACKED = 0x1, + XE_GUC_REPORT_STATUS_ERROR = 0x2, + XE_GUC_REPORT_STATUS_COMPLETE = 0x4, +}; + +enum xe_guc_sleep_state_status { + XE_GUC_SLEEP_STATE_SUCCESS = 0x1, + XE_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2, + XE_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3 +#define XE_GUC_SLEEP_STATE_INVALID_MASK 0x80000000 +}; + +#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0) +#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4 +#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) +#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) + +#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0 +#define XE_GUC_TLB_INVAL_MODE_SHIFT 8 +/* Flush PPC or SMRO caches along with TLB invalidation request */ +#define XE_GUC_TLB_INVAL_FLUSH_CACHE (1 << 31) + +enum xe_guc_tlb_invalidation_type { + XE_GUC_TLB_INVAL_FULL = 0x0, + XE_GUC_TLB_INVAL_PAGE_SELECTIVE = 0x1, + XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX = 0x2, + XE_GUC_TLB_INVAL_GUC = 0x3, +}; + +/* + * 0: Heavy mode of Invalidation: + * The pipeline of the engine(s) for which the invalidation is targeted to is + * blocked, and all the in-flight transactions are guaranteed to be Globally + * Observed before completing the TLB invalidation + * 1: Lite mode of Invalidation: + * TLBs of the targeted engine(s) are immediately invalidated. + * In-flight transactions are NOT guaranteed to be Globally Observed before + * completing TLB invalidation. + * Light Invalidation Mode is to be used only when + * it can be guaranteed (by SW) that the address translations remain invariant + * for the in-flight transactions across the TLB invalidation. In other words, + * this mode can be used when the TLB invalidation is intended to clear out the + * stale cached translations that are no longer in use. Light Invalidation Mode + * is much faster than the Heavy Invalidation Mode, as it does not wait for the + * in-flight transactions to be GOd. + */ +enum xe_guc_tlb_inval_mode { + XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0, + XE_GUC_TLB_INVAL_MODE_LITE = 0x1, +}; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h new file mode 100644 index 000000000000..811add10c30d --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _GUC_ACTIONS_SLPC_ABI_H_ +#define _GUC_ACTIONS_SLPC_ABI_H_ + +#include + +/** + * DOC: SLPC SHARED DATA STRUCTURE + * + * +----+------+--------------------------------------------------------------+ + * | CL | Bytes| Description | + * +====+======+==============================================================+ + * | 1 | 0-3 | SHARED DATA SIZE | + * | +------+--------------------------------------------------------------+ + * | | 4-7 | GLOBAL STATE | + * | +------+--------------------------------------------------------------+ + * | | 8-11 | DISPLAY DATA ADDRESS | + * | +------+--------------------------------------------------------------+ + * | | 12:63| PADDING | + * +----+------+--------------------------------------------------------------+ + * | | 0:63 | PADDING(PLATFORM INFO) | + * +----+------+--------------------------------------------------------------+ + * | 3 | 0-3 | TASK STATE DATA | + * + +------+--------------------------------------------------------------+ + * | | 4:63 | PADDING | + * +----+------+--------------------------------------------------------------+ + * |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS | + * +----+------+--------------------------------------------------------------+ + * | | | PADDING + EXTRA RESERVED PAGE | + * +----+------+--------------------------------------------------------------+ + */ + +/* + * SLPC exposes certain parameters for global configuration by the host. + * These are referred to as override parameters, because in most cases + * the host will not need to modify the default values used by SLPC. + * SLPC remembers the default values which allows the host to easily restore + * them by simply unsetting the override. The host can set or unset override + * parameters during SLPC (re-)initialization using the SLPC Reset event. + * The host can also set or unset override parameters on the fly using the + * Parameter Set and Parameter Unset events + */ + +#define SLPC_MAX_OVERRIDE_PARAMETERS 256 +#define SLPC_OVERRIDE_BITFIELD_SIZE \ + (SLPC_MAX_OVERRIDE_PARAMETERS / 32) + +#define SLPC_PAGE_SIZE_BYTES 4096 +#define SLPC_CACHELINE_SIZE_BYTES 64 +#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES) + +/* + * Cacheline size aligned (Total size needed for + * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes) + */ +#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES (((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \ + + ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \ + + (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \ + SLPC_CACHELINE_SIZE_BYTES) + +#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER (SLPC_SHARED_DATA_SIZE_BYTE_MAX - \ + (SLPC_SHARED_DATA_SIZE_BYTE_HEADER \ + + SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \ + + SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \ + + SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \ + + SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE)) + +enum slpc_task_enable { + SLPC_PARAM_TASK_DEFAULT = 0, + SLPC_PARAM_TASK_ENABLED, + SLPC_PARAM_TASK_DISABLED, + SLPC_PARAM_TASK_UNKNOWN +}; + +enum slpc_global_state { + SLPC_GLOBAL_STATE_NOT_RUNNING = 0, + SLPC_GLOBAL_STATE_INITIALIZING = 1, + SLPC_GLOBAL_STATE_RESETTING = 2, + SLPC_GLOBAL_STATE_RUNNING = 3, + SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4, + SLPC_GLOBAL_STATE_ERROR = 5 +}; + +enum slpc_param_id { + SLPC_PARAM_TASK_ENABLE_GTPERF = 0, + SLPC_PARAM_TASK_DISABLE_GTPERF = 1, + SLPC_PARAM_TASK_ENABLE_BALANCER = 2, + SLPC_PARAM_TASK_DISABLE_BALANCER = 3, + SLPC_PARAM_TASK_ENABLE_DCC = 4, + SLPC_PARAM_TASK_DISABLE_DCC = 5, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7, + SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8, + SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9, + SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10, + SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11, + SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12, + SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13, + SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14, + SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15, + SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16, + SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17, + SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18, + SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19, + SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20, + SLPC_PARAM_PWRGATE_RC_MODE = 21, + SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22, + SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23, + SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24, + SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25, + SLPC_PARAM_STRATEGIES = 26, + SLPC_PARAM_POWER_PROFILE = 27, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28, + SLPC_MAX_PARAM = 32, +}; + +enum slpc_media_ratio_mode { + SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0, + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1, + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2, +}; + +enum slpc_gucrc_mode { + SLPC_GUCRC_MODE_HW = 0, + SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1, + SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2, + SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3, + + SLPC_GUCRC_MODE_MAX, +}; + +enum slpc_event_id { + SLPC_EVENT_RESET = 0, + SLPC_EVENT_SHUTDOWN = 1, + SLPC_EVENT_PLATFORM_INFO_CHANGE = 2, + SLPC_EVENT_DISPLAY_MODE_CHANGE = 3, + SLPC_EVENT_FLIP_COMPLETE = 4, + SLPC_EVENT_QUERY_TASK_STATE = 5, + SLPC_EVENT_PARAMETER_SET = 6, + SLPC_EVENT_PARAMETER_UNSET = 7, +}; + +struct slpc_task_state_data { + union { + u32 task_status_padding; + struct { + u32 status; +#define SLPC_GTPERF_TASK_ENABLED REG_BIT(0) +#define SLPC_DCC_TASK_ENABLED REG_BIT(11) +#define SLPC_IN_DCC REG_BIT(12) +#define SLPC_BALANCER_ENABLED REG_BIT(15) +#define SLPC_IBC_TASK_ENABLED REG_BIT(16) +#define SLPC_BALANCER_IA_LMT_ENABLED REG_BIT(17) +#define SLPC_BALANCER_IA_LMT_ACTIVE REG_BIT(18) + }; + }; + union { + u32 freq_padding; + struct { +#define SLPC_MAX_UNSLICE_FREQ_MASK REG_GENMASK(7, 0) +#define SLPC_MIN_UNSLICE_FREQ_MASK REG_GENMASK(15, 8) +#define SLPC_MAX_SLICE_FREQ_MASK REG_GENMASK(23, 16) +#define SLPC_MIN_SLICE_FREQ_MASK REG_GENMASK(31, 24) + u32 freq; + }; + }; +} __packed; + +struct slpc_shared_data_header { + /* Total size in bytes of this shared buffer. */ + u32 size; + u32 global_state; + u32 display_data_addr; +} __packed; + +struct slpc_override_params { + u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE]; + u32 values[SLPC_MAX_OVERRIDE_PARAMETERS]; +} __packed; + +struct slpc_shared_data { + struct slpc_shared_data_header header; + u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER - + sizeof(struct slpc_shared_data_header)]; + + u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO]; + + struct slpc_task_state_data task_state_data; + u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE - + sizeof(struct slpc_task_state_data)]; + + struct slpc_override_params override_params; + u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES - + sizeof(struct slpc_override_params)]; + + u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER]; + + /* PAGE 2 (4096 bytes), mode based parameter will be removed soon */ + u8 reserved_mode_definition[4096]; +} __packed; + +/** + * DOC: SLPC H2G MESSAGE FORMAT + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:8 | **EVENT_ID** | + * + +-------+--------------------------------------------------------------+ + * | | 7:0 | **EVENT_ARGC** - number of data arguments | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **EVENT_DATA1** | + * +---+-------+--------------------------------------------------------------+ + * |...| 31:0 | ... | + * +---+-------+--------------------------------------------------------------+ + * |2+n| 31:0 | **EVENT_DATAn** | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST 0x3003 + +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \ + (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS 9 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \ + (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ + HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h new file mode 100644 index 000000000000..41244055cc0c --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H +#define _ABI_GUC_COMMUNICATION_CTB_ABI_H + +#include +#include + +#include "guc_messages_abi.h" + +/** + * DOC: CT Buffer + * + * Circular buffer used to send `CTB Message`_ + */ + +/** + * DOC: CTB Descriptor + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | **HEAD** - offset (in dwords) to the last dword that was | + * | | | read from the `CT Buffer`_. | + * | | | It can only be updated by the receiver. | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **TAIL** - offset (in dwords) to the last dword that was | + * | | | written to the `CT Buffer`_. | + * | | | It can only be updated by the sender. | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **STATUS** - status of the CTB | + * | | | | + * | | | - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation) | + * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | + * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | + * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | + * +---+-------+--------------------------------------------------------------+ + * |...| | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + * | 15| 31:0 | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ + +struct guc_ct_buffer_desc { + u32 head; + u32 tail; + u32 status; +#define GUC_CTB_STATUS_NO_ERROR 0 +#define GUC_CTB_STATUS_OVERFLOW (1 << 0) +#define GUC_CTB_STATUS_UNDERFLOW (1 << 1) +#define GUC_CTB_STATUS_MISMATCH (1 << 2) + u32 reserved[13]; +} __packed; +static_assert(sizeof(struct guc_ct_buffer_desc) == 64); + +/** + * DOC: CTB Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **FENCE** - message identifier | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | **FORMAT** - format of the CTB message | + * | | | - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **RESERVED** | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **NUM_DWORDS** - length of the CTB message (w/o header) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | optional (depends on FORMAT) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HDR_LEN 1u +#define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN +#define GUC_CTB_MSG_MAX_LEN 256u +#define GUC_CTB_MSG_0_FENCE (0xffff << 16) +#define GUC_CTB_MSG_0_FORMAT (0xf << 12) +#define GUC_CTB_FORMAT_HXG 0u +#define GUC_CTB_MSG_0_RESERVED (0xf << 8) +#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) + +/** + * DOC: CTB HXG Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | FENCE | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | [Embedded `HXG Message`_] | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HXG_MSG_MIN_LEN (GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN) +#define GUC_CTB_HXG_MSG_MAX_LEN GUC_CTB_MSG_MAX_LEN + +/** + * DOC: CTB based communication + * + * The CTB (command transport buffer) communication between Host and GuC + * is based on u32 data stream written to the shared buffer. One buffer can + * be used to transmit data only in one direction (one-directional channel). + * + * Current status of the each buffer is stored in the buffer descriptor. + * Buffer descriptor holds tail and head fields that represents active data + * stream. The tail field is updated by the data producer (sender), and head + * field is updated by the data consumer (receiver):: + * + * +------------+ + * | DESCRIPTOR | +=================+============+========+ + * +============+ | | MESSAGE(s) | | + * | address |--------->+=================+============+========+ + * +------------+ + * | head | ^-----head--------^ + * +------------+ + * | tail | ^---------tail-----------------^ + * +------------+ + * | size | ^---------------size--------------------^ + * +------------+ + * + * Each message in data stream starts with the single u32 treated as a header, + * followed by optional set of u32 data that makes message specific payload:: + * + * +------------+---------+---------+---------+ + * | MESSAGE | + * +------------+---------+---------+---------+ + * | msg[0] | [1] | ... | [n-1] | + * +------------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+ + * | | 0 | ... | n | + * +======+=====+=========+=========+=========+ + * | 31:16| code| | | | + * +------+-----+ | | | + * | 15:5|flags| | | | + * +------+-----+ | | | + * | 4:0| len| | | | + * +------+-----+---------+---------+---------+ + * + * ^-------------len-------------^ + * + * The message header consists of: + * + * - **len**, indicates length of the message payload (in u32) + * - **code**, indicates message code + * - **flags**, holds various bits to control message handling + */ + +/* + * Definition of the command transport message header (DW0) + * + * bit[4..0] message len (in dwords) + * bit[7..5] reserved + * bit[8] response (G2H only) + * bit[8] write fence to desc (H2G only) + * bit[9] write status to H2G buff (H2G only) + * bit[10] send status back via G2H (H2G only) + * bit[15..11] reserved + * bit[31..16] action code + */ +#define GUC_CT_MSG_LEN_SHIFT 0 +#define GUC_CT_MSG_LEN_MASK 0x1F +#define GUC_CT_MSG_IS_RESPONSE (1 << 8) +#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8) +#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9) +#define GUC_CT_MSG_SEND_STATUS (1 << 10) +#define GUC_CT_MSG_ACTION_SHIFT 16 +#define GUC_CT_MSG_ACTION_MASK 0xFFFF + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h new file mode 100644 index 000000000000..ef538e34f894 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H +#define _ABI_GUC_COMMUNICATION_MMIO_ABI_H + +/** + * DOC: GuC MMIO based communication + * + * The MMIO based communication between Host and GuC relies on special + * hardware registers which format could be defined by the software + * (so called scratch registers). + * + * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H) + * messages, which maximum length depends on number of available scratch + * registers, is directly written into those scratch registers. + * + * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8, + * but no H2G command takes more than 4 parameters and the GuC firmware + * itself uses an 4-element array to store the H2G message. + * + * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which + * are, regardless on lower count, preferred over legacy ones. + * + * The MMIO based communication is mainly used during driver initialization + * phase to setup the `CTB based communication`_ that will be used afterwards. + */ + +#define GUC_MAX_MMIO_MSG_LEN 4 + +/** + * DOC: MMIO HXG Message + * + * Format of the MMIO messages follows definitions of `HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | | + * +---+-------+ | + * |...| | [Embedded `HXG Message`_] | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h new file mode 100644 index 000000000000..ec83551bf9c0 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_ERRORS_ABI_H +#define _ABI_GUC_ERRORS_ABI_H + +enum xe_guc_response_status { + XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, +}; + +enum xe_guc_load_status { + XE_GUC_LOAD_STATUS_DEFAULT = 0x00, + XE_GUC_LOAD_STATUS_START = 0x01, + XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02, + XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03, + XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04, + XE_GUC_LOAD_STATUS_GDT_DONE = 0x10, + XE_GUC_LOAD_STATUS_IDT_DONE = 0x20, + XE_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, + XE_GUC_LOAD_STATUS_GUCINT_DONE = 0x40, + XE_GUC_LOAD_STATUS_DPC_READY = 0x50, + XE_GUC_LOAD_STATUS_DPC_ERROR = 0x60, + XE_GUC_LOAD_STATUS_EXCEPTION = 0x70, + XE_GUC_LOAD_STATUS_INIT_DATA_INVALID = 0x71, + XE_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED = 0x72, + XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, + XE_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, + XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, + + XE_GUC_LOAD_STATUS_READY = 0xF0, +}; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h new file mode 100644 index 000000000000..47094b9b044c --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _ABI_GUC_KLVS_ABI_H +#define _ABI_GUC_KLVS_ABI_H + +#include + +/** + * DOC: GuC KLV + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **KEY** - KLV key identifier | + * | | | - `GuC Self Config KLVs`_ | + * | | | - `GuC VGT Policy KLVs`_ | + * | | | - `GuC VF Configuration KLVs`_ | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **LEN** - length of VALUE (in 32bit dwords) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VALUE** - actual value of the KLV (format depends on KEY) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_KLV_LEN_MIN 1u +#define GUC_KLV_0_KEY (0xffff << 16) +#define GUC_KLV_0_LEN (0xffff << 0) +#define GUC_KLV_n_VALUE (0xffffffff << 0) + +/** + * DOC: GuC Self Config KLVs + * + * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_. + * + * _`GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR` : 0x0900 + * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts + * status vector for use by the GuC. + * + * _`GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR` : 0x0901 + * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts + * source vector for use by the GuC. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902 + * Refers to 64 bit Global Gfx address of H2G `CT Buffer`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR : 0x0903 + * Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE : 0x0904 + * Refers to size of H2G `CT Buffer`_ in bytes. + * Should be a multiple of 4K. + * + * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR : 0x0905 + * Refers to 64 bit Global Gfx address of G2H `CT Buffer`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR : 0x0906 + * Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _GUC_KLV_SELF_CFG_G2H_CTB_SIZE : 0x0907 + * Refers to size of G2H `CT Buffer`_ in bytes. + * Should be a multiple of 4K. + */ + +#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY 0x0900 +#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY 0x0901 +#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY 0x0902 +#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY 0x0903 +#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY 0x0904 +#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN 1u + +#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY 0x0905 +#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY 0x0906 +#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907 +#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u + +/* + * Per context scheduling policy update keys. + */ +enum { + GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002, + GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY = 0x2004, + GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY = 0x2005, + + GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, +}; + +/** + * DOC: GuC VGT Policy KLVs + * + * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. + * + * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001 + * This config sets whether strict scheduling is enabled whereby any VF + * that doesn’t have work to submit is still allocated a fixed execution + * time-slice to ensure active VFs execution is always consitent even + * during other VF reprovisiong / rebooting events. Changing this KLV + * impacts all VFs and takes effect on the next VF-Switch event. + * + * :0: don't schedule idle (default) + * :1: schedule if idle + * + * _`GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD` : 0x8002 + * This config sets the sample period for tracking adverse event counters. + * A sample period is the period in millisecs during which events are counted. + * This is applicable for all the VFs. + * + * :0: adverse events are not counted (default) + * :n: sample period in milliseconds + * + * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00 + * This enum is to reset utilized HW engine after VF Switch (i.e to clean + * up Stale HW register left behind by previous VF) + * + * :0: don't reset (default) + * :1: reset + */ + +#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY 0x8001 +#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_LEN 1u + +#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY 0x8002 +#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN 1u + +#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY 0x8D00 +#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN 1u + +/** + * DOC: GuC VF Configuration KLVs + * + * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VF_CFG. + * + * _`GUC_KLV_VF_CFG_GGTT_START` : 0x0001 + * A 4K aligned start GTT address/offset assigned to VF. + * Value is 64 bits. + * + * _`GUC_KLV_VF_CFG_GGTT_SIZE` : 0x0002 + * A 4K aligned size of GGTT assigned to VF. + * Value is 64 bits. + * + * _`GUC_KLV_VF_CFG_LMEM_SIZE` : 0x0003 + * A 2M aligned size of local memory assigned to VF. + * Value is 64 bits. + * + * _`GUC_KLV_VF_CFG_NUM_CONTEXTS` : 0x0004 + * Refers to the number of contexts allocated to this VF. + * + * :0: no contexts (default) + * :1-65535: number of contexts (Gen12) + * + * _`GUC_KLV_VF_CFG_TILE_MASK` : 0x0005 + * For multi-tiled products, this field contains the bitwise-OR of tiles + * assigned to the VF. Bit-0-set means VF has access to Tile-0, + * Bit-31-set means VF has access to Tile-31, and etc. + * At least one tile will always be allocated. + * If all bits are zero, VF KMD should treat this as a fatal error. + * For, single-tile products this KLV config is ignored. + * + * _`GUC_KLV_VF_CFG_NUM_DOORBELLS` : 0x0006 + * Refers to the number of doorbells allocated to this VF. + * + * :0: no doorbells (default) + * :1-255: number of doorbells (Gen12) + * + * _`GUC_KLV_VF_CFG_EXEC_QUANTUM` : 0x8A01 + * This config sets the VFs-execution-quantum in milliseconds. + * GUC will attempt to obey the maximum values as much as HW is capable + * of and this will never be perfectly-exact (accumulated nano-second + * granularity) since the GPUs clock time runs off a different crystal + * from the CPUs clock. Changing this KLV on a VF that is currently + * running a context wont take effect until a new context is scheduled in. + * That said, when the PF is changing this value from 0xFFFFFFFF to + * something else, it might never take effect if the VF is running an + * inifinitely long compute or shader kernel. In such a scenario, the + * PF would need to trigger a VM PAUSE and then change the KLV to force + * it to take effect. Such cases might typically happen on a 1PF+1VF + * Virtualization config enabled for heavier workloads like AI/ML. + * + * :0: infinite exec quantum (default) + * + * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02 + * This config sets the VF-preemption-timeout in microseconds. + * GUC will attempt to obey the minimum and maximum values as much as + * HW is capable and this will never be perfectly-exact (accumulated + * nano-second granularity) since the GPUs clock time runs off a + * different crystal from the CPUs clock. Changing this KLV on a VF + * that is currently running a context wont take effect until a new + * context is scheduled in. + * That said, when the PF is changing this value from 0xFFFFFFFF to + * something else, it might never take effect if the VF is running an + * inifinitely long compute or shader kernel. + * In this case, the PF would need to trigger a VM PAUSE and then change + * the KLV to force it to take effect. Such cases might typically happen + * on a 1PF+1VF Virtualization config enabled for heavier workloads like + * AI/ML. + * + * :0: no preemption timeout (default) + * + * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03 + * This config sets threshold for CAT errors caused by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: event occurrence count per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET` : 0x8A04 + * This config sets threshold for engine reset caused by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: event occurrence count per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT` : 0x8A05 + * This config sets threshold for page fault errors caused by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: event occurrence count per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM` : 0x8A06 + * This config sets threshold for H2G interrupts triggered by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: time (us) per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM` : 0x8A07 + * This config sets threshold for GT interrupts triggered by the VF's + * workloads. + * + * :0: adverse events or error will not be reported (default) + * :n: time (us) per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM` : 0x8A08 + * This config sets threshold for doorbell's ring triggered by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: time (us) per sampling interval + * + * _`GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID` : 0x8A0A + * Refers to the start index of doorbell assigned to this VF. + * + * :0: (default) + * :1-255: number of doorbells (Gen12) + * + * _`GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID` : 0x8A0B + * Refers to the start index in context array allocated to this VF’s use. + * + * :0: (default) + * :1-65535: number of contexts (Gen12) + */ + +#define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 +#define GUC_KLV_VF_CFG_GGTT_START_LEN 2u + +#define GUC_KLV_VF_CFG_GGTT_SIZE_KEY 0x0002 +#define GUC_KLV_VF_CFG_GGTT_SIZE_LEN 2u + +#define GUC_KLV_VF_CFG_LMEM_SIZE_KEY 0x0003 +#define GUC_KLV_VF_CFG_LMEM_SIZE_LEN 2u + +#define GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY 0x0004 +#define GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN 1u + +#define GUC_KLV_VF_CFG_TILE_MASK_KEY 0x0005 +#define GUC_KLV_VF_CFG_TILE_MASK_LEN 1u + +#define GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY 0x0006 +#define GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN 1u + +#define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY 0x8a01 +#define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN 1u + +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02 +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY 0x8a03 +#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_KEY 0x8a04 +#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_KEY 0x8a05 +#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_KEY 0x8a06 +#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_KEY 0x8a07 +#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_KEY 0x8a08 +#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_LEN 1u + +#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY 0x8a0a +#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_LEN 1u + +#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b +#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h new file mode 100644 index 000000000000..3d199016cf88 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_MESSAGES_ABI_H +#define _ABI_GUC_MESSAGES_ABI_H + +/** + * DOC: HXG Message + * + * All messages exchanged with GuC are defined using 32 bit dwords. + * First dword is treated as a message header. Remaining dwords are optional. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | | | | + * | 0 | 31 | **ORIGIN** - originator of the message | + * | | | - _`GUC_HXG_ORIGIN_HOST` = 0 | + * | | | - _`GUC_HXG_ORIGIN_GUC` = 1 | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | **TYPE** - message type | + * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | + * | | | - _`GUC_HXG_TYPE_EVENT` = 1 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7 | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **AUX** - auxiliary data (depends on TYPE) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **PAYLOAD** - optional payload (depends on TYPE) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_MSG_MIN_LEN 1u +#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_ORIGIN_HOST 0u +#define GUC_HXG_ORIGIN_GUC 1u +#define GUC_HXG_MSG_0_TYPE (0x7 << 28) +#define GUC_HXG_TYPE_REQUEST 0u +#define GUC_HXG_TYPE_EVENT 1u +#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u +#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u +#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u +#define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u +#define GUC_HXG_MSG_0_AUX (0xfffffff << 0) +#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) + +/** + * DOC: HXG Request + * + * The `HXG Request`_ message should be used to initiate synchronous activity + * for which confirmation or return data is expected. + * + * The recipient of this message shall use `HXG Response`_, `HXG Failure`_ + * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_ + * message as a intermediate reply. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - request data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - requested action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Event + * + * The `HXG Event`_ message should be used to initiate asynchronous activity + * that does not involves immediate confirmation nor data. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - event data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - event action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional event data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Busy + * + * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_ + * message if the recipient expects that it processing will be longer than default + * timeout. + * + * The @COUNTER field may be used as a progress indicator. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNTER** - progress indicator | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_BUSY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_BUSY_MSG_0_COUNTER GUC_HXG_MSG_0_AUX + +/** + * DOC: HXG Retry + * + * The `HXG Retry`_ message should be used by recipient to indicate that the + * `HXG Request`_ message was dropped and it should be resent again. + * + * The @REASON field may be used to provide additional information. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **REASON** - reason for retry | + * | | | - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0 | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RETRY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RETRY_MSG_0_REASON GUC_HXG_MSG_0_AUX +#define GUC_HXG_RETRY_REASON_UNSPECIFIED 0u + +/** + * DOC: HXG Failure + * + * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_ + * message that could not be processed due to an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **HINT** - additional error hint | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ERROR** - error/result code | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) +#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) + +/** + * DOC: HXG Response + * + * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_ + * message that was successfully processed without an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RESPONSE_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX +#define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/* deprecated */ +#define INTEL_GUC_MSG_TYPE_SHIFT 28 +#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) +#define INTEL_GUC_MSG_DATA_SHIFT 16 +#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) +#define INTEL_GUC_MSG_CODE_SHIFT 0 +#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) + +enum intel_guc_msg_type { + INTEL_GUC_MSG_TYPE_REQUEST = 0x0, + INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, +}; + +#endif diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile new file mode 100644 index 000000000000..47056b6459e3 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_bo_test.o xe_dma_buf_test.o \ + xe_migrate_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c new file mode 100644 index 000000000000..87ac21cc8ca9 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_bo_evict.h" +#include "xe_pci.h" + +static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, + bool clear, u64 get_val, u64 assign_val, + struct kunit *test) +{ + struct dma_fence *fence; + struct ttm_tt *ttm; + struct page *page; + pgoff_t ccs_page; + long timeout; + u64 *cpu_map; + int ret; + u32 offset; + + /* Move bo to VRAM if not already there. */ + ret = xe_bo_validate(bo, NULL, false); + if (ret) { + KUNIT_FAIL(test, "Failed to validate bo.\n"); + return ret; + } + + /* Optionally clear bo *and* CCS data in VRAM. */ + if (clear) { + fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0); + if (IS_ERR(fence)) { + KUNIT_FAIL(test, "Failed to submit bo clear.\n"); + return PTR_ERR(fence); + } + dma_fence_put(fence); + } + + /* Evict to system. CCS data should be copied. */ + ret = xe_bo_evict(bo, true); + if (ret) { + KUNIT_FAIL(test, "Failed to evict bo.\n"); + return ret; + } + + /* Sync all migration blits */ + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, + true, + 5 * HZ); + if (timeout <= 0) { + KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); + return -ETIME; + } + + /* + * Bo with CCS data is now in system memory. Verify backing store + * and data integrity. Then assign for the next testing round while + * we still have a CPU map. + */ + ttm = bo->ttm.ttm; + if (!ttm || !ttm_tt_is_populated(ttm)) { + KUNIT_FAIL(test, "Bo was not in expected placement.\n"); + return -EINVAL; + } + + ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; + if (ccs_page >= ttm->num_pages) { + KUNIT_FAIL(test, "No TTM CCS pages present.\n"); + return -EINVAL; + } + + page = ttm->pages[ccs_page]; + cpu_map = kmap_local_page(page); + + /* Check first CCS value */ + if (cpu_map[0] != get_val) { + KUNIT_FAIL(test, + "Expected CCS readout 0x%016llx, got 0x%016llx.\n", + (unsigned long long)get_val, + (unsigned long long)cpu_map[0]); + ret = -EINVAL; + } + + /* Check last CCS value, or at least last value in page. */ + offset = xe_device_ccs_bytes(gt->xe, bo->size); + offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; + if (cpu_map[offset] != get_val) { + KUNIT_FAIL(test, + "Expected CCS readout 0x%016llx, got 0x%016llx.\n", + (unsigned long long)get_val, + (unsigned long long)cpu_map[offset]); + ret = -EINVAL; + } + + cpu_map[0] = assign_val; + cpu_map[offset] = assign_val; + kunmap_local(cpu_map); + + return ret; +} + +static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt, + struct kunit *test) +{ + struct xe_bo *bo; + u32 vram_bit; + int ret; + + /* TODO: Sanity check */ + vram_bit = XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id; + kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id, + gt->info.vram_id); + + bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device, + vram_bit); + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "Failed to create bo.\n"); + return; + } + + kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); + ret = ccs_test_migrate(gt, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, + test); + if (ret) + goto out_unlock; + + kunit_info(test, "Verifying that CCS data survives migration.\n"); + ret = ccs_test_migrate(gt, bo, false, 0xdeadbeefdeadbeefULL, + 0xdeadbeefdeadbeefULL, test); + if (ret) + goto out_unlock; + + kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); + ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test); + +out_unlock: + xe_bo_unlock_no_vm(bo); + xe_bo_put(bo); +} + +static int ccs_test_run_device(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct xe_gt *gt; + int id; + + if (!xe_device_has_flat_ccs(xe)) { + kunit_info(test, "Skipping non-flat-ccs device.\n"); + return 0; + } + + for_each_gt(gt, xe, id) + ccs_test_run_gt(xe, gt, test); + + return 0; +} + +void xe_ccs_migrate_kunit(struct kunit *test) +{ + xe_call_for_each_device(ccs_test_run_device); +} +EXPORT_SYMBOL(xe_ccs_migrate_kunit); + +static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test) +{ + struct xe_bo *bo, *external; + unsigned int bo_flags = XE_BO_CREATE_USER_BIT | + XE_BO_CREATE_VRAM_IF_DGFX(gt); + struct xe_vm *vm = xe_migrate_get_vm(xe->gt[0].migrate); + struct ww_acquire_ctx ww; + int err, i; + + kunit_info(test, "Testing device %s gt id %u vram id %u\n", + dev_name(xe->drm.dev), gt->info.id, gt->info.vram_id); + + for (i = 0; i < 2; ++i) { + xe_vm_lock(vm, &ww, 0, false); + bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device, + bo_flags); + xe_vm_unlock(vm, &ww); + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "bo create err=%pe\n", bo); + break; + } + + external = xe_bo_create(xe, NULL, NULL, 0x10000, + ttm_bo_type_device, bo_flags); + if (IS_ERR(external)) { + KUNIT_FAIL(test, "external bo create err=%pe\n", external); + goto cleanup_bo; + } + + xe_bo_lock(external, &ww, 0, false); + err = xe_bo_pin_external(external); + xe_bo_unlock(external, &ww); + if (err) { + KUNIT_FAIL(test, "external bo pin err=%pe\n", + ERR_PTR(err)); + goto cleanup_external; + } + + err = xe_bo_evict_all(xe); + if (err) { + KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err)); + goto cleanup_all; + } + + err = xe_bo_restore_kernel(xe); + if (err) { + KUNIT_FAIL(test, "restore kernel err=%pe\n", + ERR_PTR(err)); + goto cleanup_all; + } + + err = xe_bo_restore_user(xe); + if (err) { + KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); + goto cleanup_all; + } + + if (!xe_bo_is_vram(external)) { + KUNIT_FAIL(test, "external bo is not vram\n"); + err = -EPROTO; + goto cleanup_all; + } + + if (xe_bo_is_vram(bo)) { + KUNIT_FAIL(test, "bo is vram\n"); + err = -EPROTO; + goto cleanup_all; + } + + if (i) { + down_read(&vm->lock); + xe_vm_lock(vm, &ww, 0, false); + err = xe_bo_validate(bo, bo->vm, false); + xe_vm_unlock(vm, &ww); + up_read(&vm->lock); + if (err) { + KUNIT_FAIL(test, "bo valid err=%pe\n", + ERR_PTR(err)); + goto cleanup_all; + } + xe_bo_lock(external, &ww, 0, false); + err = xe_bo_validate(external, NULL, false); + xe_bo_unlock(external, &ww); + if (err) { + KUNIT_FAIL(test, "external bo valid err=%pe\n", + ERR_PTR(err)); + goto cleanup_all; + } + } + + xe_bo_lock(external, &ww, 0, false); + xe_bo_unpin_external(external); + xe_bo_unlock(external, &ww); + + xe_bo_put(external); + xe_bo_put(bo); + continue; + +cleanup_all: + xe_bo_lock(external, &ww, 0, false); + xe_bo_unpin_external(external); + xe_bo_unlock(external, &ww); +cleanup_external: + xe_bo_put(external); +cleanup_bo: + xe_bo_put(bo); + break; + } + + xe_vm_put(vm); + + return 0; +} + +static int evict_test_run_device(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct xe_gt *gt; + int id; + + if (!IS_DGFX(xe)) { + kunit_info(test, "Skipping non-discrete device %s.\n", + dev_name(xe->drm.dev)); + return 0; + } + + for_each_gt(gt, xe, id) + evict_test_run_gt(xe, gt, test); + + return 0; +} + +void xe_bo_evict_kunit(struct kunit *test) +{ + xe_call_for_each_device(evict_test_run_device); +} +EXPORT_SYMBOL(xe_bo_evict_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c new file mode 100644 index 000000000000..c8fa29b0b3b2 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +void xe_ccs_migrate_kunit(struct kunit *test); +void xe_bo_evict_kunit(struct kunit *test); + +static struct kunit_case xe_bo_tests[] = { + KUNIT_CASE(xe_ccs_migrate_kunit), + KUNIT_CASE(xe_bo_evict_kunit), + {} +}; + +static struct kunit_suite xe_bo_test_suite = { + .name = "xe_bo", + .test_cases = xe_bo_tests, +}; + +kunit_test_suite(xe_bo_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c new file mode 100644 index 000000000000..615d22e3f731 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_pci.h" + +static bool p2p_enabled(struct dma_buf_test_params *params) +{ + return IS_ENABLED(CONFIG_PCI_P2PDMA) && params->attach_ops && + params->attach_ops->allow_peer2peer; +} + +static bool is_dynamic(struct dma_buf_test_params *params) +{ + return IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY) && params->attach_ops && + params->attach_ops->move_notify; +} + +static void check_residency(struct kunit *test, struct xe_bo *exported, + struct xe_bo *imported, struct dma_buf *dmabuf) +{ + struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); + u32 mem_type; + int ret; + + xe_bo_assert_held(exported); + xe_bo_assert_held(imported); + + mem_type = XE_PL_VRAM0; + if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + /* No VRAM allowed */ + mem_type = XE_PL_TT; + else if (params->force_different_devices && !p2p_enabled(params)) + /* No P2P */ + mem_type = XE_PL_TT; + else if (params->force_different_devices && !is_dynamic(params) && + (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) + /* Pin migrated to TT */ + mem_type = XE_PL_TT; + + if (!xe_bo_is_mem_type(exported, mem_type)) { + KUNIT_FAIL(test, "Exported bo was not in expected memory type.\n"); + return; + } + + if (xe_bo_is_pinned(exported)) + return; + + /* + * Evict exporter. Note that the gem object dma_buf member isn't + * set from xe_gem_prime_export(), and it's needed for the move_notify() + * functionality, so hack that up here. Evicting the exported bo will + * evict also the imported bo through the move_notify() functionality if + * importer is on a different device. If they're on the same device, + * the exporter and the importer should be the same bo. + */ + swap(exported->ttm.base.dma_buf, dmabuf); + ret = xe_bo_evict(exported, true); + swap(exported->ttm.base.dma_buf, dmabuf); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n", + ret); + return; + } + + /* Verify that also importer has been evicted to SYSTEM */ + if (!xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) { + KUNIT_FAIL(test, "Importer wasn't properly evicted.\n"); + return; + } + + /* Re-validate the importer. This should move also exporter in. */ + ret = xe_bo_validate(imported, NULL, false); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + KUNIT_FAIL(test, "Validating importer failed with err=%d.\n", + ret); + return; + } + + /* + * If on different devices, the exporter is kept in system if + * possible, saving a migration step as the transfer is just + * likely as fast from system memory. + */ + if (params->force_different_devices && + params->mem_mask & XE_BO_CREATE_SYSTEM_BIT) + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); + else + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + + if (params->force_different_devices) + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT)); + else + KUNIT_EXPECT_TRUE(test, exported == imported); +} + +static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); + struct drm_gem_object *import; + struct dma_buf *dmabuf; + struct xe_bo *bo; + + /* No VRAM on this device? */ + if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) && + (params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + return; + + kunit_info(test, "running %s\n", __func__); + bo = xe_bo_create(xe, NULL, NULL, PAGE_SIZE, ttm_bo_type_device, + XE_BO_CREATE_USER_BIT | params->mem_mask); + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", + PTR_ERR(bo)); + return; + } + + dmabuf = xe_gem_prime_export(&bo->ttm.base, 0); + if (IS_ERR(dmabuf)) { + KUNIT_FAIL(test, "xe_gem_prime_export() failed with err=%ld\n", + PTR_ERR(dmabuf)); + goto out; + } + + import = xe_gem_prime_import(&xe->drm, dmabuf); + if (!IS_ERR(import)) { + struct xe_bo *import_bo = gem_to_xe_bo(import); + + /* + * Did import succeed when it shouldn't due to lack of p2p support? + */ + if (params->force_different_devices && + !p2p_enabled(params) && + !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + KUNIT_FAIL(test, + "xe_gem_prime_import() succeeded when it shouldn't have\n"); + } else { + int err; + + /* Is everything where we expect it to be? */ + xe_bo_lock_no_vm(import_bo, NULL); + err = xe_bo_validate(import_bo, NULL, false); + if (err && err != -EINTR && err != -ERESTARTSYS) + KUNIT_FAIL(test, + "xe_bo_validate() failed with err=%d\n", err); + + check_residency(test, bo, import_bo, dmabuf); + xe_bo_unlock_no_vm(import_bo); + } + drm_gem_object_put(import); + } else if (PTR_ERR(import) != -EOPNOTSUPP) { + /* Unexpected error code. */ + KUNIT_FAIL(test, + "xe_gem_prime_import failed with the wrong err=%ld\n", + PTR_ERR(import)); + } else if (!params->force_different_devices || + p2p_enabled(params) || + (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + /* Shouldn't fail if we can reuse same bo, use p2p or use system */ + KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n", + PTR_ERR(import)); + } + dma_buf_put(dmabuf); +out: + drm_gem_object_put(&bo->ttm.base); +} + +static const struct dma_buf_attach_ops nop2p_attach_ops = { + .allow_peer2peer = false, + .move_notify = xe_dma_buf_move_notify +}; + +/* + * We test the implementation with bos of different residency and with + * importers with different capabilities; some lacking p2p support and some + * lacking dynamic capabilities (attach_ops == NULL). We also fake + * different devices avoiding the import shortcut that just reuses the same + * gem object. + */ +static const struct dma_buf_test_params test_params[] = { + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops}, + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops}, + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_VRAM0_BIT}, + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &xe_dma_buf_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &xe_dma_buf_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &nop2p_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &nop2p_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .force_different_devices = true}, + + {} +}; + +static int dma_buf_run_device(struct xe_device *xe) +{ + const struct dma_buf_test_params *params; + struct kunit *test = xe_cur_kunit(); + + for (params = test_params; params->mem_mask; ++params) { + struct dma_buf_test_params p = *params; + + p.base.id = XE_TEST_LIVE_DMA_BUF; + test->priv = &p; + xe_test_dmabuf_import_same_driver(xe); + } + + /* A non-zero return would halt iteration over driver devices */ + return 0; +} + +void xe_dma_buf_kunit(struct kunit *test) +{ + xe_call_for_each_device(dma_buf_run_device); +} +EXPORT_SYMBOL(xe_dma_buf_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c new file mode 100644 index 000000000000..7bb292da1193 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +void xe_dma_buf_kunit(struct kunit *test); + +static struct kunit_case xe_dma_buf_tests[] = { + KUNIT_CASE(xe_dma_buf_kunit), + {} +}; + +static struct kunit_suite xe_dma_buf_test_suite = { + .name = "xe_dma_buf", + .test_cases = xe_dma_buf_tests, +}; + +kunit_test_suite(xe_dma_buf_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c new file mode 100644 index 000000000000..0f3b819f0a34 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -0,0 +1,378 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020-2022 Intel Corporation + */ + +#include + +#include "xe_pci.h" + +static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence, + const char *str, struct kunit *test) +{ + long ret; + + if (IS_ERR(fence)) { + KUNIT_FAIL(test, "Failed to create fence for %s: %li\n", str, + PTR_ERR(fence)); + return true; + } + if (!fence) + return true; + + ret = dma_fence_wait_timeout(fence, false, 5 * HZ); + if (ret <= 0) { + KUNIT_FAIL(test, "Fence timed out for %s: %li\n", str, ret); + return true; + } + + return false; +} + +static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, + struct xe_bb *bb, u32 second_idx, const char *str, + struct kunit *test) +{ + struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb, + m->batch_base_ofs, + second_idx); + struct dma_fence *fence; + + if (IS_ERR(job)) { + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", + PTR_ERR(job)); + return PTR_ERR(job); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + if (sanity_fence_failed(xe, fence, str, test)) + return -ETIMEDOUT; + + dma_fence_put(fence); + kunit_info(test, "%s: Job completed\n", str); + return 0; +} + +static void +sanity_populate_cb(struct xe_migrate_pt_update *pt_update, + struct xe_gt *gt, struct iosys_map *map, void *dst, + u32 qword_ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update) +{ + int i; + u64 *ptr = dst; + + for (i = 0; i < num_qwords; i++) + ptr[i] = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL; +} + +static const struct xe_migrate_pt_update_ops sanity_ops = { + .populate = sanity_populate_cb, +}; + +#define check(_retval, _expected, str, _test) \ + do { if ((_retval) != (_expected)) { \ + KUNIT_FAIL(_test, "Sanity check failed: " str \ + " expected %llx, got %llx\n", \ + (u64)(_expected), (u64)(_retval)); \ + } } while (0) + +static void test_copy(struct xe_migrate *m, struct xe_bo *bo, + struct kunit *test) +{ + struct xe_device *xe = gt_to_xe(m->gt); + u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL; + bool big = bo->size >= SZ_2M; + struct dma_fence *fence; + const char *str = big ? "Copying big bo" : "Copying small bo"; + int err; + + struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL, + bo->size, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT); + if (IS_ERR(sysmem)) { + KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n", + str, PTR_ERR(sysmem)); + return; + } + + err = xe_bo_validate(sysmem, NULL, false); + if (err) { + KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n", + str, err); + goto out_unlock; + } + + err = xe_bo_vmap(sysmem); + if (err) { + KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n", + str, err); + goto out_unlock; + } + + xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); + fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0); + if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" : + "Clearing sysmem small bo", test)) { + retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); + check(retval, expected, "sysmem first offset should be cleared", + test); + retval = xe_map_rd(xe, &sysmem->vmap, sysmem->size - 8, u64); + check(retval, expected, "sysmem last offset should be cleared", + test); + } + dma_fence_put(fence); + + /* Try to copy 0xc0 from sysmem to lmem with 2MB or 64KiB/4KiB pages */ + xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size); + xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + + fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource, + bo->ttm.resource); + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" : + "Copying small bo sysmem -> vram", test)) { + retval = xe_map_rd(xe, &bo->vmap, 0, u64); + check(retval, expected, + "sysmem -> vram bo first offset should be copied", test); + retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); + check(retval, expected, + "sysmem -> vram bo offset should be copied", test); + } + dma_fence_put(fence); + + /* And other way around.. slightly hacky.. */ + xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); + xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); + + fence = xe_migrate_copy(m, sysmem, bo->ttm.resource, + sysmem->ttm.resource); + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" : + "Copying small bo vram -> sysmem", test)) { + retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); + check(retval, expected, + "vram -> sysmem bo first offset should be copied", test); + retval = xe_map_rd(xe, &sysmem->vmap, bo->size - 8, u64); + check(retval, expected, + "vram -> sysmem bo last offset should be copied", test); + } + dma_fence_put(fence); + + xe_bo_vunmap(sysmem); +out_unlock: + xe_bo_unlock_no_vm(sysmem); + xe_bo_put(sysmem); +} + +static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, + struct kunit *test) +{ + struct xe_device *xe = gt_to_xe(m->gt); + struct dma_fence *fence; + u64 retval, expected; + int i; + + struct xe_vm_pgtable_update update = { + .ofs = 1, + .qwords = 0x10, + .pt_bo = pt, + }; + struct xe_migrate_pt_update pt_update = { + .ops = &sanity_ops, + }; + + /* Test xe_migrate_update_pgtables() updates the pagetable as expected */ + expected = 0xf0f0f0f0f0f0f0f0ULL; + xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size); + + fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1, + NULL, 0, &pt_update); + if (sanity_fence_failed(xe, fence, "Migration pagetable update", test)) + return; + + dma_fence_put(fence); + retval = xe_map_rd(xe, &pt->vmap, 0, u64); + check(retval, expected, "PTE[0] must stay untouched", test); + + for (i = 0; i < update.qwords; i++) { + retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64); + check(retval, i * 0x1111111111111111ULL, "PTE update", test); + } + + retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords), + u64); + check(retval, expected, "PTE[0x11] must stay untouched", test); +} + +static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) +{ + struct xe_gt *gt = m->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny; + struct xe_res_cursor src_it; + struct dma_fence *fence; + u64 retval, expected; + struct xe_bb *bb; + int err; + u8 id = gt->info.id; + + err = xe_bo_vmap(bo); + if (err) { + KUNIT_FAIL(test, "Failed to vmap our pagetables: %li\n", + PTR_ERR(bo)); + return; + } + + big = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, SZ_4M, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(big)) { + KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); + goto vunmap; + } + + pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, GEN8_PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(pt)) { + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", + PTR_ERR(pt)); + goto free_big; + } + + tiny = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, + 2 * SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(tiny)) { + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", + PTR_ERR(pt)); + goto free_pt; + } + + bb = xe_bb_new(m->gt, 32, xe->info.supports_usm); + if (IS_ERR(bb)) { + KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n", + PTR_ERR(bb)); + goto free_tiny; + } + + kunit_info(test, "Starting tests, top level PT addr: %llx, special pagetable base addr: %llx\n", + xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE), + xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE)); + + /* First part of the test, are we updating our pagetable bo with a new entry? */ + xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); + expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0); + if (m->eng->vm->flags & XE_VM_FLAGS_64K) + expected |= GEN12_PTE_PS64; + xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), + &src_it, GEN8_PAGE_SIZE, pt); + run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); + + retval = xe_map_rd(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), + u64); + check(retval, expected, "PTE entry write", test); + + /* Now try to write data to our newly mapped 'pagetable', see if it succeeds */ + bb->len = 0; + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead); + expected = 0x12345678U; + + emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, + expected, IS_DGFX(xe)); + run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable", + test); + + retval = xe_map_rd(xe, &pt->vmap, 0, u32); + check(retval, expected, "Write to PT after adding PTE", test); + + /* Sanity checks passed, try the full ones! */ + + /* Clear a small bo */ + kunit_info(test, "Clearing small buffer object\n"); + xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); + expected = 0x224488ff; + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected); + if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) + goto out; + + dma_fence_put(fence); + retval = xe_map_rd(xe, &tiny->vmap, 0, u32); + check(retval, expected, "Command clear small first value", test); + retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); + check(retval, expected, "Command clear small last value", test); + + if (IS_DGFX(xe)) { + kunit_info(test, "Copying small buffer object to system\n"); + test_copy(m, tiny, test); + } + + /* Clear a big bo with a fixed value */ + kunit_info(test, "Clearing big buffer object\n"); + xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); + expected = 0x11223344U; + fence = xe_migrate_clear(m, big, big->ttm.resource, expected); + if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) + goto out; + + dma_fence_put(fence); + retval = xe_map_rd(xe, &big->vmap, 0, u32); + check(retval, expected, "Command clear big first value", test); + retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); + check(retval, expected, "Command clear big last value", test); + + if (IS_DGFX(xe)) { + kunit_info(test, "Copying big buffer object to system\n"); + test_copy(m, big, test); + } + + test_pt_update(m, pt, test); + +out: + xe_bb_free(bb, NULL); +free_tiny: + xe_bo_unpin(tiny); + xe_bo_put(tiny); +free_pt: + xe_bo_unpin(pt); + xe_bo_put(pt); +free_big: + xe_bo_unpin(big); + xe_bo_put(big); +vunmap: + xe_bo_vunmap(m->pt_bo); +} + +static int migrate_test_run_device(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct xe_gt *gt; + int id; + + for_each_gt(gt, xe, id) { + struct xe_migrate *m = gt->migrate; + struct ww_acquire_ctx ww; + + kunit_info(test, "Testing gt id %d.\n", id); + xe_vm_lock(m->eng->vm, &ww, 0, true); + xe_migrate_sanity_test(m, test); + xe_vm_unlock(m->eng->vm, &ww); + } + + return 0; +} + +void xe_migrate_sanity_kunit(struct kunit *test) +{ + xe_call_for_each_device(migrate_test_run_device); +} +EXPORT_SYMBOL(xe_migrate_sanity_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c new file mode 100644 index 000000000000..ad779e2bd071 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +void xe_migrate_sanity_kunit(struct kunit *test); + +static struct kunit_case xe_migrate_tests[] = { + KUNIT_CASE(xe_migrate_sanity_kunit), + {} +}; + +static struct kunit_suite xe_migrate_test_suite = { + .name = "xe_migrate", + .test_cases = xe_migrate_tests, +}; + +kunit_test_suite(xe_migrate_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h new file mode 100644 index 000000000000..1ec502b5acf3 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_test.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __XE_TEST_H__ +#define __XE_TEST_H__ + +#include + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include +#include + +/* + * Each test that provides a kunit private test structure, place a test id + * here and point the kunit->priv to an embedded struct xe_test_priv. + */ +enum xe_test_priv_id { + XE_TEST_LIVE_DMA_BUF, +}; + +/** + * struct xe_test_priv - Base class for test private info + * @id: enum xe_test_priv_id to identify the subclass. + */ +struct xe_test_priv { + enum xe_test_priv_id id; +}; + +#define XE_TEST_DECLARE(x) x +#define XE_TEST_ONLY(x) unlikely(x) +#define XE_TEST_EXPORT +#define xe_cur_kunit() current->kunit_test + +/** + * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by + * current->kunit->priv if it exists and is embedded in the expected subclass. + * @id: Id of the expected subclass. + * + * Return: NULL if the process is not a kunit test, and NULL if the + * current kunit->priv pointer is not pointing to an object of the expected + * subclass. A pointer to the embedded struct xe_test_priv otherwise. + */ +static inline struct xe_test_priv * +xe_cur_kunit_priv(enum xe_test_priv_id id) +{ + struct xe_test_priv *priv; + + if (!xe_cur_kunit()) + return NULL; + + priv = xe_cur_kunit()->priv; + return priv->id == id ? priv : NULL; +} + +#else /* if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) */ + +#define XE_TEST_DECLARE(x) +#define XE_TEST_ONLY(x) 0 +#define XE_TEST_EXPORT static +#define xe_cur_kunit() NULL +#define xe_cur_kunit_priv(_id) NULL + +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c new file mode 100644 index 000000000000..8b9209571fd0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bb.h" +#include "xe_sa.h" +#include "xe_device.h" +#include "xe_engine_types.h" +#include "xe_hw_fence.h" +#include "xe_sched_job.h" +#include "xe_vm_types.h" + +#include "gt/intel_gpu_commands.h" + +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) +{ + struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); + int err; + + if (!bb) + return ERR_PTR(-ENOMEM); + + bb->bo = xe_sa_bo_new(!usm ? >->kernel_bb_pool : + >->usm.bb_pool, 4 * dwords + 4); + if (IS_ERR(bb->bo)) { + err = PTR_ERR(bb->bo); + goto err; + } + + bb->cs = xe_sa_bo_cpu_addr(bb->bo); + bb->len = 0; + + return bb; +err: + kfree(bb); + return ERR_PTR(err); +} + +static struct xe_sched_job * +__xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr) +{ + u32 size = drm_suballoc_size(bb->bo); + + XE_BUG_ON((bb->len * 4 + 1) > size); + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + + xe_sa_bo_flush_write(bb->bo); + + return xe_sched_job_create(kernel_eng, addr); +} + +struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng, + struct xe_bb *bb, u64 batch_base_ofs) +{ + u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo); + + XE_BUG_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION)); + + return __xe_bb_create_job(wa_eng, bb, &addr); +} + +struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, + struct xe_bb *bb, + u64 batch_base_ofs, + u32 second_idx) +{ + u64 addr[2] = { + batch_base_ofs + drm_suballoc_soffset(bb->bo), + batch_base_ofs + drm_suballoc_soffset(bb->bo) + + 4 * second_idx, + }; + + BUG_ON(second_idx > bb->len); + BUG_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION)); + + return __xe_bb_create_job(kernel_eng, bb, addr); +} + +struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng, + struct xe_bb *bb) +{ + u64 addr = xe_sa_bo_gpu_addr(bb->bo); + + BUG_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION); + return __xe_bb_create_job(kernel_eng, bb, &addr); +} + +void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence) +{ + if (!bb) + return; + + xe_sa_bo_free(bb->bo, fence); + kfree(bb); +} diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h new file mode 100644 index 000000000000..0cc9260c9634 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BB_H_ +#define _XE_BB_H_ + +#include "xe_bb_types.h" + +struct dma_fence; + +struct xe_gt; +struct xe_engine; +struct xe_sched_job; + +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng, + struct xe_bb *bb); +struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, + struct xe_bb *bb, u64 batch_ofs, + u32 second_idx); +struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng, + struct xe_bb *bb, u64 batch_ofs); +void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence); + +#endif diff --git a/drivers/gpu/drm/xe/xe_bb_types.h b/drivers/gpu/drm/xe/xe_bb_types.h new file mode 100644 index 000000000000..b7d30308cf90 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bb_types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BB_TYPES_H_ +#define _XE_BB_TYPES_H_ + +#include + +struct drm_suballoc; + +struct xe_bb { + struct drm_suballoc *bo; + + u32 *cs; + u32 len; /* in dwords */ +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c new file mode 100644 index 000000000000..ef2c9196c113 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -0,0 +1,1698 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + + +#include "xe_bo.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "xe_device.h" +#include "xe_dma_buf.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_migrate.h" +#include "xe_preempt_fence.h" +#include "xe_res_cursor.h" +#include "xe_trace.h" +#include "xe_vm.h" + +static const struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = XE_PL_SYSTEM, + .flags = 0, +}; + +static struct ttm_placement sys_placement = { + .num_placement = 1, + .placement = &sys_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags, +}; + +bool mem_type_is_vram(u32 mem_type) +{ + return mem_type >= XE_PL_VRAM0; +} + +static bool resource_is_vram(struct ttm_resource *res) +{ + return mem_type_is_vram(res->mem_type); +} + +bool xe_bo_is_vram(struct xe_bo *bo) +{ + return resource_is_vram(bo->ttm.resource); +} + +static bool xe_bo_is_user(struct xe_bo *bo) +{ + return bo->flags & XE_BO_CREATE_USER_BIT; +} + +static struct xe_gt * +mem_type_to_gt(struct xe_device *xe, u32 mem_type) +{ + XE_BUG_ON(!mem_type_is_vram(mem_type)); + + return xe_device_get_gt(xe, mem_type - XE_PL_VRAM0); +} + +static void try_add_system(struct xe_bo *bo, struct ttm_place *places, + u32 bo_flags, u32 *c) +{ + if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { + places[*c] = (struct ttm_place) { + .mem_type = XE_PL_TT, + }; + *c += 1; + + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) + bo->props.preferred_mem_type = XE_PL_TT; + } +} + +static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo, + struct ttm_place *places, u32 bo_flags, u32 *c) +{ + struct xe_gt *gt; + + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) { + gt = mem_type_to_gt(xe, XE_PL_VRAM0); + XE_BUG_ON(!gt->mem.vram.size); + + places[*c] = (struct ttm_place) { + .mem_type = XE_PL_VRAM0, + /* + * For eviction / restore on suspend / resume objects + * pinned in VRAM must be contiguous + */ + .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT) ? + TTM_PL_FLAG_CONTIGUOUS : 0, + }; + *c += 1; + + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) + bo->props.preferred_mem_type = XE_PL_VRAM0; + } +} + +static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo, + struct ttm_place *places, u32 bo_flags, u32 *c) +{ + struct xe_gt *gt; + + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) { + gt = mem_type_to_gt(xe, XE_PL_VRAM1); + XE_BUG_ON(!gt->mem.vram.size); + + places[*c] = (struct ttm_place) { + .mem_type = XE_PL_VRAM1, + /* + * For eviction / restore on suspend / resume objects + * pinned in VRAM must be contiguous + */ + .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT) ? + TTM_PL_FLAG_CONTIGUOUS : 0, + }; + *c += 1; + + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) + bo->props.preferred_mem_type = XE_PL_VRAM1; + } +} + +static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags) +{ + struct ttm_place *places = bo->placements; + u32 c = 0; + + bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; + + /* The order of placements should indicate preferred location */ + + if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) { + try_add_system(bo, places, bo_flags, &c); + if (bo->props.preferred_gt == XE_GT1) { + try_add_vram1(xe, bo, places, bo_flags, &c); + try_add_vram0(xe, bo, places, bo_flags, &c); + } else { + try_add_vram0(xe, bo, places, bo_flags, &c); + try_add_vram1(xe, bo, places, bo_flags, &c); + } + } else if (bo->props.preferred_gt == XE_GT1) { + try_add_vram1(xe, bo, places, bo_flags, &c); + try_add_vram0(xe, bo, places, bo_flags, &c); + try_add_system(bo, places, bo_flags, &c); + } else { + try_add_vram0(xe, bo, places, bo_flags, &c); + try_add_vram1(xe, bo, places, bo_flags, &c); + try_add_system(bo, places, bo_flags, &c); + } + + if (!c) + return -EINVAL; + + bo->placement = (struct ttm_placement) { + .num_placement = c, + .placement = places, + .num_busy_placement = c, + .busy_placement = places, + }; + + return 0; +} + +int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags) +{ + xe_bo_assert_held(bo); + return __xe_bo_placement_for_flags(xe, bo, bo_flags); +} + +static void xe_evict_flags(struct ttm_buffer_object *tbo, + struct ttm_placement *placement) +{ + struct xe_bo *bo; + + if (!xe_bo_is_xe_bo(tbo)) { + /* Don't handle scatter gather BOs */ + if (tbo->type == ttm_bo_type_sg) { + placement->num_placement = 0; + placement->num_busy_placement = 0; + return; + } + + *placement = sys_placement; + return; + } + + /* + * For xe, sg bos that are evicted to system just triggers a + * rebind of the sg list upon subsequent validation to XE_PL_TT. + */ + + bo = ttm_to_xe_bo(tbo); + switch (tbo->resource->mem_type) { + case XE_PL_VRAM0: + case XE_PL_VRAM1: + case XE_PL_TT: + default: + /* for now kick out to system */ + *placement = sys_placement; + break; + } +} + +struct xe_ttm_tt { + struct ttm_tt ttm; + struct device *dev; + struct sg_table sgt; + struct sg_table *sg; +}; + +static int xe_tt_map_sg(struct ttm_tt *tt) +{ + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + unsigned long num_pages = tt->num_pages; + int ret; + + XE_BUG_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL); + + if (xe_tt->sg) + return 0; + + ret = sg_alloc_table_from_pages(&xe_tt->sgt, tt->pages, num_pages, + 0, (u64)num_pages << PAGE_SHIFT, + GFP_KERNEL); + if (ret) + return ret; + + xe_tt->sg = &xe_tt->sgt; + ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + if (ret) { + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + return ret; + } + + return 0; +} + +struct sg_table *xe_bo_get_sg(struct xe_bo *bo) +{ + struct ttm_tt *tt = bo->ttm.ttm; + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + + return xe_tt->sg; +} + +static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, + u32 page_flags) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_device *xe = xe_bo_device(bo); + struct xe_ttm_tt *tt; + int err; + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) + return NULL; + + tt->dev = xe->drm.dev; + + /* TODO: Select caching mode */ + err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, + bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached, + DIV_ROUND_UP(xe_device_ccs_bytes(xe_bo_device(bo), + bo->ttm.base.size), + PAGE_SIZE)); + if (err) { + kfree(tt); + return NULL; + } + + return &tt->ttm; +} + +static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, + struct ttm_operation_ctx *ctx) +{ + int err; + + /* + * dma-bufs are not populated with pages, and the dma- + * addresses are set up when moved to XE_PL_TT. + */ + if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) + return 0; + + err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx); + if (err) + return err; + + /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */ + err = xe_tt_map_sg(tt); + if (err) + ttm_pool_free(&ttm_dev->pool, tt); + + return err; +} + +static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) +{ + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + + if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) + return; + + if (xe_tt->sg) { + dma_unmap_sgtable(xe_tt->dev, xe_tt->sg, + DMA_BIDIRECTIONAL, 0); + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + } + + return ttm_pool_free(&ttm_dev->pool, tt); +} + +static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) +{ + ttm_tt_fini(tt); + kfree(tt); +} + +static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, + struct ttm_resource *mem) +{ + struct xe_device *xe = ttm_to_xe_device(bdev); + struct xe_gt *gt; + + switch (mem->mem_type) { + case XE_PL_SYSTEM: + case XE_PL_TT: + return 0; + case XE_PL_VRAM0: + case XE_PL_VRAM1: + gt = mem_type_to_gt(xe, mem->mem_type); + mem->bus.offset = mem->start << PAGE_SHIFT; + + if (gt->mem.vram.mapping && + mem->placement & TTM_PL_FLAG_CONTIGUOUS) + mem->bus.addr = (u8 *)gt->mem.vram.mapping + + mem->bus.offset; + + mem->bus.offset += gt->mem.vram.io_start; + mem->bus.is_iomem = true; + +#if !defined(CONFIG_X86) + mem->bus.caching = ttm_write_combined; +#endif + break; + default: + return -EINVAL; + } + return 0; +} + +static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, + const struct ttm_operation_ctx *ctx) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + struct xe_vma *vma; + int ret = 0; + + dma_resv_assert_held(bo->ttm.base.resv); + + if (!xe_device_in_fault_mode(xe) && !list_empty(&bo->vmas)) { + dma_resv_iter_begin(&cursor, bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + } + + list_for_each_entry(vma, &bo->vmas, bo_link) { + struct xe_vm *vm = vma->vm; + + trace_xe_vma_evict(vma); + + if (xe_vm_in_fault_mode(vm)) { + /* Wait for pending binds / unbinds. */ + long timeout; + + if (ctx->no_wait_gpu && + !dma_resv_test_signaled(bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP)) + return -EBUSY; + + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP, + ctx->interruptible, + MAX_SCHEDULE_TIMEOUT); + if (timeout > 0) { + ret = xe_vm_invalidate_vma(vma); + XE_WARN_ON(ret); + } else if (!timeout) { + ret = -ETIME; + } else { + ret = timeout; + } + + } else { + bool vm_resv_locked = false; + struct xe_vm *vm = vma->vm; + + /* + * We need to put the vma on the vm's rebind_list, + * but need the vm resv to do so. If we can't verify + * that we indeed have it locked, put the vma an the + * vm's notifier.rebind_list instead and scoop later. + */ + if (dma_resv_trylock(&vm->resv)) + vm_resv_locked = true; + else if (ctx->resv != &vm->resv) { + spin_lock(&vm->notifier.list_lock); + list_move_tail(&vma->notifier.rebind_link, + &vm->notifier.rebind_list); + spin_unlock(&vm->notifier.list_lock); + continue; + } + + xe_vm_assert_held(vm); + if (list_empty(&vma->rebind_link) && vma->gt_present) + list_add_tail(&vma->rebind_link, &vm->rebind_list); + + if (vm_resv_locked) + dma_resv_unlock(&vm->resv); + } + } + + return ret; +} + +/* + * The dma-buf map_attachment() / unmap_attachment() is hooked up here. + * Note that unmapping the attachment is deferred to the next + * map_attachment time, or to bo destroy (after idling) whichever comes first. + * This is to avoid syncing before unmap_attachment(), assuming that the + * caller relies on idling the reservation object before moving the + * backing store out. Should that assumption not hold, then we will be able + * to unconditionally call unmap_attachment() when moving out to system. + */ +static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo, + struct ttm_resource *old_res, + struct ttm_resource *new_res) +{ + struct dma_buf_attachment *attach = ttm_bo->base.import_attach; + struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt, + ttm); + struct sg_table *sg; + + XE_BUG_ON(!attach); + XE_BUG_ON(!ttm_bo->ttm); + + if (new_res->mem_type == XE_PL_SYSTEM) + goto out; + + if (ttm_bo->sg) { + dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + } + + sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sg)) + return PTR_ERR(sg); + + ttm_bo->sg = sg; + xe_tt->sg = sg; + +out: + ttm_bo_move_null(ttm_bo, new_res); + + return 0; +} + +/** + * xe_bo_move_notify - Notify subsystems of a pending move + * @bo: The buffer object + * @ctx: The struct ttm_operation_ctx controlling locking and waits. + * + * This function notifies subsystems of an upcoming buffer move. + * Upon receiving such a notification, subsystems should schedule + * halting access to the underlying pages and optionally add a fence + * to the buffer object's dma_resv object, that signals when access is + * stopped. The caller will wait on all dma_resv fences before + * starting the move. + * + * A subsystem may commence access to the object after obtaining + * bindings to the new backing memory under the object lock. + * + * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode, + * negative error code on error. + */ +static int xe_bo_move_notify(struct xe_bo *bo, + const struct ttm_operation_ctx *ctx) +{ + struct ttm_buffer_object *ttm_bo = &bo->ttm; + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + int ret; + + /* + * If this starts to call into many components, consider + * using a notification chain here. + */ + + if (xe_bo_is_pinned(bo)) + return -EINVAL; + + xe_bo_vunmap(bo); + ret = xe_bo_trigger_rebind(xe, bo, ctx); + if (ret) + return ret; + + /* Don't call move_notify() for imported dma-bufs. */ + if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach) + dma_buf_move_notify(ttm_bo->base.dma_buf); + + return 0; +} + +static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *new_mem, + struct ttm_place *hop) +{ + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct ttm_resource *old_mem = ttm_bo->resource; + struct ttm_tt *ttm = ttm_bo->ttm; + struct xe_gt *gt = NULL; + struct dma_fence *fence; + bool move_lacks_source; + bool needs_clear; + int ret = 0; + + if (!old_mem) { + if (new_mem->mem_type != TTM_PL_SYSTEM) { + hop->mem_type = TTM_PL_SYSTEM; + hop->flags = TTM_PL_FLAG_TEMPORARY; + ret = -EMULTIHOP; + goto out; + } + + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (ttm_bo->type == ttm_bo_type_sg) { + ret = xe_bo_move_notify(bo, ctx); + if (!ret) + ret = xe_bo_move_dmabuf(ttm_bo, old_mem, new_mem); + goto out; + } + + move_lacks_source = !resource_is_vram(old_mem) && + (!ttm || !ttm_tt_is_populated(ttm)); + + needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || + (!ttm && ttm_bo->type == ttm_bo_type_device); + + if ((move_lacks_source && !needs_clear) || + (old_mem->mem_type == XE_PL_SYSTEM && + new_mem->mem_type == XE_PL_TT)) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (!move_lacks_source && !xe_bo_is_pinned(bo)) { + ret = xe_bo_move_notify(bo, ctx); + if (ret) + goto out; + } + + if (old_mem->mem_type == XE_PL_TT && + new_mem->mem_type == XE_PL_SYSTEM) { + long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, + true, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + ret = timeout; + goto out; + } + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (!move_lacks_source && + ((old_mem->mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || + (resource_is_vram(old_mem) && + new_mem->mem_type == XE_PL_SYSTEM))) { + hop->fpfn = 0; + hop->lpfn = 0; + hop->mem_type = XE_PL_TT; + hop->flags = TTM_PL_FLAG_TEMPORARY; + ret = -EMULTIHOP; + goto out; + } + + if (bo->gt) + gt = bo->gt; + else if (resource_is_vram(new_mem)) + gt = mem_type_to_gt(xe, new_mem->mem_type); + else if (resource_is_vram(old_mem)) + gt = mem_type_to_gt(xe, old_mem->mem_type); + + XE_BUG_ON(!gt); + XE_BUG_ON(!gt->migrate); + + trace_xe_bo_move(bo); + xe_device_mem_access_get(xe); + + if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { + /* + * Kernel memory that is pinned should only be moved on suspend + * / resume, some of the pinned memory is required for the + * device to resume / use the GPU to move other evicted memory + * (user memory) around. This likely could be optimized a bit + * futher where we find the minimum set of pinned memory + * required for resume but for simplity doing a memcpy for all + * pinned memory. + */ + ret = xe_bo_vmap(bo); + if (!ret) { + ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem); + + /* Create a new VMAP once kernel BO back in VRAM */ + if (!ret && resource_is_vram(new_mem)) { + void *new_addr = gt->mem.vram.mapping + + (new_mem->start << PAGE_SHIFT); + + XE_BUG_ON(new_mem->start != + bo->placements->fpfn); + + iosys_map_set_vaddr_iomem(&bo->vmap, new_addr); + } + } + } else { + if (move_lacks_source) + fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0); + else + fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + xe_device_mem_access_put(xe); + goto out; + } + ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true, + new_mem); + dma_fence_put(fence); + } + + xe_device_mem_access_put(xe); + trace_printk("new_mem->mem_type=%d\n", new_mem->mem_type); + +out: + return ret; + +} + +static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *bo, + unsigned long page_offset) +{ + struct xe_device *xe = ttm_to_xe_device(bo->bdev); + struct xe_gt *gt = mem_type_to_gt(xe, bo->resource->mem_type); + struct xe_res_cursor cursor; + + xe_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); + return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT; +} + +static void __xe_bo_vunmap(struct xe_bo *bo); + +/* + * TODO: Move this function to TTM so we don't rely on how TTM does its + * locking, thereby abusing TTM internals. + */ +static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo) +{ + bool locked; + + XE_WARN_ON(kref_read(&ttm_bo->kref)); + + /* + * We can typically only race with TTM trylocking under the + * lru_lock, which will immediately be unlocked again since + * the ttm_bo refcount is zero at this point. So trylocking *should* + * always succeed here, as long as we hold the lru lock. + */ + spin_lock(&ttm_bo->bdev->lru_lock); + locked = dma_resv_trylock(ttm_bo->base.resv); + spin_unlock(&ttm_bo->bdev->lru_lock); + XE_WARN_ON(!locked); + + return locked; +} + +static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + struct dma_fence *replacement = NULL; + struct xe_bo *bo; + + if (!xe_bo_is_xe_bo(ttm_bo)) + return; + + bo = ttm_to_xe_bo(ttm_bo); + XE_WARN_ON(bo->created && kref_read(&ttm_bo->base.refcount)); + + /* + * Corner case where TTM fails to allocate memory and this BOs resv + * still points the VMs resv + */ + if (ttm_bo->base.resv != &ttm_bo->base._resv) + return; + + if (!xe_ttm_bo_lock_in_destructor(ttm_bo)) + return; + + /* + * Scrub the preempt fences if any. The unbind fence is already + * attached to the resv. + * TODO: Don't do this for external bos once we scrub them after + * unbind. + */ + dma_resv_for_each_fence(&cursor, ttm_bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, fence) { + if (xe_fence_is_xe_preempt(fence) && + !dma_fence_is_signaled(fence)) { + if (!replacement) + replacement = dma_fence_get_stub(); + + dma_resv_replace_fences(ttm_bo->base.resv, + fence->context, + replacement, + DMA_RESV_USAGE_BOOKKEEP); + } + } + dma_fence_put(replacement); + + dma_resv_unlock(ttm_bo->base.resv); +} + +static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) +{ + if (!xe_bo_is_xe_bo(ttm_bo)) + return; + + /* + * Object is idle and about to be destroyed. Release the + * dma-buf attachment. + */ + if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) { + struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, + struct xe_ttm_tt, ttm); + + dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg, + DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + xe_tt->sg = NULL; + } +} + +struct ttm_device_funcs xe_ttm_funcs = { + .ttm_tt_create = xe_ttm_tt_create, + .ttm_tt_populate = xe_ttm_tt_populate, + .ttm_tt_unpopulate = xe_ttm_tt_unpopulate, + .ttm_tt_destroy = xe_ttm_tt_destroy, + .evict_flags = xe_evict_flags, + .move = xe_bo_move, + .io_mem_reserve = xe_ttm_io_mem_reserve, + .io_mem_pfn = xe_ttm_io_mem_pfn, + .release_notify = xe_ttm_bo_release_notify, + .eviction_valuable = ttm_bo_eviction_valuable, + .delete_mem_notify = xe_ttm_bo_delete_mem_notify, +}; + +static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + + if (bo->ttm.base.import_attach) + drm_prime_gem_destroy(&bo->ttm.base, NULL); + drm_gem_object_release(&bo->ttm.base); + + WARN_ON(!list_empty(&bo->vmas)); + + if (bo->ggtt_node.size) + xe_ggtt_remove_bo(bo->gt->mem.ggtt, bo); + + if (bo->vm && xe_bo_is_user(bo)) + xe_vm_put(bo->vm); + + kfree(bo); +} + +static void xe_gem_object_free(struct drm_gem_object *obj) +{ + /* Our BO reference counting scheme works as follows: + * + * The gem object kref is typically used throughout the driver, + * and the gem object holds a ttm_buffer_object refcount, so + * that when the last gem object reference is put, which is when + * we end up in this function, we put also that ttm_buffer_object + * refcount. Anything using gem interfaces is then no longer + * allowed to access the object in a way that requires a gem + * refcount, including locking the object. + * + * driver ttm callbacks is allowed to use the ttm_buffer_object + * refcount directly if needed. + */ + __xe_bo_vunmap(gem_to_xe_bo(obj)); + ttm_bo_put(container_of(obj, struct ttm_buffer_object, base)); +} + +static bool should_migrate_to_system(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic; +} + +static vm_fault_t xe_gem_fault(struct vm_fault *vmf) +{ + struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; + struct drm_device *ddev = tbo->base.dev; + vm_fault_t ret; + int idx, r = 0; + + ret = ttm_bo_vm_reserve(tbo, vmf); + if (ret) + return ret; + + if (drm_dev_enter(ddev, &idx)) { + struct xe_bo *bo = ttm_to_xe_bo(tbo); + + trace_xe_bo_cpu_fault(bo); + + if (should_migrate_to_system(bo)) { + r = xe_bo_migrate(bo, XE_PL_TT); + if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) + ret = VM_FAULT_NOPAGE; + else if (r) + ret = VM_FAULT_SIGBUS; + } + if (!ret) + ret = ttm_bo_vm_fault_reserved(vmf, + vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + + drm_dev_exit(idx); + } else { + ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + + dma_resv_unlock(tbo->base.resv); + return ret; +} + +static const struct vm_operations_struct xe_gem_vm_ops = { + .fault = xe_gem_fault, + .open = ttm_bo_vm_open, + .close = ttm_bo_vm_close, + .access = ttm_bo_vm_access +}; + +static const struct drm_gem_object_funcs xe_gem_object_funcs = { + .free = xe_gem_object_free, + .mmap = drm_gem_ttm_mmap, + .export = xe_gem_prime_export, + .vm_ops = &xe_gem_vm_ops, +}; + +/** + * xe_bo_alloc - Allocate storage for a struct xe_bo + * + * This funcition is intended to allocate storage to be used for input + * to __xe_bo_create_locked(), in the case a pointer to the bo to be + * created is needed before the call to __xe_bo_create_locked(). + * If __xe_bo_create_locked ends up never to be called, then the + * storage allocated with this function needs to be freed using + * xe_bo_free(). + * + * Return: A pointer to an uninitialized struct xe_bo on success, + * ERR_PTR(-ENOMEM) on error. + */ +struct xe_bo *xe_bo_alloc(void) +{ + struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL); + + if (!bo) + return ERR_PTR(-ENOMEM); + + return bo; +} + +/** + * xe_bo_free - Free storage allocated using xe_bo_alloc() + * @bo: The buffer object storage. + * + * Refer to xe_bo_alloc() documentation for valid use-cases. + */ +void xe_bo_free(struct xe_bo *bo) +{ + kfree(bo); +} + +struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_gt *gt, struct dma_resv *resv, + size_t size, enum ttm_bo_type type, + u32 flags) +{ + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement *placement; + uint32_t alignment; + int err; + + /* Only kernel objects should set GT */ + XE_BUG_ON(gt && type != ttm_bo_type_kernel); + + if (!bo) { + bo = xe_bo_alloc(); + if (IS_ERR(bo)) + return bo; + } + + if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT) && + !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) { + size = ALIGN(size, SZ_64K); + flags |= XE_BO_INTERNAL_64K; + alignment = SZ_64K >> PAGE_SHIFT; + } else { + alignment = SZ_4K >> PAGE_SHIFT; + } + + bo->gt = gt; + bo->size = size; + bo->flags = flags; + bo->ttm.base.funcs = &xe_gem_object_funcs; + bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; + bo->props.preferred_gt = XE_BO_PROPS_INVALID; + bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; + bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL; + INIT_LIST_HEAD(&bo->vmas); + INIT_LIST_HEAD(&bo->pinned_link); + + drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); + + if (resv) { + ctx.allow_res_evict = true; + ctx.resv = resv; + } + + err = __xe_bo_placement_for_flags(xe, bo, bo->flags); + if (WARN_ON(err)) + return ERR_PTR(err); + + /* Defer populating type_sg bos */ + placement = (type == ttm_bo_type_sg || + bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement : + &bo->placement; + err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type, + placement, alignment, + &ctx, NULL, resv, xe_ttm_bo_destroy); + if (err) + return ERR_PTR(err); + + bo->created = true; + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + + return bo; +} + +struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo; + int err; + + if (vm) + xe_vm_assert_held(vm); + bo = __xe_bo_create_locked(xe, NULL, gt, vm ? &vm->resv : NULL, size, + type, flags); + if (IS_ERR(bo)) + return bo; + + if (vm && xe_bo_is_user(bo)) + xe_vm_get(vm); + bo->vm = vm; + + if (flags & XE_BO_CREATE_GGTT_BIT) { + XE_BUG_ON(!gt); + + err = xe_ggtt_insert_bo(gt->mem.ggtt, bo); + if (err) + goto err_unlock_put_bo; + } + + return bo; + +err_unlock_put_bo: + xe_bo_unlock_vm_held(bo); + xe_bo_put(bo); + return ERR_PTR(err); +} + +struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags); + + if (!IS_ERR(bo)) + xe_bo_unlock_vm_held(bo); + + return bo; +} + +struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags); + int err; + + if (IS_ERR(bo)) + return bo; + + err = xe_bo_pin(bo); + if (err) + goto err_put; + + err = xe_bo_vmap(bo); + if (err) + goto err_unpin; + + xe_bo_unlock_vm_held(bo); + + return bo; + +err_unpin: + xe_bo_unpin(bo); +err_put: + xe_bo_unlock_vm_held(bo); + xe_bo_put(bo); + return ERR_PTR(err); +} + +struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, + const void *data, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = xe_bo_create_pin_map(xe, gt, NULL, + ALIGN(size, PAGE_SIZE), + type, flags); + if (IS_ERR(bo)) + return bo; + + xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); + + return bo; +} + +/* + * XXX: This is in the VM bind data path, likely should calculate this once and + * store, with a recalculation if the BO is moved. + */ +static uint64_t vram_region_io_offset(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type); + + return gt->mem.vram.io_start - xe->mem.vram.io_start; +} + +/** + * xe_bo_pin_external - pin an external BO + * @bo: buffer object to be pinned + * + * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) + * BO. Unique call compared to xe_bo_pin as this function has it own set of + * asserts and code to ensure evict / restore on suspend / resume. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_bo_pin_external(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + int err; + + XE_BUG_ON(bo->vm); + XE_BUG_ON(!xe_bo_is_user(bo)); + + if (!xe_bo_is_pinned(bo)) { + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + + if (xe_bo_is_vram(bo)) { + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, + &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + } + } + + ttm_bo_pin(&bo->ttm); + + /* + * FIXME: If we always use the reserve / unreserve functions for locking + * we do not need this. + */ + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + + return 0; +} + +int xe_bo_pin(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + int err; + + /* We currently don't expect user BO to be pinned */ + XE_BUG_ON(xe_bo_is_user(bo)); + + /* Pinned object must be in GGTT or have pinned flag */ + XE_BUG_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT))); + + /* + * No reason we can't support pinning imported dma-bufs we just don't + * expect to pin an imported dma-buf. + */ + XE_BUG_ON(bo->ttm.base.import_attach); + + /* We only expect at most 1 pin */ + XE_BUG_ON(xe_bo_is_pinned(bo)); + + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + + /* + * For pinned objects in on DGFX, we expect these objects to be in + * contiguous VRAM memory. Required eviction / restore during suspend / + * resume (force restore to same physical address). + */ + if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && + bo->flags & XE_BO_INTERNAL_TEST)) { + struct ttm_place *place = &(bo->placements[0]); + bool lmem; + + XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); + XE_BUG_ON(!mem_type_is_vram(place->mem_type)); + + place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) - + vram_region_io_offset(bo)) >> PAGE_SHIFT; + place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); + + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + spin_unlock(&xe->pinned.lock); + } + + ttm_bo_pin(&bo->ttm); + + /* + * FIXME: If we always use the reserve / unreserve functions for locking + * we do not need this. + */ + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + + return 0; +} + +/** + * xe_bo_unpin_external - unpin an external BO + * @bo: buffer object to be unpinned + * + * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD) + * BO. Unique call compared to xe_bo_unpin as this function has it own set of + * asserts and code to ensure evict / restore on suspend / resume. + * + * Returns 0 for success, negative error code otherwise. + */ +void xe_bo_unpin_external(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + XE_BUG_ON(bo->vm); + XE_BUG_ON(!xe_bo_is_pinned(bo)); + XE_BUG_ON(!xe_bo_is_user(bo)); + + if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) { + spin_lock(&xe->pinned.lock); + list_del_init(&bo->pinned_link); + spin_unlock(&xe->pinned.lock); + } + + ttm_bo_unpin(&bo->ttm); + + /* + * FIXME: If we always use the reserve / unreserve functions for locking + * we do not need this. + */ + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); +} + +void xe_bo_unpin(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + XE_BUG_ON(bo->ttm.base.import_attach); + XE_BUG_ON(!xe_bo_is_pinned(bo)); + + if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && + bo->flags & XE_BO_INTERNAL_TEST)) { + XE_BUG_ON(list_empty(&bo->pinned_link)); + + spin_lock(&xe->pinned.lock); + list_del_init(&bo->pinned_link); + spin_unlock(&xe->pinned.lock); + } + + ttm_bo_unpin(&bo->ttm); +} + +/** + * xe_bo_validate() - Make sure the bo is in an allowed placement + * @bo: The bo, + * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or + * NULL. Used together with @allow_res_evict. + * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's + * reservation object. + * + * Make sure the bo is in allowed placement, migrating it if necessary. If + * needed, other bos will be evicted. If bos selected for eviction shares + * the @vm's reservation object, they can be evicted iff @allow_res_evict is + * set to true, otherwise they will be bypassed. + * + * Return: 0 on success, negative error code on failure. May return + * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal. + */ +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) +{ + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + + if (vm) { + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + ctx.allow_res_evict = allow_res_evict; + ctx.resv = &vm->resv; + } + + return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); +} + +bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) +{ + if (bo->destroy == &xe_ttm_bo_destroy) + return true; + + return false; +} + +dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, + size_t page_size, bool *is_lmem) +{ + struct xe_res_cursor cur; + u64 page; + + if (!READ_ONCE(bo->ttm.pin_count)) + xe_bo_assert_held(bo); + + XE_BUG_ON(page_size > PAGE_SIZE); + page = offset >> PAGE_SHIFT; + offset &= (PAGE_SIZE - 1); + + *is_lmem = xe_bo_is_vram(bo); + + if (!*is_lmem) { + XE_BUG_ON(!bo->ttm.ttm); + + xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT, + page_size, &cur); + return xe_res_dma(&cur) + offset; + } else { + struct xe_res_cursor cur; + + xe_res_first(bo->ttm.resource, page << PAGE_SHIFT, + page_size, &cur); + return cur.start + offset + vram_region_io_offset(bo); + } +} + +int xe_bo_vmap(struct xe_bo *bo) +{ + void *virtual; + bool is_iomem; + int ret; + + xe_bo_assert_held(bo); + + if (!iosys_map_is_null(&bo->vmap)) + return 0; + + /* + * We use this more or less deprecated interface for now since + * ttm_bo_vmap() doesn't offer the optimization of kmapping + * single page bos, which is done here. + * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap + * to use struct iosys_map. + */ + ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); + if (ret) + return ret; + + virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); + if (is_iomem) + iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual); + else + iosys_map_set_vaddr(&bo->vmap, virtual); + + return 0; +} + +static void __xe_bo_vunmap(struct xe_bo *bo) +{ + if (!iosys_map_is_null(&bo->vmap)) { + iosys_map_clear(&bo->vmap); + ttm_bo_kunmap(&bo->kmap); + } +} + +void xe_bo_vunmap(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + __xe_bo_vunmap(bo); +} + +int xe_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_gem_create *args = data; + struct ww_acquire_ctx ww; + struct xe_vm *vm = NULL; + struct xe_bo *bo; + unsigned bo_flags = XE_BO_CREATE_USER_BIT; + u32 handle; + int err; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & + ~(XE_GEM_CREATE_FLAG_DEFER_BACKING | + XE_GEM_CREATE_FLAG_SCANOUT | + xe->info.mem_region_mask))) + return -EINVAL; + + /* at least one memory type must be specified */ + if (XE_IOCTL_ERR(xe, !(args->flags & xe->info.mem_region_mask))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->handle)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->size & ~PAGE_MASK)) + return -EINVAL; + + if (args->vm_id) { + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) + return -ENOENT; + err = xe_vm_lock(vm, &ww, 0, true); + if (err) { + xe_vm_put(vm); + return err; + } + } + + if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING) + bo_flags |= XE_BO_DEFER_BACKING; + + if (args->flags & XE_GEM_CREATE_FLAG_SCANOUT) + bo_flags |= XE_BO_SCANOUT_BIT; + + bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); + bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device, + bo_flags); + if (vm) { + xe_vm_unlock(vm, &ww); + xe_vm_put(vm); + } + + if (IS_ERR(bo)) + return PTR_ERR(bo); + + err = drm_gem_handle_create(file, &bo->ttm.base, &handle); + xe_bo_put(bo); + if (err) + return err; + + args->handle = handle; + + return 0; +} + +int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct drm_xe_gem_mmap_offset *args = data; + struct drm_gem_object *gem_obj; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags)) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file, args->handle); + if (XE_IOCTL_ERR(xe, !gem_obj)) + return -ENOENT; + + /* The mmap offset was set up at BO allocation time. */ + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + + xe_bo_put(gem_to_xe_bo(gem_obj)); + return 0; +} + +int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, + int num_resv, bool intr) +{ + struct ttm_validate_buffer tv_bo; + LIST_HEAD(objs); + LIST_HEAD(dups); + + XE_BUG_ON(!ww); + + tv_bo.num_shared = num_resv; + tv_bo.bo = &bo->ttm;; + list_add_tail(&tv_bo.head, &objs); + + return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); +} + +void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww) +{ + dma_resv_unlock(bo->ttm.base.resv); + ww_acquire_fini(ww); +} + +/** + * xe_bo_can_migrate - Whether a buffer object likely can be migrated + * @bo: The buffer object to migrate + * @mem_type: The TTM memory type intended to migrate to + * + * Check whether the buffer object supports migration to the + * given memory type. Note that pinning may affect the ability to migrate as + * returned by this function. + * + * This function is primarily intended as a helper for checking the + * possibility to migrate buffer objects and can be called without + * the object lock held. + * + * Return: true if migration is possible, false otherwise. + */ +bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type) +{ + unsigned int cur_place; + + if (bo->ttm.type == ttm_bo_type_kernel) + return true; + + if (bo->ttm.type == ttm_bo_type_sg) + return false; + + for (cur_place = 0; cur_place < bo->placement.num_placement; + cur_place++) { + if (bo->placements[cur_place].mem_type == mem_type) + return true; + } + + return false; +} + +static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) +{ + memset(place, 0, sizeof(*place)); + place->mem_type = mem_type; +} + +/** + * xe_bo_migrate - Migrate an object to the desired region id + * @bo: The buffer object to migrate. + * @mem_type: The TTM region type to migrate to. + * + * Attempt to migrate the buffer object to the desired memory region. The + * buffer object may not be pinned, and must be locked. + * On successful completion, the object memory type will be updated, + * but an async migration task may not have completed yet, and to + * accomplish that, the object's kernel fences must be signaled with + * the object lock held. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -EINTR or -ERESTARTSYS if signal pending. + */ +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) +{ + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement placement; + struct ttm_place requested; + + xe_bo_assert_held(bo); + + if (bo->ttm.resource->mem_type == mem_type) + return 0; + + if (xe_bo_is_pinned(bo)) + return -EBUSY; + + if (!xe_bo_can_migrate(bo, mem_type)) + return -EINVAL; + + xe_place_from_ttm_type(mem_type, &requested); + placement.num_placement = 1; + placement.num_busy_placement = 1; + placement.placement = &requested; + placement.busy_placement = &requested; + + return ttm_bo_validate(&bo->ttm, &placement, &ctx); +} + +/** + * xe_bo_evict - Evict an object to evict placement + * @bo: The buffer object to migrate. + * @force_alloc: Set force_alloc in ttm_operation_ctx + * + * On successful completion, the object memory will be moved to evict + * placement. Ths function blocks until the object has been fully moved. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_evict(struct xe_bo *bo, bool force_alloc) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false, + .force_alloc = force_alloc, + }; + struct ttm_placement placement; + int ret; + + xe_evict_flags(&bo->ttm, &placement); + ret = ttm_bo_validate(&bo->ttm, &placement, &ctx); + if (ret) + return ret; + + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + + return 0; +} + +/** + * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when + * placed in system memory. + * @bo: The xe_bo + * + * If a bo has an allowable placement in XE_PL_TT memory, it can't use + * flat CCS compression, because the GPU then has no way to access the + * CCS metadata using relevant commands. For the opposite case, we need to + * allocate storage for the CCS metadata when the BO is not resident in + * VRAM memory. + * + * Return: true if extra pages need to be allocated, false otherwise. + */ +bool xe_bo_needs_ccs_pages(struct xe_bo *bo) +{ + return bo->ttm.type == ttm_bo_type_device && + !(bo->flags & XE_BO_CREATE_SYSTEM_BIT) && + (bo->flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT)); +} + +/** + * __xe_bo_release_dummy() - Dummy kref release function + * @kref: The embedded struct kref. + * + * Dummy release function for xe_bo_put_deferred(). Keep off. + */ +void __xe_bo_release_dummy(struct kref *kref) +{ +} + +/** + * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred(). + * @deferred: The lockless list used for the call to xe_bo_put_deferred(). + * + * Puts all bos whose put was deferred by xe_bo_put_deferred(). + * The @deferred list can be either an onstack local list or a global + * shared list used by a workqueue. + */ +void xe_bo_put_commit(struct llist_head *deferred) +{ + struct llist_node *freed; + struct xe_bo *bo, *next; + + if (!deferred) + return; + + freed = llist_del_all(deferred); + if (!freed) + return; + + llist_for_each_entry_safe(bo, next, freed, freed) + drm_gem_object_free(&bo->ttm.base.refcount); +} + +/** + * xe_bo_dumb_create - Create a dumb bo as backing for a fb + * @file_priv: ... + * @dev: ... + * @args: ... + * + * See dumb_create() hook in include/drm/drm_drv.h + * + * Return: ... + */ +int xe_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_bo *bo; + uint32_t handle; + int cpp = DIV_ROUND_UP(args->bpp, 8); + int err; + u32 page_size = max_t(u32, PAGE_SIZE, + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K); + + args->pitch = ALIGN(args->width * cpp, 64); + args->size = ALIGN(mul_u32_u32(args->pitch, args->height), + page_size); + + bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device, + XE_BO_CREATE_VRAM_IF_DGFX(to_gt(xe)) | + XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&bo->ttm.base); + if (!err) + args->handle = handle; + return err; +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_bo.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h new file mode 100644 index 000000000000..1a49c0a3c4c6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_BO_H_ +#define _XE_BO_H_ + +#include "xe_bo_types.h" +#include "xe_macros.h" +#include "xe_vm_types.h" + +#define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ + +#define XE_BO_CREATE_USER_BIT BIT(1) +#define XE_BO_CREATE_SYSTEM_BIT BIT(2) +#define XE_BO_CREATE_VRAM0_BIT BIT(3) +#define XE_BO_CREATE_VRAM1_BIT BIT(4) +#define XE_BO_CREATE_VRAM_IF_DGFX(gt) \ + (IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \ + XE_BO_CREATE_SYSTEM_BIT) +#define XE_BO_CREATE_GGTT_BIT BIT(5) +#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6) +#define XE_BO_CREATE_PINNED_BIT BIT(7) +#define XE_BO_DEFER_BACKING BIT(8) +#define XE_BO_SCANOUT_BIT BIT(9) +/* this one is trigger internally only */ +#define XE_BO_INTERNAL_TEST BIT(30) +#define XE_BO_INTERNAL_64K BIT(31) + +#define PPAT_UNCACHED GENMASK_ULL(4, 3) +#define PPAT_CACHED_PDE 0 +#define PPAT_CACHED BIT_ULL(7) +#define PPAT_DISPLAY_ELLC BIT_ULL(4) + +#define GEN8_PTE_SHIFT 12 +#define GEN8_PAGE_SIZE (1 << GEN8_PTE_SHIFT) +#define GEN8_PTE_MASK (GEN8_PAGE_SIZE - 1) +#define GEN8_PDE_SHIFT (GEN8_PTE_SHIFT - 3) +#define GEN8_PDES (1 << GEN8_PDE_SHIFT) +#define GEN8_PDE_MASK (GEN8_PDES - 1) + +#define GEN8_64K_PTE_SHIFT 16 +#define GEN8_64K_PAGE_SIZE (1 << GEN8_64K_PTE_SHIFT) +#define GEN8_64K_PTE_MASK (GEN8_64K_PAGE_SIZE - 1) +#define GEN8_64K_PDE_MASK (GEN8_PDE_MASK >> 4) + +#define GEN8_PDE_PS_2M BIT_ULL(7) +#define GEN8_PDPE_PS_1G BIT_ULL(7) +#define GEN8_PDE_IPS_64K BIT_ULL(11) + +#define GEN12_GGTT_PTE_LM BIT_ULL(1) +#define GEN12_USM_PPGTT_PTE_AE BIT_ULL(10) +#define GEN12_PPGTT_PTE_LM BIT_ULL(11) +#define GEN12_PDE_64K BIT_ULL(6) +#define GEN12_PTE_PS64 BIT_ULL(8) + +#define GEN8_PAGE_PRESENT BIT_ULL(0) +#define GEN8_PAGE_RW BIT_ULL(1) + +#define PTE_READ_ONLY BIT(0) + +#define XE_PL_SYSTEM TTM_PL_SYSTEM +#define XE_PL_TT TTM_PL_TT +#define XE_PL_VRAM0 TTM_PL_VRAM +#define XE_PL_VRAM1 (XE_PL_VRAM0 + 1) + +#define XE_BO_PROPS_INVALID (-1) + +struct sg_table; + +struct xe_bo *xe_bo_alloc(void); +void xe_bo_free(struct xe_bo *bo); + +struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_gt *gt, struct dma_resv *resv, + size_t size, enum ttm_bo_type type, + u32 flags); +struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, + const void *data, size_t size, + enum ttm_bo_type type, u32 flags); + +int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags); + +static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) +{ + return container_of(bo, struct xe_bo, ttm); +} + +static inline struct xe_bo *gem_to_xe_bo(const struct drm_gem_object *obj) +{ + return container_of(obj, struct xe_bo, ttm.base); +} + +#define xe_bo_device(bo) ttm_to_xe_device((bo)->ttm.bdev) + +static inline struct xe_bo *xe_bo_get(struct xe_bo *bo) +{ + if (bo) + drm_gem_object_get(&bo->ttm.base); + + return bo; +} + +static inline void xe_bo_put(struct xe_bo *bo) +{ + if (bo) + drm_gem_object_put(&bo->ttm.base); +} + +static inline void xe_bo_assert_held(struct xe_bo *bo) +{ + if (bo) + dma_resv_assert_held((bo)->ttm.base.resv); +} + +int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, + int num_resv, bool intr); + +void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww); + +static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) +{ + if (bo) { + XE_BUG_ON(bo->vm && bo->ttm.base.resv != &bo->vm->resv); + if (bo->vm) + xe_vm_assert_held(bo->vm); + else + dma_resv_unlock(bo->ttm.base.resv); + } +} + +static inline void xe_bo_lock_no_vm(struct xe_bo *bo, + struct ww_acquire_ctx *ctx) +{ + if (bo) { + XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && + bo->ttm.base.resv != &bo->ttm.base._resv)); + dma_resv_lock(bo->ttm.base.resv, ctx); + } +} + +static inline void xe_bo_unlock_no_vm(struct xe_bo *bo) +{ + if (bo) { + XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && + bo->ttm.base.resv != &bo->ttm.base._resv)); + dma_resv_unlock(bo->ttm.base.resv); + } +} + +int xe_bo_pin_external(struct xe_bo *bo); +int xe_bo_pin(struct xe_bo *bo); +void xe_bo_unpin_external(struct xe_bo *bo); +void xe_bo_unpin(struct xe_bo *bo); +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict); + +static inline bool xe_bo_is_pinned(struct xe_bo *bo) +{ + return bo->ttm.pin_count; +} + +static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) +{ + if (likely(bo)) { + xe_bo_lock_no_vm(bo, NULL); + xe_bo_unpin(bo); + xe_bo_unlock_no_vm(bo); + + xe_bo_put(bo); + } +} + +bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo); +dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, + size_t page_size, bool *is_lmem); + +static inline dma_addr_t +xe_bo_main_addr(struct xe_bo *bo, size_t page_size) +{ + bool is_lmem; + + return xe_bo_addr(bo, 0, page_size, &is_lmem); +} + +static inline u32 +xe_bo_ggtt_addr(struct xe_bo *bo) +{ + XE_BUG_ON(bo->ggtt_node.size > bo->size); + XE_BUG_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); + return bo->ggtt_node.start; +} + +int xe_bo_vmap(struct xe_bo *bo); +void xe_bo_vunmap(struct xe_bo *bo); + +bool mem_type_is_vram(u32 mem_type); +bool xe_bo_is_vram(struct xe_bo *bo); + +bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); + +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); +int xe_bo_evict(struct xe_bo *bo, bool force_alloc); + +extern struct ttm_device_funcs xe_ttm_funcs; + +int xe_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); + +bool xe_bo_needs_ccs_pages(struct xe_bo *bo); + +static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) +{ + return PAGE_ALIGN(bo->ttm.base.size); +} + +void __xe_bo_release_dummy(struct kref *kref); + +/** + * xe_bo_put_deferred() - Put a buffer object with delayed final freeing + * @bo: The bo to put. + * @deferred: List to which to add the buffer object if we cannot put, or + * NULL if the function is to put unconditionally. + * + * Since the final freeing of an object includes both sleeping and (!) + * memory allocation in the dma_resv individualization, it's not ok + * to put an object from atomic context nor from within a held lock + * tainted by reclaim. In such situations we want to defer the final + * freeing until we've exited the restricting context, or in the worst + * case to a workqueue. + * This function either puts the object if possible without the refcount + * reaching zero, or adds it to the @deferred list if that was not possible. + * The caller needs to follow up with a call to xe_bo_put_commit() to actually + * put the bo iff this function returns true. It's safe to always + * follow up with a call to xe_bo_put_commit(). + * TODO: It's TTM that is the villain here. Perhaps TTM should add an + * interface like this. + * + * Return: true if @bo was the first object put on the @freed list, + * false otherwise. + */ +static inline bool +xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred) +{ + if (!deferred) { + xe_bo_put(bo); + return false; + } + + if (!kref_put(&bo->ttm.base.refcount, __xe_bo_release_dummy)) + return false; + + return llist_add(&bo->freed, deferred); +} + +void xe_bo_put_commit(struct llist_head *deferred); + +struct sg_table *xe_bo_get_sg(struct xe_bo *bo); + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +/** + * xe_bo_is_mem_type - Whether the bo currently resides in the given + * TTM memory type + * @bo: The bo to check. + * @mem_type: The TTM memory type. + * + * Return: true iff the bo resides in @mem_type, false otherwise. + */ +static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type) +{ + xe_bo_assert_held(bo); + return bo->ttm.resource->mem_type == mem_type; +} +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h new file mode 100644 index 000000000000..f57d440cc95a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_doc.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BO_DOC_H_ +#define _XE_BO_DOC_H_ + +/** + * DOC: Buffer Objects (BO) + * + * BO management + * ============= + * + * TTM manages (placement, eviction, etc...) all BOs in XE. + * + * BO creation + * =========== + * + * Create a chunk of memory which can be used by the GPU. Placement rules + * (sysmem or vram region) passed in upon creation. TTM handles placement of BO + * and can trigger eviction of other BOs to make space for the new BO. + * + * Kernel BOs + * ---------- + * + * A kernel BO is created as part of driver load (e.g. uC firmware images, GuC + * ADS, etc...) or a BO created as part of a user operation which requires + * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs + * are typically mapped in the GGTT (any kernel BOs aside memory for page tables + * are in the GGTT), are pinned (can't move or be evicted at runtime), have a + * vmap (XE can access the memory via xe_map layer) and have contiguous physical + * memory. + * + * More details of why kernel BOs are pinned and contiguous below. + * + * User BOs + * -------- + * + * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is + * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user + * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All + * user BOs are evictable and user BOs are never pinned by XE. The allocation of + * the backing store can be defered from creation time until first use which is + * either mmap, bind, or pagefault. + * + * Private BOs + * ~~~~~~~~~~~ + * + * A private BO is a user BO created with a valid VM argument passed into the + * create IOCTL. If a BO is private it cannot be exported via prime FD and + * mappings can only be created for the BO within the VM it is tied to. Lastly, + * the BO dma-resv slots / lock point to the VM's dma-resv slots / lock (all + * private BOs to a VM share common dma-resv slots / lock). + * + * External BOs + * ~~~~~~~~~~~~ + * + * An external BO is a user BO created with a NULL VM argument passed into the + * create IOCTL. An external BO can be shared with different UMDs / devices via + * prime FD and the BO can be mapped into multiple VMs. An external BO has its + * own unique dma-resv slots / lock. An external BO will be in an array of all + * VMs which has a mapping of the BO. This allows VMs to lookup and lock all + * external BOs mapped in the VM as needed. + * + * BO placement + * ~~~~~~~~~~~~ + * + * When a user BO is created, a mask of valid placements is passed indicating + * which memory regions are considered valid. + * + * The memory region information is available via query uAPI (TODO: add link). + * + * BO validation + * ============= + * + * BO validation (ttm_bo_validate) refers to ensuring a BO has a valid + * placement. If a BO was swapped to temporary storage, a validation call will + * trigger a move back to a valid (location where GPU can access BO) placement. + * Validation of a BO may evict other BOs to make room for the BO being + * validated. + * + * BO eviction / moving + * ==================== + * + * All eviction (or in other words, moving a BO from one memory location to + * another) is routed through TTM with a callback into XE. + * + * Runtime eviction + * ---------------- + * + * Runtime evictions refers to during normal operations where TTM decides it + * needs to move a BO. Typically this is because TTM needs to make room for + * another BO and the evicted BO is first BO on LRU list that is not locked. + * + * An example of this is a new BO which can only be placed in VRAM but there is + * not space in VRAM. There could be multiple BOs which have sysmem and VRAM + * placement rules which currently reside in VRAM, TTM trigger a will move of + * one (or multiple) of these BO(s) until there is room in VRAM to place the new + * BO. The evicted BO(s) are valid but still need new bindings before the BO + * used again (exec or compute mode rebind worker). + * + * Another example would be, TTM can't find a BO to evict which has another + * valid placement. In this case TTM will evict one (or multiple) unlocked BO(s) + * to a temporary unreachable (invalid) placement. The evicted BO(s) are invalid + * and before next use need to be moved to a valid placement and rebound. + * + * In both cases, moves of these BOs are scheduled behind the fences in the BO's + * dma-resv slots. + * + * WW locking tries to ensures if 2 VMs use 51% of the memory forward progress + * is made on both VMs. + * + * Runtime eviction uses per a GT migration engine (TODO: link to migration + * engine doc) to do a GPU memcpy from one location to another. + * + * Rebinds after runtime eviction + * ------------------------------ + * + * When BOs are moved, every mapping (VMA) of the BO needs to rebound before + * the BO is used again. Every VMA is added to an evicted list of its VM when + * the BO is moved. This is safe because of the VM locking structure (TODO: link + * to VM locking doc). On the next use of a VM (exec or compute mode rebind + * worker) the evicted VMA list is checked and rebinds are triggered. In the + * case of faulting VM, the rebind is done in the page fault handler. + * + * Suspend / resume eviction of VRAM + * --------------------------------- + * + * During device suspend / resume VRAM may lose power which means the contents + * of VRAM's memory is blown away. Thus BOs present in VRAM at the time of + * suspend must be moved to sysmem in order for their contents to be saved. + * + * A simple TTM call (ttm_resource_manager_evict_all) can move all non-pinned + * (user) BOs to sysmem. External BOs that are pinned need to be manually + * evicted with a simple loop + xe_bo_evict call. It gets a little trickier + * with kernel BOs. + * + * Some kernel BOs are used by the GT migration engine to do moves, thus we + * can't move all of the BOs via the GT migration engine. For simplity, use a + * TTM memcpy (CPU) to move any kernel (pinned) BO on either suspend or resume. + * + * Some kernel BOs need to be restored to the exact same physical location. TTM + * makes this rather easy but the caveat is the memory must be contiguous. Again + * for simplity, we enforce that all kernel (pinned) BOs are contiguous and + * restored to the same physical location. + * + * Pinned external BOs in VRAM are restored on resume via the GPU. + * + * Rebinds after suspend / resume + * ------------------------------ + * + * Most kernel BOs have GGTT mappings which must be restored during the resume + * process. All user BOs are rebound after validation on their next use. + * + * Future work + * =========== + * + * Trim the list of BOs which is saved / restored via TTM memcpy on suspend / + * resume. All we really need to save / restore via TTM memcpy is the memory + * required for the GuC to load and the memory for the GT migrate engine to + * operate. + * + * Do not require kernel BOs to be contiguous in physical memory / restored to + * the same physical address on resume. In all likelihood the only memory that + * needs to be restored to the same physical address is memory used for page + * tables. All of that memory is allocated 1 page at time so the contiguous + * requirement isn't needed. Some work on the vmap code would need to be done if + * kernel BOs are not contiguous too. + * + * Make some kernel BO evictable rather than pinned. An example of this would be + * engine state, in all likelihood if the dma-slots of these BOs where properly + * used rather than pinning we could safely evict + rebind these BOs as needed. + * + * Some kernel BOs do not need to be restored on resume (e.g. GuC ADS as that is + * repopulated on resume), add flag to mark such objects as no save / restore. + */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c new file mode 100644 index 000000000000..7046dc203138 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_bo_evict.h" +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_gt.h" + +/** + * xe_bo_evict_all - evict all BOs from VRAM + * + * @xe: xe device + * + * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next + * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. + * All eviction magic done via TTM calls. + * + * Evict == move VRAM BOs to temporary (typically system) memory. + * + * This function should be called before the device goes into a suspend state + * where the VRAM loses power. + */ +int xe_bo_evict_all(struct xe_device *xe) +{ + struct ttm_device *bdev = &xe->ttm; + struct ww_acquire_ctx ww; + struct xe_bo *bo; + struct xe_gt *gt; + struct list_head still_in_list; + u32 mem_type; + u8 id; + int ret; + + if (!IS_DGFX(xe)) + return 0; + + /* User memory */ + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { + struct ttm_resource_manager *man = + ttm_manager_type(bdev, mem_type); + + if (man) { + ret = ttm_resource_manager_evict_all(bdev, man); + if (ret) + return ret; + } + } + + /* Pinned user memory in VRAM */ + INIT_LIST_HEAD(&still_in_list); + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.external_vram, + typeof(*bo), pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &still_in_list); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, &ww, 0, false); + ret = xe_bo_evict(bo, true); + xe_bo_unlock(bo, &ww); + xe_bo_put(bo); + if (ret) { + spin_lock(&xe->pinned.lock); + list_splice_tail(&still_in_list, + &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + return ret; + } + + spin_lock(&xe->pinned.lock); + } + list_splice_tail(&still_in_list, &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + + /* + * Wait for all user BO to be evicted as those evictions depend on the + * memory moved below. + */ + for_each_gt(gt, xe, id) + xe_gt_migrate_wait(gt); + + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present, + typeof(*bo), pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &xe->pinned.evicted); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, &ww, 0, false); + ret = xe_bo_evict(bo, true); + xe_bo_unlock(bo, &ww); + xe_bo_put(bo); + if (ret) + return ret; + + spin_lock(&xe->pinned.lock); + } + spin_unlock(&xe->pinned.lock); + + return 0; +} + +/** + * xe_bo_restore_kernel - restore kernel BOs to VRAM + * + * @xe: xe device + * + * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All + * moves done via TTM calls. + * + * This function should be called early, before trying to init the GT, on device + * resume. + */ +int xe_bo_restore_kernel(struct xe_device *xe) +{ + struct ww_acquire_ctx ww; + struct xe_bo *bo; + int ret; + + if (!IS_DGFX(xe)) + return 0; + + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.evicted, + typeof(*bo), pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, &ww, 0, false); + ret = xe_bo_validate(bo, NULL, false); + xe_bo_unlock(bo, &ww); + if (ret) { + xe_bo_put(bo); + return ret; + } + + if (bo->flags & XE_BO_CREATE_GGTT_BIT) + xe_ggtt_map_bo(bo->gt->mem.ggtt, bo); + + /* + * We expect validate to trigger a move VRAM and our move code + * should setup the iosys map. + */ + XE_BUG_ON(iosys_map_is_null(&bo->vmap)); + XE_BUG_ON(!xe_bo_is_vram(bo)); + + xe_bo_put(bo); + + spin_lock(&xe->pinned.lock); + } + spin_unlock(&xe->pinned.lock); + + return 0; +} + +/** + * xe_bo_restore_user - restore pinned user BOs to VRAM + * + * @xe: xe device + * + * Move pinned user BOs from temporary (typically system) memory to VRAM via + * CPU. All moves done via TTM calls. + * + * This function should be called late, after GT init, on device resume. + */ +int xe_bo_restore_user(struct xe_device *xe) +{ + struct ww_acquire_ctx ww; + struct xe_bo *bo; + struct xe_gt *gt; + struct list_head still_in_list; + u8 id; + int ret; + + if (!IS_DGFX(xe)) + return 0; + + /* Pinned user memory in VRAM should be validated on resume */ + INIT_LIST_HEAD(&still_in_list); + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.external_vram, + typeof(*bo), pinned_link); + if (!bo) + break; + list_move_tail(&bo->pinned_link, &still_in_list); + xe_bo_get(bo); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, &ww, 0, false); + ret = xe_bo_validate(bo, NULL, false); + xe_bo_unlock(bo, &ww); + xe_bo_put(bo); + if (ret) { + spin_lock(&xe->pinned.lock); + list_splice_tail(&still_in_list, + &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + return ret; + } + + spin_lock(&xe->pinned.lock); + } + list_splice_tail(&still_in_list, &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + + /* Wait for validate to complete */ + for_each_gt(gt, xe, id) + xe_gt_migrate_wait(gt); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h new file mode 100644 index 000000000000..746894798852 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_evict.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BO_EVICT_H_ +#define _XE_BO_EVICT_H_ + +struct xe_device; + +int xe_bo_evict_all(struct xe_device *xe); +int xe_bo_restore_kernel(struct xe_device *xe); +int xe_bo_restore_user(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h new file mode 100644 index 000000000000..06de3330211d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BO_TYPES_H_ +#define _XE_BO_TYPES_H_ + +#include + +#include +#include +#include +#include +#include + +struct xe_device; +struct xe_vm; + +#define XE_BO_MAX_PLACEMENTS 3 + +/** @xe_bo: XE buffer object */ +struct xe_bo { + /** @ttm: TTM base buffer object */ + struct ttm_buffer_object ttm; + /** @size: Size of this buffer object */ + size_t size; + /** @flags: flags for this buffer object */ + u32 flags; + /** @vm: VM this BO is attached to, for extobj this will be NULL */ + struct xe_vm *vm; + /** @gt: GT this BO is attached to (kernel BO only) */ + struct xe_gt *gt; + /** @vmas: List of VMAs for this BO */ + struct list_head vmas; + /** @placements: valid placements for this BO */ + struct ttm_place placements[XE_BO_MAX_PLACEMENTS]; + /** @placement: current placement for this BO */ + struct ttm_placement placement; + /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ + struct drm_mm_node ggtt_node; + /** @vmap: iosys map of this buffer */ + struct iosys_map vmap; + /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ + struct ttm_bo_kmap_obj kmap; + /** @pinned_link: link to present / evicted list of pinned BO */ + struct list_head pinned_link; + /** @props: BO user controlled properties */ + struct { + /** @preferred_mem: preferred memory class for this BO */ + s16 preferred_mem_class; + /** @prefered_gt: preferred GT for this BO */ + s16 preferred_gt; + /** @preferred_mem_type: preferred memory type */ + s32 preferred_mem_type; + /** + * @cpu_atomic: the CPU expects to do atomics operations to + * this BO + */ + bool cpu_atomic; + /** + * @device_atomic: the device expects to do atomics operations + * to this BO + */ + bool device_atomic; + } props; + /** @freed: List node for delayed put. */ + struct llist_node freed; + /** @created: Whether the bo has passed initial creation */ + bool created; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c new file mode 100644 index 000000000000..84db7b3f501e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_debugfs.h" +#include "xe_gt_debugfs.h" +#include "xe_step.h" + +#ifdef CONFIG_DRM_XE_DEBUG +#include "xe_bo_evict.h" +#include "xe_migrate.h" +#include "xe_vm.h" +#endif + +static struct xe_device *node_to_xe(struct drm_info_node *node) +{ + return to_xe_device(node->minor->dev); +} + +static int info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + struct xe_gt *gt; + u8 id; + + drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); + drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); + drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", + xe_step_name(xe->info.step.graphics), + xe_step_name(xe->info.step.media), + xe_step_name(xe->info.step.display), + xe_step_name(xe->info.step.basedie)); + drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx)); + drm_printf(&p, "platform %d\n", xe->info.platform); + drm_printf(&p, "subplatform %d\n", + xe->info.subplatform > XE_SUBPLATFORM_NONE ? xe->info.subplatform : 0); + drm_printf(&p, "devid 0x%x\n", xe->info.devid); + drm_printf(&p, "revid %d\n", xe->info.revid); + drm_printf(&p, "tile_count %d\n", xe->info.tile_count); + drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level); + drm_printf(&p, "enable_guc %s\n", str_yes_no(xe->info.enable_guc)); + drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm)); + drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs)); + for_each_gt(gt, xe, id) { + drm_printf(&p, "gt%d force wake %d\n", id, + xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT)); + drm_printf(&p, "gt%d engine_mask 0x%llx\n", id, + gt->info.engine_mask); + } + + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"info", info, 0}, +}; + +static int forcewake_open(struct inode *inode, struct file *file) +{ + struct xe_device *xe = inode->i_private; + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + return 0; +} + +static int forcewake_release(struct inode *inode, struct file *file) +{ + struct xe_device *xe = inode->i_private; + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + return 0; +} + +static const struct file_operations forcewake_all_fops = { + .owner = THIS_MODULE, + .open = forcewake_open, + .release = forcewake_release, +}; + +void xe_debugfs_register(struct xe_device *xe) +{ + struct ttm_device *bdev = &xe->ttm; + struct drm_minor *minor = xe->drm.primary; + struct dentry *root = minor->debugfs_root; + struct ttm_resource_manager *man; + struct xe_gt *gt; + u32 mem_type; + u8 id; + + drm_debugfs_create_files(debugfs_list, + ARRAY_SIZE(debugfs_list), + root, minor); + + debugfs_create_file("forcewake_all", 0400, root, xe, + &forcewake_all_fops); + + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { + man = ttm_manager_type(bdev, mem_type); + + if (man) { + char name[16]; + + sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0); + ttm_resource_manager_create_debugfs(man, root, name); + } + } + + man = ttm_manager_type(bdev, XE_PL_TT); + ttm_resource_manager_create_debugfs(man, root, "gtt_mm"); + + for_each_gt(gt, xe, id) + xe_gt_debugfs_register(gt); +} diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h new file mode 100644 index 000000000000..715b8e2e0bd9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_debugfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_DEBUGFS_H_ +#define _XE_DEBUGFS_H_ + +struct xe_device; + +void xe_debugfs_register(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c new file mode 100644 index 000000000000..93dea2b9c464 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_device.h" + +#include +#include +#include +#include +#include +#include + +#include "xe_bo.h" +#include "xe_debugfs.h" +#include "xe_dma_buf.h" +#include "xe_drv.h" +#include "xe_engine.h" +#include "xe_exec.h" +#include "xe_gt.h" +#include "xe_irq.h" +#include "xe_module.h" +#include "xe_mmio.h" +#include "xe_pcode.h" +#include "xe_pm.h" +#include "xe_query.h" +#include "xe_vm.h" +#include "xe_vm_madvise.h" +#include "xe_wait_user_fence.h" + +static int xe_file_open(struct drm_device *dev, struct drm_file *file) +{ + struct xe_file *xef; + + xef = kzalloc(sizeof(*xef), GFP_KERNEL); + if (!xef) + return -ENOMEM; + + xef->drm = file; + + mutex_init(&xef->vm.lock); + xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); + + mutex_init(&xef->engine.lock); + xa_init_flags(&xef->engine.xa, XA_FLAGS_ALLOC1); + + file->driver_priv = xef; + return 0; +} + +static void device_kill_persitent_engines(struct xe_device *xe, + struct xe_file *xef); + +static void xe_file_close(struct drm_device *dev, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = file->driver_priv; + struct xe_vm *vm; + struct xe_engine *e; + unsigned long idx; + + mutex_lock(&xef->engine.lock); + xa_for_each(&xef->engine.xa, idx, e) { + xe_engine_kill(e); + xe_engine_put(e); + } + mutex_unlock(&xef->engine.lock); + mutex_destroy(&xef->engine.lock); + device_kill_persitent_engines(xe, xef); + + mutex_lock(&xef->vm.lock); + xa_for_each(&xef->vm.xa, idx, vm) + xe_vm_close_and_put(vm); + mutex_unlock(&xef->vm.lock); + mutex_destroy(&xef->vm.lock); + + kfree(xef); +} + +static const struct drm_ioctl_desc xe_ioctls[] = { + DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_ENGINE_SET_PROPERTY, xe_engine_set_property_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), +}; + +static const struct file_operations xe_driver_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release_noglobal, + .unlocked_ioctl = drm_ioctl, + .mmap = drm_gem_mmap, + .poll = drm_poll, + .read = drm_read, +// .compat_ioctl = i915_ioc32_compat_ioctl, + .llseek = noop_llseek, +}; + +static void xe_driver_release(struct drm_device *dev) +{ + struct xe_device *xe = to_xe_device(dev); + + pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL); +} + +static struct drm_driver driver = { + /* Don't use MTRRs here; the Xserver or userspace app should + * deal with them for Intel hardware. + */ + .driver_features = + DRIVER_GEM | + DRIVER_RENDER | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, + .open = xe_file_open, + .postclose = xe_file_close, + + .gem_prime_import = xe_gem_prime_import, + + .dumb_create = xe_bo_dumb_create, + .dumb_map_offset = drm_gem_ttm_dumb_map_offset, + .release = &xe_driver_release, + + .ioctls = xe_ioctls, + .num_ioctls = ARRAY_SIZE(xe_ioctls), + .fops = &xe_driver_fops, + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +static void xe_device_destroy(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + destroy_workqueue(xe->ordered_wq); + mutex_destroy(&xe->persitent_engines.lock); + ttm_device_fini(&xe->ttm); +} + +struct xe_device *xe_device_create(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct xe_device *xe; + int err; + + err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); + if (err) + return ERR_PTR(err); + + xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm); + if (IS_ERR(xe)) + return xe; + + err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, + xe->drm.anon_inode->i_mapping, + xe->drm.vma_offset_manager, false, false); + if (WARN_ON(err)) + goto err_put; + + xe->info.devid = pdev->device; + xe->info.revid = pdev->revision; + xe->info.enable_guc = enable_guc; + + spin_lock_init(&xe->irq.lock); + + init_waitqueue_head(&xe->ufence_wq); + + mutex_init(&xe->usm.lock); + xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1); + + mutex_init(&xe->persitent_engines.lock); + INIT_LIST_HEAD(&xe->persitent_engines.list); + + spin_lock_init(&xe->pinned.lock); + INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); + INIT_LIST_HEAD(&xe->pinned.external_vram); + INIT_LIST_HEAD(&xe->pinned.evicted); + + xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); + + mutex_init(&xe->sb_lock); + xe->enabled_irq_mask = ~0; + + err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); + if (err) + goto err_put; + + mutex_init(&xe->mem_access.lock); + return xe; + +err_put: + drm_dev_put(&xe->drm); + + return ERR_PTR(err); +} + +int xe_device_probe(struct xe_device *xe) +{ + struct xe_gt *gt; + int err; + u8 id; + + xe->info.mem_region_mask = 1; + + for_each_gt(gt, xe, id) { + err = xe_gt_alloc(xe, gt); + if (err) + return err; + } + + err = xe_mmio_init(xe); + if (err) + return err; + + for_each_gt(gt, xe, id) { + err = xe_pcode_probe(gt); + if (err) + return err; + } + + err = xe_irq_install(xe); + if (err) + return err; + + for_each_gt(gt, xe, id) { + err = xe_gt_init_early(gt); + if (err) + goto err_irq_shutdown; + } + + err = xe_mmio_probe_vram(xe); + if (err) + goto err_irq_shutdown; + + for_each_gt(gt, xe, id) { + err = xe_gt_init_noalloc(gt); + if (err) + goto err_irq_shutdown; + } + + for_each_gt(gt, xe, id) { + err = xe_gt_init(gt); + if (err) + goto err_irq_shutdown; + } + + err = drm_dev_register(&xe->drm, 0); + if (err) + goto err_irq_shutdown; + + xe_debugfs_register(xe); + + return 0; + +err_irq_shutdown: + xe_irq_shutdown(xe); + return err; +} + +void xe_device_remove(struct xe_device *xe) +{ + xe_irq_shutdown(xe); +} + +void xe_device_shutdown(struct xe_device *xe) +{ +} + +void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e) +{ + mutex_lock(&xe->persitent_engines.lock); + list_add_tail(&e->persitent.link, &xe->persitent_engines.list); + mutex_unlock(&xe->persitent_engines.lock); +} + +void xe_device_remove_persitent_engines(struct xe_device *xe, + struct xe_engine *e) +{ + mutex_lock(&xe->persitent_engines.lock); + if (!list_empty(&e->persitent.link)) + list_del(&e->persitent.link); + mutex_unlock(&xe->persitent_engines.lock); +} + +static void device_kill_persitent_engines(struct xe_device *xe, + struct xe_file *xef) +{ + struct xe_engine *e, *next; + + mutex_lock(&xe->persitent_engines.lock); + list_for_each_entry_safe(e, next, &xe->persitent_engines.list, + persitent.link) + if (e->persitent.xef == xef) { + xe_engine_kill(e); + list_del_init(&e->persitent.link); + } + mutex_unlock(&xe->persitent_engines.lock); +} + +#define SOFTWARE_FLAGS_SPR33 _MMIO(0x4F084) + +void xe_device_wmb(struct xe_device *xe) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + + wmb(); + if (IS_DGFX(xe)) + xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33.reg, 0); +} + +u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) +{ + return xe_device_has_flat_ccs(xe) ? + DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0; +} + +void xe_device_mem_access_get(struct xe_device *xe) +{ + bool resumed = xe_pm_runtime_resume_if_suspended(xe); + + mutex_lock(&xe->mem_access.lock); + if (xe->mem_access.ref++ == 0) + xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe); + mutex_unlock(&xe->mem_access.lock); + + /* The usage counter increased if device was immediately resumed */ + if (resumed) + xe_pm_runtime_put(xe); + + XE_WARN_ON(xe->mem_access.ref == U32_MAX); +} + +void xe_device_mem_access_put(struct xe_device *xe) +{ + mutex_lock(&xe->mem_access.lock); + if (--xe->mem_access.ref == 0 && xe->mem_access.hold_rpm) + xe_pm_runtime_put(xe); + mutex_unlock(&xe->mem_access.lock); + + XE_WARN_ON(xe->mem_access.ref < 0); +} diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h new file mode 100644 index 000000000000..88d55671b068 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_DEVICE_H_ +#define _XE_DEVICE_H_ + +struct xe_engine; +struct xe_file; + +#include + +#include "xe_device_types.h" +#include "xe_macros.h" +#include "xe_force_wake.h" + +#include "gt/intel_gpu_commands.h" + +static inline struct xe_device *to_xe_device(const struct drm_device *dev) +{ + return container_of(dev, struct xe_device, drm); +} + +static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) +{ + return pci_get_drvdata(pdev); +} + +static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm) +{ + return container_of(ttm, struct xe_device, ttm); +} + +struct xe_device *xe_device_create(struct pci_dev *pdev, + const struct pci_device_id *ent); +int xe_device_probe(struct xe_device *xe); +void xe_device_remove(struct xe_device *xe); +void xe_device_shutdown(struct xe_device *xe); + +void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e); +void xe_device_remove_persitent_engines(struct xe_device *xe, + struct xe_engine *e); + +void xe_device_wmb(struct xe_device *xe); + +static inline struct xe_file *to_xe_file(const struct drm_file *file) +{ + return file->driver_priv; +} + +static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) +{ + struct xe_gt *gt; + + XE_BUG_ON(gt_id > XE_MAX_GT); + gt = xe->gt + gt_id; + XE_BUG_ON(gt->info.id != gt_id); + XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); + + return gt; +} + +/* + * FIXME: Placeholder until multi-gt lands. Once that lands, kill this function. + */ +static inline struct xe_gt *to_gt(struct xe_device *xe) +{ + return xe->gt; +} + +static inline bool xe_device_guc_submission_enabled(struct xe_device *xe) +{ + return xe->info.enable_guc; +} + +static inline void xe_device_guc_submission_disable(struct xe_device *xe) +{ + xe->info.enable_guc = false; +} + +#define for_each_gt(gt__, xe__, id__) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \ + for_each_if ((gt__) = xe_device_get_gt((xe__), (id__))) + +static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt) +{ + return >->mmio.fw; +} + +void xe_device_mem_access_get(struct xe_device *xe); +void xe_device_mem_access_put(struct xe_device *xe); + +static inline void xe_device_assert_mem_access(struct xe_device *xe) +{ + XE_WARN_ON(!xe->mem_access.ref); +} + +static inline bool xe_device_mem_access_ongoing(struct xe_device *xe) +{ + bool ret; + + mutex_lock(&xe->mem_access.lock); + ret = xe->mem_access.ref; + mutex_unlock(&xe->mem_access.lock); + + return ret; +} + +static inline bool xe_device_in_fault_mode(struct xe_device *xe) +{ + return xe->usm.num_vm_in_fault_mode != 0; +} + +static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) +{ + return xe->usm.num_vm_in_non_fault_mode != 0; +} + +static inline bool xe_device_has_flat_ccs(struct xe_device *xe) +{ + return xe->info.has_flat_ccs; +} + +u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); +#endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h new file mode 100644 index 000000000000..d62ee85bfcbe --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_DEVICE_TYPES_H_ +#define _XE_DEVICE_TYPES_H_ + +#include + +#include +#include +#include + +#include "xe_gt_types.h" +#include "xe_platform_types.h" +#include "xe_step_types.h" + +#define XE_BO_INVALID_OFFSET LONG_MAX + +#define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100) +#define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100) +#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100) +#define MEDIA_VERx100(xe) ((xe)->info.media_verx100) +#define IS_DGFX(xe) ((xe)->info.is_dgfx) + +#define XE_VRAM_FLAGS_NEED64K BIT(0) + +#define XE_GT0 0 +#define XE_GT1 1 +#define XE_MAX_GT (XE_GT1 + 1) + +#define XE_MAX_ASID (BIT(20)) + +#define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step) \ + ((_xe)->info.platform == (_platform) && \ + (_xe)->info.step.graphics >= (min_step) && \ + (_xe)->info.step.graphics < (max_step)) +#define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step) \ + ((_xe)->info.platform == (_platform) && \ + (_xe)->info.subplatform == (sub) && \ + (_xe)->info.step.graphics >= (min_step) && \ + (_xe)->info.step.graphics < (max_step)) + +/** + * struct xe_device - Top level struct of XE device + */ +struct xe_device { + /** @drm: drm device */ + struct drm_device drm; + + /** @info: device info */ + struct intel_device_info { + /** @graphics_verx100: graphics IP version */ + u32 graphics_verx100; + /** @media_verx100: media IP version */ + u32 media_verx100; + /** @mem_region_mask: mask of valid memory regions */ + u32 mem_region_mask; + /** @is_dgfx: is discrete device */ + bool is_dgfx; + /** @platform: XE platform enum */ + enum xe_platform platform; + /** @subplatform: XE subplatform enum */ + enum xe_subplatform subplatform; + /** @devid: device ID */ + u16 devid; + /** @revid: device revision */ + u8 revid; + /** @step: stepping information for each IP */ + struct xe_step_info step; + /** @dma_mask_size: DMA address bits */ + u8 dma_mask_size; + /** @vram_flags: Vram flags */ + u8 vram_flags; + /** @tile_count: Number of tiles */ + u8 tile_count; + /** @vm_max_level: Max VM level */ + u8 vm_max_level; + /** @media_ver: Media version */ + u8 media_ver; + /** @supports_usm: Supports unified shared memory */ + bool supports_usm; + /** @enable_guc: GuC submission enabled */ + bool enable_guc; + /** @has_flat_ccs: Whether flat CCS metadata is used */ + bool has_flat_ccs; + /** @has_4tile: Whether tile-4 tiling is supported */ + bool has_4tile; + } info; + + /** @irq: device interrupt state */ + struct { + /** @lock: lock for processing irq's on this device */ + spinlock_t lock; + + /** @enabled: interrupts enabled on this device */ + bool enabled; + } irq; + + /** @ttm: ttm device */ + struct ttm_device ttm; + + /** @mmio: mmio info for device */ + struct { + /** @size: size of MMIO space for device */ + size_t size; + /** @regs: pointer to MMIO space for device */ + void *regs; + } mmio; + + /** @mem: memory info for device */ + struct { + /** @vram: VRAM info for device */ + struct { + /** @io_start: start address of VRAM */ + resource_size_t io_start; + /** @size: size of VRAM */ + resource_size_t size; + /** @mapping: pointer to VRAM mappable space */ + void *__iomem mapping; + } vram; + } mem; + + /** @usm: unified memory state */ + struct { + /** @asid: convert a ASID to VM */ + struct xarray asid_to_vm; + /** @next_asid: next ASID, used to cyclical alloc asids */ + u32 next_asid; + /** @num_vm_in_fault_mode: number of VM in fault mode */ + u32 num_vm_in_fault_mode; + /** @num_vm_in_non_fault_mode: number of VM in non-fault mode */ + u32 num_vm_in_non_fault_mode; + /** @lock: protects UM state */ + struct mutex lock; + } usm; + + /** @persitent_engines: engines that are closed but still running */ + struct { + /** @lock: protects persitent engines */ + struct mutex lock; + /** @list: list of persitent engines */ + struct list_head list; + } persitent_engines; + + /** @pinned: pinned BO state */ + struct { + /** @lock: protected pinned BO list state */ + spinlock_t lock; + /** @evicted: pinned kernel BO that are present */ + struct list_head kernel_bo_present; + /** @evicted: pinned BO that have been evicted */ + struct list_head evicted; + /** @external_vram: pinned external BO in vram*/ + struct list_head external_vram; + } pinned; + + /** @ufence_wq: user fence wait queue */ + wait_queue_head_t ufence_wq; + + /** @ordered_wq: used to serialize compute mode resume */ + struct workqueue_struct *ordered_wq; + + /** @gt: graphics tile */ + struct xe_gt gt[XE_MAX_GT]; + + /** + * @mem_access: keep track of memory access in the device, possibly + * triggering additional actions when they occur. + */ + struct { + /** @lock: protect the ref count */ + struct mutex lock; + /** @ref: ref count of memory accesses */ + u32 ref; + /** @hold_rpm: need to put rpm ref back at the end */ + bool hold_rpm; + } mem_access; + + /** @d3cold_allowed: Indicates if d3cold is a valid device state */ + bool d3cold_allowed; + + /* For pcode */ + struct mutex sb_lock; + + u32 enabled_irq_mask; +}; + +/** + * struct xe_file - file handle for XE driver + */ +struct xe_file { + /** @drm: base DRM file */ + struct drm_file *drm; + + /** @vm: VM state for file */ + struct { + /** @xe: xarray to store VMs */ + struct xarray xa; + /** @lock: protects file VM state */ + struct mutex lock; + } vm; + + /** @engine: Submission engine state for file */ + struct { + /** @xe: xarray to store engines */ + struct xarray xa; + /** @lock: protects file engine state */ + struct mutex lock; + } engine; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c new file mode 100644 index 000000000000..d09ff25bd940 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include +#include + +#include + +#include +#include + +#include "tests/xe_test.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_dma_buf.h" +#include "xe_ttm_vram_mgr.h" +#include "xe_vm.h" + +MODULE_IMPORT_NS(DMA_BUF); + +static int xe_dma_buf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + + if (attach->peer2peer && + pci_p2pdma_distance(to_pci_dev(obj->dev->dev), attach->dev, false) < 0) + attach->peer2peer = false; + + if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT)) + return -EOPNOTSUPP; + + xe_device_mem_access_get(to_xe_device(obj->dev)); + return 0; +} + +static void xe_dma_buf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + + xe_device_mem_access_put(to_xe_device(obj->dev)); +} + +static int xe_dma_buf_pin(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + /* + * Migrate to TT first to increase the chance of non-p2p clients + * can attach. + */ + (void)xe_bo_migrate(bo, XE_PL_TT); + xe_bo_pin_external(bo); + + return 0; +} + +static void xe_dma_buf_unpin(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + xe_bo_unpin_external(bo); +} + +static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, + enum dma_data_direction dir) +{ + struct dma_buf *dma_buf = attach->dmabuf; + struct drm_gem_object *obj = dma_buf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + struct sg_table *sgt; + int r = 0; + + if (!attach->peer2peer && !xe_bo_can_migrate(bo, XE_PL_TT)) + return ERR_PTR(-EOPNOTSUPP); + + if (!xe_bo_is_pinned(bo)) { + if (!attach->peer2peer || + bo->ttm.resource->mem_type == XE_PL_SYSTEM) { + if (xe_bo_can_migrate(bo, XE_PL_TT)) + r = xe_bo_migrate(bo, XE_PL_TT); + else + r = xe_bo_validate(bo, NULL, false); + } + if (r) + return ERR_PTR(r); + } + + switch (bo->ttm.resource->mem_type) { + case XE_PL_TT: + sgt = drm_prime_pages_to_sg(obj->dev, + bo->ttm.ttm->pages, + bo->ttm.ttm->num_pages); + if (IS_ERR(sgt)) + return sgt; + + if (dma_map_sgtable(attach->dev, sgt, dir, + DMA_ATTR_SKIP_CPU_SYNC)) + goto error_free; + break; + + case XE_PL_VRAM0: + case XE_PL_VRAM1: + r = xe_ttm_vram_mgr_alloc_sgt(xe_bo_device(bo), + bo->ttm.resource, 0, + bo->ttm.base.size, attach->dev, + dir, &sgt); + if (r) + return ERR_PTR(r); + break; + default: + return ERR_PTR(-EINVAL); + } + + return sgt; + +error_free: + sg_free_table(sgt); + kfree(sgt); + return ERR_PTR(-EBUSY); +} + +static void xe_dma_buf_unmap(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) +{ + struct dma_buf *dma_buf = attach->dmabuf; + struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv); + + if (!xe_bo_is_vram(bo)) { + dma_unmap_sgtable(attach->dev, sgt, dir, 0); + sg_free_table(sgt); + kfree(sgt); + } else { + xe_ttm_vram_mgr_free_sgt(attach->dev, dir, sgt); + } +} + +static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction direction) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + bool reads = (direction == DMA_BIDIRECTIONAL || + direction == DMA_FROM_DEVICE); + + if (!reads) + return 0; + + xe_bo_lock_no_vm(bo, NULL); + (void)xe_bo_migrate(bo, XE_PL_TT); + xe_bo_unlock_no_vm(bo); + + return 0; +} + +const struct dma_buf_ops xe_dmabuf_ops = { + .attach = xe_dma_buf_attach, + .detach = xe_dma_buf_detach, + .pin = xe_dma_buf_pin, + .unpin = xe_dma_buf_unpin, + .map_dma_buf = xe_dma_buf_map, + .unmap_dma_buf = xe_dma_buf_unmap, + .release = drm_gem_dmabuf_release, + .begin_cpu_access = xe_dma_buf_begin_cpu_access, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + +struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) +{ + struct xe_bo *bo = gem_to_xe_bo(obj); + struct dma_buf *buf; + + if (bo->vm) + return ERR_PTR(-EPERM); + + buf = drm_gem_prime_export(obj, flags); + if (!IS_ERR(buf)) + buf->ops = &xe_dmabuf_ops; + + return buf; +} + +static struct drm_gem_object * +xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, + struct dma_buf *dma_buf) +{ + struct dma_resv *resv = dma_buf->resv; + struct xe_device *xe = to_xe_device(dev); + struct xe_bo *bo; + int ret; + + dma_resv_lock(resv, NULL); + bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size, + ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto error; + } + dma_resv_unlock(resv); + + return &bo->ttm.base; + +error: + dma_resv_unlock(resv); + return ERR_PTR(ret); +} + +static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->importer_priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + XE_WARN_ON(xe_bo_evict(bo, false)); +} + +static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = { + .allow_peer2peer = true, + .move_notify = xe_dma_buf_move_notify +}; + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + +struct dma_buf_test_params { + struct xe_test_priv base; + const struct dma_buf_attach_ops *attach_ops; + bool force_different_devices; + u32 mem_mask; +}; + +#define to_dma_buf_test_params(_priv) \ + container_of(_priv, struct dma_buf_test_params, base) +#endif + +struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + XE_TEST_DECLARE(struct dma_buf_test_params *test = + to_dma_buf_test_params + (xe_cur_kunit_priv(XE_TEST_LIVE_DMA_BUF));) + const struct dma_buf_attach_ops *attach_ops; + struct dma_buf_attachment *attach; + struct drm_gem_object *obj; + struct xe_bo *bo; + + if (dma_buf->ops == &xe_dmabuf_ops) { + obj = dma_buf->priv; + if (obj->dev == dev && + !XE_TEST_ONLY(test && test->force_different_devices)) { + /* + * Importing dmabuf exported from out own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_get(obj); + return obj; + } + } + + /* + * Don't publish the bo until we have a valid attachment, and a + * valid attachment needs the bo address. So pre-create a bo before + * creating the attachment and publish. + */ + bo = xe_bo_alloc(); + if (IS_ERR(bo)) + return ERR_CAST(bo); + + attach_ops = &xe_dma_buf_attach_ops; +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + if (test) + attach_ops = test->attach_ops; +#endif + + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base); + if (IS_ERR(attach)) { + obj = ERR_CAST(attach); + goto out_err; + } + + /* Errors here will take care of freeing the bo. */ + obj = xe_dma_buf_init_obj(dev, bo, dma_buf); + if (IS_ERR(obj)) + return obj; + + + get_dma_buf(dma_buf); + obj->import_attach = attach; + return obj; + +out_err: + xe_bo_free(bo); + + return obj; +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_dma_buf.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.h b/drivers/gpu/drm/xe/xe_dma_buf.h new file mode 100644 index 000000000000..861dd28a862c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dma_buf.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_DMA_BUF_H_ +#define _XE_DMA_BUF_H_ + +#include + +struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags); +struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); + +#endif diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h new file mode 100644 index 000000000000..0377e5e4e35f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_DRV_H_ +#define _XE_DRV_H_ + +#include + +#define DRIVER_NAME "xe" +#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DATE "20201103" +#define DRIVER_TIMESTAMP 1604406085 + +/* Interface history: + * + * 1.1: Original. + */ +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 1 +#define DRIVER_PATCHLEVEL 0 + +#endif diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c new file mode 100644 index 000000000000..63219bd98be7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_engine.h" + +#include +#include +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_migrate.h" +#include "xe_pm.h" +#include "xe_trace.h" +#include "xe_vm.h" + +static struct xe_engine *__xe_engine_create(struct xe_device *xe, + struct xe_vm *vm, + u32 logical_mask, + u16 width, struct xe_hw_engine *hwe, + u32 flags) +{ + struct xe_engine *e; + struct xe_gt *gt = hwe->gt; + int err; + int i; + + e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + kref_init(&e->refcount); + e->flags = flags; + e->hwe = hwe; + e->gt = gt; + if (vm) + e->vm = xe_vm_get(vm); + e->class = hwe->class; + e->width = width; + e->logical_mask = logical_mask; + e->fence_irq = >->fence_irq[hwe->class]; + e->ring_ops = gt->ring_ops[hwe->class]; + e->ops = gt->engine_ops; + INIT_LIST_HEAD(&e->persitent.link); + INIT_LIST_HEAD(&e->compute.link); + INIT_LIST_HEAD(&e->multi_gt_link); + + /* FIXME: Wire up to configurable default value */ + e->sched_props.timeslice_us = 1 * 1000; + e->sched_props.preempt_timeout_us = 640 * 1000; + + if (xe_engine_is_parallel(e)) { + e->parallel.composite_fence_ctx = dma_fence_context_alloc(1); + e->parallel.composite_fence_seqno = 1; + } + if (e->flags & ENGINE_FLAG_VM) { + e->bind.fence_ctx = dma_fence_context_alloc(1); + e->bind.fence_seqno = 1; + } + + for (i = 0; i < width; ++i) { + err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K); + if (err) + goto err_lrc; + } + + err = e->ops->init(e); + if (err) + goto err_lrc; + + return e; + +err_lrc: + for (i = i - 1; i >= 0; --i) + xe_lrc_finish(e->lrc + i); + kfree(e); + return ERR_PTR(err); +} + +struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hwe, u32 flags) +{ + struct ww_acquire_ctx ww; + struct xe_engine *e; + int err; + + if (vm) { + err = xe_vm_lock(vm, &ww, 0, true); + if (err) + return ERR_PTR(err); + } + e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags); + if (vm) + xe_vm_unlock(vm, &ww); + + return e; +} + +struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags) +{ + struct xe_hw_engine *hwe, *hwe0 = NULL; + enum xe_hw_engine_id id; + u32 logical_mask = 0; + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + if (hwe->class == class) { + logical_mask |= BIT(hwe->logical_instance); + if (!hwe0) + hwe0 = hwe; + } + } + + if (!logical_mask) + return ERR_PTR(-ENODEV); + + return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags); +} + +void xe_engine_destroy(struct kref *ref) +{ + struct xe_engine *e = container_of(ref, struct xe_engine, refcount); + struct xe_engine *engine, *next; + + if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) { + list_for_each_entry_safe(engine, next, &e->multi_gt_list, + multi_gt_link) + xe_engine_put(engine); + } + + e->ops->fini(e); +} + +void xe_engine_fini(struct xe_engine *e) +{ + int i; + + for (i = 0; i < e->width; ++i) + xe_lrc_finish(e->lrc + i); + if (e->vm) + xe_vm_put(e->vm); + + kfree(e); +} + +struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id) +{ + struct xe_engine *e; + + mutex_lock(&xef->engine.lock); + e = xa_load(&xef->engine.xa, id); + mutex_unlock(&xef->engine.lock); + + if (e) + xe_engine_get(e); + + return e; +} + +static int engine_set_priority(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH && + !capable(CAP_SYS_NICE))) + return -EPERM; + + return e->ops->set_priority(e, value); +} + +static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return e->ops->set_timeslice(e, value); +} + +static int engine_set_preemption_timeout(struct xe_device *xe, + struct xe_engine *e, u64 value, + bool create) +{ + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return e->ops->set_preempt_timeout(e, value); +} + +static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM)) + return -EINVAL; + + if (value) { + struct xe_vm *vm = e->vm; + int err; + + if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm))) + return -EOPNOTSUPP; + + if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm))) + return -EOPNOTSUPP; + + if (XE_IOCTL_ERR(xe, e->width != 1)) + return -EINVAL; + + e->compute.context = dma_fence_context_alloc(1); + spin_lock_init(&e->compute.lock); + + err = xe_vm_add_compute_engine(vm, e); + if (XE_IOCTL_ERR(xe, err)) + return err; + + e->flags |= ENGINE_FLAG_COMPUTE_MODE; + e->flags &= ~ENGINE_FLAG_PERSISTENT; + } + + return 0; +} + +static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + return -EINVAL; + + if (value) + e->flags |= ENGINE_FLAG_PERSISTENT; + else + e->flags &= ~ENGINE_FLAG_PERSISTENT; + + return 0; +} + +static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return e->ops->set_job_timeout(e, value); +} + +static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) + return -EINVAL; + + e->usm.acc_trigger = value; + + return 0; +} + +static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) + return -EINVAL; + + e->usm.acc_notify = value; + + return 0; +} + +static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) + return -EINVAL; + + e->usm.acc_granularity = value; + + return 0; +} + +typedef int (*xe_engine_set_property_fn)(struct xe_device *xe, + struct xe_engine *e, + u64 value, bool create); + +static const xe_engine_set_property_fn engine_set_property_funcs[] = { + [XE_ENGINE_PROPERTY_PRIORITY] = engine_set_priority, + [XE_ENGINE_PROPERTY_TIMESLICE] = engine_set_timeslice, + [XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout, + [XE_ENGINE_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode, + [XE_ENGINE_PROPERTY_PERSISTENCE] = engine_set_persistence, + [XE_ENGINE_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout, + [XE_ENGINE_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger, + [XE_ENGINE_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify, + [XE_ENGINE_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity, +}; + +static int engine_user_ext_set_property(struct xe_device *xe, + struct xe_engine *e, + u64 extension, + bool create) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_engine_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, ext.property >= + ARRAY_SIZE(engine_set_property_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs)); + return engine_set_property_funcs[idx](xe, e, ext.value, create); +} + +typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe, + struct xe_engine *e, + u64 extension, + bool create); + +static const xe_engine_set_property_fn engine_user_extension_funcs[] = { + [XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e, + u64 extensions, int ext_number, bool create) +{ + u64 __user *address = u64_to_user_ptr(extensions); + struct xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, ext.name >= + ARRAY_SIZE(engine_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, + ARRAY_SIZE(engine_user_extension_funcs)); + err = engine_user_extension_funcs[idx](xe, e, extensions, create); + if (XE_IOCTL_ERR(xe, err)) + return err; + + if (ext.next_extension) + return engine_user_extensions(xe, e, ext.next_extension, + ++ext_number, create); + + return 0; +} + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +static struct xe_hw_engine * +find_hw_engine(struct xe_device *xe, + struct drm_xe_engine_class_instance eci) +{ + u32 idx; + + if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + return NULL; + + if (eci.gt_id >= xe->info.tile_count) + return NULL; + + idx = array_index_nospec(eci.engine_class, + ARRAY_SIZE(user_to_xe_engine_class)); + + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), + user_to_xe_engine_class[idx], + eci.engine_instance, true); +} + +static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt, + struct drm_xe_engine_class_instance *eci, + u16 width, u16 num_placements) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 logical_mask = 0; + + if (XE_IOCTL_ERR(xe, width != 1)) + return 0; + if (XE_IOCTL_ERR(xe, num_placements != 1)) + return 0; + if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0)) + return 0; + + eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + if (hwe->class == + user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) + logical_mask |= BIT(hwe->logical_instance); + } + + return logical_mask; +} + +static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, + struct drm_xe_engine_class_instance *eci, + u16 width, u16 num_placements) +{ + int len = width * num_placements; + int i, j, n; + u16 class; + u16 gt_id; + u32 return_mask = 0, prev_mask; + + if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) && + len > 1)) + return 0; + + for (i = 0; i < width; ++i) { + u32 current_mask = 0; + + for (j = 0; j < num_placements; ++j) { + struct xe_hw_engine *hwe; + + n = j * width + i; + + hwe = find_hw_engine(xe, eci[n]); + if (XE_IOCTL_ERR(xe, !hwe)) + return 0; + + if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe))) + return 0; + + if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) || + XE_IOCTL_ERR(xe, n && eci[n].engine_class != class)) + return 0; + + class = eci[n].engine_class; + gt_id = eci[n].gt_id; + + if (width == 1 || !i) + return_mask |= BIT(eci[n].engine_instance); + current_mask |= BIT(eci[n].engine_instance); + } + + /* Parallel submissions must be logically contiguous */ + if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1)) + return 0; + + prev_mask = current_mask; + } + + return return_mask; +} + +int xe_engine_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_create *args = data; + struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; + struct drm_xe_engine_class_instance __user *user_eci = + u64_to_user_ptr(args->instances); + struct xe_hw_engine *hwe; + struct xe_vm *vm, *migrate_vm; + struct xe_gt *gt; + struct xe_engine *e = NULL; + u32 logical_mask; + u32 id; + int len; + int err; + + if (XE_IOCTL_ERR(xe, args->flags)) + return -EINVAL; + + len = args->width * args->num_placements; + if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) + return -EINVAL; + + err = __copy_from_user(eci, user_eci, + sizeof(struct drm_xe_engine_class_instance) * + len); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count)) + return -EINVAL; + + xe_pm_runtime_get(xe); + + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { + for_each_gt(gt, xe, id) { + struct xe_engine *new; + + if (xe_gt_is_media_type(gt)) + continue; + + eci[0].gt_id = gt->info.id; + logical_mask = bind_engine_logical_mask(xe, gt, eci, + args->width, + args->num_placements); + if (XE_IOCTL_ERR(xe, !logical_mask)) { + err = -EINVAL; + goto put_rpm; + } + + hwe = find_hw_engine(xe, eci[0]); + if (XE_IOCTL_ERR(xe, !hwe)) { + err = -EINVAL; + goto put_rpm; + } + + migrate_vm = xe_migrate_get_vm(gt->migrate); + new = xe_engine_create(xe, migrate_vm, logical_mask, + args->width, hwe, + ENGINE_FLAG_PERSISTENT | + ENGINE_FLAG_VM | + (id ? + ENGINE_FLAG_BIND_ENGINE_CHILD : + 0)); + xe_vm_put(migrate_vm); + if (IS_ERR(new)) { + err = PTR_ERR(new); + if (e) + goto put_engine; + goto put_rpm; + } + if (id == 0) + e = new; + else + list_add_tail(&new->multi_gt_list, + &e->multi_gt_link); + } + } else { + gt = xe_device_get_gt(xe, eci[0].gt_id); + logical_mask = calc_validate_logical_mask(xe, gt, eci, + args->width, + args->num_placements); + if (XE_IOCTL_ERR(xe, !logical_mask)) { + err = -EINVAL; + goto put_rpm; + } + + hwe = find_hw_engine(xe, eci[0]); + if (XE_IOCTL_ERR(xe, !hwe)) { + err = -EINVAL; + goto put_rpm; + } + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) { + err = -ENOENT; + goto put_rpm; + } + + e = xe_engine_create(xe, vm, logical_mask, + args->width, hwe, ENGINE_FLAG_PERSISTENT); + xe_vm_put(vm); + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto put_rpm; + } + } + + if (args->extensions) { + err = engine_user_extensions(xe, e, args->extensions, 0, true); + if (XE_IOCTL_ERR(xe, err)) + goto put_engine; + } + + if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) != + !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) { + err = -ENOTSUPP; + goto put_engine; + } + + e->persitent.xef = xef; + + mutex_lock(&xef->engine.lock); + err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&xef->engine.lock); + if (err) + goto put_engine; + + args->engine_id = id; + + return 0; + +put_engine: + xe_engine_kill(e); + xe_engine_put(e); +put_rpm: + xe_pm_runtime_put(xe); + return err; +} + +static void engine_kill_compute(struct xe_engine *e) +{ + if (!xe_vm_in_compute_mode(e->vm)) + return; + + down_write(&e->vm->lock); + list_del(&e->compute.link); + --e->vm->preempt.num_engines; + if (e->compute.pfence) { + dma_fence_enable_sw_signaling(e->compute.pfence); + dma_fence_put(e->compute.pfence); + e->compute.pfence = NULL; + } + up_write(&e->vm->lock); +} + +void xe_engine_kill(struct xe_engine *e) +{ + struct xe_engine *engine = e, *next; + + list_for_each_entry_safe(engine, next, &engine->multi_gt_list, + multi_gt_link) { + e->ops->kill(engine); + engine_kill_compute(engine); + } + + e->ops->kill(e); + engine_kill_compute(e); +} + +int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_destroy *args = data; + struct xe_engine *e; + + if (XE_IOCTL_ERR(xe, args->pad)) + return -EINVAL; + + mutex_lock(&xef->engine.lock); + e = xa_erase(&xef->engine.xa, args->engine_id); + mutex_unlock(&xef->engine.lock); + if (XE_IOCTL_ERR(xe, !e)) + return -ENOENT; + + if (!(e->flags & ENGINE_FLAG_PERSISTENT)) + xe_engine_kill(e); + else + xe_device_add_persitent_engines(xe, e); + + trace_xe_engine_close(e); + xe_engine_put(e); + xe_pm_runtime_put(xe); + + return 0; +} + +int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_set_property *args = data; + struct xe_engine *e; + int ret; + u32 idx; + + e = xe_engine_lookup(xef, args->engine_id); + if (XE_IOCTL_ERR(xe, !e)) + return -ENOENT; + + if (XE_IOCTL_ERR(xe, args->property >= + ARRAY_SIZE(engine_set_property_funcs))) { + ret = -EINVAL; + goto out; + } + + idx = array_index_nospec(args->property, + ARRAY_SIZE(engine_set_property_funcs)); + ret = engine_set_property_funcs[idx](xe, e, args->value, false); + if (XE_IOCTL_ERR(xe, ret)) + goto out; + + if (args->extensions) + ret = engine_user_extensions(xe, e, args->extensions, 0, + false); +out: + xe_engine_put(e); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h new file mode 100644 index 000000000000..4d1b609fea7e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_engine.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_ENGINE_H_ +#define _XE_ENGINE_H_ + +#include "xe_engine_types.h" +#include "xe_vm_types.h" + +struct drm_device; +struct drm_file; +struct xe_device; +struct xe_file; + +struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hw_engine, u32 flags); +struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags); + +void xe_engine_fini(struct xe_engine *e); +void xe_engine_destroy(struct kref *ref); + +struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id); + +static inline struct xe_engine *xe_engine_get(struct xe_engine *engine) +{ + kref_get(&engine->refcount); + return engine; +} + +static inline void xe_engine_put(struct xe_engine *engine) +{ + kref_put(&engine->refcount, xe_engine_destroy); +} + +static inline bool xe_engine_is_parallel(struct xe_engine *engine) +{ + return engine->width > 1; +} + +void xe_engine_kill(struct xe_engine *e); + +int xe_engine_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h new file mode 100644 index 000000000000..3dfa1c14e181 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_engine_types.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_ENGINE_TYPES_H_ +#define _XE_ENGINE_TYPES_H_ + +#include + +#include + +#include "xe_gpu_scheduler_types.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence_types.h" +#include "xe_lrc_types.h" + +struct xe_execlist_engine; +struct xe_gt; +struct xe_guc_engine; +struct xe_hw_engine; +struct xe_vm; + +enum xe_engine_priority { + XE_ENGINE_PRIORITY_UNSET = -2, /* For execlist usage only */ + XE_ENGINE_PRIORITY_LOW = 0, + XE_ENGINE_PRIORITY_NORMAL, + XE_ENGINE_PRIORITY_HIGH, + XE_ENGINE_PRIORITY_KERNEL, + + XE_ENGINE_PRIORITY_COUNT +}; + +/** + * struct xe_engine - Submission engine + * + * Contains all state necessary for submissions. Can either be a user object or + * a kernel object. + */ +struct xe_engine { + /** @gt: graphics tile this engine can submit to */ + struct xe_gt *gt; + /** + * @hwe: A hardware of the same class. May (physical engine) or may not + * (virtual engine) be where jobs actual engine up running. Should never + * really be used for submissions. + */ + struct xe_hw_engine *hwe; + /** @refcount: ref count of this engine */ + struct kref refcount; + /** @vm: VM (address space) for this engine */ + struct xe_vm *vm; + /** @class: class of this engine */ + enum xe_engine_class class; + /** @priority: priority of this exec queue */ + enum xe_engine_priority priority; + /** + * @logical_mask: logical mask of where job submitted to engine can run + */ + u32 logical_mask; + /** @name: name of this engine */ + char name[MAX_FENCE_NAME_LEN]; + /** @width: width (number BB submitted per exec) of this engine */ + u16 width; + /** @fence_irq: fence IRQ used to signal job completion */ + struct xe_hw_fence_irq *fence_irq; + +#define ENGINE_FLAG_BANNED BIT(0) +#define ENGINE_FLAG_KERNEL BIT(1) +#define ENGINE_FLAG_PERSISTENT BIT(2) +#define ENGINE_FLAG_COMPUTE_MODE BIT(3) +#define ENGINE_FLAG_VM BIT(4) +#define ENGINE_FLAG_BIND_ENGINE_CHILD BIT(5) +#define ENGINE_FLAG_WA BIT(6) + + /** + * @flags: flags for this engine, should statically setup aside from ban + * bit + */ + unsigned long flags; + + union { + /** @multi_gt_list: list head for VM bind engines if multi-GT */ + struct list_head multi_gt_list; + /** @multi_gt_link: link for VM bind engines if multi-GT */ + struct list_head multi_gt_link; + }; + + union { + /** @execlist: execlist backend specific state for engine */ + struct xe_execlist_engine *execlist; + /** @guc: GuC backend specific state for engine */ + struct xe_guc_engine *guc; + }; + + /** + * @persitent: persitent engine state + */ + struct { + /** @xef: file which this engine belongs to */ + struct xe_file *xef; + /** @link: link in list of persitent engines */ + struct list_head link; + } persitent; + + union { + /** + * @parallel: parallel submission state + */ + struct { + /** @composite_fence_ctx: context composite fence */ + u64 composite_fence_ctx; + /** @composite_fence_seqno: seqno for composite fence */ + u32 composite_fence_seqno; + } parallel; + /** + * @bind: bind submission state + */ + struct { + /** @fence_ctx: context bind fence */ + u64 fence_ctx; + /** @fence_seqno: seqno for bind fence */ + u32 fence_seqno; + } bind; + }; + + /** @sched_props: scheduling properties */ + struct { + /** @timeslice_us: timeslice period in micro-seconds */ + u32 timeslice_us; + /** @preempt_timeout_us: preemption timeout in micro-seconds */ + u32 preempt_timeout_us; + } sched_props; + + /** @compute: compute engine state */ + struct { + /** @pfence: preemption fence */ + struct dma_fence *pfence; + /** @context: preemption fence context */ + u64 context; + /** @seqno: preemption fence seqno */ + u32 seqno; + /** @link: link into VM's list of engines */ + struct list_head link; + /** @lock: preemption fences lock */ + spinlock_t lock; + } compute; + + /** @usm: unified shared memory state */ + struct { + /** @acc_trigger: access counter trigger */ + u32 acc_trigger; + /** @acc_notify: access counter notify */ + u32 acc_notify; + /** @acc_granularity: access counter granularity */ + u32 acc_granularity; + } usm; + + /** @ops: submission backend engine operations */ + const struct xe_engine_ops *ops; + + /** @ring_ops: ring operations for this engine */ + const struct xe_ring_ops *ring_ops; + /** @entity: DRM sched entity for this engine (1 to 1 relationship) */ + struct drm_sched_entity *entity; + /** @lrc: logical ring context for this engine */ + struct xe_lrc lrc[0]; +}; + +/** + * struct xe_engine_ops - Submission backend engine operations + */ +struct xe_engine_ops { + /** @init: Initialize engine for submission backend */ + int (*init)(struct xe_engine *e); + /** @kill: Kill inflight submissions for backend */ + void (*kill)(struct xe_engine *e); + /** @fini: Fini engine for submission backend */ + void (*fini)(struct xe_engine *e); + /** @set_priority: Set priority for engine */ + int (*set_priority)(struct xe_engine *e, + enum xe_engine_priority priority); + /** @set_timeslice: Set timeslice for engine */ + int (*set_timeslice)(struct xe_engine *e, u32 timeslice_us); + /** @set_preempt_timeout: Set preemption timeout for engine */ + int (*set_preempt_timeout)(struct xe_engine *e, u32 preempt_timeout_us); + /** @set_job_timeout: Set job timeout for engine */ + int (*set_job_timeout)(struct xe_engine *e, u32 job_timeout_ms); + /** + * @suspend: Suspend engine from executing, allowed to be called + * multiple times in a row before resume with the caveat that + * suspend_wait returns before calling suspend again. + */ + int (*suspend)(struct xe_engine *e); + /** + * @suspend_wait: Wait for an engine to suspend executing, should be + * call after suspend. + */ + void (*suspend_wait)(struct xe_engine *e); + /** + * @resume: Resume engine execution, engine must be in a suspended + * state and dma fence returned from most recent suspend call must be + * signalled when this function is called. + */ + void (*resume)(struct xe_engine *e); +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c new file mode 100644 index 000000000000..00f298acc436 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_exec.h" +#include "xe_macros.h" +#include "xe_sched_job.h" +#include "xe_sync.h" +#include "xe_vm.h" + +/** + * DOC: Execbuf (User GPU command submission) + * + * Execs have historically been rather complicated in DRM drivers (at least in + * the i915) because a few things: + * + * - Passing in a list BO which are read / written to creating implicit syncs + * - Binding at exec time + * - Flow controlling the ring at exec time + * + * In XE we avoid all of this complication by not allowing a BO list to be + * passed into an exec, using the dma-buf implicit sync uAPI, have binds as + * seperate operations, and using the DRM scheduler to flow control the ring. + * Let's deep dive on each of these. + * + * We can get away from a BO list by forcing the user to use in / out fences on + * every exec rather than the kernel tracking dependencies of BO (e.g. if the + * user knows an exec writes to a BO and reads from the BO in the next exec, it + * is the user's responsibility to pass in / out fence between the two execs). + * + * Implicit dependencies for external BOs are handled by using the dma-buf + * implicit dependency uAPI (TODO: add link). To make this works each exec must + * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external + * BO mapped in the VM. + * + * We do not allow a user to trigger a bind at exec time rather we have a VM + * bind IOCTL which uses the same in / out fence interface as exec. In that + * sense, a VM bind is basically the same operation as an exec from the user + * perspective. e.g. If an exec depends on a VM bind use the in / out fence + * interface (struct drm_xe_sync) to synchronize like syncing between two + * dependent execs. + * + * Although a user cannot trigger a bind, we still have to rebind userptrs in + * the VM that have been invalidated since the last exec, likewise we also have + * to rebind BOs that have been evicted by the kernel. We schedule these rebinds + * behind any pending kernel operations on any external BOs in VM or any BOs + * private to the VM. This is accomplished by the rebinds waiting on BOs + * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs + * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and + * in DMA_RESV_USAGE_WRITE for external BOs). + * + * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute + * mode VMs we use preempt fences and a rebind worker (TODO: add link). + * + * There is no need to flow control the ring in the exec as we write the ring at + * submission time and set the DRM scheduler max job limit SIZE_OF_RING / + * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the + * ring is available. + * + * All of this results in a rather simple exec implementation. + * + * Flow + * ~~~~ + * + * .. code-block:: + * + * Parse input arguments + * Wait for any async VM bind passed as in-fences to start + * <----------------------------------------------------------------------| + * Lock global VM lock in read mode | + * Pin userptrs (also finds userptr invalidated since last exec) | + * Lock exec (VM dma-resv lock, external BOs dma-resv locks) | + * Validate BOs that have been evicted | + * Create job | + * Rebind invalidated userptrs + evicted BOs (non-compute-mode) | + * Add rebind fence dependency to job | + * Add job VM dma-resv bookkeeping slot (non-compute mode) | + * Add job to external BOs dma-resv write slots (non-compute mode) | + * Check if any userptrs invalidated since pin ------ Drop locks ---------| + * Install in / out fences for job + * Submit job + * Unlock all + */ + +static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, + struct ttm_validate_buffer tv_onstack[], + struct ttm_validate_buffer **tv, + struct list_head *objs) +{ + struct xe_vm *vm = e->vm; + struct xe_vma *vma; + LIST_HEAD(dups); + int err; + + *tv = NULL; + if (xe_vm_no_dma_fences(e->vm)) + return 0; + + err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1); + if (err) + return err; + + /* + * Validate BOs that have been evicted (i.e. make sure the + * BOs have valid placements possibly moving an evicted BO back + * to a location where the GPU can access it). + */ + list_for_each_entry(vma, &vm->rebind_list, rebind_link) { + if (xe_vma_is_userptr(vma)) + continue; + + err = xe_bo_validate(vma->bo, vm, false); + if (err) { + xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs); + *tv = NULL; + return err; + } + } + + return 0; +} + +static void xe_exec_end(struct xe_engine *e, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer *tv, + struct ww_acquire_ctx *ww, + struct list_head *objs) +{ + if (!xe_vm_no_dma_fences(e->vm)) + xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs); +} + +int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec *args = data; + struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs); + u64 __user *addresses_user = u64_to_user_ptr(args->address); + struct xe_engine *engine; + struct xe_sync_entry *syncs = NULL; + u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; + struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; + struct ttm_validate_buffer *tv = NULL; + u32 i, num_syncs = 0; + struct xe_sched_job *job; + struct dma_fence *rebind_fence; + struct xe_vm *vm; + struct ww_acquire_ctx ww; + struct list_head objs; + bool write_locked; + int err = 0; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + engine = xe_engine_lookup(xef, args->engine_id); + if (XE_IOCTL_ERR(xe, !engine)) + return -ENOENT; + + if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) { + err = -ECANCELED; + goto err_engine; + } + + if (args->num_syncs) { + syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); + if (!syncs) { + err = -ENOMEM; + goto err_engine; + } + } + + vm = engine->vm; + + for (i = 0; i < args->num_syncs; i++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], + &syncs_user[i], true, + xe_vm_no_dma_fences(vm)); + if (err) + goto err_syncs; + } + + if (xe_engine_is_parallel(engine)) { + err = __copy_from_user(addresses, addresses_user, sizeof(u64) * + engine->width); + if (err) { + err = -EFAULT; + goto err_syncs; + } + } + + /* + * We can't install a job into the VM dma-resv shared slot before an + * async VM bind passed in as a fence without the risk of deadlocking as + * the bind can trigger an eviction which in turn depends on anything in + * the VM dma-resv shared slots. Not an ideal solution, but we wait for + * all dependent async VM binds to start (install correct fences into + * dma-resv slots) before moving forward. + */ + if (!xe_vm_no_dma_fences(vm) && + vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) { + for (i = 0; i < args->num_syncs; i++) { + struct dma_fence *fence = syncs[i].fence; + if (fence) { + err = xe_vm_async_fence_wait_start(fence); + if (err) + goto err_syncs; + } + } + } + +retry: + if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) { + err = down_write_killable(&vm->lock); + write_locked = true; + } else { + /* We don't allow execs while the VM is in error state */ + err = down_read_interruptible(&vm->lock); + write_locked = false; + } + if (err) + goto err_syncs; + + /* We don't allow execs while the VM is in error state */ + if (vm->async_ops.error) { + err = vm->async_ops.error; + goto err_unlock_list; + } + + /* + * Extreme corner where we exit a VM error state with a munmap style VM + * unbind inflight which requires a rebind. In this case the rebind + * needs to install some fences into the dma-resv slots. The worker to + * do this queued, let that worker make progress by dropping vm->lock, + * flushing the worker and retrying the exec. + */ + if (vm->async_ops.munmap_rebind_inflight) { + if (write_locked) + up_write(&vm->lock); + else + up_read(&vm->lock); + flush_work(&vm->async_ops.work); + goto retry; + } + + if (write_locked) { + err = xe_vm_userptr_pin(vm); + downgrade_write(&vm->lock); + write_locked = false; + if (err) + goto err_unlock_list; + } + + err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs); + if (err) + goto err_unlock_list; + + if (xe_vm_is_closed(engine->vm)) { + drm_warn(&xe->drm, "Trying to schedule after vm is closed\n"); + err = -EIO; + goto err_engine_end; + } + + job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ? + addresses : &args->address); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err_engine_end; + } + + /* + * Rebind any invalidated userptr or evicted BOs in the VM, non-compute + * VM mode only. + */ + rebind_fence = xe_vm_rebind(vm, false); + if (IS_ERR(rebind_fence)) { + err = PTR_ERR(rebind_fence); + goto err_put_job; + } + + /* + * We store the rebind_fence in the VM so subsequent execs don't get + * scheduled before the rebinds of userptrs / evicted BOs is complete. + */ + if (rebind_fence) { + dma_fence_put(vm->rebind_fence); + vm->rebind_fence = rebind_fence; + } + if (vm->rebind_fence) { + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &vm->rebind_fence->flags)) { + dma_fence_put(vm->rebind_fence); + vm->rebind_fence = NULL; + } else { + dma_fence_get(vm->rebind_fence); + err = drm_sched_job_add_dependency(&job->drm, + vm->rebind_fence); + if (err) + goto err_put_job; + } + } + + /* Wait behind munmap style rebinds */ + if (!xe_vm_no_dma_fences(vm)) { + err = drm_sched_job_add_resv_dependencies(&job->drm, + &vm->resv, + DMA_RESV_USAGE_KERNEL); + if (err) + goto err_put_job; + } + + for (i = 0; i < num_syncs && !err; i++) + err = xe_sync_entry_add_deps(&syncs[i], job); + if (err) + goto err_put_job; + + if (!xe_vm_no_dma_fences(vm)) { + err = down_read_interruptible(&vm->userptr.notifier_lock); + if (err) + goto err_put_job; + + err = __xe_vm_userptr_needs_repin(vm); + if (err) + goto err_repin; + } + + /* + * Point of no return, if we error after this point just set an error on + * the job and let the DRM scheduler / backend clean up the job. + */ + xe_sched_job_arm(job); + if (!xe_vm_no_dma_fences(vm)) { + /* Block userptr invalidations / BO eviction */ + dma_resv_add_fence(&vm->resv, + &job->drm.s_fence->finished, + DMA_RESV_USAGE_BOOKKEEP); + + /* + * Make implicit sync work across drivers, assuming all external + * BOs are written as we don't pass in a read / write list. + */ + xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished, + DMA_RESV_USAGE_WRITE); + } + + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], job, + &job->drm.s_fence->finished); + + xe_sched_job_push(job); + +err_repin: + if (!xe_vm_no_dma_fences(vm)) + up_read(&vm->userptr.notifier_lock); +err_put_job: + if (err) + xe_sched_job_put(job); +err_engine_end: + xe_exec_end(engine, tv_onstack, tv, &ww, &objs); +err_unlock_list: + if (write_locked) + up_write(&vm->lock); + else + up_read(&vm->lock); + if (err == -EAGAIN) + goto retry; +err_syncs: + for (i = 0; i < num_syncs; i++) + xe_sync_entry_cleanup(&syncs[i]); + kfree(syncs); +err_engine: + xe_engine_put(engine); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_exec.h b/drivers/gpu/drm/xe/xe_exec.h new file mode 100644 index 000000000000..e4932494cea3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_EXEC_H_ +#define _XE_EXEC_H_ + +struct drm_device; +struct drm_file; + +int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c new file mode 100644 index 000000000000..47587571123a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include "xe_execlist.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_hw_fence.h" +#include "xe_gt.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_mocs.h" +#include "xe_ring_ops_types.h" +#include "xe_sched_job.h" + +#include "i915_reg.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_lrc_reg.h" +#include "gt/intel_engine_regs.h" + +#define XE_EXECLIST_HANG_LIMIT 1 + +#define GEN11_SW_CTX_ID_SHIFT 37 +#define GEN11_SW_CTX_ID_WIDTH 11 +#define XEHP_SW_CTX_ID_SHIFT 39 +#define XEHP_SW_CTX_ID_WIDTH 16 + +#define GEN11_SW_CTX_ID \ + GENMASK_ULL(GEN11_SW_CTX_ID_WIDTH + GEN11_SW_CTX_ID_SHIFT - 1, \ + GEN11_SW_CTX_ID_SHIFT) + +#define XEHP_SW_CTX_ID \ + GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ + XEHP_SW_CTX_ID_SHIFT) + + +static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, + u32 ctx_id) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + u64 lrc_desc; + + printk(KERN_INFO "__start_lrc(%s, 0x%p, %u)\n", hwe->name, lrc, ctx_id); + + lrc_desc = xe_lrc_descriptor(lrc); + + if (GRAPHICS_VERx100(xe) >= 1250) { + XE_BUG_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); + lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); + } else { + XE_BUG_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id)); + lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id); + } + + if (hwe->class == XE_ENGINE_CLASS_COMPUTE) + xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg, + _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); + + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + lrc->ring.old_tail = lrc->ring.tail; + + /* + * Make sure the context image is complete before we submit it to HW. + * + * Ostensibly, writes (including the WCB) should be flushed prior to + * an uncached write such as our mmio register access, the empirical + * evidence (esp. on Braswell) suggests that the WC write into memory + * may not be visible to the HW prior to the completion of the UC + * register write and that we may begin execution from the context + * before its image is complete leading to invalid PD chasing. + */ + wmb(); + + xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg, + xe_bo_ggtt_addr(hwe->hwsp)); + xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg); + xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg, + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 0, + lower_32_bits(lrc_desc)); + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 4, + upper_32_bits(lrc_desc)); + xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg, + EL_CTRL_LOAD); +} + +static void __xe_execlist_port_start(struct xe_execlist_port *port, + struct xe_execlist_engine *exl) +{ + struct xe_device *xe = gt_to_xe(port->hwe->gt); + int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID); + + if (GRAPHICS_VERx100(xe) >= 1250) + max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); + + xe_execlist_port_assert_held(port); + + if (port->running_exl != exl || !exl->has_run) { + port->last_ctx_id++; + + /* 0 is reserved for the kernel context */ + if (port->last_ctx_id > max_ctx) + port->last_ctx_id = 1; + } + + __start_lrc(port->hwe, exl->engine->lrc, port->last_ctx_id); + port->running_exl = exl; + exl->has_run = true; +} + +static void __xe_execlist_port_idle(struct xe_execlist_port *port) +{ + u32 noop[2] = { MI_NOOP, MI_NOOP }; + + xe_execlist_port_assert_held(port); + + if (!port->running_exl) + return; + + printk(KERN_INFO "__xe_execlist_port_idle(%d:%d)\n", port->hwe->class, + port->hwe->instance); + + xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop)); + __start_lrc(port->hwe, &port->hwe->kernel_lrc, 0); + port->running_exl = NULL; +} + +static bool xe_execlist_is_idle(struct xe_execlist_engine *exl) +{ + struct xe_lrc *lrc = exl->engine->lrc; + + return lrc->ring.tail == lrc->ring.old_tail; +} + +static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) +{ + struct xe_execlist_engine *exl = NULL; + int i; + + xe_execlist_port_assert_held(port); + + for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { + while (!list_empty(&port->active[i])) { + exl = list_first_entry(&port->active[i], + struct xe_execlist_engine, + active_link); + list_del(&exl->active_link); + + if (xe_execlist_is_idle(exl)) { + exl->active_priority = XE_ENGINE_PRIORITY_UNSET; + continue; + } + + list_add_tail(&exl->active_link, &port->active[i]); + __xe_execlist_port_start(port, exl); + return; + } + } + + __xe_execlist_port_idle(port); +} + +static u64 read_execlist_status(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + u32 hi, lo; + + lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base).reg); + hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base).reg); + + printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class, + hwe->instance, hi, lo); + + return lo | (u64)hi << 32; +} + +static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) +{ + u64 status; + + xe_execlist_port_assert_held(port); + + status = read_execlist_status(port->hwe); + if (status & BIT(7)) + return; + + __xe_execlist_port_start_next_active(port); +} + +static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, + u16 intr_vec) +{ + struct xe_execlist_port *port = hwe->exl_port; + + spin_lock(&port->lock); + xe_execlist_port_irq_handler_locked(port); + spin_unlock(&port->lock); +} + +static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, + enum xe_engine_priority priority) +{ + xe_execlist_port_assert_held(port); + + if (port->running_exl && port->running_exl->active_priority >= priority) + return; + + __xe_execlist_port_start_next_active(port); +} + +static void xe_execlist_make_active(struct xe_execlist_engine *exl) +{ + struct xe_execlist_port *port = exl->port; + enum xe_engine_priority priority = exl->active_priority; + + XE_BUG_ON(priority == XE_ENGINE_PRIORITY_UNSET); + XE_BUG_ON(priority < 0); + XE_BUG_ON(priority >= ARRAY_SIZE(exl->port->active)); + + spin_lock_irq(&port->lock); + + if (exl->active_priority != priority && + exl->active_priority != XE_ENGINE_PRIORITY_UNSET) { + /* Priority changed, move it to the right list */ + list_del(&exl->active_link); + exl->active_priority = XE_ENGINE_PRIORITY_UNSET; + } + + if (exl->active_priority == XE_ENGINE_PRIORITY_UNSET) { + exl->active_priority = priority; + list_add_tail(&exl->active_link, &port->active[priority]); + } + + xe_execlist_port_wake_locked(exl->port, priority); + + spin_unlock_irq(&port->lock); +} + +static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) +{ + struct xe_execlist_port *port = + container_of(timer, struct xe_execlist_port, irq_fail); + + spin_lock_irq(&port->lock); + xe_execlist_port_irq_handler_locked(port); + spin_unlock_irq(&port->lock); + + port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); + add_timer(&port->irq_fail); +} + +struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, + struct xe_hw_engine *hwe) +{ + struct drm_device *drm = &xe->drm; + struct xe_execlist_port *port; + int i; + + port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); + if (!port) + return ERR_PTR(-ENOMEM); + + port->hwe = hwe; + + spin_lock_init(&port->lock); + for (i = 0; i < ARRAY_SIZE(port->active); i++) + INIT_LIST_HEAD(&port->active[i]); + + port->last_ctx_id = 1; + port->running_exl = NULL; + + hwe->irq_handler = xe_execlist_port_irq_handler; + + /* TODO: Fix the interrupt code so it doesn't race like mad */ + timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); + port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); + add_timer(&port->irq_fail); + + return port; +} + +void xe_execlist_port_destroy(struct xe_execlist_port *port) +{ + del_timer(&port->irq_fail); + + /* Prevent an interrupt while we're destroying */ + spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock); + port->hwe->irq_handler = NULL; + spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock); +} + +static struct dma_fence * +execlist_run_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + struct xe_engine *e = job->engine; + struct xe_execlist_engine *exl = job->engine->execlist; + + e->ring_ops->emit_job(job); + xe_execlist_make_active(exl); + + return dma_fence_get(job->fence); +} + +static void execlist_job_free(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + + xe_sched_job_put(job); +} + +static const struct drm_sched_backend_ops drm_sched_ops = { + .run_job = execlist_run_job, + .free_job = execlist_job_free, +}; + +static int execlist_engine_init(struct xe_engine *e) +{ + struct drm_gpu_scheduler *sched; + struct xe_execlist_engine *exl; + int err; + + XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); + + exl = kzalloc(sizeof(*exl), GFP_KERNEL); + if (!exl) + return -ENOMEM; + + exl->engine = e; + + err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1, + e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, + XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT, + NULL, NULL, e->hwe->name, + gt_to_xe(e->gt)->drm.dev); + if (err) + goto err_free; + + sched = &exl->sched; + err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL); + if (err) + goto err_sched; + + exl->port = e->hwe->exl_port; + exl->has_run = false; + exl->active_priority = XE_ENGINE_PRIORITY_UNSET; + e->execlist = exl; + e->entity = &exl->entity; + + switch (e->class) { + case XE_ENGINE_CLASS_RENDER: + sprintf(e->name, "rcs%d", ffs(e->logical_mask) - 1); + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + sprintf(e->name, "vcs%d", ffs(e->logical_mask) - 1); + break; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + sprintf(e->name, "vecs%d", ffs(e->logical_mask) - 1); + break; + case XE_ENGINE_CLASS_COPY: + sprintf(e->name, "bcs%d", ffs(e->logical_mask) - 1); + break; + case XE_ENGINE_CLASS_COMPUTE: + sprintf(e->name, "ccs%d", ffs(e->logical_mask) - 1); + break; + default: + XE_WARN_ON(e->class); + } + + return 0; + +err_sched: + drm_sched_fini(&exl->sched); +err_free: + kfree(exl); + return err; +} + +static void execlist_engine_fini_async(struct work_struct *w) +{ + struct xe_execlist_engine *ee = + container_of(w, struct xe_execlist_engine, fini_async); + struct xe_engine *e = ee->engine; + struct xe_execlist_engine *exl = e->execlist; + unsigned long flags; + + XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); + + spin_lock_irqsave(&exl->port->lock, flags); + if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET)) + list_del(&exl->active_link); + spin_unlock_irqrestore(&exl->port->lock, flags); + + if (e->flags & ENGINE_FLAG_PERSISTENT) + xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); + drm_sched_entity_fini(&exl->entity); + drm_sched_fini(&exl->sched); + kfree(exl); + + xe_engine_fini(e); +} + +static void execlist_engine_kill(struct xe_engine *e) +{ + /* NIY */ +} + +static void execlist_engine_fini(struct xe_engine *e) +{ + INIT_WORK(&e->execlist->fini_async, execlist_engine_fini_async); + queue_work(system_unbound_wq, &e->execlist->fini_async); +} + +static int execlist_engine_set_priority(struct xe_engine *e, + enum xe_engine_priority priority) +{ + /* NIY */ + return 0; +} + +static int execlist_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) +{ + /* NIY */ + return 0; +} + +static int execlist_engine_set_preempt_timeout(struct xe_engine *e, + u32 preempt_timeout_us) +{ + /* NIY */ + return 0; +} + +static int execlist_engine_set_job_timeout(struct xe_engine *e, + u32 job_timeout_ms) +{ + /* NIY */ + return 0; +} + +static int execlist_engine_suspend(struct xe_engine *e) +{ + /* NIY */ + return 0; +} + +static void execlist_engine_suspend_wait(struct xe_engine *e) + +{ + /* NIY */ +} + +static void execlist_engine_resume(struct xe_engine *e) +{ + xe_mocs_init_engine(e); +} + +static const struct xe_engine_ops execlist_engine_ops = { + .init = execlist_engine_init, + .kill = execlist_engine_kill, + .fini = execlist_engine_fini, + .set_priority = execlist_engine_set_priority, + .set_timeslice = execlist_engine_set_timeslice, + .set_preempt_timeout = execlist_engine_set_preempt_timeout, + .set_job_timeout = execlist_engine_set_job_timeout, + .suspend = execlist_engine_suspend, + .suspend_wait = execlist_engine_suspend_wait, + .resume = execlist_engine_resume, +}; + +int xe_execlist_init(struct xe_gt *gt) +{ + /* GuC submission enabled, nothing to do */ + if (xe_device_guc_submission_enabled(gt_to_xe(gt))) + return 0; + + gt->engine_ops = &execlist_engine_ops; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h new file mode 100644 index 000000000000..6a0442a6eff6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_execlist.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_EXECLIST_H_ +#define _XE_EXECLIST_H_ + +#include "xe_execlist_types.h" + +struct xe_device; +struct xe_gt; + +#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock); + +int xe_execlist_init(struct xe_gt *gt); +struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, + struct xe_hw_engine *hwe); +void xe_execlist_port_destroy(struct xe_execlist_port *port); + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h new file mode 100644 index 000000000000..9b1239b47292 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_EXECLIST_TYPES_H_ +#define _XE_EXECLIST_TYPES_H_ + +#include +#include +#include + +#include "xe_engine_types.h" + +struct xe_hw_engine; +struct xe_execlist_engine; + +struct xe_execlist_port { + struct xe_hw_engine *hwe; + + spinlock_t lock; + + struct list_head active[XE_ENGINE_PRIORITY_COUNT]; + + u32 last_ctx_id; + + struct xe_execlist_engine *running_exl; + + struct timer_list irq_fail; +}; + +struct xe_execlist_engine { + struct xe_engine *engine; + + struct drm_gpu_scheduler sched; + + struct drm_sched_entity entity; + + struct xe_execlist_port *port; + + bool has_run; + + struct work_struct fini_async; + + enum xe_engine_priority active_priority; + struct list_head active_link; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c new file mode 100644 index 000000000000..0320ce7ba3d1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_mmio.h" +#include "gt/intel_gt_regs.h" + +#define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 + +static struct xe_gt * +fw_to_gt(struct xe_force_wake *fw) +{ + return fw->gt; +} + +static struct xe_device * +fw_to_xe(struct xe_force_wake *fw) +{ + return gt_to_xe(fw_to_gt(fw)); +} + +static void domain_init(struct xe_force_wake_domain *domain, + enum xe_force_wake_domain_id id, + u32 reg, u32 ack, u32 val, u32 mask) +{ + domain->id = id; + domain->reg_ctl = reg; + domain->reg_ack = ack; + domain->val = val; + domain->mask = mask; +} + +#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) + +void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) +{ + struct xe_device *xe = gt_to_xe(gt); + + fw->gt = gt; + mutex_init(&fw->lock); + + /* Assuming gen11+ so assert this assumption is correct */ + XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + + if (xe->info.platform == XE_METEORLAKE) { + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], + XE_FW_DOMAIN_ID_GT, + FORCEWAKE_GT_GEN9.reg, + FORCEWAKE_ACK_GT_MTL.reg, + BIT(0), BIT(16)); + } else { + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], + XE_FW_DOMAIN_ID_GT, + FORCEWAKE_GT_GEN9.reg, + FORCEWAKE_ACK_GT_GEN9.reg, + BIT(0), BIT(16)); + } +} + +void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) +{ + int i, j; + + /* Assuming gen11+ so assert this assumption is correct */ + XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + + if (!xe_gt_is_media_type(gt)) + domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], + XE_FW_DOMAIN_ID_RENDER, + FORCEWAKE_RENDER_GEN9.reg, + FORCEWAKE_ACK_RENDER_GEN9.reg, + BIT(0), BIT(16)); + + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], + XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, + FORCEWAKE_MEDIA_VDBOX_GEN11(j).reg, + FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(j).reg, + BIT(0), BIT(16)); + } + + for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], + XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, + FORCEWAKE_MEDIA_VEBOX_GEN11(j).reg, + FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(j).reg, + BIT(0), BIT(16)); + } +} + +void xe_force_wake_prune(struct xe_gt *gt, struct xe_force_wake *fw) +{ + int i, j; + + /* Call after fuses have been read, prune domains that are fused off */ + + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) + if (!(gt->info.engine_mask & BIT(i))) + fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j].reg_ctl = 0; + + for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) + if (!(gt->info.engine_mask & BIT(i))) + fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j].reg_ctl = 0; +} + +static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) +{ + xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val); +} + +static int domain_wake_wait(struct xe_gt *gt, + struct xe_force_wake_domain *domain) +{ + return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, + XE_FORCE_WAKE_ACK_TIMEOUT_MS); +} + +static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) +{ + xe_mmio_write32(gt, domain->reg_ctl, domain->mask); +} + +static int domain_sleep_wait(struct xe_gt *gt, + struct xe_force_wake_domain *domain) +{ + return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val, + XE_FORCE_WAKE_ACK_TIMEOUT_MS); +} + +#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ + for (tmp__ = (mask__); tmp__ ;) \ + for_each_if((domain__ = ((fw__)->domains + \ + __mask_next_bit(tmp__))) && \ + domain__->reg_ctl) + +int xe_force_wake_get(struct xe_force_wake *fw, + enum xe_force_wake_domains domains) +{ + struct xe_device *xe = fw_to_xe(fw); + struct xe_gt *gt = fw_to_gt(fw); + struct xe_force_wake_domain *domain; + enum xe_force_wake_domains tmp, woken = 0; + int ret, ret2 = 0; + + mutex_lock(&fw->lock); + for_each_fw_domain_masked(domain, domains, fw, tmp) { + if (!domain->ref++) { + woken |= BIT(domain->id); + domain_wake(gt, domain); + } + } + for_each_fw_domain_masked(domain, woken, fw, tmp) { + ret = domain_wake_wait(gt, domain); + ret2 |= ret; + if (ret) + drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n", + domain->id, ret); + } + fw->awake_domains |= woken; + mutex_unlock(&fw->lock); + + return ret2; +} + +int xe_force_wake_put(struct xe_force_wake *fw, + enum xe_force_wake_domains domains) +{ + struct xe_device *xe = fw_to_xe(fw); + struct xe_gt *gt = fw_to_gt(fw); + struct xe_force_wake_domain *domain; + enum xe_force_wake_domains tmp, sleep = 0; + int ret, ret2 = 0; + + mutex_lock(&fw->lock); + for_each_fw_domain_masked(domain, domains, fw, tmp) { + if (!--domain->ref) { + sleep |= BIT(domain->id); + domain_sleep(gt, domain); + } + } + for_each_fw_domain_masked(domain, sleep, fw, tmp) { + ret = domain_sleep_wait(gt, domain); + ret2 |= ret; + if (ret) + drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n", + domain->id, ret); + } + fw->awake_domains &= ~sleep; + mutex_unlock(&fw->lock); + + return ret2; +} diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h new file mode 100644 index 000000000000..5adb8daa3b71 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_FORCE_WAKE_H_ +#define _XE_FORCE_WAKE_H_ + +#include "xe_force_wake_types.h" +#include "xe_macros.h" + +struct xe_gt; + +void xe_force_wake_init_gt(struct xe_gt *gt, + struct xe_force_wake *fw); +void xe_force_wake_init_engines(struct xe_gt *gt, + struct xe_force_wake *fw); +void xe_force_wake_prune(struct xe_gt *gt, + struct xe_force_wake *fw); +int xe_force_wake_get(struct xe_force_wake *fw, + enum xe_force_wake_domains domains); +int xe_force_wake_put(struct xe_force_wake *fw, + enum xe_force_wake_domains domains); + +static inline int +xe_force_wake_ref(struct xe_force_wake *fw, + enum xe_force_wake_domains domain) +{ + XE_BUG_ON(!domain); + return fw->domains[ffs(domain) - 1].ref; +} + +static inline void +xe_force_wake_assert_held(struct xe_force_wake *fw, + enum xe_force_wake_domains domain) +{ + XE_BUG_ON(!(fw->awake_domains & domain)); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h new file mode 100644 index 000000000000..208dd629d7b1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_FORCE_WAKE_TYPES_H_ +#define _XE_FORCE_WAKE_TYPES_H_ + +#include +#include + +enum xe_force_wake_domain_id { + XE_FW_DOMAIN_ID_GT = 0, + XE_FW_DOMAIN_ID_RENDER, + XE_FW_DOMAIN_ID_MEDIA, + XE_FW_DOMAIN_ID_MEDIA_VDBOX0, + XE_FW_DOMAIN_ID_MEDIA_VDBOX1, + XE_FW_DOMAIN_ID_MEDIA_VDBOX2, + XE_FW_DOMAIN_ID_MEDIA_VDBOX3, + XE_FW_DOMAIN_ID_MEDIA_VDBOX4, + XE_FW_DOMAIN_ID_MEDIA_VDBOX5, + XE_FW_DOMAIN_ID_MEDIA_VDBOX6, + XE_FW_DOMAIN_ID_MEDIA_VDBOX7, + XE_FW_DOMAIN_ID_MEDIA_VEBOX0, + XE_FW_DOMAIN_ID_MEDIA_VEBOX1, + XE_FW_DOMAIN_ID_MEDIA_VEBOX2, + XE_FW_DOMAIN_ID_MEDIA_VEBOX3, + XE_FW_DOMAIN_ID_GSC, + XE_FW_DOMAIN_ID_COUNT +}; + +enum xe_force_wake_domains { + XE_FW_GT = BIT(XE_FW_DOMAIN_ID_GT), + XE_FW_RENDER = BIT(XE_FW_DOMAIN_ID_RENDER), + XE_FW_MEDIA = BIT(XE_FW_DOMAIN_ID_MEDIA), + XE_FW_MEDIA_VDBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX0), + XE_FW_MEDIA_VDBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX1), + XE_FW_MEDIA_VDBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX2), + XE_FW_MEDIA_VDBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX3), + XE_FW_MEDIA_VDBOX4 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX4), + XE_FW_MEDIA_VDBOX5 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX5), + XE_FW_MEDIA_VDBOX6 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX6), + XE_FW_MEDIA_VDBOX7 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX7), + XE_FW_MEDIA_VEBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX0), + XE_FW_MEDIA_VEBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX1), + XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2), + XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3), + XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC), + XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1 +}; + +/** + * struct xe_force_wake_domain - XE force wake domains + */ +struct xe_force_wake_domain { + /** @id: domain force wake id */ + enum xe_force_wake_domain_id id; + /** @reg_ctl: domain wake control register address */ + u32 reg_ctl; + /** @reg_ack: domain ack register address */ + u32 reg_ack; + /** @val: domain wake write value */ + u32 val; + /** @mask: domain mask */ + u32 mask; + /** @ref: domain reference */ + u32 ref; +}; + +/** + * struct xe_force_wake - XE force wake + */ +struct xe_force_wake { + /** @gt: back pointers to GT */ + struct xe_gt *gt; + /** @lock: protects everything force wake struct */ + struct mutex lock; + /** @awake_domains: mask of all domains awake */ + enum xe_force_wake_domains awake_domains; + /** @domains: force wake domains */ + struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c new file mode 100644 index 000000000000..eab74a509f68 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_ggtt.h" + +#include +#include + +#include + +#include "xe_device.h" +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_mmio.h" +#include "xe_wopcm.h" + +#include "i915_reg.h" +#include "gt/intel_gt_regs.h" + +/* FIXME: Common file, preferably auto-gen */ +#define MTL_GGTT_PTE_PAT0 BIT(52) +#define MTL_GGTT_PTE_PAT1 BIT(53) + +u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) +{ + struct xe_device *xe = xe_bo_device(bo); + u64 pte; + bool is_lmem; + + pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem); + pte |= GEN8_PAGE_PRESENT; + + if (is_lmem) + pte |= GEN12_GGTT_PTE_LM; + + /* FIXME: vfunc + pass in caching rules */ + if (xe->info.platform == XE_METEORLAKE) { + pte |= MTL_GGTT_PTE_PAT0; + pte |= MTL_GGTT_PTE_PAT1; + } + + return pte; +} + +static unsigned int probe_gsm_size(struct pci_dev *pdev) +{ + u16 gmch_ctl, ggms; + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl); + ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK; + return ggms ? SZ_1M << ggms : 0; +} + +void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) +{ + XE_BUG_ON(addr & GEN8_PTE_MASK); + XE_BUG_ON(addr >= ggtt->size); + + writeq(pte, &ggtt->gsm[addr >> GEN8_PTE_SHIFT]); +} + +static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) +{ + u64 end = start + size - 1; + u64 scratch_pte; + + XE_BUG_ON(start >= end); + + if (ggtt->scratch) + scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); + else + scratch_pte = 0; + + while (start < end) { + xe_ggtt_set_pte(ggtt, start, scratch_pte); + start += GEN8_PAGE_SIZE; + } +} + +static void ggtt_fini_noalloc(struct drm_device *drm, void *arg) +{ + struct xe_ggtt *ggtt = arg; + + mutex_destroy(&ggtt->lock); + drm_mm_takedown(&ggtt->mm); + + xe_bo_unpin_map_no_vm(ggtt->scratch); +} + +int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + unsigned int gsm_size; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + ggtt->gt = gt; + + gsm_size = probe_gsm_size(pdev); + if (gsm_size == 0) { + drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); + return -ENOMEM; + } + + ggtt->gsm = gt->mmio.regs + SZ_8M; + ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE; + + /* + * 8B per entry, each points to a 4KB page. + * + * The GuC owns the WOPCM space, thus we can't allocate GGTT address in + * this area. Even though we likely configure the WOPCM to less than the + * maximum value, to simplify the driver load (no need to fetch HuC + + * GuC firmwares and determine there sizes before initializing the GGTT) + * just start the GGTT allocation above the max WOPCM size. This might + * waste space in the GGTT (WOPCM is 2MB on modern platforms) but we can + * live with this. + * + * Another benifit of this is the GuC bootrom can't access anything + * below the WOPCM max size so anything the bootom needs to access (e.g. + * a RSA key) needs to be placed in the GGTT above the WOPCM max size. + * Starting the GGTT allocations above the WOPCM max give us the correct + * placement for free. + */ + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), + ggtt->size - xe_wopcm_size(xe)); + mutex_init(&ggtt->lock); + + return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt); +} + +static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) +{ + struct drm_mm_node *hole; + u64 start, end; + + /* Display may have allocated inside ggtt, so be careful with clearing here */ + mutex_lock(&ggtt->lock); + drm_mm_for_each_hole(hole, &ggtt->mm, start, end) + xe_ggtt_clear(ggtt, start, end - start); + + xe_ggtt_invalidate(ggtt->gt); + mutex_unlock(&ggtt->lock); +} + +int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + ggtt->scratch = xe_bo_create_locked(xe, gt, NULL, GEN8_PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(ggtt->scratch)) { + err = PTR_ERR(ggtt->scratch); + goto err; + } + + err = xe_bo_pin(ggtt->scratch); + xe_bo_unlock_no_vm(ggtt->scratch); + if (err) { + xe_bo_put(ggtt->scratch); + goto err; + } + + xe_ggtt_initial_clear(ggtt); + return 0; +err: + ggtt->scratch = NULL; + return err; +} + +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) +#define PVC_GUC_TLB_INV_DESC0 _MMIO(0xcf7c) +#define PVC_GUC_TLB_INV_DESC0_VALID (1 << 0) +#define PVC_GUC_TLB_INV_DESC1 _MMIO(0xcf80) +#define PVC_GUC_TLB_INV_DESC1_INVALIDATE (1 << 6) + +void xe_ggtt_invalidate(struct xe_gt *gt) +{ + /* TODO: vfunc for GuC vs. non-GuC */ + + /* TODO: i915 makes comments about this being uncached and + * therefore flushing WC buffers. Is that really true here? + */ + xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN); + if (xe_device_guc_submission_enabled(gt_to_xe(gt))) { + struct xe_device *xe = gt_to_xe(gt); + + /* TODO: also use vfunc here */ + if (xe->info.platform == XE_PVC) { + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg, + PVC_GUC_TLB_INV_DESC1_INVALIDATE); + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg, + PVC_GUC_TLB_INV_DESC0_VALID); + } else + xe_mmio_write32(gt, GEN12_GUC_TLB_INV_CR.reg, + GEN12_GUC_TLB_INV_CR_INVALIDATE); + } +} + +void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) +{ + u64 addr, scratch_pte; + + scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); + + printk("%sGlobal GTT:", prefix); + for (addr = 0; addr < ggtt->size; addr += GEN8_PAGE_SIZE) { + unsigned int i = addr / GEN8_PAGE_SIZE; + + XE_BUG_ON(addr > U32_MAX); + if (ggtt->gsm[i] == scratch_pte) + continue; + + printk("%s ggtt[0x%08x] = 0x%016llx", + prefix, (u32)addr, ggtt->gsm[i]); + } +} + +int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, + u32 size, u32 align, u32 mm_flags) +{ + return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, + mm_flags); +} + +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + u32 size, u32 align) +{ + int ret; + + mutex_lock(&ggtt->lock); + ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, + align, DRM_MM_INSERT_HIGH); + mutex_unlock(&ggtt->lock); + + return ret; +} + +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + u64 start = bo->ggtt_node.start; + u64 offset, pte; + + for (offset = 0; offset < bo->size; offset += GEN8_PAGE_SIZE) { + pte = xe_ggtt_pte_encode(bo, offset); + xe_ggtt_set_pte(ggtt, start + offset, pte); + } + + xe_ggtt_invalidate(ggtt->gt); +} + +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + int err; + + if (XE_WARN_ON(bo->ggtt_node.size)) { + /* Someone's already inserted this BO in the GGTT */ + XE_BUG_ON(bo->ggtt_node.size != bo->size); + return 0; + } + + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + + mutex_lock(&ggtt->lock); + err = drm_mm_insert_node(&ggtt->mm, &bo->ggtt_node, bo->size); + if (!err) + xe_ggtt_map_bo(ggtt, bo); + mutex_unlock(&ggtt->lock); + + return 0; +} + +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) +{ + mutex_lock(&ggtt->lock); + + xe_ggtt_clear(ggtt, node->start, node->size); + drm_mm_remove_node(node); + node->size = 0; + + xe_ggtt_invalidate(ggtt->gt); + + mutex_unlock(&ggtt->lock); +} + +void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + if (XE_WARN_ON(!bo->ggtt_node.size)) + return; + + /* This BO is not currently in the GGTT */ + XE_BUG_ON(bo->ggtt_node.size != bo->size); + + xe_ggtt_remove_node(ggtt, &bo->ggtt_node); +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h new file mode 100644 index 000000000000..289c6852ad1a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_GGTT_H_ +#define _XE_GGTT_H_ + +#include "xe_ggtt_types.h" + +u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset); +void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); +void xe_ggtt_invalidate(struct xe_gt *gt); +int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt); +int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt); +void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); + +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + u32 size, u32 align); +int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, + struct drm_mm_node *node, + u32 size, u32 align, u32 mm_flags); +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node); +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h new file mode 100644 index 000000000000..e04193001763 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GGTT_TYPES_H_ +#define _XE_GGTT_TYPES_H_ + +#include + +struct xe_bo; +struct xe_gt; + +struct xe_ggtt { + struct xe_gt *gt; + + u64 size; + + struct xe_bo *scratch; + + struct mutex lock; + + u64 __iomem *gsm; + + struct drm_mm mm; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c new file mode 100644 index 000000000000..e4ad1d6ce1d5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gpu_scheduler.h" + +static void xe_sched_process_msg_queue(struct xe_gpu_scheduler *sched) +{ + if (!READ_ONCE(sched->base.pause_submit)) + queue_work(sched->base.submit_wq, &sched->work_process_msg); +} + +static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched) +{ + struct xe_sched_msg *msg; + + spin_lock(&sched->base.job_list_lock); + msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); + if (msg) + xe_sched_process_msg_queue(sched); + spin_unlock(&sched->base.job_list_lock); +} + +static struct xe_sched_msg * +xe_sched_get_msg(struct xe_gpu_scheduler *sched) +{ + struct xe_sched_msg *msg; + + spin_lock(&sched->base.job_list_lock); + msg = list_first_entry_or_null(&sched->msgs, + struct xe_sched_msg, link); + if (msg) + list_del(&msg->link); + spin_unlock(&sched->base.job_list_lock); + + return msg; +} + +static void xe_sched_process_msg_work(struct work_struct *w) +{ + struct xe_gpu_scheduler *sched = + container_of(w, struct xe_gpu_scheduler, work_process_msg); + struct xe_sched_msg *msg; + + if (READ_ONCE(sched->base.pause_submit)) + return; + + msg = xe_sched_get_msg(sched); + if (msg) { + sched->ops->process_msg(msg); + + xe_sched_process_msg_queue_if_ready(sched); + } +} + +int xe_sched_init(struct xe_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, + const struct xe_sched_backend_ops *xe_ops, + struct workqueue_struct *submit_wq, + uint32_t hw_submission, unsigned hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, + struct device *dev) +{ + sched->ops = xe_ops; + INIT_LIST_HEAD(&sched->msgs); + INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work); + + return drm_sched_init(&sched->base, ops, submit_wq, 1, hw_submission, + hang_limit, timeout, timeout_wq, score, name, + dev); +} + +void xe_sched_fini(struct xe_gpu_scheduler *sched) +{ + xe_sched_submission_stop(sched); + drm_sched_fini(&sched->base); +} + +void xe_sched_submission_start(struct xe_gpu_scheduler *sched) +{ + drm_sched_wqueue_start(&sched->base); + queue_work(sched->base.submit_wq, &sched->work_process_msg); +} + +void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) +{ + drm_sched_wqueue_stop(&sched->base); + cancel_work_sync(&sched->work_process_msg); +} + +void xe_sched_add_msg(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + spin_lock(&sched->base.job_list_lock); + list_add_tail(&msg->link, &sched->msgs); + spin_unlock(&sched->base.job_list_lock); + + xe_sched_process_msg_queue(sched); +} diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h new file mode 100644 index 000000000000..10c6bb9c9386 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GPU_SCHEDULER_H_ +#define _XE_GPU_SCHEDULER_H_ + +#include "xe_gpu_scheduler_types.h" +#include "xe_sched_job_types.h" + +int xe_sched_init(struct xe_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, + const struct xe_sched_backend_ops *xe_ops, + struct workqueue_struct *submit_wq, + uint32_t hw_submission, unsigned hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, + struct device *dev); +void xe_sched_fini(struct xe_gpu_scheduler *sched); + +void xe_sched_submission_start(struct xe_gpu_scheduler *sched); +void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); + +void xe_sched_add_msg(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); + +static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) +{ + drm_sched_stop(&sched->base, NULL); +} + +static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched) +{ + drm_sched_tdr_queue_imm(&sched->base); +} + +static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) +{ + drm_sched_resubmit_jobs(&sched->base); +} + +static inline bool +xe_sched_invalidate_job(struct xe_sched_job *job, int threshold) +{ + return drm_sched_invalidate_job(&job->drm, threshold); +} + +static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, + struct xe_sched_job *job) +{ + list_add(&job->drm.list, &sched->base.pending_list); +} + +static inline +struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) +{ + return list_first_entry_or_null(&sched->base.pending_list, + struct xe_sched_job, drm.list); +} + +static inline int +xe_sched_entity_init(struct xe_sched_entity *entity, + struct xe_gpu_scheduler *sched) +{ + return drm_sched_entity_init(entity, 0, + (struct drm_gpu_scheduler **)&sched, + 1, NULL); +} + +#define xe_sched_entity_fini drm_sched_entity_fini + +#endif diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h new file mode 100644 index 000000000000..6731b13da8bb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GPU_SCHEDULER_TYPES_H_ +#define _XE_GPU_SCHEDULER_TYPES_H_ + +#include + +/** + * struct xe_sched_msg - an in-band (relative to GPU scheduler run queue) + * message + * + * Generic enough for backend defined messages, backend can expand if needed. + */ +struct xe_sched_msg { + /** @link: list link into the gpu scheduler list of messages */ + struct list_head link; + /** + * @private_data: opaque pointer to message private data (backend defined) + */ + void *private_data; + /** @opcode: opcode of message (backend defined) */ + unsigned int opcode; +}; + +/** + * struct xe_sched_backend_ops - Define the backend operations called by the + * scheduler + */ +struct xe_sched_backend_ops { + /** + * @process_msg: Process a message. Allowed to block, it is this + * function's responsibility to free message if dynamically allocated. + */ + void (*process_msg)(struct xe_sched_msg *msg); +}; + +/** + * struct xe_gpu_scheduler - Xe GPU scheduler + */ +struct xe_gpu_scheduler { + /** @base: DRM GPU scheduler */ + struct drm_gpu_scheduler base; + /** @ops: Xe scheduler ops */ + const struct xe_sched_backend_ops *ops; + /** @msgs: list of messages to be processed in @work_process_msg */ + struct list_head msgs; + /** @work_process_msg: processes messages */ + struct work_struct work_process_msg; +}; + +#define xe_sched_entity drm_sched_entity +#define xe_sched_policy drm_sched_policy + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c new file mode 100644 index 000000000000..5f8fa9d98d5a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -0,0 +1,830 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include + +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_execlist.h" +#include "xe_force_wake.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_gt_clock.h" +#include "xe_gt_mcr.h" +#include "xe_gt_pagefault.h" +#include "xe_gt_sysfs.h" +#include "xe_gt_topology.h" +#include "xe_hw_fence.h" +#include "xe_irq.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_migrate.h" +#include "xe_mmio.h" +#include "xe_mocs.h" +#include "xe_reg_sr.h" +#include "xe_ring_ops.h" +#include "xe_sa.h" +#include "xe_sched_job.h" +#include "xe_ttm_gtt_mgr.h" +#include "xe_ttm_vram_mgr.h" +#include "xe_tuning.h" +#include "xe_uc.h" +#include "xe_vm.h" +#include "xe_wa.h" +#include "xe_wopcm.h" + +#include "gt/intel_gt_regs.h" + +struct xe_gt *xe_find_full_gt(struct xe_gt *gt) +{ + struct xe_gt *search; + u8 id; + + XE_BUG_ON(!xe_gt_is_media_type(gt)); + + for_each_gt(search, gt_to_xe(gt), id) { + if (search->info.vram_id == gt->info.vram_id) + return search; + } + + XE_BUG_ON("NOT POSSIBLE"); + return NULL; +} + +int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) +{ + struct drm_device *drm = &xe->drm; + + XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); + + if (!xe_gt_is_media_type(gt)) { + gt->mem.ggtt = drmm_kzalloc(drm, sizeof(*gt->mem.ggtt), + GFP_KERNEL); + if (!gt->mem.ggtt) + return -ENOMEM; + + gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr), + GFP_KERNEL); + if (!gt->mem.vram_mgr) + return -ENOMEM; + + gt->mem.gtt_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.gtt_mgr), + GFP_KERNEL); + if (!gt->mem.gtt_mgr) + return -ENOMEM; + } else { + struct xe_gt *full_gt = xe_find_full_gt(gt); + + gt->mem.ggtt = full_gt->mem.ggtt; + gt->mem.vram_mgr = full_gt->mem.vram_mgr; + gt->mem.gtt_mgr = full_gt->mem.gtt_mgr; + } + + gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); + + return 0; +} + +/* FIXME: These should be in a common file */ +#define CHV_PPAT_SNOOP REG_BIT(6) +#define GEN8_PPAT_AGE(x) ((x)<<4) +#define GEN8_PPAT_LLCeLLC (3<<2) +#define GEN8_PPAT_LLCELLC (2<<2) +#define GEN8_PPAT_LLC (1<<2) +#define GEN8_PPAT_WB (3<<0) +#define GEN8_PPAT_WT (2<<0) +#define GEN8_PPAT_WC (1<<0) +#define GEN8_PPAT_UC (0<<0) +#define GEN8_PPAT_ELLC_OVERRIDE (0<<2) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) +#define GEN12_PPAT_CLOS(x) ((x)<<2) + +static void tgl_setup_private_ppat(struct xe_gt *gt) +{ + /* TGL doesn't support LLC or AGE settings */ + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB); +} + +static void pvc_setup_private_ppat(struct xe_gt *gt) +{ + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, + GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, + GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, + GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, + GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB); +} + +#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) +#define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) +#define MTL_PPAT_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3) +#define MTL_PPAT_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1) +#define MTL_PPAT_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0) +#define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3) +#define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2) +#define MTL_0_COH_NON REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0) + +static void mtl_setup_private_ppat(struct xe_gt *gt) +{ + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, + MTL_PPAT_1_WT | MTL_2_COH_1W); + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, + MTL_PPAT_3_UC | MTL_2_COH_1W); + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, + MTL_PPAT_0_WB | MTL_2_COH_1W); + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, + MTL_PPAT_0_WB | MTL_3_COH_2W); +} + +static void setup_private_ppat(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.platform == XE_METEORLAKE) + mtl_setup_private_ppat(gt); + else if (xe->info.platform == XE_PVC) + pvc_setup_private_ppat(gt); + else + tgl_setup_private_ppat(gt); +} + +static int gt_ttm_mgr_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + struct sysinfo si; + u64 gtt_size; + + si_meminfo(&si); + gtt_size = (u64)si.totalram * si.mem_unit * 3/4; + + if (gt->mem.vram.size) { + err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr); + if (err) + return err; + gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20), + gt->mem.vram.size), + gtt_size); + xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1; + } + + err = xe_ttm_gtt_mgr_init(gt, gt->mem.gtt_mgr, gtt_size); + if (err) + return err; + + return 0; +} + +static void gt_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + int i; + + destroy_workqueue(gt->ordered_wq); + + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); +} + +static void gt_reset_worker(struct work_struct *w); + +int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) +{ + struct xe_sched_job *job; + struct xe_bb *bb; + struct dma_fence *fence; + u64 batch_ofs; + long timeout; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo); + job = xe_bb_create_wa_job(e, bb, batch_ofs); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(bb); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} + +int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) +{ + struct xe_reg_sr *sr = &e->hwe->reg_lrc; + struct xe_reg_sr_entry *entry; + unsigned long reg; + struct xe_sched_job *job; + struct xe_bb *bb; + struct dma_fence *fence; + u64 batch_ofs; + long timeout; + int count = 0; + + bb = xe_bb_new(gt, SZ_4K, false); /* Just pick a large BB size */ + if (IS_ERR(bb)) + return PTR_ERR(bb); + + xa_for_each(&sr->xa, reg, entry) + ++count; + + if (count) { + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count); + xa_for_each(&sr->xa, reg, entry) { + bb->cs[bb->len++] = reg; + bb->cs[bb->len++] = entry->set_bits; + } + } + bb->cs[bb->len++] = MI_NOOP; + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + + batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo); + job = xe_bb_create_wa_job(e, bb, batch_ofs); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(bb); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} + +int xe_gt_record_default_lrcs(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int err = 0; + + for_each_hw_engine(hwe, gt, id) { + struct xe_engine *e, *nop_e; + struct xe_vm *vm; + void *default_lrc; + + if (gt->default_lrc[hwe->class]) + continue; + + xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe); + xe_wa_process_lrc(hwe); + + default_lrc = drmm_kzalloc(&xe->drm, + xe_lrc_size(xe, hwe->class), + GFP_KERNEL); + if (!default_lrc) + return -ENOMEM; + + vm = xe_migrate_get_vm(gt->migrate); + e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1, + hwe, ENGINE_FLAG_WA); + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto put_vm; + } + + /* Prime golden LRC with known good state */ + err = emit_wa_job(gt, e); + if (err) + goto put_engine; + + nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), + 1, hwe, ENGINE_FLAG_WA); + if (IS_ERR(nop_e)) { + err = PTR_ERR(nop_e); + goto put_engine; + } + + /* Switch to different LRC */ + err = emit_nop_job(gt, nop_e); + if (err) + goto put_nop_e; + + /* Reload golden LRC to record the effect of any indirect W/A */ + err = emit_nop_job(gt, e); + if (err) + goto put_nop_e; + + xe_map_memcpy_from(xe, default_lrc, + &e->lrc[0].bo->vmap, + xe_lrc_pphwsp_offset(&e->lrc[0]), + xe_lrc_size(xe, hwe->class)); + + gt->default_lrc[hwe->class] = default_lrc; +put_nop_e: + xe_engine_put(nop_e); +put_engine: + xe_engine_put(e); +put_vm: + xe_vm_put(vm); + if (err) + break; + } + + return err; +} + +int xe_gt_init_early(struct xe_gt *gt) +{ + int err; + + xe_force_wake_init_gt(gt, gt_to_fw(gt)); + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); + + return 0; +} + +/** + * xe_gt_init_noalloc - Init GT up to the point where allocations can happen. + * @gt: The GT to initialize. + * + * This function prepares the GT to allow memory allocations to VRAM, but is not + * allowed to allocate memory itself. This state is useful for display readout, + * because the inherited display framebuffer will otherwise be overwritten as it + * is usually put at the start of VRAM. + * + * Returns: 0 on success, negative error code on error. + */ +int xe_gt_init_noalloc(struct xe_gt *gt) +{ + int err, err2; + + if (xe_gt_is_media_type(gt)) + return 0; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err; + + err = gt_ttm_mgr_init(gt); + if (err) + goto err_force_wake; + + err = xe_ggtt_init_noalloc(gt, gt->mem.ggtt); + +err_force_wake: + err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + XE_WARN_ON(err2); + xe_device_mem_access_put(gt_to_xe(gt)); +err: + return err; +} + +static int gt_fw_domain_init(struct xe_gt *gt) +{ + int err, i; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_hw_fence_irq; + + if (!xe_gt_is_media_type(gt)) { + err = xe_ggtt_init(gt, gt->mem.ggtt); + if (err) + goto err_force_wake; + } + + /* Allow driver to load if uC init fails (likely missing firmware) */ + err = xe_uc_init(>->uc); + XE_WARN_ON(err); + + err = xe_uc_init_hwconfig(>->uc); + if (err) + goto err_force_wake; + + /* Enables per hw engine IRQs */ + xe_gt_irq_postinstall(gt); + + /* Rerun MCR init as we now have hw engine list */ + xe_gt_mcr_init(gt); + + err = xe_hw_engines_init_early(gt); + if (err) + goto err_force_wake; + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + XE_WARN_ON(err); + xe_device_mem_access_put(gt_to_xe(gt)); + + return 0; + +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_hw_fence_irq: + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_device_mem_access_put(gt_to_xe(gt)); + + return err; +} + +static int all_fw_domain_init(struct xe_gt *gt) +{ + int err, i; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_hw_fence_irq; + + setup_private_ppat(gt); + + xe_reg_sr_apply_mmio(>->reg_sr, gt); + + err = xe_gt_clock_init(gt); + if (err) + goto err_force_wake; + + xe_mocs_init(gt); + err = xe_execlist_init(gt); + if (err) + goto err_force_wake; + + err = xe_hw_engines_init(gt); + if (err) + goto err_force_wake; + + err = xe_uc_init_post_hwconfig(>->uc); + if (err) + goto err_force_wake; + + /* + * FIXME: This should be ok as SA should only be used by gt->migrate and + * vm->gt->migrate and both should be pointing to a non-media GT. But to + * realy safe, convert gt->kernel_bb_pool to a pointer and point a media + * GT to the kernel_bb_pool on a real tile. + */ + if (!xe_gt_is_media_type(gt)) { + err = xe_sa_bo_manager_init(gt, >->kernel_bb_pool, SZ_1M, 16); + if (err) + goto err_force_wake; + + /* + * USM has its only SA pool to non-block behind user operations + */ + if (gt_to_xe(gt)->info.supports_usm) { + err = xe_sa_bo_manager_init(gt, >->usm.bb_pool, + SZ_1M, 16); + if (err) + goto err_force_wake; + } + } + + if (!xe_gt_is_media_type(gt)) { + gt->migrate = xe_migrate_init(gt); + if (IS_ERR(gt->migrate)) + goto err_force_wake; + } else { + gt->migrate = xe_find_full_gt(gt)->migrate; + } + + err = xe_uc_init_hw(>->uc); + if (err) + goto err_force_wake; + + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + xe_device_mem_access_put(gt_to_xe(gt)); + + return 0; + +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); +err_hw_fence_irq: + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_device_mem_access_put(gt_to_xe(gt)); + + return err; +} + +int xe_gt_init(struct xe_gt *gt) +{ + int err; + int i; + + INIT_WORK(>->reset.worker, gt_reset_worker); + + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) { + gt->ring_ops[i] = xe_ring_ops_get(gt, i); + xe_hw_fence_irq_init(>->fence_irq[i]); + } + + err = xe_gt_pagefault_init(gt); + if (err) + return err; + + xe_gt_sysfs_init(gt); + + err = gt_fw_domain_init(gt); + if (err) + return err; + + xe_force_wake_init_engines(gt, gt_to_fw(gt)); + + err = all_fw_domain_init(gt); + if (err) + return err; + + xe_force_wake_prune(gt, gt_to_fw(gt)); + + err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); + if (err) + return err; + + return 0; +} + +int do_gt_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL); + err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5); + if (err) + drm_err(&xe->drm, + "GT reset failed to clear GEN11_GRDOM_FULL\n"); + + return err; +} + +static int do_gt_restart(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int err; + + setup_private_ppat(gt); + + xe_reg_sr_apply_mmio(>->reg_sr, gt); + + err = xe_wopcm_init(>->uc.wopcm); + if (err) + return err; + + for_each_hw_engine(hwe, gt, id) + xe_hw_engine_enable_ring(hwe); + + err = xe_uc_init_hw(>->uc); + if (err) + return err; + + xe_mocs_init(gt); + err = xe_uc_start(>->uc); + if (err) + return err; + + for_each_hw_engine(hwe, gt, id) { + xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); + xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, + hwe->mmio_base, gt); + } + + return 0; +} + +static int gt_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + /* We only support GT resets with GuC submission */ + if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) + return -ENODEV; + + drm_info(&xe->drm, "GT reset started\n"); + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_msg; + + xe_uc_stop_prepare(>->uc); + xe_gt_pagefault_reset(gt); + + err = xe_uc_stop(>->uc); + if (err) + goto err_out; + + err = do_gt_reset(gt); + if (err) + goto err_out; + + err = do_gt_restart(gt); + if (err) + goto err_out; + + xe_device_mem_access_put(gt_to_xe(gt)); + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + + drm_info(&xe->drm, "GT reset done\n"); + + return 0; + +err_out: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +err_msg: + XE_WARN_ON(xe_uc_start(>->uc)); + xe_device_mem_access_put(gt_to_xe(gt)); + drm_err(&xe->drm, "GT reset failed, err=%d\n", err); + + return err; +} + +static void gt_reset_worker(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); + + gt_reset(gt); +} + +void xe_gt_reset_async(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + drm_info(&xe->drm, "Try GT reset\n"); + + /* Don't do a reset while one is already in flight */ + if (xe_uc_reset_prepare(>->uc)) + return; + + drm_info(&xe->drm, "Doing GT reset\n"); + queue_work(gt->ordered_wq, >->reset.worker); +} + +void xe_gt_suspend_prepare(struct xe_gt *gt) +{ + xe_device_mem_access_get(gt_to_xe(gt)); + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + xe_uc_stop_prepare(>->uc); + + xe_device_mem_access_put(gt_to_xe(gt)); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +} + +int xe_gt_suspend(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + /* For now suspend/resume is only allowed with GuC */ + if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) + return -ENODEV; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_msg; + + err = xe_uc_suspend(>->uc); + if (err) + goto err_force_wake; + + xe_device_mem_access_put(gt_to_xe(gt)); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + drm_info(&xe->drm, "GT suspended\n"); + + return 0; + +err_force_wake: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +err_msg: + xe_device_mem_access_put(gt_to_xe(gt)); + drm_err(&xe->drm, "GT suspend failed: %d\n", err); + + return err; +} + +int xe_gt_resume(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_msg; + + err = do_gt_restart(gt); + if (err) + goto err_force_wake; + + xe_device_mem_access_put(gt_to_xe(gt)); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + drm_info(&xe->drm, "GT resumed\n"); + + return 0; + +err_force_wake: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +err_msg: + xe_device_mem_access_put(gt_to_xe(gt)); + drm_err(&xe->drm, "GT resume failed: %d\n", err); + + return err; +} + +void xe_gt_migrate_wait(struct xe_gt *gt) +{ + xe_migrate_wait(gt->migrate); +} + +struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, + enum xe_engine_class class, + u16 instance, bool logical) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) + if (hwe->class == class && + ((!logical && hwe->instance == instance) || + (logical && hwe->logical_instance == instance))) + return hwe; + + return NULL; +} + +struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, + enum xe_engine_class class) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + switch (class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) + return hwe; + break; + default: + if (hwe->class == class) + return hwe; + } + } + + return NULL; +} diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h new file mode 100644 index 000000000000..5dc08a993cfe --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_H_ +#define _XE_GT_H_ + +#include + +#include "xe_device_types.h" +#include "xe_hw_engine.h" + +#define for_each_hw_engine(hwe__, gt__, id__) \ + for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \ + for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \ + xe_hw_engine_is_valid((hwe__))) + +int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt); +int xe_gt_init_early(struct xe_gt *gt); +int xe_gt_init_noalloc(struct xe_gt *gt); +int xe_gt_init(struct xe_gt *gt); +int xe_gt_record_default_lrcs(struct xe_gt *gt); +void xe_gt_suspend_prepare(struct xe_gt *gt); +int xe_gt_suspend(struct xe_gt *gt); +int xe_gt_resume(struct xe_gt *gt); +void xe_gt_reset_async(struct xe_gt *gt); +void xe_gt_migrate_wait(struct xe_gt *gt); + +struct xe_gt *xe_find_full_gt(struct xe_gt *gt); + +/** + * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the + * first that matches the same reset domain as @class + * @gt: GT structure + * @class: hw engine class to lookup + */ +struct xe_hw_engine * +xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class); + +struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, + enum xe_engine_class class, + u16 instance, + bool logical); + +static inline bool xe_gt_is_media_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MEDIA; +} + +static inline struct xe_device * gt_to_xe(struct xe_gt *gt) +{ + return gt->xe; +} + +static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe) +{ + struct xe_device *xe = gt_to_xe(gt); + + return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && + hwe->instance == gt->usm.reserved_bcs_instance; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c new file mode 100644 index 000000000000..575433e9718a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "i915_reg.h" +#include "gt/intel_gt_regs.h" + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_clock.h" +#include "xe_macros.h" +#include "xe_mmio.h" + +static u32 read_reference_ts_freq(struct xe_gt *gt) +{ + u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg); + u32 base_freq, frac_freq; + + base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; + base_freq *= 1000000; + + frac_freq = ((ts_override & + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); + frac_freq = 1000000 / (frac_freq + 1); + + return base_freq + frac_freq; +} + +static u32 get_crystal_clock_freq(u32 rpm_config_reg) +{ + const u32 f19_2_mhz = 19200000; + const u32 f24_mhz = 24000000; + const u32 f25_mhz = 25000000; + const u32 f38_4_mhz = 38400000; + u32 crystal_clock = + (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + + switch (crystal_clock) { + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + return f24_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + return f19_2_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: + return f38_4_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: + return f25_mhz; + default: + XE_BUG_ON("NOT_POSSIBLE"); + return 0; + } +} + +int xe_gt_clock_init(struct xe_gt *gt) +{ + u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE.reg); + u32 freq = 0; + + /* Assuming gen11+ so assert this assumption is correct */ + XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(gt); + } else { + u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg); + + freq = get_crystal_clock_freq(c0); + + /* + * Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + } + + gt->info.clock_freq = freq; + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h new file mode 100644 index 000000000000..511923afd224 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_clock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_CLOCK_H_ +#define _XE_GT_CLOCK_H_ + +struct xe_gt; + +int xe_gt_clock_init(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c new file mode 100644 index 000000000000..cd1888784141 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_debugfs.h" +#include "xe_gt_mcr.h" +#include "xe_gt_pagefault.h" +#include "xe_gt_topology.h" +#include "xe_hw_engine.h" +#include "xe_macros.h" +#include "xe_uc_debugfs.h" + +static struct xe_gt *node_to_gt(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int hw_engines(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct xe_device *xe = gt_to_xe(gt); + struct drm_printer p = drm_seq_file_printer(m); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) { + xe_device_mem_access_put(xe); + return err; + } + + for_each_hw_engine(hwe, gt, id) + xe_hw_engine_print_state(hwe, &p); + + xe_device_mem_access_put(xe); + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + return err; + + return 0; +} + +static int force_reset(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + + xe_gt_reset_async(gt); + + return 0; +} + +static int sa_info(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + drm_suballoc_dump_debug_info(>->kernel_bb_pool.base, &p, + gt->kernel_bb_pool.gpu_addr); + + return 0; +} + +static int topology(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_gt_topology_dump(gt, &p); + + return 0; +} + +static int steering(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_gt_mcr_steering_dump(gt, &p); + + return 0; +} + +#ifdef CONFIG_DRM_XE_DEBUG +static int invalidate_tlb(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + int seqno; + int ret = 0; + + seqno = xe_gt_tlb_invalidation(gt); + XE_WARN_ON(seqno < 0); + if (seqno > 0) + ret = xe_gt_tlb_invalidation_wait(gt, seqno); + XE_WARN_ON(ret < 0); + + return 0; +} +#endif + +static const struct drm_info_list debugfs_list[] = { + {"hw_engines", hw_engines, 0}, + {"force_reset", force_reset, 0}, + {"sa_info", sa_info, 0}, + {"topology", topology, 0}, + {"steering", steering, 0}, +#ifdef CONFIG_DRM_XE_DEBUG + {"invalidate_tlb", invalidate_tlb, 0}, +#endif +}; + +void xe_gt_debugfs_register(struct xe_gt *gt) +{ + struct drm_minor *minor = gt_to_xe(gt)->drm.primary; + struct dentry *root; + struct drm_info_list *local; + char name[8]; + int i; + + XE_BUG_ON(!minor->debugfs_root); + + sprintf(name, "gt%d", gt->info.id); + root = debugfs_create_dir(name, minor->debugfs_root); + if (IS_ERR(root)) { + XE_WARN_ON("Create GT directory failed"); + return; + } + + /* + * Allocate local copy as we need to pass in the GT to the debugfs + * entry and drm_debugfs_create_files just references the drm_info_list + * passed in (e.g. can't define this on the stack). + */ +#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) + local = drmm_kmalloc(>_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) { + XE_WARN_ON("Couldn't allocate memory"); + return; + } + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = gt; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + root, minor); + + xe_uc_debugfs_register(>->uc, root); +} diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h new file mode 100644 index 000000000000..5a329f118a57 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_DEBUGFS_H_ +#define _XE_GT_DEBUGFS_H_ + +struct xe_gt; + +void xe_gt_debugfs_register(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c new file mode 100644 index 000000000000..b69c0d6c6b2f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_gt_topology.h" +#include "xe_gt_types.h" +#include "xe_mmio.h" + +#include "gt/intel_gt_regs.h" + +/** + * DOC: GT Multicast/Replicated (MCR) Register Support + * + * Some GT registers are designed as "multicast" or "replicated" registers: + * multiple instances of the same register share a single MMIO offset. MCR + * registers are generally used when the hardware needs to potentially track + * independent values of a register per hardware unit (e.g., per-subslice, + * per-L3bank, etc.). The specific types of replication that exist vary + * per-platform. + * + * MMIO accesses to MCR registers are controlled according to the settings + * programmed in the platform's MCR_SELECTOR register(s). MMIO writes to MCR + * registers can be done in either a (i.e., a single write updates all + * instances of the register to the same value) or unicast (a write updates only + * one specific instance). Reads of MCR registers always operate in a unicast + * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR. + * Selection of a specific MCR instance for unicast operations is referred to + * as "steering." + * + * If MCR register operations are steered toward a hardware unit that is + * fused off or currently powered down due to power gating, the MMIO operation + * is "terminated" by the hardware. Terminated read operations will return a + * value of zero and terminated unicast write operations will be silently + * ignored. + */ + +enum { + MCR_OP_READ, + MCR_OP_WRITE +}; + +static const struct xe_mmio_range xelp_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +/* + * Although the bspec lists more "MSLICE" ranges than shown here, some of those + * are of a "GAM" subclass that has special rules and doesn't need to be + * included here. + */ +static const struct xe_mmio_range xehp_mslice_steering_table[] = { + { 0x00DD00, 0x00DDFF }, + { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ + {}, +}; + +static const struct xe_mmio_range xehp_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D880, 0x00D8FF }, + {}, +}; + +/* + * We have several types of MCR registers where steering to (0,0) will always + * provide us with a non-terminated value. We'll stick them all in the same + * table for simplicity. + */ +static const struct xe_mmio_range xehpc_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* HALF-BSLICE */ + { 0x008800, 0x00887F }, /* CC */ + { 0x008A80, 0x008AFF }, /* TILEPSMI */ + { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */ + { 0x00B100, 0x00B3FF }, /* L3BANK */ + { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */ + { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */ + { 0x00DD00, 0x00DDFF }, /* BSLICE */ + { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */ + { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */ + { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */ + { 0x024180, 0x0241FF }, /* HALF-BSLICE */ + {}, +}; + +static const struct xe_mmio_range xelpg_instance0_steering_table[] = { + { 0x000B00, 0x000BFF }, /* SQIDI */ + { 0x001000, 0x001FFF }, /* SQIDI */ + { 0x004000, 0x0048FF }, /* GAM */ + { 0x008700, 0x0087FF }, /* SQIDI */ + { 0x00B000, 0x00B0FF }, /* NODE */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DDFF }, /* OAAL2 */ + {}, +}; + +static const struct xe_mmio_range xelpg_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +static const struct xe_mmio_range xelp_dss_steering_table[] = { + { 0x008150, 0x00815F }, + { 0x009520, 0x00955F }, + { 0x00DE80, 0x00E8FF }, + { 0x024A00, 0x024A7F }, + {}, +}; + +/* DSS steering is used for GSLICE ranges as well */ +static const struct xe_mmio_range xehp_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* GSLICE */ + { 0x005400, 0x007FFF }, /* GSLICE */ + { 0x008140, 0x00815F }, /* GSLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x008D00, 0x008DFF }, /* DSS */ + { 0x0094D0, 0x00955F }, /* GSLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* GSLICE */ + { 0x00DC00, 0x00DCFF }, /* GSLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved ) */ + { 0x017000, 0x017FFF }, /* GSLICE */ + { 0x024A00, 0x024A7F }, /* DSS */ + {}, +}; + +/* DSS steering is used for COMPUTE ranges as well */ +static const struct xe_mmio_range xehpc_dss_steering_table[] = { + { 0x008140, 0x00817F }, /* COMPUTE (0x8140-0x814F & 0x8160-0x817F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* COMPUTE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00DC00, 0x00DCFF }, /* COMPUTE */ + { 0x00DE80, 0x00E7FF }, /* DSS (0xDF00-0xE1FF reserved ) */ + {}, +}; + +/* DSS steering is used for SLICE ranges as well */ +static const struct xe_mmio_range xelpg_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* SLICE */ + { 0x005500, 0x007FFF }, /* SLICE */ + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00DCFF }, /* SLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ + {}, +}; + +static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = { + { 0x393200, 0x39323F }, + { 0x393400, 0x3934FF }, + {}, +}; + +/* + * DG2 GAM registers are a special case; this table is checked directly in + * xe_gt_mcr_get_nonterminated_steering and is not hooked up via + * gt->steering[]. + */ +static const struct xe_mmio_range dg2_gam_ranges[] = { + { 0x004000, 0x004AFF }, + { 0x00C800, 0x00CFFF }, + { 0x00F000, 0x00FFFF }, + {}, +}; + +static void init_steering_l3bank(struct xe_gt *gt) +{ + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, + xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, + xe_mmio_read32(gt, XEHP_FUSE4.reg)); + + /* + * Group selects mslice, instance selects bank within mslice. + * Bank 0 is always valid _except_ when the bank mask is 010b. + */ + gt->steering[L3BANK].group_target = __ffs(mslice_mask); + gt->steering[L3BANK].instance_target = + bank_mask & BIT(0) ? 0 : 2; + } else { + u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK, + ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + + gt->steering[L3BANK].group_target = 0; /* unused */ + gt->steering[L3BANK].instance_target = __ffs(fuse); + } +} + +static void init_steering_mslice(struct xe_gt *gt) +{ + u32 mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, + xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + + /* + * mslice registers are valid (not terminated) if either the meml3 + * associated with the mslice is present, or at least one DSS associated + * with the mslice is present. There will always be at least one meml3 + * so we can just use that to find a non-terminated mslice and ignore + * the DSS fusing. + */ + gt->steering[MSLICE].group_target = __ffs(mask); + gt->steering[MSLICE].instance_target = 0; /* unused */ + + /* + * LNCF termination is also based on mslice presence, so we'll set + * it up here. Either LNCF within a non-terminated mslice will work, + * so we just always pick LNCF 0 here. + */ + gt->steering[LNCF].group_target = __ffs(mask) << 1; + gt->steering[LNCF].instance_target = 0; /* unused */ +} + +static void init_steering_dss(struct xe_gt *gt) +{ + unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), + xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)); + unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4; + + gt->steering[DSS].group_target = dss / dss_per_grp; + gt->steering[DSS].instance_target = dss % dss_per_grp; +} + +static void init_steering_oaddrm(struct xe_gt *gt) +{ + /* + * First instance is only terminated if the entire first media slice + * is absent (i.e., no VCS0 or VECS0). + */ + if (gt->info.engine_mask & (XE_HW_ENGINE_VCS0 | XE_HW_ENGINE_VECS0)) + gt->steering[OADDRM].group_target = 0; + else + gt->steering[OADDRM].group_target = 1; + + gt->steering[DSS].instance_target = 0; /* unused */ +} + +static void init_steering_inst0(struct xe_gt *gt) +{ + gt->steering[DSS].group_target = 0; /* unused */ + gt->steering[DSS].instance_target = 0; /* unused */ +} + +static const struct { + const char *name; + void (*init)(struct xe_gt *); +} xe_steering_types[] = { + { "L3BANK", init_steering_l3bank }, + { "MSLICE", init_steering_mslice }, + { "LNCF", NULL }, /* initialized by mslice init */ + { "DSS", init_steering_dss }, + { "OADDRM", init_steering_oaddrm }, + { "INSTANCE 0", init_steering_inst0 }, +}; + +void xe_gt_mcr_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); + + spin_lock_init(>->mcr_lock); + + if (gt->info.type == XE_GT_TYPE_MEDIA) { + drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); + + gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; + } else if (GRAPHICS_VERx100(xe) >= 1270) { + gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table; + gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; + gt->steering[DSS].ranges = xelpg_dss_steering_table; + } else if (xe->info.platform == XE_PVC) { + gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table; + gt->steering[DSS].ranges = xehpc_dss_steering_table; + } else if (xe->info.platform == XE_DG2) { + gt->steering[MSLICE].ranges = xehp_mslice_steering_table; + gt->steering[LNCF].ranges = xehp_lncf_steering_table; + gt->steering[DSS].ranges = xehp_dss_steering_table; + } else { + gt->steering[L3BANK].ranges = xelp_l3bank_steering_table; + gt->steering[DSS].ranges = xelp_dss_steering_table; + } + + /* Select non-terminated steering target for each type */ + for (int i = 0; i < NUM_STEERING_TYPES; i++) + if (gt->steering[i].ranges && xe_steering_types[i].init) + xe_steering_types[i].init(gt); +} + +/* + * xe_gt_mcr_get_nonterminated_steering - find group/instance values that + * will steer a register to a non-terminated instance + * @gt: GT structure + * @reg: register for which the steering is required + * @group: return variable for group steering + * @instance: return variable for instance steering + * + * This function returns a group/instance pair that is guaranteed to work for + * read steering of the given register. Note that a value will be returned even + * if the register is not replicated and therefore does not actually require + * steering. + * + * Returns true if the caller should steer to the @group/@instance values + * returned. Returns false if the caller need not perform any steering (i.e., + * the DG2 GAM range special case). + */ +static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, + i915_mcr_reg_t reg, + u8 *group, u8 *instance) +{ + for (int type = 0; type < NUM_STEERING_TYPES; type++) { + if (!gt->steering[type].ranges) + continue; + + for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { + if (xe_mmio_in_range(>->steering[type].ranges[i], reg.reg)) { + *group = gt->steering[type].group_target; + *instance = gt->steering[type].instance_target; + return true; + } + } + } + + /* + * All MCR registers should usually be part of one of the steering + * ranges we're tracking. However there's one special case: DG2 + * GAM registers are technically multicast registers, but are special + * in a number of ways: + * - they have their own dedicated steering control register (they + * don't share 0xFDC with other MCR classes) + * - all reads should be directed to instance 1 (unicast reads against + * other instances are not allowed), and instance 1 is already the + * the hardware's default steering target, which we never change + * + * Ultimately this means that we can just treat them as if they were + * unicast registers and all operations will work properly. + */ + for (int i = 0; dg2_gam_ranges[i].end > 0; i++) + if (xe_mmio_in_range(&dg2_gam_ranges[i], reg.reg)) + return false; + + /* + * Not found in a steering table and not a DG2 GAM register? We'll + * just steer to 0/0 as a guess and raise a warning. + */ + drm_WARN(>_to_xe(gt)->drm, true, + "Did not find MCR register %#x in any MCR steering table\n", + reg.reg); + *group = 0; + *instance = 0; + + return true; +} + +#define STEER_SEMAPHORE 0xFD0 + +/* + * Obtain exclusive access to MCR steering. On MTL and beyond we also need + * to synchronize with external clients (e.g., firmware), so a semaphore + * register will also need to be taken. + */ +static void mcr_lock(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int ret; + + spin_lock(>->mcr_lock); + + /* + * Starting with MTL we also need to grab a semaphore register + * to synchronize with external agents (e.g., firmware) that now + * shares the same steering control register. + */ + if (GRAPHICS_VERx100(xe) >= 1270) + ret = wait_for_us(xe_mmio_read32(gt, STEER_SEMAPHORE) == 0x1, 10); + + drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); +} + +static void mcr_unlock(struct xe_gt *gt) { + /* Release hardware semaphore */ + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) + xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1); + + spin_unlock(>->mcr_lock); +} + +/* + * Access a register with specific MCR steering + * + * Caller needs to make sure the relevant forcewake wells are up. + */ +static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag, + int group, int instance, u32 value) +{ + u32 steer_reg, steer_val, val = 0; + + lockdep_assert_held(>->mcr_lock); + + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + steer_reg = MTL_MCR_SELECTOR.reg; + steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) | + REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance); + } else { + steer_reg = GEN8_MCR_SELECTOR.reg; + steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, group) | + REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, instance); + } + + /* + * Always leave the hardware in multicast mode when doing reads + * (see comment about Wa_22013088509 below) and only change it + * to unicast mode when doing writes of a specific instance. + * + * No need to save old steering reg value. + */ + if (rw_flag == MCR_OP_READ) + steer_val |= GEN11_MCR_MULTICAST; + + xe_mmio_write32(gt, steer_reg, steer_val); + + if (rw_flag == MCR_OP_READ) + val = xe_mmio_read32(gt, reg.reg); + else + xe_mmio_write32(gt, reg.reg, value); + + /* + * If we turned off the multicast bit (during a write) we're required + * to turn it back on before finishing. The group and instance values + * don't matter since they'll be re-programmed on the next MCR + * operation. + */ + if (rw_flag == MCR_OP_WRITE) + xe_mmio_write32(gt, steer_reg, GEN11_MCR_MULTICAST); + + return val; +} + +/** + * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register + * @gt: GT structure + * @reg: register to read + * + * Reads a GT MCR register. The read will be steered to a non-terminated + * instance (i.e., one that isn't fused off or powered down by power gating). + * This function assumes the caller is already holding any necessary forcewake + * domains. + * + * Returns the value from a non-terminated instance of @reg. + */ +u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg) +{ + u8 group, instance; + u32 val; + bool steer; + + steer = xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); + + if (steer) { + mcr_lock(gt); + val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, + group, instance, 0); + mcr_unlock(gt); + } else { + /* DG2 GAM special case rules; treat as if unicast */ + val = xe_mmio_read32(gt, reg.reg); + } + + return val; +} + +/** + * xe_gt_mcr_unicast_read - read a specific instance of an MCR register + * @gt: GT structure + * @reg: the MCR register to read + * @group: the MCR group + * @instance: the MCR instance + * + * Returns the value read from an MCR register after steering toward a specific + * group/instance. + */ +u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, + i915_mcr_reg_t reg, + int group, int instance) +{ + u32 val; + + mcr_lock(gt); + val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, group, instance, 0); + mcr_unlock(gt); + + return val; +} + +/** + * xe_gt_mcr_unicast_write - write a specific instance of an MCR register + * @gt: GT structure + * @reg: the MCR register to write + * @value: value to write + * @group: the MCR group + * @instance: the MCR instance + * + * Write an MCR register in unicast mode after steering toward a specific + * group/instance. + */ +void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value, + int group, int instance) +{ + mcr_lock(gt); + rw_with_mcr_steering(gt, reg, MCR_OP_WRITE, group, instance, value); + mcr_unlock(gt); +} + +/** + * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register + * @gt: GT structure + * @reg: the MCR register to write + * @value: value to write + * + * Write an MCR register in multicast mode to update all instances. + */ +void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value) +{ + /* + * Synchronize with any unicast operations. Once we have exclusive + * access, the MULTICAST bit should already be set, so there's no need + * to touch the steering register. + */ + mcr_lock(gt); + xe_mmio_write32(gt, reg.reg, value); + mcr_unlock(gt); +} + +void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p) +{ + for (int i = 0; i < NUM_STEERING_TYPES; i++) { + if (gt->steering[i].ranges) { + drm_printf(p, "%s steering: group=%#x, instance=%#x\n", + xe_steering_types[i].name, + gt->steering[i].group_target, + gt->steering[i].instance_target); + for (int j = 0; gt->steering[i].ranges[j].end; j++) + drm_printf(p, "\t0x%06x - 0x%06x\n", + gt->steering[i].ranges[j].start, + gt->steering[i].ranges[j].end); + } + } +} diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h new file mode 100644 index 000000000000..62ec6eb654a0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_MCR_H_ +#define _XE_GT_MCR_H_ + +#include "i915_reg_defs.h" + +struct drm_printer; +struct xe_gt; + +void xe_gt_mcr_init(struct xe_gt *gt); + +u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg, + int group, int instance); +u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg); + +void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value, + int group, int instance); +void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value); + +void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); + +#endif /* _XE_GT_MCR_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c new file mode 100644 index 000000000000..7125113b7390 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -0,0 +1,750 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include +#include + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_gt_pagefault.h" +#include "xe_migrate.h" +#include "xe_pt.h" +#include "xe_trace.h" +#include "xe_vm.h" + +struct pagefault { + u64 page_addr; + u32 asid; + u16 pdata; + u8 vfid; + u8 access_type; + u8 fault_type; + u8 fault_level; + u8 engine_class; + u8 engine_instance; + u8 fault_unsuccessful; +}; + +enum access_type { + ACCESS_TYPE_READ = 0, + ACCESS_TYPE_WRITE = 1, + ACCESS_TYPE_ATOMIC = 2, + ACCESS_TYPE_RESERVED = 3, +}; + +enum fault_type { + NOT_PRESENT = 0, + WRITE_ACCESS_VIOLATION = 1, + ATOMIC_ACCESS_VIOLATION = 2, +}; + +struct acc { + u64 va_range_base; + u32 asid; + u32 sub_granularity; + u8 granularity; + u8 vfid; + u8 access_type; + u8 engine_class; + u8 engine_instance; +}; + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static int send_tlb_invalidation(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + 0, + XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT | + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | + XE_GUC_TLB_INVAL_FLUSH_CACHE, + }; + int seqno; + int ret; + + /* + * XXX: The seqno algorithm relies on TLB invalidation being processed + * in order which they currently are, if that changes the algorithm will + * need to be updated. + */ + mutex_lock(&guc->ct.lock); + seqno = gt->usm.tlb_invalidation_seqno; + action[1] = seqno; + gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) % + TLB_INVALIDATION_SEQNO_MAX; + if (!gt->usm.tlb_invalidation_seqno) + gt->usm.tlb_invalidation_seqno = 1; + ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_TLB_INVALIDATE, 1); + if (!ret) + ret = seqno; + mutex_unlock(&guc->ct.lock); + + return ret; +} + +static bool access_is_atomic(enum access_type access_type) +{ + return access_type == ACCESS_TYPE_ATOMIC; +} + +static bool vma_is_valid(struct xe_gt *gt, struct xe_vma *vma) +{ + return BIT(gt->info.id) & vma->gt_present && + !(BIT(gt->info.id) & vma->usm.gt_invalidated); +} + +static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup) +{ + if (lookup->start > vma->end || lookup->end < vma->start) + return false; + + return true; +} + +static bool only_needs_bo_lock(struct xe_bo *bo) +{ + return bo && bo->vm; +} + +static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) +{ + struct xe_vma *vma = NULL, lookup; + + lookup.start = page_addr; + lookup.end = lookup.start + SZ_4K - 1; + if (vm->usm.last_fault_vma) { /* Fast lookup */ + if (vma_matches(vm->usm.last_fault_vma, &lookup)) + vma = vm->usm.last_fault_vma; + } + if (!vma) + vma = xe_vm_find_overlapping_vma(vm, &lookup); + + return vma; +} + +static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_vm *vm; + struct xe_vma *vma = NULL; + struct xe_bo *bo; + LIST_HEAD(objs); + LIST_HEAD(dups); + struct ttm_validate_buffer tv_bo, tv_vm; + struct ww_acquire_ctx ww; + struct dma_fence *fence; + bool write_locked; + int ret = 0; + bool atomic; + + /* ASID to VM */ + mutex_lock(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, pf->asid); + if (vm) + xe_vm_get(vm); + mutex_unlock(&xe->usm.lock); + if (!vm || !xe_vm_in_fault_mode(vm)) + return -EINVAL; + +retry_userptr: + /* + * TODO: Avoid exclusive lock if VM doesn't have userptrs, or + * start out read-locked? + */ + down_write(&vm->lock); + write_locked = true; + vma = lookup_vma(vm, pf->page_addr); + if (!vma) { + ret = -EINVAL; + goto unlock_vm; + } + + if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) { + downgrade_write(&vm->lock); + write_locked = false; + } + + trace_xe_vma_pagefault(vma); + + atomic = access_is_atomic(pf->access_type); + + /* Check if VMA is valid */ + if (vma_is_valid(gt, vma) && !atomic) + goto unlock_vm; + + /* TODO: Validate fault */ + + if (xe_vma_is_userptr(vma) && write_locked) { + spin_lock(&vm->userptr.invalidated_lock); + list_del_init(&vma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + + ret = xe_vma_userptr_pin_pages(vma); + if (ret) + goto unlock_vm; + + downgrade_write(&vm->lock); + write_locked = false; + } + + /* Lock VM and BOs dma-resv */ + bo = vma->bo; + if (only_needs_bo_lock(bo)) { + /* This path ensures the BO's LRU is updated */ + ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); + } else { + tv_vm.num_shared = xe->info.tile_count; + tv_vm.bo = xe_vm_ttm_bo(vm); + list_add(&tv_vm.head, &objs); + if (bo) { + tv_bo.bo = &bo->ttm; + tv_bo.num_shared = xe->info.tile_count; + list_add(&tv_bo.head, &objs); + } + ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); + } + if (ret) + goto unlock_vm; + + if (atomic) { + if (xe_vma_is_userptr(vma)) { + ret = -EACCES; + goto unlock_dma_resv; + } + + /* Migrate to VRAM, move should invalidate the VMA first */ + ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id); + if (ret) + goto unlock_dma_resv; + } else if (bo) { + /* Create backing store if needed */ + ret = xe_bo_validate(bo, vm, true); + if (ret) + goto unlock_dma_resv; + } + + /* Bind VMA only to the GT that has faulted */ + trace_xe_vma_pf_bind(vma); + fence = __xe_pt_bind_vma(gt, vma, xe_gt_migrate_engine(gt), NULL, 0, + vma->gt_present & BIT(gt->info.id)); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto unlock_dma_resv; + } + + /* + * XXX: Should we drop the lock before waiting? This only helps if doing + * GPU binds which is currently only done if we have to wait for more + * than 10ms on a move. + */ + dma_fence_wait(fence, false); + dma_fence_put(fence); + + if (xe_vma_is_userptr(vma)) + ret = xe_vma_userptr_check_repin(vma); + vma->usm.gt_invalidated &= ~BIT(gt->info.id); + +unlock_dma_resv: + if (only_needs_bo_lock(bo)) + xe_bo_unlock(bo, &ww); + else + ttm_eu_backoff_reservation(&ww, &objs); +unlock_vm: + if (!ret) + vm->usm.last_fault_vma = vma; + if (write_locked) + up_write(&vm->lock); + else + up_read(&vm->lock); + if (ret == -EAGAIN) + goto retry_userptr; + + if (!ret) { + /* + * FIXME: Doing a full TLB invalidation for now, likely could + * defer TLB invalidate + fault response to a callback of fence + * too + */ + ret = send_tlb_invalidation(>->uc.guc); + if (ret >= 0) + ret = 0; + } + xe_vm_put(vm); + + return ret; +} + +static int send_pagefault_reply(struct xe_guc *guc, + struct xe_guc_pagefault_reply *reply) +{ + u32 action[] = { + XE_GUC_ACTION_PAGE_FAULT_RES_DESC, + reply->dw0, + reply->dw1, + }; + + return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + +static void print_pagefault(struct xe_device *xe, struct pagefault *pf) +{ + drm_warn(&xe->drm, "\n\tASID: %d\n" + "\tVFID: %d\n" + "\tPDATA: 0x%04x\n" + "\tFaulted Address: 0x%08x%08x\n" + "\tFaultType: %d\n" + "\tAccessType: %d\n" + "\tFaultLevel: %d\n" + "\tEngineClass: %d\n" + "\tEngineInstance: %d\n", + pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), + lower_32_bits(pf->page_addr), + pf->fault_type, pf->access_type, pf->fault_level, + pf->engine_class, pf->engine_instance); +} + +#define PF_MSG_LEN_DW 4 + +static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) +{ + const struct xe_guc_pagefault_desc *desc; + int ret = 0; + + spin_lock_irq(&pf_queue->lock); + if (pf_queue->head != pf_queue->tail) { + desc = (const struct xe_guc_pagefault_desc *) + (pf_queue->data + pf_queue->head); + + pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); + pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); + pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); + pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << + PFD_PDATA_HI_SHIFT; + pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); + pf->asid = FIELD_GET(PFD_ASID, desc->dw1); + pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); + pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); + pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); + pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << + PFD_VIRTUAL_ADDR_HI_SHIFT; + pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << + PFD_VIRTUAL_ADDR_LO_SHIFT; + + pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) % + PF_QUEUE_NUM_DW; + } else { + ret = -1; + } + spin_unlock_irq(&pf_queue->lock); + + return ret; +} + +static bool pf_queue_full(struct pf_queue *pf_queue) +{ + lockdep_assert_held(&pf_queue->lock); + + return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <= + PF_MSG_LEN_DW; +} + +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct pf_queue *pf_queue; + unsigned long flags; + u32 asid; + bool full; + + if (unlikely(len != PF_MSG_LEN_DW)) + return -EPROTO; + + asid = FIELD_GET(PFD_ASID, msg[1]); + pf_queue = >->usm.pf_queue[asid % NUM_PF_QUEUE]; + + spin_lock_irqsave(&pf_queue->lock, flags); + full = pf_queue_full(pf_queue); + if (!full) { + memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32)); + pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW; + queue_work(gt->usm.pf_wq, &pf_queue->worker); + } else { + XE_WARN_ON("PF Queue full, shouldn't be possible"); + } + spin_unlock_irqrestore(&pf_queue->lock, flags); + + return full ? -ENOSPC : 0; +} + +static void pf_queue_work_func(struct work_struct *w) +{ + struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); + struct xe_gt *gt = pf_queue->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_guc_pagefault_reply reply = {}; + struct pagefault pf = {}; + int ret; + + ret = get_pagefault(pf_queue, &pf); + if (ret) + return; + + ret = handle_pagefault(gt, &pf); + if (unlikely(ret)) { + print_pagefault(xe, &pf); + pf.fault_unsuccessful = 1; + drm_warn(&xe->drm, "Fault response: Unsuccessful %d\n", ret); + } + + reply.dw0 = FIELD_PREP(PFR_VALID, 1) | + FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | + FIELD_PREP(PFR_REPLY, PFR_ACCESS) | + FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | + FIELD_PREP(PFR_ASID, pf.asid); + + reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | + FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | + FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | + FIELD_PREP(PFR_PDATA, pf.pdata); + + send_pagefault_reply(>->uc.guc, &reply); +} + +static void acc_queue_work_func(struct work_struct *w); + +int xe_gt_pagefault_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int i; + + if (!xe->info.supports_usm) + return 0; + + gt->usm.tlb_invalidation_seqno = 1; + for (i = 0; i < NUM_PF_QUEUE; ++i) { + gt->usm.pf_queue[i].gt = gt; + spin_lock_init(>->usm.pf_queue[i].lock); + INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); + } + for (i = 0; i < NUM_ACC_QUEUE; ++i) { + gt->usm.acc_queue[i].gt = gt; + spin_lock_init(>->usm.acc_queue[i].lock); + INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); + } + + gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", + WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); + if (!gt->usm.pf_wq) + return -ENOMEM; + + gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", + WQ_UNBOUND | WQ_HIGHPRI, + NUM_ACC_QUEUE); + if (!gt->usm.acc_wq) + return -ENOMEM; + + return 0; +} + +void xe_gt_pagefault_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int i; + + if (!xe->info.supports_usm) + return; + + for (i = 0; i < NUM_PF_QUEUE; ++i) { + spin_lock_irq(>->usm.pf_queue[i].lock); + gt->usm.pf_queue[i].head = 0; + gt->usm.pf_queue[i].tail = 0; + spin_unlock_irq(>->usm.pf_queue[i].lock); + } + + for (i = 0; i < NUM_ACC_QUEUE; ++i) { + spin_lock(>->usm.acc_queue[i].lock); + gt->usm.acc_queue[i].head = 0; + gt->usm.acc_queue[i].tail = 0; + spin_unlock(>->usm.acc_queue[i].lock); + } +} + +int xe_gt_tlb_invalidation(struct xe_gt *gt) +{ + return send_tlb_invalidation(>->uc.guc); +} + +static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) +{ + if (gt->usm.tlb_invalidation_seqno_recv >= seqno) + return true; + + if (seqno - gt->usm.tlb_invalidation_seqno_recv > + (TLB_INVALIDATION_SEQNO_MAX / 2)) + return true; + + return false; +} + +int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_guc *guc = >->uc.guc; + int ret; + + /* + * XXX: See above, this algorithm only works if seqno are always in + * order + */ + ret = wait_event_timeout(guc->ct.wq, + tlb_invalidation_seqno_past(gt, seqno), + HZ / 5); + if (!ret) { + drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n", + seqno, gt->usm.tlb_invalidation_seqno_recv); + return -ETIME; + } + + return 0; +} + +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + int expected_seqno; + + if (unlikely(len != 1)) + return -EPROTO; + + /* Sanity check on seqno */ + expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) % + TLB_INVALIDATION_SEQNO_MAX; + XE_WARN_ON(expected_seqno != msg[0]); + + gt->usm.tlb_invalidation_seqno_recv = msg[0]; + smp_wmb(); + wake_up_all(&guc->ct.wq); + + return 0; +} + +static int granularity_in_byte(int val) +{ + switch (val) { + case 0: + return SZ_128K; + case 1: + return SZ_2M; + case 2: + return SZ_16M; + case 3: + return SZ_64M; + default: + return 0; + } +} + +static int sub_granularity_in_byte(int val) +{ + return (granularity_in_byte(val) / 32); +} + +static void print_acc(struct xe_device *xe, struct acc *acc) +{ + drm_warn(&xe->drm, "Access counter request:\n" + "\tType: %s\n" + "\tASID: %d\n" + "\tVFID: %d\n" + "\tEngine: %d:%d\n" + "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" + "\tSub_Granularity Vector: 0x%08x\n" + "\tVA Range base: 0x%016llx\n", + acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", + acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, + granularity_in_byte(acc->granularity) / SZ_1K, + sub_granularity_in_byte(acc->granularity) / SZ_1K, + acc->sub_granularity, acc->va_range_base); +} + +static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) +{ + u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * + sub_granularity_in_byte(acc->granularity); + struct xe_vma lookup; + + lookup.start = page_va; + lookup.end = lookup.start + SZ_4K - 1; + + return xe_vm_find_overlapping_vma(vm, &lookup); +} + +static int handle_acc(struct xe_gt *gt, struct acc *acc) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_vm *vm; + struct xe_vma *vma; + struct xe_bo *bo; + LIST_HEAD(objs); + LIST_HEAD(dups); + struct ttm_validate_buffer tv_bo, tv_vm; + struct ww_acquire_ctx ww; + int ret = 0; + + /* We only support ACC_TRIGGER at the moment */ + if (acc->access_type != ACC_TRIGGER) + return -EINVAL; + + /* ASID to VM */ + mutex_lock(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, acc->asid); + if (vm) + xe_vm_get(vm); + mutex_unlock(&xe->usm.lock); + if (!vm || !xe_vm_in_fault_mode(vm)) + return -EINVAL; + + down_read(&vm->lock); + + /* Lookup VMA */ + vma = get_acc_vma(vm, acc); + if (!vma) { + ret = -EINVAL; + goto unlock_vm; + } + + trace_xe_vma_acc(vma); + + /* Userptr can't be migrated, nothing to do */ + if (xe_vma_is_userptr(vma)) + goto unlock_vm; + + /* Lock VM and BOs dma-resv */ + bo = vma->bo; + if (only_needs_bo_lock(bo)) { + /* This path ensures the BO's LRU is updated */ + ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); + } else { + tv_vm.num_shared = xe->info.tile_count; + tv_vm.bo = xe_vm_ttm_bo(vm); + list_add(&tv_vm.head, &objs); + tv_bo.bo = &bo->ttm; + tv_bo.num_shared = xe->info.tile_count; + list_add(&tv_bo.head, &objs); + ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); + } + if (ret) + goto unlock_vm; + + /* Migrate to VRAM, move should invalidate the VMA first */ + ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id); + + if (only_needs_bo_lock(bo)) + xe_bo_unlock(bo, &ww); + else + ttm_eu_backoff_reservation(&ww, &objs); +unlock_vm: + up_read(&vm->lock); + xe_vm_put(vm); + + return ret; +} + +#define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) + +static int get_acc(struct acc_queue *acc_queue, struct acc *acc) +{ + const struct xe_guc_acc_desc *desc; + int ret = 0; + + spin_lock(&acc_queue->lock); + if (acc_queue->head != acc_queue->tail) { + desc = (const struct xe_guc_acc_desc *) + (acc_queue->data + acc_queue->head); + + acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); + acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | + FIELD_GET(ACC_SUBG_LO, desc->dw0); + acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); + acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); + acc->asid = FIELD_GET(ACC_ASID, desc->dw1); + acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); + acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); + acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, + desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); + } else { + ret = -1; + } + spin_unlock(&acc_queue->lock); + + return ret; +} + +static void acc_queue_work_func(struct work_struct *w) +{ + struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); + struct xe_gt *gt = acc_queue->gt; + struct xe_device *xe = gt_to_xe(gt); + struct acc acc = {}; + int ret; + + ret = get_acc(acc_queue, &acc); + if (ret) + return; + + ret = handle_acc(gt, &acc); + if (unlikely(ret)) { + print_acc(xe, &acc); + drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); + } +} + +#define ACC_MSG_LEN_DW 4 + +static bool acc_queue_full(struct acc_queue *acc_queue) +{ + lockdep_assert_held(&acc_queue->lock); + + return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <= + ACC_MSG_LEN_DW; +} + +int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct acc_queue *acc_queue; + u32 asid; + bool full; + + if (unlikely(len != ACC_MSG_LEN_DW)) + return -EPROTO; + + asid = FIELD_GET(ACC_ASID, msg[1]); + acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; + + spin_lock(&acc_queue->lock); + full = acc_queue_full(acc_queue); + if (!full) { + memcpy(acc_queue->data + acc_queue->tail, msg, + len * sizeof(u32)); + acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW; + queue_work(gt->usm.acc_wq, &acc_queue->worker); + } else { + drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); + } + spin_unlock(&acc_queue->lock); + + return full ? -ENOSPC : 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h new file mode 100644 index 000000000000..35f68027cc9c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_PAGEFAULT_H_ +#define _XE_GT_PAGEFAULT_H_ + +#include + +struct xe_gt; +struct xe_guc; + +int xe_gt_pagefault_init(struct xe_gt *gt); +void xe_gt_pagefault_reset(struct xe_gt *gt); +int xe_gt_tlb_invalidation(struct xe_gt *gt); +int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif /* _XE_GT_PAGEFAULT_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c new file mode 100644 index 000000000000..2d966d935b8e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include +#include "xe_gt.h" +#include "xe_gt_sysfs.h" + +static void xe_gt_sysfs_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static struct kobj_type xe_gt_sysfs_kobj_type = { + .release = xe_gt_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static void gt_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + kobject_put(gt->sysfs); +} + +int xe_gt_sysfs_init(struct xe_gt *gt) +{ + struct device *dev = gt_to_xe(gt)->drm.dev; + struct kobj_gt *kg; + int err; + + kg = kzalloc(sizeof(*kg), GFP_KERNEL); + if (!kg) + return -ENOMEM; + + kobject_init(&kg->base, &xe_gt_sysfs_kobj_type); + kg->gt = gt; + + err = kobject_add(&kg->base, &dev->kobj, "gt%d", gt->info.id); + if (err) { + kobject_put(&kg->base); + return err; + } + + gt->sysfs = &kg->base; + + err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_sysfs_fini, gt); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h new file mode 100644 index 000000000000..ecbfcc5c7d42 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_SYSFS_H_ +#define _XE_GT_SYSFS_H_ + +#include "xe_gt_sysfs_types.h" + +int xe_gt_sysfs_init(struct xe_gt *gt); + +static inline struct xe_gt * +kobj_to_gt(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_gt, base)->gt; +} + +#endif /* _XE_GT_SYSFS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h new file mode 100644 index 000000000000..d3bc6b83360f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_SYSFS_TYPES_H_ +#define _XE_GT_SYSFS_TYPES_H_ + +#include + +struct xe_gt; + +/** + * struct kobj_gt - A GT's kobject struct that connects the kobject and the GT + * + * When dealing with multiple GTs, this struct helps to understand which GT + * needs to be addressed on a given sysfs call. + */ +struct kobj_gt { + /** @base: The actual kobject */ + struct kobject base; + /** @gt: A pointer to the GT itself */ + struct xe_gt *gt; +}; + +#endif /* _XE_GT_SYSFS_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c new file mode 100644 index 000000000000..8e02e362ba27 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_gt.h" +#include "xe_gt_topology.h" +#include "xe_mmio.h" + +#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) +#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) + +#define XELP_EU_ENABLE 0x9134 /* "_DISABLE" on Xe_LP */ +#define XELP_EU_MASK REG_GENMASK(7, 0) +#define XELP_GT_GEOMETRY_DSS_ENABLE 0x913c +#define XEHP_GT_COMPUTE_DSS_ENABLE 0x9144 +#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT 0x9148 + +static void +load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) +{ + va_list argp; + u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; + int i; + + if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS)) + numregs = XE_MAX_DSS_FUSE_REGS; + + va_start(argp, numregs); + for (i = 0; i < numregs; i++) + fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, u32)); + va_end(argp); + + bitmap_from_arr32(mask, fuse_val, numregs * 32); +} + +static void +load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE); + u32 val = 0; + int i; + + BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1); + + /* + * Pre-Xe_HP platforms inverted the bit meaning (disable instead + * of enable). + */ + if (GRAPHICS_VERx100(xe) < 1250) + reg = ~reg & XELP_EU_MASK; + + /* On PVC, one bit = one EU */ + if (GRAPHICS_VERx100(xe) == 1260) { + val = reg; + } else { + /* All other platforms, one bit = 2 EU */ + for (i = 0; i < fls(reg); i++) + if (reg & BIT(i)) + val |= 0x3 << 2 * i; + } + + bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); +} + +void +xe_gt_topology_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct drm_printer p = drm_debug_printer("GT topology"); + int num_geometry_regs, num_compute_regs; + + if (GRAPHICS_VERx100(xe) == 1260) { + num_geometry_regs = 0; + num_compute_regs = 2; + } else if (GRAPHICS_VERx100(xe) >= 1250) { + num_geometry_regs = 1; + num_compute_regs = 1; + } else { + num_geometry_regs = 1; + num_compute_regs = 0; + } + + load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs, + XELP_GT_GEOMETRY_DSS_ENABLE); + load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, + XEHP_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); + + xe_gt_topology_dump(gt, &p); +} + +unsigned int +xe_gt_topology_count_dss(xe_dss_mask_t mask) +{ + return bitmap_weight(mask, XE_MAX_DSS_FUSE_BITS); +} + +u64 +xe_gt_topology_dss_group_mask(xe_dss_mask_t mask, int grpsize) +{ + xe_dss_mask_t per_dss_mask = {}; + u64 grpmask = 0; + + WARN_ON(DIV_ROUND_UP(XE_MAX_DSS_FUSE_BITS, grpsize) > BITS_PER_TYPE(grpmask)); + + bitmap_fill(per_dss_mask, grpsize); + for (int i = 0; !bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); i++) { + if (bitmap_intersects(mask, per_dss_mask, grpsize)) + grpmask |= BIT(i); + + bitmap_shift_right(mask, mask, grpsize, XE_MAX_DSS_FUSE_BITS); + } + + return grpmask; +} + +void +xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) +{ + drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS, + gt->fuse_topo.g_dss_mask); + drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS, + gt->fuse_topo.c_dss_mask); + + drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, + gt->fuse_topo.eu_mask_per_dss); + +} + +/* + * Used to obtain the index of the first DSS. Can start searching from the + * beginning of a specific dss group (e.g., gslice, cslice, etc.) if + * groupsize and groupnum are non-zero. + */ +unsigned int +xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum) +{ + return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); +} diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h new file mode 100644 index 000000000000..7a0abc64084f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __XE_GT_TOPOLOGY_H__ +#define __XE_GT_TOPOLOGY_H__ + +#include "xe_gt_types.h" + +struct drm_printer; + +void xe_gt_topology_init(struct xe_gt *gt); + +void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); + +unsigned int +xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum); + +#endif /* __XE_GT_TOPOLOGY_H__ */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h new file mode 100644 index 000000000000..c80a9215098d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_TYPES_H_ +#define _XE_GT_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence_types.h" +#include "xe_reg_sr_types.h" +#include "xe_sa_types.h" +#include "xe_uc_types.h" + +struct xe_engine_ops; +struct xe_ggtt; +struct xe_migrate; +struct xe_ring_ops; +struct xe_ttm_gtt_mgr; +struct xe_ttm_vram_mgr; + +enum xe_gt_type { + XE_GT_TYPE_UNINITIALIZED, + XE_GT_TYPE_MAIN, + XE_GT_TYPE_REMOTE, + XE_GT_TYPE_MEDIA, +}; + +#define XE_MAX_DSS_FUSE_REGS 2 +#define XE_MAX_EU_FUSE_REGS 1 + +typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; +typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; + +struct xe_mmio_range { + u32 start; + u32 end; +}; + +/* + * The hardware has multiple kinds of multicast register ranges that need + * special register steering (and future platforms are expected to add + * additional types). + * + * During driver startup, we initialize the steering control register to + * direct reads to a slice/subslice that are valid for the 'subslice' class + * of multicast registers. If another type of steering does not have any + * overlap in valid steering targets with 'subslice' style registers, we will + * need to explicitly re-steer reads of registers of the other type. + * + * Only the replication types that may need additional non-default steering + * are listed here. + */ +enum xe_steering_type { + L3BANK, + MSLICE, + LNCF, + DSS, + OADDRM, + + /* + * On some platforms there are multiple types of MCR registers that + * will always return a non-terminated value at instance (0, 0). We'll + * lump those all into a single category to keep things simple. + */ + INSTANCE0, + + NUM_STEERING_TYPES +}; + +/** + * struct xe_gt - Top level struct of a graphics tile + * + * A graphics tile may be a physical split (duplicate pieces of silicon, + * different GGTT + VRAM) or a virtual split (shared GGTT + VRAM). Either way + * this structure encapsulates of everything a GT is (MMIO, VRAM, memory + * management, microcontrols, and a hardware set of engines). + */ +struct xe_gt { + /** @xe: backpointer to XE device */ + struct xe_device *xe; + + /** @info: GT info */ + struct { + /** @type: type of GT */ + enum xe_gt_type type; + /** @id: id of GT */ + u8 id; + /** @vram: id of the VRAM for this GT */ + u8 vram_id; + /** @clock_freq: clock frequency */ + u32 clock_freq; + /** @engine_mask: mask of engines present on GT */ + u64 engine_mask; + } info; + + /** + * @mmio: mmio info for GT, can be subset of the global device mmio + * space + */ + struct { + /** @size: size of MMIO space on GT */ + size_t size; + /** @regs: pointer to MMIO space on GT */ + void *regs; + /** @fw: force wake for GT */ + struct xe_force_wake fw; + /** + * @adj_limit: adjust MMIO address if address is below this + * value + */ + u32 adj_limit; + /** @adj_offset: offect to add to MMIO address when adjusting */ + u32 adj_offset; + } mmio; + + /** + * @reg_sr: table with registers to be restored on GT init/resume/reset + */ + struct xe_reg_sr reg_sr; + + /** + * @mem: memory management info for GT, multiple GTs can point to same + * objects (virtual split) + */ + struct { + /** + * @vram: VRAM info for GT, multiple GTs can point to same info + * (virtual split), can be subset of global device VRAM + */ + struct { + /** @io_start: start address of VRAM */ + resource_size_t io_start; + /** @size: size of VRAM */ + resource_size_t size; + /** @mapping: pointer to VRAM mappable space */ + void *__iomem mapping; + } vram; + /** @vram_mgr: VRAM TTM manager */ + struct xe_ttm_vram_mgr *vram_mgr; + /** @gtt_mr: GTT TTM manager */ + struct xe_ttm_gtt_mgr *gtt_mgr; + /** @ggtt: Global graphics translation table */ + struct xe_ggtt *ggtt; + } mem; + + /** @reset: state for GT resets */ + struct { + /** + * @worker: work so GT resets can done async allowing to reset + * code to safely flush all code paths + */ + struct work_struct worker; + } reset; + + /** @usm: unified shared memory state */ + struct { + /** + * @bb_pool: Pool from which batchbuffers, for USM operations + * (e.g. migrations, fixing page tables), are allocated. + * Dedicated pool needed so USM operations to not get blocked + * behind any user operations which may have resulted in a + * fault. + */ + struct xe_sa_manager bb_pool; + /** + * @reserved_bcs_instance: reserved BCS instance used for USM + * operations (e.g. mmigrations, fixing page tables) + */ + u16 reserved_bcs_instance; + /** + * @tlb_invalidation_seqno: TLB invalidation seqno, protected by + * CT lock + */ +#define TLB_INVALIDATION_SEQNO_MAX 0x100000 + int tlb_invalidation_seqno; + /** + * @tlb_invalidation_seqno_recv: last received TLB invalidation + * seqno, protected by CT lock + */ + int tlb_invalidation_seqno_recv; + /** @pf_wq: page fault work queue, unbound, high priority */ + struct workqueue_struct *pf_wq; + /** @acc_wq: access counter work queue, unbound, high priority */ + struct workqueue_struct *acc_wq; + /** + * @pf_queue: Page fault queue used to sync faults so faults can + * be processed not under the GuC CT lock. The queue is sized so + * it can sync all possible faults (1 per physical engine). + * Multiple queues exists for page faults from different VMs are + * be processed in parallel. + */ + struct pf_queue { + /** @gt: back pointer to GT */ + struct xe_gt *gt; +#define PF_QUEUE_NUM_DW 128 + /** @data: data in the page fault queue */ + u32 data[PF_QUEUE_NUM_DW]; + /** + * @head: head pointer in DWs for page fault queue, + * moved by worker which processes faults. + */ + u16 head; + /** + * @tail: tail pointer in DWs for page fault queue, + * moved by G2H handler. + */ + u16 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process page faults */ + struct work_struct worker; +#define NUM_PF_QUEUE 4 + } pf_queue[NUM_PF_QUEUE]; + /** + * @acc_queue: Same as page fault queue, cannot process access + * counters under CT lock. + */ + struct acc_queue { + /** @gt: back pointer to GT */ + struct xe_gt *gt; +#define ACC_QUEUE_NUM_DW 128 + /** @data: data in the page fault queue */ + u32 data[ACC_QUEUE_NUM_DW]; + /** + * @head: head pointer in DWs for page fault queue, + * moved by worker which processes faults. + */ + u16 head; + /** + * @tail: tail pointer in DWs for page fault queue, + * moved by G2H handler. + */ + u16 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process access counters */ + struct work_struct worker; +#define NUM_ACC_QUEUE 4 + } acc_queue[NUM_ACC_QUEUE]; + } usm; + + /** @ordered_wq: used to serialize GT resets and TDRs */ + struct workqueue_struct *ordered_wq; + + /** @uc: micro controllers on the GT */ + struct xe_uc uc; + + /** @engine_ops: submission backend engine operations */ + const struct xe_engine_ops *engine_ops; + + /** + * @ring_ops: ring operations for this hw engine (1 per engine class) + */ + const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX]; + + /** @fence_irq: fence IRQs (1 per engine class) */ + struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX]; + + /** @default_lrc: default LRC state */ + void *default_lrc[XE_ENGINE_CLASS_MAX]; + + /** @hw_engines: hardware engines on the GT */ + struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; + + /** @kernel_bb_pool: Pool from which batchbuffers are allocated */ + struct xe_sa_manager kernel_bb_pool; + + /** @migrate: Migration helper for vram blits and clearing */ + struct xe_migrate *migrate; + + /** @pcode: GT's PCODE */ + struct { + /** @lock: protecting GT's PCODE mailbox data */ + struct mutex lock; + } pcode; + + /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ + struct kobject *sysfs; + + /** @mocs: info */ + struct { + /** @uc_index: UC index */ + u8 uc_index; + /** @wb_index: WB index, only used on L3_CCS platforms */ + u8 wb_index; + } mocs; + + /** @fuse_topo: GT topology reported by fuse registers */ + struct { + /** @g_dss_mask: dual-subslices usable by geometry */ + xe_dss_mask_t g_dss_mask; + + /** @c_dss_mask: dual-subslices usable by compute */ + xe_dss_mask_t c_dss_mask; + + /** @eu_mask_per_dss: EU mask per DSS*/ + xe_eu_mask_t eu_mask_per_dss; + } fuse_topo; + + /** @steering: register steering for individual HW units */ + struct { + /* @ranges: register ranges used for this steering type */ + const struct xe_mmio_range *ranges; + + /** @group_target: target to steer accesses to */ + u16 group_target; + /** @instance_target: instance to steer accesses to */ + u16 instance_target; + } steering[NUM_STEERING_TYPES]; + + /** + * @mcr_lock: protects the MCR_SELECTOR register for the duration + * of a steered operation + */ + spinlock_t mcr_lock; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c new file mode 100644 index 000000000000..3c285d849ef6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -0,0 +1,875 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_guc.h" +#include "xe_guc_ads.h" +#include "xe_guc_ct.h" +#include "xe_guc_hwconfig.h" +#include "xe_guc_log.h" +#include "xe_guc_reg.h" +#include "xe_guc_pc.h" +#include "xe_guc_submit.h" +#include "xe_gt.h" +#include "xe_platform_types.h" +#include "xe_uc_fw.h" +#include "xe_wopcm.h" +#include "xe_mmio.h" +#include "xe_force_wake.h" +#include "i915_reg_defs.h" +#include "gt/intel_gt_regs.h" + +/* TODO: move to common file */ +#define GUC_PVC_MOCS_INDEX_MASK REG_GENMASK(25, 24) +#define PVC_MOCS_UC_INDEX 1 +#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(GUC_PVC_MOCS_INDEX_MASK,\ + index) + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static struct xe_device * +guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + +/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ +#define GUC_GGTT_TOP 0xFEE00000 +static u32 guc_bo_ggtt_addr(struct xe_guc *guc, + struct xe_bo *bo) +{ + u32 addr = xe_bo_ggtt_addr(bo); + + XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc))); + XE_BUG_ON(range_overflows_t(u32, addr, bo->size, GUC_GGTT_TOP)); + + return addr; +} + +static u32 guc_ctl_debug_flags(struct xe_guc *guc) +{ + u32 level = xe_guc_log_get_level(&guc->log); + u32 flags = 0; + + if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) + flags |= GUC_LOG_DISABLED; + else + flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << + GUC_LOG_VERBOSITY_SHIFT; + + return flags; +} + +static u32 guc_ctl_feature_flags(struct xe_guc *guc) +{ + return GUC_CTL_ENABLE_SLPC; +} + +static u32 guc_ctl_log_params_flags(struct xe_guc *guc) +{ + u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT; + u32 flags; + + #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) + #define LOG_UNIT SZ_1M + #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS + #else + #define LOG_UNIT SZ_4K + #define LOG_FLAG 0 + #endif + + #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) + #define CAPTURE_UNIT SZ_1M + #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS + #else + #define CAPTURE_UNIT SZ_4K + #define CAPTURE_FLAG 0 + #endif + + BUILD_BUG_ON(!CRASH_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); + + BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > + (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); + BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > + (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); + BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > + (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); + + flags = GUC_LOG_VALID | + GUC_LOG_NOTIFY_ON_HALF_FULL | + CAPTURE_FLAG | + LOG_FLAG | + ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | + ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | + ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << + GUC_LOG_CAPTURE_SHIFT) | + (offset << GUC_LOG_BUF_ADDR_SHIFT); + + #undef LOG_UNIT + #undef LOG_FLAG + #undef CAPTURE_UNIT + #undef CAPTURE_FLAG + + return flags; +} + +static u32 guc_ctl_ads_flags(struct xe_guc *guc) +{ + u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT; + u32 flags = ads << GUC_ADS_ADDR_SHIFT; + + return flags; +} + +static u32 guc_ctl_wa_flags(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 flags = 0; + + /* Wa_22012773006:gen11,gen12 < XeHP */ + if (GRAPHICS_VER(xe) >= 11 && + GRAPHICS_VERx100(xe) < 1250) + flags |= GUC_WA_POLLCS; + + /* Wa_16011759253 */ + /* Wa_22011383443 */ + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0) || + IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0)) + flags |= GUC_WA_GAM_CREDITS; + + /* Wa_14014475959 */ + if (IS_PLATFORM_STEP(xe, XE_METEORLAKE, STEP_A0, STEP_B0) || + xe->info.platform == XE_DG2) + flags |= GUC_WA_HOLD_CCS_SWITCHOUT; + + /* + * Wa_14012197797 + * Wa_22011391025 + * + * The same WA bit is used for both and 22011391025 is applicable to + * all DG2. + */ + if (xe->info.platform == XE_DG2) + flags |= GUC_WA_DUAL_QUEUE; + + /* + * Wa_2201180203 + * GUC_WA_PRE_PARSER causes media workload hang for PVC A0 and PCIe + * errors. Disable this for PVC A0 steppings. + */ + if (GRAPHICS_VER(xe) <= 12 && + !IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0)) + flags |= GUC_WA_PRE_PARSER; + + /* Wa_16011777198 */ + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) || + IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, + STEP_B0)) + flags |= GUC_WA_RCS_RESET_BEFORE_RC6; + + /* + * Wa_22012727170 + * Wa_22012727685 + * + * This WA is applicable to PVC CT A0, but causes media regressions. + * Drop the WA for PVC. + */ + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) || + IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, + STEP_FOREVER)) + flags |= GUC_WA_CONTEXT_ISOLATION; + + /* Wa_16015675438, Wa_18020744125 */ + if (!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) + flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; + + /* Wa_1509372804 */ + if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_C0)) + flags |= GUC_WA_RENDER_RST_RC6_EXIT; + + + return flags; +} + +static u32 guc_ctl_devid(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + + return (((u32)xe->info.devid) << 16) | xe->info.revid; +} + +static void guc_init_params(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 *params = guc->params; + int i; + + BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); + BUILD_BUG_ON(SOFT_SCRATCH_COUNT != GUC_CTL_MAX_DWORDS + 2); + + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); + params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); + params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); + params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc); + params[GUC_CTL_WA] = guc_ctl_wa_flags(guc); + params[GUC_CTL_DEVID] = guc_ctl_devid(guc); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); +} + +/* + * Initialise the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +void guc_write_params(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + int i; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + xe_mmio_write32(gt, SOFT_SCRATCH(0).reg, 0); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]); +} + +#define MEDIA_GUC_HOST_INTERRUPT _MMIO(0x190304) + +int xe_guc_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + guc->fw.type = XE_UC_FW_TYPE_GUC; + ret = xe_uc_fw_init(&guc->fw); + if (ret) + goto out; + + ret = xe_guc_log_init(&guc->log); + if (ret) + goto out; + + ret = xe_guc_ads_init(&guc->ads); + if (ret) + goto out; + + ret = xe_guc_ct_init(&guc->ct); + if (ret) + goto out; + + ret = xe_guc_pc_init(&guc->pc); + if (ret) + goto out; + + guc_init_params(guc); + + if (xe_gt_is_media_type(gt)) + guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg; + else + guc->notify_reg = GEN11_GUC_HOST_INTERRUPT.reg; + + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + + return 0; + +out: + drm_err(&xe->drm, "GuC init failed with %d", ret); + return ret; +} + +/** + * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load + * @guc: The GuC object + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_init_post_hwconfig(struct xe_guc *guc) +{ + return xe_guc_ads_init_post_hwconfig(&guc->ads); +} + +int xe_guc_post_load_init(struct xe_guc *guc) +{ + xe_guc_ads_populate_post_load(&guc->ads); + + return 0; +} + +int xe_guc_reset(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 guc_status; + int ret; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC); + + ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5); + if (ret) { + drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", + xe_mmio_read32(gt, GEN6_GDRST.reg)); + goto err_out; + } + + guc_status = xe_mmio_read32(gt, GUC_STATUS.reg); + if (!(guc_status & GS_MIA_IN_RESET)) { + drm_err(&xe->drm, + "GuC status: 0x%x, MIA core expected to be in reset\n", + guc_status); + ret = -EIO; + goto err_out; + } + + return 0; + +err_out: + + return ret; +} + +static void guc_prepare_xfer(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = guc_to_xe(guc); + u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | + GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | + GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | + GUC_ENABLE_MIA_CLOCK_GATING; + + if (GRAPHICS_VERx100(xe) < 1250) + shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES | + GUC_ENABLE_MIA_CACHING; + + if (xe->info.platform == XE_PVC) + shim_flags |= PVC_GUC_MOCS_INDEX(PVC_MOCS_UC_INDEX); + + /* Must program this register before loading the ucode with DMA */ + xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags); + + xe_mmio_write32(gt, GEN9_GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE); +} + +/* + * Supporting MMIO & in memory RSA + */ +static int guc_xfer_rsa(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 rsa[UOS_RSA_SCRATCH_COUNT]; + size_t copied; + int i; + + if (guc->fw.rsa_size > 256) { + u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) + + xe_uc_fw_rsa_offset(&guc->fw); + xe_mmio_write32(gt, UOS_RSA_SCRATCH(0).reg, rsa_ggtt_addr); + return 0; + } + + copied = xe_uc_fw_copy_rsa(&guc->fw, rsa, sizeof(rsa)); + if (copied < sizeof(rsa)) + return -ENOMEM; + + for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) + xe_mmio_write32(gt, UOS_RSA_SCRATCH(i).reg, rsa[i]); + + return 0; +} + +/* + * Read the GuC status register (GUC_STATUS) and store it in the + * specified location; then return a boolean indicating whether + * the value matches either of two values representing completion + * of the GuC boot process. + * + * This is used for polling the GuC status in a wait_for() + * loop below. + */ +static bool guc_ready(struct xe_guc *guc, u32 *status) +{ + u32 val = xe_mmio_read32(guc_to_gt(guc), GUC_STATUS.reg); + u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val); + + *status = val; + return uk_val == XE_GUC_LOAD_STATUS_READY; +} + +static int guc_wait_ucode(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 status; + int ret; + + /* + * Wait for the GuC to start up. + * NB: Docs recommend not using the interrupt for completion. + * Measurements indicate this should take no more than 20ms + * (assuming the GT clock is at maximum frequency). So, a + * timeout here indicates that the GuC has failed and is unusable. + * (Higher levels of the driver may decide to reset the GuC and + * attempt the ucode load again if this happens.) + * + * FIXME: There is a known (but exceedingly unlikely) race condition + * where the asynchronous frequency management code could reduce + * the GT clock while a GuC reload is in progress (during a full + * GT reset). A fix is in progress but there are complex locking + * issues to be resolved. In the meantime bump the timeout to + * 200ms. Even at slowest clock, this should be sufficient. And + * in the working case, a larger timeout makes no difference. + */ + ret = wait_for(guc_ready(guc, &status), 200); + if (ret) { + struct drm_device *drm = &xe->drm; + struct drm_printer p = drm_info_printer(drm->dev); + + drm_info(drm, "GuC load failed: status = 0x%08X\n", status); + drm_info(drm, "GuC load failed: status: Reset = %d, " + "BootROM = 0x%02X, UKernel = 0x%02X, " + "MIA = 0x%02X, Auth = 0x%02X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status), + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + drm_info(drm, "GuC firmware signature verification failed\n"); + ret = -ENOEXEC; + } + + if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == + XE_GUC_LOAD_STATUS_EXCEPTION) { + drm_info(drm, "GuC firmware exception. EIP: %#x\n", + xe_mmio_read32(guc_to_gt(guc), + SOFT_SCRATCH(13).reg)); + ret = -ENXIO; + } + + xe_guc_log_print(&guc->log, &p); + } else { + drm_dbg(&xe->drm, "GuC successfully loaded"); + } + + return ret; +} + +static int __xe_guc_upload(struct xe_guc *guc) +{ + int ret; + + guc_write_params(guc); + guc_prepare_xfer(guc); + + /* + * Note that GuC needs the CSS header plus uKernel code to be copied + * by the DMA engine in one operation, whereas the RSA signature is + * loaded separately, either by copying it to the UOS_RSA_SCRATCH + * register (if key size <= 256) or through a ggtt-pinned vma (if key + * size > 256). The RSA size and therefore the way we provide it to the + * HW is fixed for each platform and hard-coded in the bootrom. + */ + ret = guc_xfer_rsa(guc); + if (ret) + goto out; + /* + * Current uCode expects the code to be loaded at 8k; locations below + * this are used for the stack. + */ + ret = xe_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE); + if (ret) + goto out; + + /* Wait for authentication */ + ret = guc_wait_ucode(guc); + if (ret) + goto out; + + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); + return 0; + +out: + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL); + return 0 /* FIXME: ret, don't want to stop load currently */; +} + +/** + * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table + * @guc: The GuC object + * + * This function uploads a minimal GuC that does not support submissions but + * in a state where the hwconfig table can be read. Next, it reads and parses + * the hwconfig table so it can be used for subsequent steps in the driver load. + * Lastly, it enables CT communication (XXX: this is needed for PFs/VFs only). + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) +{ + int ret; + + xe_guc_ads_populate_minimal(&guc->ads); + + ret = __xe_guc_upload(guc); + if (ret) + return ret; + + ret = xe_guc_hwconfig_init(guc); + if (ret) + return ret; + + ret = xe_guc_enable_communication(guc); + if (ret) + return ret; + + return 0; +} + +int xe_guc_upload(struct xe_guc *guc) +{ + xe_guc_ads_populate(&guc->ads); + + return __xe_guc_upload(guc); +} + +static void guc_handle_mmio_msg(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 msg; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + msg = xe_mmio_read32(gt, SOFT_SCRATCH(15).reg); + msg &= XE_GUC_RECV_MSG_EXCEPTION | + XE_GUC_RECV_MSG_CRASH_DUMP_POSTED; + xe_mmio_write32(gt, SOFT_SCRATCH(15).reg, 0); + + if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED) + drm_err(&guc_to_xe(guc)->drm, + "Received early GuC crash dump notification!\n"); + + if (msg & XE_GUC_RECV_MSG_EXCEPTION) + drm_err(&guc_to_xe(guc)->drm, + "Received early GuC exception notification!\n"); +} + +void guc_enable_irq(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 events = xe_gt_is_media_type(gt) ? + REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST) : + REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); + + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, + REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST)); + if (xe_gt_is_media_type(gt)) + xe_mmio_rmw32(gt, GEN11_GUC_SG_INTR_MASK.reg, events, 0); + else + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~events); +} + +int xe_guc_enable_communication(struct xe_guc *guc) +{ + int err; + + guc_enable_irq(guc); + + xe_mmio_rmw32(guc_to_gt(guc), GEN6_PMINTRMSK.reg, + ARAT_EXPIRED_INTRMSK, 0); + + err = xe_guc_ct_enable(&guc->ct); + if (err) + return err; + + guc_handle_mmio_msg(guc); + + return 0; +} + +int xe_guc_suspend(struct xe_guc *guc) +{ + int ret; + u32 action[] = { + XE_GUC_ACTION_CLIENT_SOFT_RESET, + }; + + ret = xe_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + if (ret) { + drm_err(&guc_to_xe(guc)->drm, + "GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret); + return ret; + } + + xe_guc_sanitize(guc); + return 0; +} + +void xe_guc_notify(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + + xe_mmio_write32(gt, guc->notify_reg, GUC_SEND_TRIGGER); +} + +int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) +{ + u32 action[] = { + XE_GUC_ACTION_AUTHENTICATE_HUC, + rsa_addr + }; + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +#define MEDIA_SOFT_SCRATCH(n) _MMIO(0x190310 + (n) * 4) +#define MEDIA_SOFT_SCRATCH_COUNT 4 + +int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 header; + u32 reply_reg = xe_gt_is_media_type(gt) ? + MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg; + int ret; + int i; + + XE_BUG_ON(guc->ct.enabled); + XE_BUG_ON(!len); + XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT); + XE_BUG_ON(len > MEDIA_SOFT_SCRATCH_COUNT); + XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != + GUC_HXG_ORIGIN_HOST); + XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != + GUC_HXG_TYPE_REQUEST); + +retry: + /* Not in critical data-path, just do if else for GT type */ + if (xe_gt_is_media_type(gt)) { + for (i = 0; i < len; ++i) + xe_mmio_write32(gt, MEDIA_SOFT_SCRATCH(i).reg, + request[i]); +#define LAST_INDEX MEDIA_SOFT_SCRATCH_COUNT - 1 + xe_mmio_read32(gt, MEDIA_SOFT_SCRATCH(LAST_INDEX).reg); + } else { + for (i = 0; i < len; ++i) + xe_mmio_write32(gt, GEN11_SOFT_SCRATCH(i).reg, + request[i]); +#undef LAST_INDEX +#define LAST_INDEX GEN11_SOFT_SCRATCH_COUNT - 1 + xe_mmio_read32(gt, GEN11_SOFT_SCRATCH(LAST_INDEX).reg); + } + + xe_guc_notify(guc); + + ret = xe_mmio_wait32(gt, reply_reg, + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, + GUC_HXG_ORIGIN_GUC), + GUC_HXG_MSG_0_ORIGIN, + 50); + if (ret) { +timeout: + drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n", + request[0], xe_mmio_read32(gt, reply_reg)); + return ret; + } + + header = xe_mmio_read32(gt, reply_reg); + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == + GUC_HXG_TYPE_NO_RESPONSE_BUSY) { +#define done ({ header = xe_mmio_read32(gt, reply_reg); \ + FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != \ + GUC_HXG_ORIGIN_GUC || \ + FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != \ + GUC_HXG_TYPE_NO_RESPONSE_BUSY; }) + + ret = wait_for(done, 1000); + if (unlikely(ret)) + goto timeout; + if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != + GUC_HXG_ORIGIN_GUC)) + goto proto; +#undef done + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == + GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); + + drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %u\n", + request[0], reason); + goto retry; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == + GUC_HXG_TYPE_RESPONSE_FAILURE) { + u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); + u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); + + drm_err(&xe->drm, "mmio request %#x: failure %x/%u\n", + request[0], error, hint); + return -ENXIO; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != + GUC_HXG_TYPE_RESPONSE_SUCCESS) { +proto: + drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n", + request[0], header); + return -EPROTO; + } + + /* Use data from the GuC response as our return value */ + return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); +} + +static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val) +{ + u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, + GUC_ACTION_HOST2GUC_SELF_CFG), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) | + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32, + lower_32_bits(val)), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64, + upper_32_bits(val)), + }; + int ret; + + XE_BUG_ON(len > 2); + XE_BUG_ON(len == 1 && upper_32_bits(val)); + + /* Self config must go over MMIO */ + ret = xe_guc_send_mmio(guc, request, ARRAY_SIZE(request)); + + if (unlikely(ret < 0)) + return ret; + if (unlikely(ret > 1)) + return -EPROTO; + if (unlikely(!ret)) + return -ENOKEY; + + return 0; +} + +int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val) +{ + return guc_self_cfg(guc, key, 1, val); +} + +int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val) +{ + return guc_self_cfg(guc, key, 2, val); +} + +void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) +{ + if (iir & GUC_INTR_GUC2HOST) + xe_guc_ct_irq_handler(&guc->ct); +} + +void xe_guc_sanitize(struct xe_guc *guc) +{ + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + xe_guc_ct_disable(&guc->ct); +} + +int xe_guc_reset_prepare(struct xe_guc *guc) +{ + return xe_guc_submit_reset_prepare(guc); +} + +void xe_guc_reset_wait(struct xe_guc *guc) +{ + xe_guc_submit_reset_wait(guc); +} + +void xe_guc_stop_prepare(struct xe_guc *guc) +{ + XE_WARN_ON(xe_guc_pc_stop(&guc->pc)); +} + +int xe_guc_stop(struct xe_guc *guc) +{ + int ret; + + xe_guc_ct_disable(&guc->ct); + + ret = xe_guc_submit_stop(guc); + if (ret) + return ret; + + return 0; +} + +int xe_guc_start(struct xe_guc *guc) +{ + int ret; + + ret = xe_guc_submit_start(guc); + if (ret) + return ret; + + ret = xe_guc_pc_start(&guc->pc); + XE_WARN_ON(ret); + + return 0; +} + +void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 status; + int err; + int i; + + xe_uc_fw_print(&guc->fw, p); + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + status = xe_mmio_read32(gt, GUC_STATUS.reg); + + drm_printf(p, "\nGuC status 0x%08x:\n", status); + drm_printf(p, "\tBootrom status = 0x%x\n", + (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); + drm_printf(p, "\tuKernel status = 0x%x\n", + (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); + drm_printf(p, "\tMIA Core status = 0x%x\n", + (status & GS_MIA_MASK) >> GS_MIA_SHIFT); + drm_printf(p, "\tLog level = %d\n", + xe_guc_log_get_level(&guc->log)); + + drm_puts(p, "\nScratch registers:\n"); + for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { + drm_printf(p, "\t%2d: \t0x%x\n", + i, xe_mmio_read32(gt, SOFT_SCRATCH(i).reg)); + } + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + + xe_guc_ct_print(&guc->ct, p); + xe_guc_submit_print(guc, p); +} diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h new file mode 100644 index 000000000000..72b71d75566c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_H_ +#define _XE_GUC_H_ + +#include "xe_hw_engine_types.h" +#include "xe_guc_types.h" +#include "xe_macros.h" + +struct drm_printer; + +int xe_guc_init(struct xe_guc *guc); +int xe_guc_init_post_hwconfig(struct xe_guc *guc); +int xe_guc_post_load_init(struct xe_guc *guc); +int xe_guc_reset(struct xe_guc *guc); +int xe_guc_upload(struct xe_guc *guc); +int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); +int xe_guc_enable_communication(struct xe_guc *guc); +int xe_guc_suspend(struct xe_guc *guc); +void xe_guc_notify(struct xe_guc *guc); +int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); +int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len); +int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val); +int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val); +void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir); +void xe_guc_sanitize(struct xe_guc *guc); +void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p); +int xe_guc_reset_prepare(struct xe_guc *guc); +void xe_guc_reset_wait(struct xe_guc *guc); +void xe_guc_stop_prepare(struct xe_guc *guc); +int xe_guc_stop(struct xe_guc *guc); +int xe_guc_start(struct xe_guc *guc); + +static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_RENDER: + return GUC_RENDER_CLASS; + case XE_ENGINE_CLASS_VIDEO_DECODE: + return GUC_VIDEO_CLASS; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return GUC_VIDEOENHANCE_CLASS; + case XE_ENGINE_CLASS_COPY: + return GUC_BLITTER_CLASS; + case XE_ENGINE_CLASS_COMPUTE: + return GUC_COMPUTE_CLASS; + case XE_ENGINE_CLASS_OTHER: + default: + XE_WARN_ON(class); + return -1; + } +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c new file mode 100644 index 000000000000..0c08cecaca40 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -0,0 +1,676 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ads.h" +#include "xe_guc_reg.h" +#include "xe_hw_engine.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_engine_regs.h" + +/* Slack of a few additional entries per engine */ +#define ADS_REGSET_EXTRA_MAX 8 + +static struct xe_guc * +ads_to_guc(struct xe_guc_ads *ads) +{ + return container_of(ads, struct xe_guc, ads); +} + +static struct xe_gt * +ads_to_gt(struct xe_guc_ads *ads) +{ + return container_of(ads, struct xe_gt, uc.guc.ads); +} + +static struct xe_device * +ads_to_xe(struct xe_guc_ads *ads) +{ + return gt_to_xe(ads_to_gt(ads)); +} + +static struct iosys_map * +ads_to_map(struct xe_guc_ads *ads) +{ + return &ads->bo->vmap; +} + +/* UM Queue parameters: */ +#define GUC_UM_QUEUE_SIZE (SZ_64K) +#define GUC_PAGE_RES_TIMEOUT_US (-1) + +/* + * The Additional Data Struct (ADS) has pointers for different buffers used by + * the GuC. One single gem object contains the ADS struct itself (guc_ads) and + * all the extra buffers indirectly linked via the ADS struct's entries. + * + * Layout of the ADS blob allocated for the GuC: + * + * +---------------------------------------+ <== base + * | guc_ads | + * +---------------------------------------+ + * | guc_policies | + * +---------------------------------------+ + * | guc_gt_system_info | + * +---------------------------------------+ + * | guc_engine_usage | + * +---------------------------------------+ + * | guc_um_init_params | + * +---------------------------------------+ <== static + * | guc_mmio_reg[countA] (engine 0.0) | + * | guc_mmio_reg[countB] (engine 0.1) | + * | guc_mmio_reg[countC] (engine 1.0) | + * | ... | + * +---------------------------------------+ <== dynamic + * | padding | + * +---------------------------------------+ <== 4K aligned + * | golden contexts | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | capture lists | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | UM queues | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | private data | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + */ +struct __guc_ads_blob { + struct guc_ads ads; + struct guc_policies policies; + struct guc_gt_system_info system_info; + struct guc_engine_usage engine_usage; + struct guc_um_init_params um_init_params; + /* From here on, location is dynamic! Refer to above diagram. */ + struct guc_mmio_reg regset[0]; +} __packed; + +#define ads_blob_read(ads_, field_) \ + xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ + struct __guc_ads_blob, field_) + +#define ads_blob_write(ads_, field_, val_) \ + xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ + struct __guc_ads_blob, field_, val_) + +#define info_map_write(xe_, map_, field_, val_) \ + xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_) + +#define info_map_read(xe_, map_, field_) \ + xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_) + +static size_t guc_ads_regset_size(struct xe_guc_ads *ads) +{ + XE_BUG_ON(!ads->regset_size); + + return ads->regset_size; +} + +static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) +{ + return PAGE_ALIGN(ads->golden_lrc_size); +} + +static size_t guc_ads_capture_size(struct xe_guc_ads *ads) +{ + /* FIXME: Allocate a proper capture list */ + return PAGE_ALIGN(PAGE_SIZE); +} + +static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + + if (!xe->info.supports_usm) + return 0; + + return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX; +} + +static size_t guc_ads_private_data_size(struct xe_guc_ads *ads) +{ + return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size); +} + +static size_t guc_ads_regset_offset(struct xe_guc_ads *ads) +{ + return offsetof(struct __guc_ads_blob, regset); +} + +static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) +{ + size_t offset; + + offset = guc_ads_regset_offset(ads) + + guc_ads_regset_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) +{ + size_t offset; + + offset = guc_ads_golden_lrc_offset(ads) + + guc_ads_golden_lrc_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads) +{ + u32 offset; + + offset = guc_ads_capture_offset(ads) + + guc_ads_capture_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads) +{ + size_t offset; + + offset = guc_ads_um_queues_offset(ads) + + guc_ads_um_queues_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_size(struct xe_guc_ads *ads) +{ + return guc_ads_private_data_offset(ads) + + guc_ads_private_data_size(ads); +} + +static void guc_ads_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc_ads *ads = arg; + + xe_bo_unpin_map_no_vm(ads->bo); +} + +static size_t calculate_regset_size(struct xe_gt *gt) +{ + struct xe_reg_sr_entry *sr_entry; + unsigned long sr_idx; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + unsigned int count = 0; + + for_each_hw_engine(hwe, gt, id) + xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry) + count++; + + count += (ADS_REGSET_EXTRA_MAX + LNCFCMOCS_REG_COUNT) * XE_NUM_HW_ENGINES; + + return count * sizeof(struct guc_mmio_reg); +} + +static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 mask = 0; + + for_each_hw_engine(hwe, gt, id) + if (hwe->class == class) + mask |= BIT(hwe->instance); + + return mask; +} + +static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + size_t total_size = 0, alloc_size, real_size; + int class; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + if (class == XE_ENGINE_CLASS_OTHER) + continue; + + if (!engine_enable_mask(gt, class)) + continue; + + real_size = xe_lrc_size(xe, class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + } + + return total_size; +} + +#define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64) + +int xe_guc_ads_init(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct xe_bo *bo; + int err; + + ads->golden_lrc_size = calculate_golden_lrc_size(ads); + ads->regset_size = calculate_regset_size(gt); + + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ads_size(ads) + + MAX_GOLDEN_LRC_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ads->bo = bo; + + err = drmm_add_action_or_reset(&xe->drm, guc_ads_fini, ads); + if (err) + return err; + + return 0; +} + +/** + * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load + * @ads: Additional data structures object + * + * Recalcuate golden_lrc_size & regset_size as the number hardware engines may + * have changed after the hwconfig was loaded. Also verify the new sizes fit in + * the already allocated ADS buffer object. + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) +{ + struct xe_gt *gt = ads_to_gt(ads); + u32 prev_regset_size = ads->regset_size; + + XE_BUG_ON(!ads->bo); + + ads->golden_lrc_size = calculate_golden_lrc_size(ads); + ads->regset_size = calculate_regset_size(gt); + + XE_WARN_ON(ads->golden_lrc_size + + (ads->regset_size - prev_regset_size) > + MAX_GOLDEN_LRC_SIZE); + + return 0; +} + +static void guc_policies_init(struct xe_guc_ads *ads) +{ + ads_blob_write(ads, policies.dpc_promote_time, + GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); + ads_blob_write(ads, policies.max_num_work_items, + GLOBAL_POLICY_MAX_NUM_WI); + ads_blob_write(ads, policies.global_flags, 0); + ads_blob_write(ads, policies.is_valid, 1); +} + +static void fill_engine_enable_masks(struct xe_gt *gt, + struct iosys_map *info_map) +{ + struct xe_device *xe = gt_to_xe(gt); + + info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_COPY)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE)); + info_map_write(xe, info_map, + engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE)); +} + +static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + u8 guc_class; + + for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { + if (!info_map_read(xe, &info_map, + engine_enabled_masks[guc_class])) + continue; + + ads_blob_write(ads, ads.eng_state_size[guc_class], + guc_ads_golden_lrc_size(ads) - + xe_lrc_skip_size(xe)); + ads_blob_write(ads, ads.golden_context_lrca[guc_class], + xe_bo_ggtt_addr(ads->bo) + + guc_ads_golden_lrc_offset(ads)); + } +} + +static void guc_mapping_table_init_invalid(struct xe_gt *gt, + struct iosys_map *info_map) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int i, j; + + /* Table must be set to invalid values for entries not used */ + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) + for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) + info_map_write(xe, info_map, mapping_table[i][j], + GUC_MAX_INSTANCES_PER_CLASS); +} + +static void guc_mapping_table_init(struct xe_gt *gt, + struct iosys_map *info_map) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + guc_mapping_table_init_invalid(gt, info_map); + + for_each_hw_engine(hwe, gt, id) { + u8 guc_class; + + guc_class = xe_engine_class_to_guc_class(hwe->class); + info_map_write(xe, info_map, + mapping_table[guc_class][hwe->logical_instance], + hwe->instance); + } +} + +static void guc_capture_list_init(struct xe_guc_ads *ads) +{ + int i, j; + u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads); + + /* FIXME: Populate a proper capture list */ + for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { + for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { + ads_blob_write(ads, ads.capture_instance[i][j], addr); + ads_blob_write(ads, ads.capture_class[i][j], addr); + } + + ads_blob_write(ads, ads.capture_global[i], addr); + } +} + +static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, + struct iosys_map *regset_map, + u32 reg, u32 flags, + unsigned int n_entry) +{ + struct guc_mmio_reg entry = { + .offset = reg, + .flags = flags, + /* TODO: steering */ + }; + + xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), + &entry, sizeof(entry)); +} + +static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, + struct iosys_map *regset_map, + struct xe_hw_engine *hwe) +{ + struct xe_hw_engine *hwe_rcs_reset_domain = + xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); + struct xe_reg_sr_entry *entry; + unsigned long idx; + unsigned count = 0; + const struct { + u32 reg; + u32 flags; + bool skip; + } *e, extra_regs[] = { + { .reg = RING_MODE_GEN7(hwe->mmio_base).reg, }, + { .reg = RING_HWS_PGA(hwe->mmio_base).reg, }, + { .reg = RING_IMR(hwe->mmio_base).reg, }, + { .reg = GEN12_RCU_MODE.reg, .flags = 0x3, + .skip = hwe != hwe_rcs_reset_domain }, + }; + u32 i; + + BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX); + + xa_for_each(&hwe->reg_sr.xa, idx, entry) { + u32 flags = entry->masked_reg ? GUC_REGSET_MASKED : 0; + + guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++); + } + + for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) { + if (e->skip) + continue; + + guc_mmio_regset_write_one(ads, regset_map, + e->reg, e->flags, count++); + } + + for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { + guc_mmio_regset_write_one(ads, regset_map, + GEN9_LNCFCMOCS(i).reg, 0, count++); + } + + XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg))); + + return count; +} + +static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) +{ + size_t regset_offset = guc_ads_regset_offset(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset; + struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + regset_offset); + + for_each_hw_engine(hwe, gt, id) { + unsigned int count; + u8 gc; + + /* + * 1. Write all MMIO entries for this engine to the table. No + * need to worry about fused-off engines and when there are + * entries in the regset: the reg_state_list has been zero'ed + * by xe_guc_ads_populate() + */ + count = guc_mmio_regset_write(ads, ®set_map, hwe); + if (!count) + continue; + + /* + * 2. Record in the header (ads.reg_state_list) the address + * location and number of entries + */ + gc = xe_engine_class_to_guc_class(hwe->class); + ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr); + ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count); + + addr += count * sizeof(struct guc_mmio_reg); + iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg)); + } +} + +static void guc_um_init_params(struct xe_guc_ads *ads) +{ + u32 um_queue_offset = guc_ads_um_queues_offset(ads); + u64 base_dpa; + u32 base_ggtt; + int i; + + base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; + base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; + + for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { + ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, + base_dpa + (i * GUC_UM_QUEUE_SIZE)); + ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, + base_ggtt + (i * GUC_UM_QUEUE_SIZE)); + ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes, + GUC_UM_QUEUE_SIZE); + } + + ads_blob_write(ads, um_init_params.page_response_timeout_in_us, + GUC_PAGE_RES_TIMEOUT_US); +} + +static void guc_doorbell_init(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + + if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { + u32 distdbreg = + xe_mmio_read32(gt, GEN12_DIST_DBS_POPULATED.reg); + + ads_blob_write(ads, + system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], + ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) + & GEN12_DOORBELLS_PER_SQIDI) + 1); + } +} + +/** + * xe_guc_ads_populate_minimal - populate minimal ADS + * @ads: Additional data structures object + * + * This function populates a minimal ADS that does not support submissions but + * enough so the GuC can load and the hwconfig table can be read. + */ +void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) +{ + struct xe_gt *gt = ads_to_gt(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + u32 base = xe_bo_ggtt_addr(ads->bo); + + XE_BUG_ON(!ads->bo); + + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + guc_policies_init(ads); + guc_prep_golden_lrc_null(ads); + guc_mapping_table_init_invalid(gt, &info_map); + guc_doorbell_init(ads); + + ads_blob_write(ads, ads.scheduler_policies, base + + offsetof(struct __guc_ads_blob, policies)); + ads_blob_write(ads, ads.gt_system_info, base + + offsetof(struct __guc_ads_blob, system_info)); + ads_blob_write(ads, ads.private_data, base + + guc_ads_private_data_offset(ads)); +} + +void xe_guc_ads_populate(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + u32 base = xe_bo_ggtt_addr(ads->bo); + + XE_BUG_ON(!ads->bo); + + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + guc_policies_init(ads); + fill_engine_enable_masks(gt, &info_map); + guc_mmio_reg_state_init(ads); + guc_prep_golden_lrc_null(ads); + guc_mapping_table_init(gt, &info_map); + guc_capture_list_init(ads); + guc_doorbell_init(ads); + + if (xe->info.supports_usm) { + guc_um_init_params(ads); + ads_blob_write(ads, ads.um_init_data, base + + offsetof(struct __guc_ads_blob, um_init_params)); + } + + ads_blob_write(ads, ads.scheduler_policies, base + + offsetof(struct __guc_ads_blob, policies)); + ads_blob_write(ads, ads.gt_system_info, base + + offsetof(struct __guc_ads_blob, system_info)); + ads_blob_write(ads, ads.private_data, base + + guc_ads_private_data_offset(ads)); +} + +static void guc_populate_golden_lrc(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + size_t total_size = 0, alloc_size, real_size; + u32 addr_ggtt, offset; + int class; + + offset = guc_ads_golden_lrc_offset(ads); + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + u8 guc_class; + + if (class == XE_ENGINE_CLASS_OTHER) + continue; + + guc_class = xe_engine_class_to_guc_class(class); + + if (!info_map_read(xe, &info_map, + engine_enabled_masks[guc_class])) + continue; + + XE_BUG_ON(!gt->default_lrc[class]); + + real_size = xe_lrc_size(xe, class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists LRC registers. This is + * required to allow the GuC to restore just the engine state + * when a watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ + ads_blob_write(ads, ads.eng_state_size[guc_class], + real_size - xe_lrc_skip_size(xe)); + ads_blob_write(ads, ads.golden_context_lrca[guc_class], + addr_ggtt); + + xe_map_memcpy_to(xe, ads_to_map(ads), offset, + gt->default_lrc[class], real_size); + + addr_ggtt += alloc_size; + offset += alloc_size; + } + + XE_BUG_ON(total_size != ads->golden_lrc_size); +} + +void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) +{ + guc_populate_golden_lrc(ads); +} diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h new file mode 100644 index 000000000000..138ef6267671 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ads.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ADS_H_ +#define _XE_GUC_ADS_H_ + +#include "xe_guc_ads_types.h" + +int xe_guc_ads_init(struct xe_guc_ads *ads); +int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads); +void xe_guc_ads_populate(struct xe_guc_ads *ads); +void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads); +void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h new file mode 100644 index 000000000000..4afe44bece4b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ADS_TYPES_H_ +#define _XE_GUC_ADS_TYPES_H_ + +#include + +struct xe_bo; + +/** + * struct xe_guc_ads - GuC additional data structures (ADS) + */ +struct xe_guc_ads { + /** @bo: XE BO for GuC ads blob */ + struct xe_bo *bo; + /** @golden_lrc_size: golden LRC size */ + size_t golden_lrc_size; + /** @regset_size: size of register set passed to GuC for save/restore */ + u32 regset_size; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c new file mode 100644 index 000000000000..61a424c41779 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -0,0 +1,1196 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include + +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_gt_pagefault.h" +#include "xe_guc_submit.h" +#include "xe_map.h" +#include "xe_trace.h" + +/* Used when a CT send wants to block and / or receive data */ +struct g2h_fence { + wait_queue_head_t wq; + u32 *response_buffer; + u32 seqno; + u16 response_len; + u16 error; + u16 hint; + u16 reason; + bool retry; + bool fail; + bool done; +}; + +static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) +{ + g2h_fence->response_buffer = response_buffer; + g2h_fence->response_len = 0; + g2h_fence->fail = false; + g2h_fence->retry = false; + g2h_fence->done = false; + g2h_fence->seqno = ~0x0; +} + +static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) +{ + return g2h_fence->seqno == ~0x0; +} + +static struct xe_guc * +ct_to_guc(struct xe_guc_ct *ct) +{ + return container_of(ct, struct xe_guc, ct); +} + +static struct xe_gt * +ct_to_gt(struct xe_guc_ct *ct) +{ + return container_of(ct, struct xe_gt, uc.guc.ct); +} + +static struct xe_device * +ct_to_xe(struct xe_guc_ct *ct) +{ + return gt_to_xe(ct_to_gt(ct)); +} + +/** + * DOC: GuC CTB Blob + * + * We allocate single blob to hold both CTB descriptors and buffers: + * + * +--------+-----------------------------------------------+------+ + * | offset | contents | size | + * +========+===============================================+======+ + * | 0x0000 | H2G CTB Descriptor (send) | | + * +--------+-----------------------------------------------+ 4K | + * | 0x0800 | G2H CTB Descriptor (g2h) | | + * +--------+-----------------------------------------------+------+ + * | 0x1000 | H2G CT Buffer (send) | n*4K | + * | | | | + * +--------+-----------------------------------------------+------+ + * | 0x1000 | G2H CT Buffer (g2h) | m*4K | + * | + n*4K | | | + * +--------+-----------------------------------------------+------+ + * + * Size of each ``CT Buffer`` must be multiple of 4K. + * We don't expect too many messages in flight at any time, unless we are + * using the GuC submission. In that case each request requires a minimum + * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this + * enough space to avoid backpressure on the driver. We increase the size + * of the receive buffer (relative to the send) to ensure a G2H response + * CTB has a landing spot. + */ + +#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) +#define CTB_H2G_BUFFER_SIZE (SZ_4K) +#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) + +static size_t guc_ct_size(void) +{ + return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + + CTB_G2H_BUFFER_SIZE; +} + +static void guc_ct_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc_ct *ct = arg; + + xa_destroy(&ct->fence_lookup); + xe_bo_unpin_map_no_vm(ct->bo); +} + +static void g2h_worker_func(struct work_struct *w); + +static void primelockdep(struct xe_guc_ct *ct) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&ct->lock); + fs_reclaim_release(GFP_KERNEL); +} + +int xe_guc_ct_init(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_bo *bo; + int err; + + XE_BUG_ON(guc_ct_size() % PAGE_SIZE); + + mutex_init(&ct->lock); + spin_lock_init(&ct->fast_lock); + xa_init(&ct->fence_lookup); + ct->fence_context = dma_fence_context_alloc(1); + INIT_WORK(&ct->g2h_worker, g2h_worker_func); + init_waitqueue_head(&ct->wq); + + primelockdep(ct); + + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ct_size(), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ct->bo = bo; + + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); + if (err) + return err; + + return 0; +} + +#define desc_read(xe_, guc_ctb__, field_) \ + xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \ + struct guc_ct_buffer_desc, field_) + +#define desc_write(xe_, guc_ctb__, field_, val_) \ + xe_map_wr_field(xe_, &guc_ctb__->desc, 0, \ + struct guc_ct_buffer_desc, field_, val_) + +static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, + struct iosys_map *map) +{ + h2g->size = CTB_H2G_BUFFER_SIZE / sizeof(u32); + h2g->resv_space = 0; + h2g->tail = 0; + h2g->head = 0; + h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) - + h2g->resv_space; + h2g->broken = false; + + h2g->desc = *map; + xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); + + h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2); +} + +static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, + struct iosys_map *map) +{ + g2h->size = CTB_G2H_BUFFER_SIZE / sizeof(u32); + g2h->resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32); + g2h->head = 0; + g2h->tail = 0; + g2h->space = CIRC_SPACE(g2h->tail, g2h->head, g2h->size) - + g2h->resv_space; + g2h->broken = false; + + g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE); + xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); + + g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 + + CTB_H2G_BUFFER_SIZE); +} + +static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) +{ + struct xe_guc *guc = ct_to_guc(ct); + u32 desc_addr, ctb_addr, size; + int err; + + desc_addr = xe_bo_ggtt_addr(ct->bo); + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2; + size = ct->ctbs.h2g.size * sizeof(u32); + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY, + desc_addr); + if (err) + return err; + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY, + ctb_addr); + if (err) + return err; + + return xe_guc_self_cfg32(guc, + GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY, + size); +} + +static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct) +{ + struct xe_guc *guc = ct_to_guc(ct); + u32 desc_addr, ctb_addr, size; + int err; + + desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE; + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 + + CTB_H2G_BUFFER_SIZE; + size = ct->ctbs.g2h.size * sizeof(u32); + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY, + desc_addr); + if (err) + return err; + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY, + ctb_addr); + if (err) + return err; + + return xe_guc_self_cfg32(guc, + GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY, + size); +} + +static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) +{ + u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, + GUC_ACTION_HOST2GUC_CONTROL_CTB), + FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL, + enable ? GUC_CTB_CONTROL_ENABLE : + GUC_CTB_CONTROL_DISABLE), + }; + int ret = xe_guc_send_mmio(ct_to_guc(ct), request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +int xe_guc_ct_enable(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + int err; + + XE_BUG_ON(ct->enabled); + + guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); + guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); + + err = guc_ct_ctb_h2g_register(ct); + if (err) + goto err_out; + + err = guc_ct_ctb_g2h_register(ct); + if (err) + goto err_out; + + err = guc_ct_control_toggle(ct, true); + if (err) + goto err_out; + + mutex_lock(&ct->lock); + ct->g2h_outstanding = 0; + ct->enabled = true; + mutex_unlock(&ct->lock); + + smp_mb(); + wake_up_all(&ct->wq); + drm_dbg(&xe->drm, "GuC CT communication channel enabled\n"); + + return 0; + +err_out: + drm_err(&xe->drm, "Failed to enabled CT (%d)\n", err); + + return err; +} + +void xe_guc_ct_disable(struct xe_guc_ct *ct) +{ + mutex_lock(&ct->lock); + ct->enabled = false; + mutex_unlock(&ct->lock); + + xa_destroy(&ct->fence_lookup); +} + +static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len) +{ + struct guc_ctb *h2g = &ct->ctbs.h2g; + + lockdep_assert_held(&ct->lock); + + if (cmd_len > h2g->space) { + h2g->head = desc_read(ct_to_xe(ct), h2g, head); + h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) - + h2g->resv_space; + if (cmd_len > h2g->space) + return false; + } + + return true; +} + +static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len) +{ + lockdep_assert_held(&ct->lock); + + return ct->ctbs.g2h.space > g2h_len; +} + +static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len) +{ + lockdep_assert_held(&ct->lock); + + if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len)) + return -EBUSY; + + return 0; +} + +static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) +{ + lockdep_assert_held(&ct->lock); + ct->ctbs.h2g.space -= cmd_len; +} + +static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) +{ + XE_BUG_ON(g2h_len > ct->ctbs.g2h.space); + + if (g2h_len) { + spin_lock_irq(&ct->fast_lock); + ct->ctbs.g2h.space -= g2h_len; + ct->g2h_outstanding += num_g2h; + spin_unlock_irq(&ct->fast_lock); + } +} + +static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) +{ + lockdep_assert_held(&ct->fast_lock); + XE_WARN_ON(ct->ctbs.g2h.space + g2h_len > + ct->ctbs.g2h.size - ct->ctbs.g2h.resv_space); + + ct->ctbs.g2h.space += g2h_len; + --ct->g2h_outstanding; +} + +static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) +{ + spin_lock_irq(&ct->fast_lock); + __g2h_release_space(ct, g2h_len); + spin_unlock_irq(&ct->fast_lock); +} + +static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 ct_fence_value, bool want_response) +{ + struct xe_device *xe = ct_to_xe(ct); + struct guc_ctb *h2g = &ct->ctbs.h2g; + u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)]; + u32 cmd_len = len + GUC_CTB_HDR_LEN; + u32 cmd_idx = 0, i; + u32 tail = h2g->tail; + struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds, + tail * sizeof(u32)); + + lockdep_assert_held(&ct->lock); + XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN); + XE_BUG_ON(tail > h2g->size); + + /* Command will wrap, zero fill (NOPs), return and check credits again */ + if (tail + cmd_len > h2g->size) { + xe_map_memset(xe, &map, 0, 0, (h2g->size - tail) * sizeof(u32)); + h2g_reserve_space(ct, (h2g->size - tail)); + h2g->tail = 0; + desc_write(xe, h2g, tail, h2g->tail); + + return -EAGAIN; + } + + /* + * dw0: CT header (including fence) + * dw1: HXG header (including action code) + * dw2+: action data + */ + cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | + FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | + FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); + if (want_response) { + cmd[cmd_idx++] = + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + } else { + cmd[cmd_idx++] = + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + } + for (i = 1; i < len; ++i) + cmd[cmd_idx++] = action[i]; + + /* Write H2G ensuring visable before descriptor update */ + xe_map_memcpy_to(xe, &map, 0, cmd, cmd_len * sizeof(u32)); + xe_device_wmb(ct_to_xe(ct)); + + /* Update local copies */ + h2g->tail = (tail + cmd_len) % h2g->size; + h2g_reserve_space(ct, cmd_len); + + /* Update descriptor */ + desc_write(xe, h2g, tail, h2g->tail); + + return 0; +} + +static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, + u32 len, u32 g2h_len, u32 num_g2h, + struct g2h_fence *g2h_fence) +{ + int ret; + + XE_BUG_ON(g2h_len && g2h_fence); + XE_BUG_ON(num_g2h && g2h_fence); + XE_BUG_ON(g2h_len && !num_g2h); + XE_BUG_ON(!g2h_len && num_g2h); + lockdep_assert_held(&ct->lock); + + if (unlikely(ct->ctbs.h2g.broken)) { + ret = -EPIPE; + goto out; + } + + if (unlikely(!ct->enabled)) { + ret = -ENODEV; + goto out; + } + + if (g2h_fence) { + g2h_len = GUC_CTB_HXG_MSG_MAX_LEN; + num_g2h = 1; + + if (g2h_fence_needs_alloc(g2h_fence)) { + void *ptr; + + g2h_fence->seqno = (ct->fence_seqno++ & 0xffff); + init_waitqueue_head(&g2h_fence->wq); + ptr = xa_store(&ct->fence_lookup, + g2h_fence->seqno, + g2h_fence, GFP_ATOMIC); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + goto out; + } + } + } + + xe_device_mem_access_get(ct_to_xe(ct)); +retry: + ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len); + if (unlikely(ret)) + goto put_wa; + + ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0, + !!g2h_fence); + if (unlikely(ret)) { + if (ret == -EAGAIN) + goto retry; + goto put_wa; + } + + g2h_reserve_space(ct, g2h_len, num_g2h); + xe_guc_notify(ct_to_guc(ct)); +put_wa: + xe_device_mem_access_put(ct_to_xe(ct)); +out: + + return ret; +} + +static void kick_reset(struct xe_guc_ct *ct) +{ + xe_gt_reset_async(ct_to_gt(ct)); +} + +static int dequeue_one_g2h(struct xe_guc_ct *ct); + +static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h, + struct g2h_fence *g2h_fence) +{ + struct drm_device *drm = &ct_to_xe(ct)->drm; + struct drm_printer p = drm_info_printer(drm->dev); + unsigned int sleep_period_ms = 1; + int ret; + + XE_BUG_ON(g2h_len && g2h_fence); + lockdep_assert_held(&ct->lock); + +try_again: + ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, + g2h_fence); + + /* + * We wait to try to restore credits for about 1 second before bailing. + * In the case of H2G credits we have no choice but just to wait for the + * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In + * the case of G2H we process any G2H in the channel, hopefully freeing + * credits as we consume the G2H messages. + */ + if (unlikely(ret == -EBUSY && + !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { + struct guc_ctb *h2g = &ct->ctbs.h2g; + + if (sleep_period_ms == 1024) + goto broken; + + trace_xe_guc_ct_h2g_flow_control(h2g->head, h2g->tail, + h2g->size, h2g->space, + len + GUC_CTB_HDR_LEN); + msleep(sleep_period_ms); + sleep_period_ms <<= 1; + + goto try_again; + } else if (unlikely(ret == -EBUSY)) { + struct xe_device *xe = ct_to_xe(ct); + struct guc_ctb *g2h = &ct->ctbs.g2h; + + trace_xe_guc_ct_g2h_flow_control(g2h->head, + desc_read(xe, g2h, tail), + g2h->size, g2h->space, + g2h_fence ? + GUC_CTB_HXG_MSG_MAX_LEN : + g2h_len); + +#define g2h_avail(ct) \ + (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.head) + if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding || + g2h_avail(ct), HZ)) + goto broken; +#undef g2h_avail + + if (dequeue_one_g2h(ct) < 0) + goto broken; + + goto try_again; + } + + return ret; + +broken: + drm_err(drm, "No forward process on H2G, reset required"); + xe_guc_ct_print(ct, &p); + ct->ctbs.h2g.broken = true; + + return -EDEADLK; +} + +static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) +{ + int ret; + + XE_BUG_ON(g2h_len && g2h_fence); + + mutex_lock(&ct->lock); + ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); + mutex_unlock(&ct->lock); + + return ret; +} + +int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h) +{ + int ret; + + ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL); + if (ret == -EDEADLK) + kick_reset(ct); + + return ret; +} + +int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h) +{ + int ret; + + ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL); + if (ret == -EDEADLK) + kick_reset(ct); + + return ret; +} + +int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len) +{ + int ret; + + lockdep_assert_held(&ct->lock); + + ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL); + if (ret == -EDEADLK) + kick_reset(ct); + + return ret; +} + +/* + * Check if a GT reset is in progress or will occur and if GT reset brought the + * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset. + */ +static bool retry_failure(struct xe_guc_ct *ct, int ret) +{ + if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV)) + return false; + +#define ct_alive(ct) \ + (ct->enabled && !ct->ctbs.h2g.broken && !ct->ctbs.g2h.broken) + if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) + return false; +#undef ct_alive + + return true; +} + +static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buffer, bool no_fail) +{ + struct xe_device *xe = ct_to_xe(ct); + struct g2h_fence g2h_fence; + int ret = 0; + + /* + * We use a fence to implement blocking sends / receiving response data. + * The seqno of the fence is sent in the H2G, returned in the G2H, and + * an xarray is used as storage media with the seqno being to key. + * Fields in the fence hold success, failure, retry status and the + * response data. Safe to allocate on the stack as the xarray is the + * only reference and it cannot be present after this function exits. + */ +retry: + g2h_fence_init(&g2h_fence, response_buffer); +retry_same_fence: + ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence); + if (unlikely(ret == -ENOMEM)) { + void *ptr; + + /* Retry allocation /w GFP_KERNEL */ + ptr = xa_store(&ct->fence_lookup, + g2h_fence.seqno, + &g2h_fence, GFP_KERNEL); + if (IS_ERR(ptr)) { + return PTR_ERR(ptr); + } + + goto retry_same_fence; + } else if (unlikely(ret)) { + if (ret == -EDEADLK) + kick_reset(ct); + + if (no_fail && retry_failure(ct, ret)) + goto retry_same_fence; + + if (!g2h_fence_needs_alloc(&g2h_fence)) + xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + + return ret; + } + + ret = wait_event_timeout(g2h_fence.wq, g2h_fence.done, HZ); + if (!ret) { + drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x", + g2h_fence.seqno, action[0]); + xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + return -ETIME; + } + + if (g2h_fence.retry) { + drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d", + action[0], g2h_fence.reason); + goto retry; + } + if (g2h_fence.fail) { + drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d", + action[0], g2h_fence.error, g2h_fence.hint); + ret = -EIO; + } + + return ret > 0 ? 0 : ret; +} + +int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buffer) +{ + return guc_ct_send_recv(ct, action, len, response_buffer, false); +} + +int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, + u32 len, u32 *response_buffer) +{ + return guc_ct_send_recv(ct, action, len, response_buffer, true); +} + +static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + + lockdep_assert_held(&ct->lock); + + switch (action) { + case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + g2h_release_space(ct, len); + } + + return 0; +} + +static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + u32 response_len = len - GUC_CTB_MSG_MIN_LEN; + u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]); + u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]); + struct g2h_fence *g2h_fence; + + lockdep_assert_held(&ct->lock); + + g2h_fence = xa_erase(&ct->fence_lookup, fence); + if (unlikely(!g2h_fence)) { + /* Don't tear down channel, as send could've timed out */ + drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence); + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); + return 0; + } + + XE_WARN_ON(fence != g2h_fence->seqno); + + if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) { + g2h_fence->fail = true; + g2h_fence->error = + FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]); + g2h_fence->hint = + FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]); + } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + g2h_fence->retry = true; + g2h_fence->reason = + FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[0]); + } else if (g2h_fence->response_buffer) { + g2h_fence->response_len = response_len; + memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN, + response_len * sizeof(u32)); + } + + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); + + g2h_fence->done = true; + smp_mb(); + + wake_up(&g2h_fence->wq); + + return 0; +} + +static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + u32 header, hxg, origin, type; + int ret; + + lockdep_assert_held(&ct->lock); + + header = msg[0]; + hxg = msg[1]; + + origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg); + if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { + drm_err(&xe->drm, + "G2H channel broken on read, origin=%d, reset required\n", + origin); + ct->ctbs.g2h.broken = true; + + return -EPROTO; + } + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg); + switch (type) { + case GUC_HXG_TYPE_EVENT: + ret = parse_g2h_event(ct, msg, len); + break; + case GUC_HXG_TYPE_RESPONSE_SUCCESS: + case GUC_HXG_TYPE_RESPONSE_FAILURE: + case GUC_HXG_TYPE_NO_RESPONSE_RETRY: + ret = parse_g2h_response(ct, msg, len); + break; + default: + drm_err(&xe->drm, + "G2H channel broken on read, type=%d, reset required\n", + type); + ct->ctbs.g2h.broken = true; + + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_guc *guc = ct_to_guc(ct); + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; + u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; + int ret = 0; + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) + return 0; + + switch (action) { + case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + ret = xe_guc_sched_done_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + ret = xe_guc_deregister_done_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: + ret = xe_guc_engine_reset_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: + ret = xe_guc_engine_reset_failure_handler(guc, payload, + adj_len); + break; + case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: + /* Selftest only at the moment */ + break; + case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION: + case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE: + /* FIXME: Handle this */ + break; + case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR: + ret = xe_guc_engine_memory_cat_error_handler(guc, payload, + adj_len); + break; + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + ret = xe_guc_pagefault_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + ret = xe_guc_tlb_invalidation_done_handler(guc, payload, + adj_len); + break; + case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY: + ret = xe_guc_access_counter_notify_handler(guc, payload, + adj_len); + break; + default: + drm_err(&xe->drm, "unexpected action 0x%04x\n", action); + } + + if (ret) + drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", + action, ret); + + return 0; +} + +static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) +{ + struct xe_device *xe = ct_to_xe(ct); + struct guc_ctb *g2h = &ct->ctbs.g2h; + u32 tail, head, len; + s32 avail; + + lockdep_assert_held(&ct->fast_lock); + + if (!ct->enabled) + return -ENODEV; + + if (g2h->broken) + return -EPIPE; + + /* Calculate DW available to read */ + tail = desc_read(xe, g2h, tail); + avail = tail - g2h->head; + if (unlikely(avail == 0)) + return 0; + + if (avail < 0) + avail += g2h->size; + + /* Read header */ + xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->head, sizeof(u32)); + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; + if (len > avail) { + drm_err(&xe->drm, + "G2H channel broken on read, avail=%d, len=%d, reset required\n", + avail, len); + g2h->broken = true; + + return -EPROTO; + } + + head = (g2h->head + 1) % g2h->size; + avail = len - 1; + + /* Read G2H message */ + if (avail + head > g2h->size) { + u32 avail_til_wrap = g2h->size - head; + + xe_map_memcpy_from(xe, msg + 1, + &g2h->cmds, sizeof(u32) * head, + avail_til_wrap * sizeof(u32)); + xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap, + &g2h->cmds, 0, + (avail - avail_til_wrap) * sizeof(u32)); + } else { + xe_map_memcpy_from(xe, msg + 1, + &g2h->cmds, sizeof(u32) * head, + avail * sizeof(u32)); + } + + if (fast_path) { + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) + return 0; + + switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) { + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + break; /* Process these in fast-path */ + default: + return 0; + } + } + + /* Update local / descriptor header */ + g2h->head = (head + avail) % g2h->size; + desc_write(xe, g2h, head, g2h->head); + + return len; +} + +static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_guc *guc = ct_to_guc(ct); + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; + u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; + int ret = 0; + + switch (action) { + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + ret = xe_guc_pagefault_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + __g2h_release_space(ct, len); + ret = xe_guc_tlb_invalidation_done_handler(guc, payload, + adj_len); + break; + default: + XE_WARN_ON("NOT_POSSIBLE"); + } + + if (ret) + drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", + action, ret); +} + +/** + * xe_guc_ct_fast_path - process critical G2H in the IRQ handler + * @ct: GuC CT object + * + * Anything related to page faults is critical for performance, process these + * critical G2H in the IRQ. This is safe as these handlers either just wake up + * waiters or queue another worker. + */ +void xe_guc_ct_fast_path(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + int len; + + if (!xe_device_in_fault_mode(xe) || !xe_device_mem_access_ongoing(xe)) + return; + + spin_lock(&ct->fast_lock); + do { + len = g2h_read(ct, ct->fast_msg, true); + if (len > 0) + g2h_fast_path(ct, ct->fast_msg, len); + } while (len > 0); + spin_unlock(&ct->fast_lock); +} + +/* Returns less than zero on error, 0 on done, 1 on more available */ +static int dequeue_one_g2h(struct xe_guc_ct *ct) +{ + int len; + int ret; + + lockdep_assert_held(&ct->lock); + + spin_lock_irq(&ct->fast_lock); + len = g2h_read(ct, ct->msg, false); + spin_unlock_irq(&ct->fast_lock); + if (len <= 0) + return len; + + ret = parse_g2h_msg(ct, ct->msg, len); + if (unlikely(ret < 0)) + return ret; + + ret = process_g2h_msg(ct, ct->msg, len); + if (unlikely(ret < 0)) + return ret; + + return 1; +} + +static void g2h_worker_func(struct work_struct *w) +{ + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); + int ret; + + xe_device_mem_access_get(ct_to_xe(ct)); + do { + mutex_lock(&ct->lock); + ret = dequeue_one_g2h(ct); + mutex_unlock(&ct->lock); + + if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) { + struct drm_device *drm = &ct_to_xe(ct)->drm; + struct drm_printer p = drm_info_printer(drm->dev); + + xe_guc_ct_print(ct, &p); + kick_reset(ct); + } + } while (ret == 1); + xe_device_mem_access_put(ct_to_xe(ct)); +} + +static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, + struct drm_printer *p) +{ + u32 head, tail; + + drm_printf(p, "\tsize: %d\n", ctb->size); + drm_printf(p, "\tresv_space: %d\n", ctb->resv_space); + drm_printf(p, "\thead: %d\n", ctb->head); + drm_printf(p, "\ttail: %d\n", ctb->tail); + drm_printf(p, "\tspace: %d\n", ctb->space); + drm_printf(p, "\tbroken: %d\n", ctb->broken); + + head = desc_read(xe, ctb, head); + tail = desc_read(xe, ctb, tail); + drm_printf(p, "\thead (memory): %d\n", head); + drm_printf(p, "\ttail (memory): %d\n", tail); + drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status)); + + if (head != tail) { + struct iosys_map map = + IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32)); + + while (head != tail) { + drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, + xe_map_rd(xe, &map, 0, u32)); + ++head; + if (head == ctb->size) { + head = 0; + map = ctb->cmds; + } else { + iosys_map_incr(&map, sizeof(u32)); + } + } + } +} + +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) +{ + if (ct->enabled) { + drm_puts(p, "\nH2G CTB (all sizes in DW):\n"); + guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p); + + drm_puts(p, "\nG2H CTB (all sizes in DW):\n"); + guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p); + drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding); + } else { + drm_puts(p, "\nCT disabled\n"); + } +} + +#ifdef XE_GUC_CT_SELFTEST +/* + * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow + * control if enough sent, 8k sends is enough. Verify forward process, verify + * credits expected values on exit. + */ +void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p) +{ + struct guc_ctb *g2h = &ct->ctbs.g2h; + u32 action[] = { XE_GUC_ACTION_SCHED_ENGINE_MODE_SET, 0, 0, 1, }; + u32 bad_action[] = { XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 0, 0, }; + int ret; + int i; + + ct->suppress_irq_handler = true; + drm_puts(p, "Starting GuC CT selftest\n"); + + for (i = 0; i < 8192; ++i) { + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1); + if (ret) { + drm_printf(p, "Aborted pass %d, ret %d\n", i, ret); + xe_guc_ct_print(ct, p); + break; + } + } + + ct->suppress_irq_handler = false; + if (!ret) { + xe_guc_ct_irq_handler(ct); + msleep(200); + if (g2h->space != + CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) { + drm_printf(p, "Mismatch on space %d, %d\n", + g2h->space, + CIRC_SPACE(0, 0, g2h->size) - + g2h->resv_space); + ret = -EIO; + } + if (ct->g2h_outstanding) { + drm_printf(p, "Outstanding G2H, %d\n", + ct->g2h_outstanding); + ret = -EIO; + } + } + + /* Check failure path for blocking CTs too */ + xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action)); + if (g2h->space != + CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) { + drm_printf(p, "Mismatch on space %d, %d\n", + g2h->space, + CIRC_SPACE(0, 0, g2h->size) - + g2h->resv_space); + ret = -EIO; + } + if (ct->g2h_outstanding) { + drm_printf(p, "Outstanding G2H, %d\n", + ct->g2h_outstanding); + ret = -EIO; + } + + drm_printf(p, "GuC CT selftest done - %s\n", ret ? "FAIL" : "PASS"); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h new file mode 100644 index 000000000000..49fb74f91e4d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_CT_H_ +#define _XE_GUC_CT_H_ + +#include "xe_guc_ct_types.h" + +struct drm_printer; + +int xe_guc_ct_init(struct xe_guc_ct *ct); +int xe_guc_ct_enable(struct xe_guc_ct *ct); +void xe_guc_ct_disable(struct xe_guc_ct *ct); +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); +void xe_guc_ct_fast_path(struct xe_guc_ct *ct); + +static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) +{ + wake_up_all(&ct->wq); +#ifdef XE_GUC_CT_SELFTEST + if (!ct->suppress_irq_handler && ct->enabled) + queue_work(system_unbound_wq, &ct->g2h_worker); +#else + if (ct->enabled) + queue_work(system_unbound_wq, &ct->g2h_worker); +#endif + xe_guc_ct_fast_path(ct); +} + +/* Basic CT send / receives */ +int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h); +int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h); +int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buffer); +static inline int +xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len) +{ + return xe_guc_ct_send_recv(ct, action, len, NULL); +} + +/* This is only version of the send CT you can call from a G2H handler */ +int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, + u32 len); + +/* Can't fail because a GT reset is in progress */ +int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, + u32 len, u32 *response_buffer); +static inline int +xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len) +{ + return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL); +} + +#ifdef XE_GUC_CT_SELFTEST +void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p); +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h new file mode 100644 index 000000000000..17b148bf3735 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_CT_TYPES_H_ +#define _XE_GUC_CT_TYPES_H_ + +#include +#include +#include +#include +#include + +#include "abi/guc_communication_ctb_abi.h" + +#define XE_GUC_CT_SELFTEST + +struct xe_bo; + +/** + * struct guc_ctb - GuC command transport buffer (CTB) + */ +struct guc_ctb { + /** @desc: dma buffer map for CTB descriptor */ + struct iosys_map desc; + /** @cmds: dma buffer map for CTB commands */ + struct iosys_map cmds; + /** @size: size of CTB commands (DW) */ + u32 size; + /** @resv_space: reserved space of CTB commands (DW) */ + u32 resv_space; + /** @head: head of CTB commands (DW) */ + u32 head; + /** @tail: tail of CTB commands (DW) */ + u32 tail; + /** @space: space in CTB commands (DW) */ + u32 space; + /** @broken: channel broken */ + bool broken; +}; + +/** + * struct xe_guc_ct - GuC command transport (CT) layer + * + * Includes a pair of CT buffers for bi-directional communication and tracking + * for the H2G and G2H requests sent and received through the buffers. + */ +struct xe_guc_ct { + /** @bo: XE BO for CT */ + struct xe_bo *bo; + /** @lock: protects everything in CT layer */ + struct mutex lock; + /** @fast_lock: protects G2H channel and credits */ + spinlock_t fast_lock; + /** @ctbs: buffers for sending and receiving commands */ + struct { + /** @send: Host to GuC (H2G, send) channel */ + struct guc_ctb h2g; + /** @recv: GuC to Host (G2H, receive) channel */ + struct guc_ctb g2h; + } ctbs; + /** @g2h_outstanding: number of outstanding G2H */ + u32 g2h_outstanding; + /** @g2h_worker: worker to process G2H messages */ + struct work_struct g2h_worker; + /** @enabled: CT enabled */ + bool enabled; + /** @fence_seqno: G2H fence seqno - 16 bits used by CT */ + u32 fence_seqno; + /** @fence_context: context for G2H fence */ + u64 fence_context; + /** @fence_lookup: G2H fence lookup */ + struct xarray fence_lookup; + /** @wq: wait queue used for reliable CT sends and freeing G2H credits */ + wait_queue_head_t wq; +#ifdef XE_GUC_CT_SELFTEST + /** @suppress_irq_handler: force flow control to sender */ + bool suppress_irq_handler; +#endif + /** @msg: Message buffer */ + u32 msg[GUC_CTB_MSG_MAX_LEN]; + /** @fast_msg: Message buffer */ + u32 fast_msg[GUC_CTB_MSG_MAX_LEN]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c new file mode 100644 index 000000000000..916e9633b322 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_debugfs.h" +#include "xe_guc_log.h" +#include "xe_macros.h" + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static struct xe_device * +guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + +static struct xe_guc *node_to_guc(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int guc_info(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_guc_print_info(guc, &p); + xe_device_mem_access_put(xe); + + return 0; +} + +static int guc_log(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_guc_log_print(&guc->log, &p); + xe_device_mem_access_put(xe); + + return 0; +} + +#ifdef XE_GUC_CT_SELFTEST +static int guc_ct_selftest(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_guc_ct_selftest(&guc->ct, &p); + xe_device_mem_access_put(xe); + + return 0; +} +#endif + +static const struct drm_info_list debugfs_list[] = { + {"guc_info", guc_info, 0}, + {"guc_log", guc_log, 0}, +#ifdef XE_GUC_CT_SELFTEST + {"guc_ct_selftest", guc_ct_selftest, 0}, +#endif +}; + +void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) +{ + struct drm_minor *minor = guc_to_xe(guc)->drm.primary; + struct drm_info_list *local; + int i; + +#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) + local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) { + XE_WARN_ON("Couldn't allocate memory"); + return; + } + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = guc; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.h b/drivers/gpu/drm/xe/xe_guc_debugfs.h new file mode 100644 index 000000000000..4756dff26fca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_DEBUGFS_H_ +#define _XE_GUC_DEBUGFS_H_ + +struct dentry; +struct xe_guc; + +void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_engine_types.h b/drivers/gpu/drm/xe/xe_guc_engine_types.h new file mode 100644 index 000000000000..512615d1ce8c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_engine_types.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ENGINE_TYPES_H_ +#define _XE_GUC_ENGINE_TYPES_H_ + +#include +#include + +#include "xe_gpu_scheduler_types.h" + +struct dma_fence; +struct xe_engine; + +/** + * struct xe_guc_engine - GuC specific state for an xe_engine + */ +struct xe_guc_engine { + /** @engine: Backpointer to parent xe_engine */ + struct xe_engine *engine; + /** @sched: GPU scheduler for this xe_engine */ + struct xe_gpu_scheduler sched; + /** @entity: Scheduler entity for this xe_engine */ + struct xe_sched_entity entity; + /** + * @static_msgs: Static messages for this xe_engine, used when a message + * needs to sent through the GPU scheduler but memory allocations are + * not allowed. + */ +#define MAX_STATIC_MSG_TYPE 3 + struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; + /** @fini_async: do final fini async from this worker */ + struct work_struct fini_async; + /** @resume_time: time of last resume */ + u64 resume_time; + /** @state: GuC specific state for this xe_engine */ + atomic_t state; + /** @wqi_head: work queue item tail */ + u32 wqi_head; + /** @wqi_tail: work queue item tail */ + u32 wqi_tail; + /** @id: GuC id for this xe_engine */ + u16 id; + /** @suspend_wait: wait queue used to wait on pending suspends */ + wait_queue_head_t suspend_wait; + /** @suspend_pending: a suspend of the engine is pending */ + bool suspend_pending; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h new file mode 100644 index 000000000000..f562404a6cf7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -0,0 +1,392 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_FWIF_H +#define _XE_GUC_FWIF_H + +#include + +#include "abi/guc_actions_abi.h" +#include "abi/guc_actions_slpc_abi.h" +#include "abi/guc_errors_abi.h" +#include "abi/guc_communication_mmio_abi.h" +#include "abi/guc_communication_ctb_abi.h" +#include "abi/guc_klvs_abi.h" +#include "abi/guc_messages_abi.h" + +#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 +#define G2H_LEN_DW_DEREGISTER_CONTEXT 3 +#define G2H_LEN_DW_TLB_INVALIDATE 3 + +#define GUC_CONTEXT_DISABLE 0 +#define GUC_CONTEXT_ENABLE 1 + +#define GUC_CLIENT_PRIORITY_KMD_HIGH 0 +#define GUC_CLIENT_PRIORITY_HIGH 1 +#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 +#define GUC_CLIENT_PRIORITY_NORMAL 3 +#define GUC_CLIENT_PRIORITY_NUM 4 + +#define GUC_RENDER_ENGINE 0 +#define GUC_VIDEO_ENGINE 1 +#define GUC_BLITTER_ENGINE 2 +#define GUC_VIDEOENHANCE_ENGINE 3 +#define GUC_VIDEO_ENGINE2 4 +#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) + +#define GUC_RENDER_CLASS 0 +#define GUC_VIDEO_CLASS 1 +#define GUC_VIDEOENHANCE_CLASS 2 +#define GUC_BLITTER_CLASS 3 +#define GUC_COMPUTE_CLASS 4 +#define GUC_GSC_OTHER_CLASS 5 +#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS +#define GUC_MAX_ENGINE_CLASSES 16 +#define GUC_MAX_INSTANCES_PER_CLASS 32 + +/* Work item for submitting workloads into work queue of GuC. */ +#define WQ_STATUS_ACTIVE 1 +#define WQ_STATUS_SUSPENDED 2 +#define WQ_STATUS_CMD_ERROR 3 +#define WQ_STATUS_ENGINE_ID_NOT_USED 4 +#define WQ_STATUS_SUSPENDED_FROM_RESET 5 +#define WQ_TYPE_NOOP 0x4 +#define WQ_TYPE_MULTI_LRC 0x5 +#define WQ_TYPE_MASK GENMASK(7, 0) +#define WQ_LEN_MASK GENMASK(26, 16) + +#define WQ_GUC_ID_MASK GENMASK(15, 0) +#define WQ_RING_TAIL_MASK GENMASK(28, 18) + +struct guc_wq_item { + u32 header; + u32 context_desc; + u32 submit_element_info; + u32 fence_id; +} __packed; + +struct guc_sched_wq_desc { + u32 head; + u32 tail; + u32 error_offset; + u32 wq_status; + u32 reserved[28]; +} __packed; + +/* Helper for context registration H2G */ +struct guc_ctxt_registration_info { + u32 flags; + u32 context_idx; + u32 engine_class; + u32 engine_submit_mask; + u32 wq_desc_lo; + u32 wq_desc_hi; + u32 wq_base_lo; + u32 wq_base_hi; + u32 wq_size; + u32 hwlrca_lo; + u32 hwlrca_hi; +}; +#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) + +/* 32-bit KLV structure as used by policy updates and others */ +struct guc_klv_generic_dw_t { + u32 kl; + u32 value; +} __packed; + +/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */ +struct guc_update_engine_policy_header { + u32 action; + u32 guc_id; +} __packed; + +struct guc_update_engine_policy { + struct guc_update_engine_policy_header header; + struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; +} __packed; + +/* GUC_CTL_* - Parameters for loading the GuC */ +#define GUC_CTL_LOG_PARAMS 0 +#define GUC_LOG_VALID BIT(0) +#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) +#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) +#define GUC_LOG_LOG_ALLOC_UNITS BIT(3) +#define GUC_LOG_CRASH_SHIFT 4 +#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) +#define GUC_LOG_DEBUG_SHIFT 6 +#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) +#define GUC_LOG_CAPTURE_SHIFT 10 +#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) +#define GUC_LOG_BUF_ADDR_SHIFT 12 + +#define GUC_CTL_WA 1 +#define GUC_WA_GAM_CREDITS BIT(10) +#define GUC_WA_DUAL_QUEUE BIT(11) +#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13) +#define GUC_WA_CONTEXT_ISOLATION BIT(15) +#define GUC_WA_PRE_PARSER BIT(14) +#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17) +#define GUC_WA_POLLCS BIT(18) +#define GUC_WA_RENDER_RST_RC6_EXIT BIT(19) +#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) + +#define GUC_CTL_FEATURE 2 +#define GUC_CTL_ENABLE_SLPC BIT(2) +#define GUC_CTL_DISABLE_SCHEDULER BIT(14) + +#define GUC_CTL_DEBUG 3 +#define GUC_LOG_VERBOSITY_SHIFT 0 +#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_MIN 0 +#define GUC_LOG_VERBOSITY_MAX 3 +#define GUC_LOG_VERBOSITY_MASK 0x0000000f +#define GUC_LOG_DESTINATION_MASK (3 << 4) +#define GUC_LOG_DISABLED (1 << 6) +#define GUC_PROFILE_ENABLED (1 << 7) + +#define GUC_CTL_ADS 4 +#define GUC_ADS_ADDR_SHIFT 1 +#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) + +#define GUC_CTL_DEVID 5 + +#define GUC_CTL_MAX_DWORDS 14 + +/* Scheduling policy settings */ + +#define GLOBAL_POLICY_MAX_NUM_WI 15 + +/* Don't reset an engine upon preemption failure */ +#define GLOBAL_POLICY_DISABLE_ENGINE_RESET BIT(0) + +#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 + +struct guc_policies { + u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; + /* In micro seconds. How much time to allow before DPC processing is + * called back via interrupt (to prevent DPC queue drain starving). + * Typically 1000s of micro seconds (example only, not granularity). */ + u32 dpc_promote_time; + + /* Must be set to take these new values. */ + u32 is_valid; + + /* Max number of WIs to process per call. A large value may keep CS + * idle. */ + u32 max_num_work_items; + + u32 global_flags; + u32 reserved[4]; +} __packed; + +/* GuC MMIO reg state struct */ +struct guc_mmio_reg { + u32 offset; + u32 value; + u32 flags; + u32 mask; +#define GUC_REGSET_MASKED BIT(0) +#define GUC_REGSET_MASKED_WITH_VALUE BIT(2) +#define GUC_REGSET_RESTORE_ONLY BIT(3) +} __packed; + +/* GuC register sets */ +struct guc_mmio_reg_set { + u32 address; + u16 count; + u16 reserved; +} __packed; + +/* Generic GT SysInfo data types */ +#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0 +#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1 +#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI 2 +#define GUC_GENERIC_GT_SYSINFO_MAX 16 + +/* HW info */ +struct guc_gt_system_info { + u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES]; + u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; +} __packed; + +enum { + GUC_CAPTURE_LIST_INDEX_PF = 0, + GUC_CAPTURE_LIST_INDEX_VF = 1, + GUC_CAPTURE_LIST_INDEX_MAX = 2, +}; + +/* GuC Additional Data Struct */ +struct guc_ads { + struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 reserved0; + u32 scheduler_policies; + u32 gt_system_info; + u32 reserved1; + u32 control_data; + u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; + u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; + u32 private_data; + u32 um_init_data; + u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX]; + u32 reserved[14]; +} __packed; + +/* Engine usage stats */ +struct guc_engine_usage_record { + u32 current_context_index; + u32 last_switch_in_stamp; + u32 reserved0; + u32 total_runtime; + u32 reserved1[4]; +} __packed; + +struct guc_engine_usage { + struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; +} __packed; + +/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ +enum xe_guc_recv_message { + XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), + XE_GUC_RECV_MSG_EXCEPTION = BIT(30), +}; + +/* Page fault structures */ +struct access_counter_desc { + u32 dw0; +#define ACCESS_COUNTER_TYPE BIT(0) +#define ACCESS_COUNTER_SUBG_LO GENMASK(31, 1) + + u32 dw1; +#define ACCESS_COUNTER_SUBG_HI BIT(0) +#define ACCESS_COUNTER_RSVD0 GENMASK(2, 1) +#define ACCESS_COUNTER_ENG_INSTANCE GENMASK(8, 3) +#define ACCESS_COUNTER_ENG_CLASS GENMASK(11, 9) +#define ACCESS_COUNTER_ASID GENMASK(31, 12) + + u32 dw2; +#define ACCESS_COUNTER_VFID GENMASK(5, 0) +#define ACCESS_COUNTER_RSVD1 GENMASK(7, 6) +#define ACCESS_COUNTER_GRANULARITY GENMASK(10, 8) +#define ACCESS_COUNTER_RSVD2 GENMASK(16, 11) +#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17) + + u32 dw3; +#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0) +} __packed; + +enum guc_um_queue_type { + GUC_UM_HW_QUEUE_PAGE_FAULT = 0, + GUC_UM_HW_QUEUE_PAGE_FAULT_RESPONSE, + GUC_UM_HW_QUEUE_ACCESS_COUNTER, + GUC_UM_HW_QUEUE_MAX +}; + +struct guc_um_queue_params { + u64 base_dpa; + u32 base_ggtt_address; + u32 size_in_bytes; + u32 rsvd[4]; +} __packed; + +struct guc_um_init_params { + u64 page_response_timeout_in_us; + u32 rsvd[6]; + struct guc_um_queue_params queue_params[GUC_UM_HW_QUEUE_MAX]; +} __packed; + +enum xe_guc_fault_reply_type { + PFR_ACCESS = 0, + PFR_ENGINE, + PFR_VFID, + PFR_ALL, + PFR_INVALID +}; + +enum xe_guc_response_desc_type { + TLB_INVALIDATION_DESC = 0, + FAULT_RESPONSE_DESC +}; + +struct xe_guc_pagefault_desc { + u32 dw0; +#define PFD_FAULT_LEVEL GENMASK(2, 0) +#define PFD_SRC_ID GENMASK(10, 3) +#define PFD_RSVD_0 GENMASK(17, 11) +#define XE2_PFD_TRVA_FAULT BIT(18) +#define PFD_ENG_INSTANCE GENMASK(24, 19) +#define PFD_ENG_CLASS GENMASK(27, 25) +#define PFD_PDATA_LO GENMASK(31, 28) + + u32 dw1; +#define PFD_PDATA_HI GENMASK(11, 0) +#define PFD_PDATA_HI_SHIFT 4 +#define PFD_ASID GENMASK(31, 12) + + u32 dw2; +#define PFD_ACCESS_TYPE GENMASK(1, 0) +#define PFD_FAULT_TYPE GENMASK(3, 2) +#define PFD_VFID GENMASK(9, 4) +#define PFD_RSVD_1 GENMASK(11, 10) +#define PFD_VIRTUAL_ADDR_LO GENMASK(31, 12) +#define PFD_VIRTUAL_ADDR_LO_SHIFT 12 + + u32 dw3; +#define PFD_VIRTUAL_ADDR_HI GENMASK(31, 0) +#define PFD_VIRTUAL_ADDR_HI_SHIFT 32 +} __packed; + +struct xe_guc_pagefault_reply { + u32 dw0; +#define PFR_VALID BIT(0) +#define PFR_SUCCESS BIT(1) +#define PFR_REPLY GENMASK(4, 2) +#define PFR_RSVD_0 GENMASK(9, 5) +#define PFR_DESC_TYPE GENMASK(11, 10) +#define PFR_ASID GENMASK(31, 12) + + u32 dw1; +#define PFR_VFID GENMASK(5, 0) +#define PFR_RSVD_1 BIT(6) +#define PFR_ENG_INSTANCE GENMASK(12, 7) +#define PFR_ENG_CLASS GENMASK(15, 13) +#define PFR_PDATA GENMASK(31, 16) + + u32 dw2; +#define PFR_RSVD_2 GENMASK(31, 0) +} __packed; + +struct xe_guc_acc_desc { + u32 dw0; +#define ACC_TYPE BIT(0) +#define ACC_TRIGGER 0 +#define ACC_NOTIFY 1 +#define ACC_SUBG_LO GENMASK(31, 1) + + u32 dw1; +#define ACC_SUBG_HI BIT(0) +#define ACC_RSVD0 GENMASK(2, 1) +#define ACC_ENG_INSTANCE GENMASK(8, 3) +#define ACC_ENG_CLASS GENMASK(11, 9) +#define ACC_ASID GENMASK(31, 12) + + u32 dw2; +#define ACC_VFID GENMASK(5, 0) +#define ACC_RSVD1 GENMASK(7, 6) +#define ACC_GRANULARITY GENMASK(10, 8) +#define ACC_RSVD2 GENMASK(16, 11) +#define ACC_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17) + + u32 dw3; +#define ACC_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0) +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c new file mode 100644 index 000000000000..8dfd48f71a7c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_hwconfig.h" +#include "xe_map.h" + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static struct xe_device * +guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + +static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size) +{ + u32 action[] = { + XE_GUC_ACTION_GET_HWCONFIG, + lower_32_bits(ggtt_addr), + upper_32_bits(ggtt_addr), + size, + }; + + return xe_guc_send_mmio(guc, action, ARRAY_SIZE(action)); +} + +static int guc_hwconfig_size(struct xe_guc *guc, u32 *size) +{ + int ret = send_get_hwconfig(guc, 0, 0); + + if (ret < 0) + return ret; + + *size = ret; + return 0; +} + +static int guc_hwconfig_copy(struct xe_guc *guc) +{ + int ret = send_get_hwconfig(guc, xe_bo_ggtt_addr(guc->hwconfig.bo), + guc->hwconfig.size); + + if (ret < 0) + return ret; + + return 0; +} + +static void guc_hwconfig_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc *guc = arg; + + xe_bo_unpin_map_no_vm(guc->hwconfig.bo); +} + +int xe_guc_hwconfig_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + struct xe_bo *bo; + u32 size; + int err; + + /* Initialization already done */ + if (guc->hwconfig.bo) + return 0; + + /* + * All hwconfig the same across GTs so only GT0 needs to be configured + */ + if (gt->info.id != XE_GT0) + return 0; + + /* ADL_P, DG2+ supports hwconfig table */ + if (GRAPHICS_VERx100(xe) < 1255 && xe->info.platform != XE_ALDERLAKE_P) + return 0; + + err = guc_hwconfig_size(guc, &size); + if (err) + return err; + if (!size) + return -EINVAL; + + bo = xe_bo_create_pin_map(xe, gt, NULL, PAGE_ALIGN(size), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + guc->hwconfig.bo = bo; + guc->hwconfig.size = size; + + err = drmm_add_action_or_reset(&xe->drm, guc_hwconfig_fini, guc); + if (err) + return err; + + return guc_hwconfig_copy(guc); +} + +u32 xe_guc_hwconfig_size(struct xe_guc *guc) +{ + return !guc->hwconfig.bo ? 0 : guc->hwconfig.size; +} + +void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) +{ + struct xe_device *xe = guc_to_xe(guc); + + XE_BUG_ON(!guc->hwconfig.bo); + + xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, + guc->hwconfig.size); +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h new file mode 100644 index 000000000000..b5794d641900 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_HWCONFIG_H_ +#define _XE_GUC_HWCONFIG_H_ + +#include + +struct xe_guc; + +int xe_guc_hwconfig_init(struct xe_guc *guc); +u32 xe_guc_hwconfig_size(struct xe_guc *guc); +void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c new file mode 100644 index 000000000000..7ec1b2bb1f8e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_guc_log.h" +#include "xe_map.h" +#include "xe_module.h" + +static struct xe_gt * +log_to_gt(struct xe_guc_log *log) +{ + return container_of(log, struct xe_gt, uc.guc.log); +} + +static struct xe_device * +log_to_xe(struct xe_guc_log *log) +{ + return gt_to_xe(log_to_gt(log)); +} + +static size_t guc_log_size(void) +{ + /* + * GuC Log buffer Layout + * + * +===============================+ 00B + * | Crash dump state header | + * +-------------------------------+ 32B + * | Debug state header | + * +-------------------------------+ 64B + * | Capture state header | + * +-------------------------------+ 96B + * | | + * +===============================+ PAGE_SIZE (4KB) + * | Crash Dump logs | + * +===============================+ + CRASH_SIZE + * | Debug logs | + * +===============================+ + DEBUG_SIZE + * | Capture logs | + * +===============================+ + CAPTURE_SIZE + */ + return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + + CAPTURE_BUFFER_SIZE; +} + +void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p) +{ + struct xe_device *xe = log_to_xe(log); + size_t size; + int i, j; + + XE_BUG_ON(!log->bo); + + size = log->bo->size; + +#define DW_PER_READ 128 + XE_BUG_ON(size % (DW_PER_READ * sizeof(u32))); + for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) { + u32 read[DW_PER_READ]; + + xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32), + DW_PER_READ * sizeof(u32)); +#define DW_PER_PRINT 4 + for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) { + u32 *print = read + j * DW_PER_PRINT; + + drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(print + 0), *(print + 1), + *(print + 2), *(print + 3)); + } + } +} + +static void guc_log_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc_log *log = arg; + + xe_bo_unpin_map_no_vm(log->bo); +} + +int xe_guc_log_init(struct xe_guc_log *log) +{ + struct xe_device *xe = log_to_xe(log); + struct xe_gt *gt = log_to_gt(log); + struct xe_bo *bo; + int err; + + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size()); + log->bo = bo; + log->level = xe_guc_log_level; + + err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h new file mode 100644 index 000000000000..2d25ab28b4b3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_LOG_H_ +#define _XE_GUC_LOG_H_ + +#include "xe_guc_log_types.h" + +struct drm_printer; + +#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER) +#define CRASH_BUFFER_SIZE SZ_1M +#define DEBUG_BUFFER_SIZE SZ_8M +#define CAPTURE_BUFFER_SIZE SZ_2M +#else +#define CRASH_BUFFER_SIZE SZ_8K +#define DEBUG_BUFFER_SIZE SZ_64K +#define CAPTURE_BUFFER_SIZE SZ_16K +#endif +/* + * While we're using plain log level in i915, GuC controls are much more... + * "elaborate"? We have a couple of bits for verbosity, separate bit for actual + * log enabling, and separate bit for default logging - which "conveniently" + * ignores the enable bit. + */ +#define GUC_LOG_LEVEL_DISABLED 0 +#define GUC_LOG_LEVEL_NON_VERBOSE 1 +#define GUC_LOG_LEVEL_IS_ENABLED(x) ((x) > GUC_LOG_LEVEL_DISABLED) +#define GUC_LOG_LEVEL_IS_VERBOSE(x) ((x) > GUC_LOG_LEVEL_NON_VERBOSE) +#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({ \ + typeof(x) _x = (x); \ + GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0; \ +}) +#define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) +#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) + +int xe_guc_log_init(struct xe_guc_log *log); +void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p); + +static inline u32 +xe_guc_log_get_level(struct xe_guc_log *log) +{ + return log->level; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h new file mode 100644 index 000000000000..125080d138a7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_LOG_TYPES_H_ +#define _XE_GUC_LOG_TYPES_H_ + +#include + +struct xe_bo; + +/** + * struct xe_guc_log - GuC log + */ +struct xe_guc_log { + /** @level: GuC log level */ + u32 level; + /** @bo: XE BO for GuC log */ + struct xe_bo *bo; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c new file mode 100644 index 000000000000..227e30a482e3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_types.h" +#include "xe_gt_sysfs.h" +#include "xe_guc_ct.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_pcode.h" +#include "i915_reg_defs.h" +#include "i915_reg.h" + +#include "intel_mchbar_regs.h" + +/* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */ +#define RP0_MASK REG_GENMASK(7, 0) +#define RP1_MASK REG_GENMASK(15, 8) +#define RPN_MASK REG_GENMASK(23, 16) + +#define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) +#define RPE_MASK REG_GENMASK(15, 8) + +#include "gt/intel_gt_regs.h" +/* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */ +#define REQ_RATIO_MASK REG_GENMASK(31, 23) + +/* For GEN6_GT_CORE_STATUS.reg to be merged when the definition moves to Xe */ +#define RCN_MASK REG_GENMASK(2, 0) + +#define GEN12_RPSTAT1 _MMIO(0x1381b4) +#define GEN12_CAGF_MASK REG_GENMASK(19, 11) + +#define GT_FREQUENCY_MULTIPLIER 50 +#define GEN9_FREQ_SCALER 3 + +/** + * DOC: GuC Power Conservation (PC) + * + * GuC Power Conservation (PC) supports multiple features for the most + * efficient and performing use of the GT when GuC submission is enabled, + * including frequency management, Render-C states management, and various + * algorithms for power balancing. + * + * Single Loop Power Conservation (SLPC) is the name given to the suite of + * connected power conservation features in the GuC firmware. The firmware + * exposes a programming interface to the host for the control of SLPC. + * + * Frequency management: + * ===================== + * + * Xe driver enables SLPC with all of its defaults features and frequency + * selection, which varies per platform. + * Xe's GuC PC provides a sysfs API for frequency management: + * + * device/gt#/freq_* *read-only* files: + * - freq_act: The actual resolved frequency decided by PCODE. + * - freq_cur: The current one requested by GuC PC to the Hardware. + * - freq_rpn: The Render Performance (RP) N level, which is the minimal one. + * - freq_rpe: The Render Performance (RP) E level, which is the efficient one. + * - freq_rp0: The Render Performance (RP) 0 level, which is the maximum one. + * + * device/gt#/freq_* *read-write* files: + * - freq_min: GuC PC min request. + * - freq_max: GuC PC max request. + * If max <= min, then freq_min becomes a fixed frequency request. + * + * Render-C States: + * ================ + * + * Render-C states is also a GuC PC feature that is now enabled in Xe for + * all platforms. + * Xe's GuC PC provides a sysfs API for Render-C States: + * + * device/gt#/rc* *read-only* files: + * - rc_status: Provide the actual immediate status of Render-C: (rc0 or rc6) + * - rc6_residency: Provide the rc6_residency counter in units of 1.28 uSec. + * Prone to overflows. + */ + +static struct xe_guc * +pc_to_guc(struct xe_guc_pc *pc) +{ + return container_of(pc, struct xe_guc, pc); +} + +static struct xe_device * +pc_to_xe(struct xe_guc_pc *pc) +{ + struct xe_guc *guc = pc_to_guc(pc); + struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); + + return gt_to_xe(gt); +} + +static struct xe_gt * +pc_to_gt(struct xe_guc_pc *pc) +{ + return container_of(pc, struct xe_gt, uc.guc.pc); +} + +static struct xe_guc_pc * +dev_to_pc(struct device *dev) +{ + return &kobj_to_gt(&dev->kobj)->uc.guc.pc; +} + +static struct iosys_map * +pc_to_maps(struct xe_guc_pc *pc) +{ + return &pc->bo->vmap; +} + +#define slpc_shared_data_read(pc_, field_) \ + xe_map_rd_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \ + struct slpc_shared_data, field_) + +#define slpc_shared_data_write(pc_, field_, val_) \ + xe_map_wr_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \ + struct slpc_shared_data, field_, val_) + +#define SLPC_EVENT(id, count) \ + (FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ + FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) + +static bool pc_is_in_state(struct xe_guc_pc *pc, enum slpc_global_state state) +{ + xe_device_assert_mem_access(pc_to_xe(pc)); + return slpc_shared_data_read(pc, header.global_state) == state; +} + +static int pc_action_reset(struct xe_guc_pc *pc) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_RESET, 2), + xe_bo_ggtt_addr(pc->bo), + 0, + }; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret)); + + return ret; +} + +static int pc_action_shutdown(struct xe_guc_pc *pc) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2), + xe_bo_ggtt_addr(pc->bo), + 0, + }; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe", + ERR_PTR(ret)); + + return ret; +} + +static int pc_action_query_task_state(struct xe_guc_pc *pc) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + xe_bo_ggtt_addr(pc->bo), + 0, + }; + + if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + return -EAGAIN; + + /* Blocking here to ensure the results are ready before reading them */ + ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); + if (ret) + drm_err(&pc_to_xe(pc)->drm, + "GuC PC query task state failed: %pe", ERR_PTR(ret)); + + return ret; +} + +static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + + if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + return -EAGAIN; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe", + ERR_PTR(ret)); + + return ret; +} + +static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + u32 action[] = { + XE_GUC_ACTION_SETUP_PC_GUCRC, + mode, + }; + int ret; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe", + ERR_PTR(ret)); + return ret; +} + +static u32 decode_freq(u32 raw) +{ + return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER, + GEN9_FREQ_SCALER); +} + +static u32 pc_get_min_freq(struct xe_guc_pc *pc) +{ + u32 freq; + + freq = FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK, + slpc_shared_data_read(pc, task_state_data.freq)); + + return decode_freq(freq); +} + +static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) +{ + /* + * Let's only check for the rpn-rp0 range. If max < min, + * min becomes a fixed request. + */ + if (freq < pc->rpn_freq || freq > pc->rp0_freq) + return -EINVAL; + + /* + * GuC policy is to elevate minimum frequency to the efficient levels + * Our goal is to have the admin choices respected. + */ + pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + freq < pc->rpe_freq); + + return pc_action_set_param(pc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); +} + +static int pc_get_max_freq(struct xe_guc_pc *pc) +{ + u32 freq; + + freq = FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK, + slpc_shared_data_read(pc, task_state_data.freq)); + + return decode_freq(freq); +} + +static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) +{ + /* + * Let's only check for the rpn-rp0 range. If max < min, + * min becomes a fixed request. + * Also, overclocking is not supported. + */ + if (freq < pc->rpn_freq || freq > pc->rp0_freq) + return -EINVAL; + + return pc_action_set_param(pc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + freq); +} + +static void pc_update_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 reg; + + /* + * For PVC we still need to use fused RP1 as the approximation for RPe + * For other platforms than PVC we get the resolved RPe directly from + * PCODE at a different register + */ + if (xe->info.platform == XE_PVC) + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg); + else + reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg); + + pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + + /* + * RPe is decided at runtime by PCODE. In the rare case where that's + * smaller than the fused min, we will trust the PCODE and use that + * as our minimum one. + */ + pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); +} + +static ssize_t freq_act_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kobject *kobj = &dev->kobj; + struct xe_gt *gt = kobj_to_gt(kobj); + u32 freq; + ssize_t ret; + + /* + * When in RC6, actual frequency is 0. Let's block RC6 so we are able + * to verify that our freq requests are really happening. + */ + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + xe_device_mem_access_get(gt_to_xe(gt)); + freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg); + xe_device_mem_access_put(gt_to_xe(gt)); + + freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq); + ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return ret; +} +static DEVICE_ATTR_RO(freq_act); + +static ssize_t freq_cur_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kobject *kobj = &dev->kobj; + struct xe_gt *gt = kobj_to_gt(kobj); + u32 freq; + ssize_t ret; + + /* + * GuC SLPC plays with cur freq request when GuCRC is enabled + * Block RC6 for a more reliable read. + */ + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + xe_device_mem_access_get(gt_to_xe(gt)); + freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg); + xe_device_mem_access_put(gt_to_xe(gt)); + + freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); + ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return ret; +} +static DEVICE_ATTR_RO(freq_cur); + +static ssize_t freq_rp0_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", pc->rp0_freq); +} +static DEVICE_ATTR_RO(freq_rp0); + +static ssize_t freq_rpe_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + pc_update_rp_values(pc); + return sysfs_emit(buf, "%d\n", pc->rpe_freq); +} +static DEVICE_ATTR_RO(freq_rpe); + +static ssize_t freq_rpn_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", pc->rpn_freq); +} +static DEVICE_ATTR_RO(freq_rpn); + +static ssize_t freq_min_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + struct xe_gt *gt = pc_to_gt(pc); + ssize_t ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + /* + * GuC SLPC plays with min freq request when GuCRC is enabled + * Block RC6 for a more reliable read. + */ + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + goto out; + + ret = pc_action_query_task_state(pc); + if (ret) + goto fw; + + ret = sysfs_emit(buf, "%d\n", pc_get_min_freq(pc)); + +fw: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +static ssize_t freq_min_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = kstrtou32(buff, 0, &freq); + if (ret) + return ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + ret = pc_set_min_freq(pc, freq); + if (ret) + goto out; + + pc->user_requested_min = freq; + +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret ?: count; +} +static DEVICE_ATTR_RW(freq_min); + +static ssize_t freq_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + ssize_t ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + ret = pc_action_query_task_state(pc); + if (ret) + goto out; + + ret = sysfs_emit(buf, "%d\n", pc_get_max_freq(pc)); + +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +static ssize_t freq_max_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = kstrtou32(buff, 0, &freq); + if (ret) + return ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + ret = pc_set_max_freq(pc, freq); + if (ret) + goto out; + + pc->user_requested_max = freq; + +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret ?: count; +} +static DEVICE_ATTR_RW(freq_max); + +static ssize_t rc_status_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + xe_device_mem_access_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, GEN6_GT_CORE_STATUS.reg); + xe_device_mem_access_put(gt_to_xe(gt)); + + switch (REG_FIELD_GET(RCN_MASK, reg)) { + case GEN6_RC6: + return sysfs_emit(buff, "rc6\n"); + case GEN6_RC0: + return sysfs_emit(buff, "rc0\n"); + default: + return -ENOENT; + } +} +static DEVICE_ATTR_RO(rc_status); + +static ssize_t rc6_residency_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + ssize_t ret; + + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg); + xe_device_mem_access_put(pc_to_xe(pc)); + + ret = sysfs_emit(buff, "%u\n", reg); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return ret; +} +static DEVICE_ATTR_RO(rc6_residency); + +static const struct attribute *pc_attrs[] = { + &dev_attr_freq_act.attr, + &dev_attr_freq_cur.attr, + &dev_attr_freq_rp0.attr, + &dev_attr_freq_rpe.attr, + &dev_attr_freq_rpn.attr, + &dev_attr_freq_min.attr, + &dev_attr_freq_max.attr, + &dev_attr_rc_status.attr, + &dev_attr_rc6_residency.attr, + NULL +}; + +static void pc_init_fused_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 reg; + + xe_device_assert_mem_access(pc_to_xe(pc)); + + if (xe->info.platform == XE_PVC) + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg); + else + reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP.reg); + pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + /* + * GuC defaults to some RPmax that is not actually achievable without + * overclocking. Let's adjust it to the Hardware RP0, which is the + * regular maximum + */ + if (pc_get_max_freq(pc) > pc->rp0_freq) + pc_set_max_freq(pc, pc->rp0_freq); + + /* + * Same thing happens for Server platforms where min is listed as + * RPMax + */ + if (pc_get_min_freq(pc) > pc->rp0_freq) + pc_set_min_freq(pc, pc->rp0_freq); + + return 0; +} + +static int pc_adjust_requested_freq(struct xe_guc_pc *pc) +{ + int ret = 0; + + lockdep_assert_held(&pc->freq_lock); + + if (pc->user_requested_min != 0) { + ret = pc_set_min_freq(pc, pc->user_requested_min); + if (ret) + return ret; + } + + if (pc->user_requested_max != 0) { + ret = pc_set_max_freq(pc, pc->user_requested_max); + if (ret) + return ret; + } + + return ret; +} + +static int pc_gucrc_disable(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret; + + xe_device_assert_mem_access(pc_to_xe(pc)); + + ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); + if (ret) + return ret; + + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + xe_mmio_write32(gt, GEN9_PG_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN6_RC_CONTROL.reg, 0); + xe_mmio_write32(gt, GEN6_RC_STATE.reg, 0); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return 0; +} + +static void pc_init_pcode_freq(struct xe_guc_pc *pc) +{ + u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); + u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER); + + XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max)); +} + +static int pc_init_freqs(struct xe_guc_pc *pc) +{ + int ret; + + mutex_lock(&pc->freq_lock); + + ret = pc_adjust_freq_bounds(pc); + if (ret) + goto out; + + ret = pc_adjust_requested_freq(pc); + if (ret) + goto out; + + pc_update_rp_values(pc); + + pc_init_pcode_freq(pc); + + /* + * The frequencies are really ready for use only after the user + * requested ones got restored. + */ + pc->freq_ready = true; + +out: + mutex_unlock(&pc->freq_lock); + return ret; +} + +/** + * xe_guc_pc_start - Start GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +int xe_guc_pc_start(struct xe_guc_pc *pc) +{ + struct xe_device *xe = pc_to_xe(pc); + struct xe_gt *gt = pc_to_gt(pc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int ret; + + XE_WARN_ON(!xe_device_guc_submission_enabled(xe)); + + xe_device_mem_access_get(pc_to_xe(pc)); + + memset(pc->bo->vmap.vaddr, 0, size); + slpc_shared_data_write(pc, header.size, size); + + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + ret = pc_action_reset(pc); + if (ret) + goto out; + + if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING), 5)) { + drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n"); + ret = -EIO; + goto out; + } + + ret = pc_init_freqs(pc); + if (ret) + goto out; + + if (xe->info.platform == XE_PVC) { + pc_gucrc_disable(pc); + ret = 0; + goto out; + } + + ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL); + +out: + xe_device_mem_access_put(pc_to_xe(pc)); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return ret; +} + +/** + * xe_guc_pc_stop - Stop GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +int xe_guc_pc_stop(struct xe_guc_pc *pc) +{ + int ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + + ret = pc_gucrc_disable(pc); + if (ret) + goto out; + + mutex_lock(&pc->freq_lock); + pc->freq_ready = false; + mutex_unlock(&pc->freq_lock); + + ret = pc_action_shutdown(pc); + if (ret) + goto out; + + if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING), 5)) { + drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); + ret = -EIO; + } + +out: + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +static void pc_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc_pc *pc = arg; + + XE_WARN_ON(xe_guc_pc_stop(pc)); + sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs); + xe_bo_unpin_map_no_vm(pc->bo); +} + +/** + * xe_guc_pc_init - Initialize GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +int xe_guc_pc_init(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; + + mutex_init(&pc->freq_lock); + + bo = xe_bo_create_pin_map(xe, gt, NULL, size, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + + if (IS_ERR(bo)) + return PTR_ERR(bo); + + pc->bo = bo; + + pc_init_fused_rp_values(pc); + + err = sysfs_create_files(gt->sysfs, pc_attrs); + if (err) + return err; + + err = drmm_add_action_or_reset(&xe->drm, pc_fini, pc); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h new file mode 100644 index 000000000000..da29e4934868 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_PC_H_ +#define _XE_GUC_PC_H_ + +#include "xe_guc_pc_types.h" + +int xe_guc_pc_init(struct xe_guc_pc *pc); +int xe_guc_pc_start(struct xe_guc_pc *pc); +int xe_guc_pc_stop(struct xe_guc_pc *pc); + +#endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h new file mode 100644 index 000000000000..39548e03acf4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_PC_TYPES_H_ +#define _XE_GUC_PC_TYPES_H_ + +#include +#include + +/** + * struct xe_guc_pc - GuC Power Conservation (PC) + */ +struct xe_guc_pc { + /** @bo: GGTT buffer object that is shared with GuC PC */ + struct xe_bo *bo; + /** @rp0_freq: HW RP0 frequency - The Maximum one */ + u32 rp0_freq; + /** @rpe_freq: HW RPe frequency - The Efficient one */ + u32 rpe_freq; + /** @rpn_freq: HW RPN frequency - The Minimum one */ + u32 rpn_freq; + /** @user_requested_min: Stash the minimum requested freq by user */ + u32 user_requested_min; + /** @user_requested_max: Stash the maximum requested freq by user */ + u32 user_requested_max; + /** @freq_lock: Let's protect the frequencies */ + struct mutex freq_lock; + /** @freq_ready: Only handle freq changes, if they are really ready */ + bool freq_ready; +}; + +#endif /* _XE_GUC_PC_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h new file mode 100644 index 000000000000..1e16a9b76ddc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_reg.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_REG_H_ +#define _XE_GUC_REG_H_ + +#include +#include + +#include "i915_reg.h" + +/* Definitions of GuC H/W registers, bits, etc */ + +#define GUC_STATUS _MMIO(0xc000) +#define GS_RESET_SHIFT 0 +#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT) +#define GS_BOOTROM_SHIFT 1 +#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) +#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) +#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) +#define GS_UKERNEL_SHIFT 8 +#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) +#define GS_MIA_SHIFT 16 +#define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) +#define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT) +#define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT) +#define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT) +#define GS_AUTH_STATUS_SHIFT 30 +#define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) + +#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) +#define SOFT_SCRATCH_COUNT 16 + +#define GEN11_SOFT_SCRATCH(n) _MMIO(0x190240 + (n) * 4) +#define GEN11_SOFT_SCRATCH_COUNT 4 + +#define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) +#define UOS_RSA_SCRATCH_COUNT 64 + +#define DMA_ADDR_0_LOW _MMIO(0xc300) +#define DMA_ADDR_0_HIGH _MMIO(0xc304) +#define DMA_ADDR_1_LOW _MMIO(0xc308) +#define DMA_ADDR_1_HIGH _MMIO(0xc30c) +#define DMA_ADDRESS_SPACE_WOPCM (7 << 16) +#define DMA_ADDRESS_SPACE_GTT (8 << 16) +#define DMA_COPY_SIZE _MMIO(0xc310) +#define DMA_CTRL _MMIO(0xc314) +#define HUC_UKERNEL (1<<9) +#define UOS_MOVE (1<<4) +#define START_DMA (1<<0) +#define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) +#define GUC_WOPCM_OFFSET_VALID (1<<0) +#define HUC_LOADING_AGENT_VCR (0<<1) +#define HUC_LOADING_AGENT_GUC (1<<1) +#define GUC_WOPCM_OFFSET_SHIFT 14 +#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT) +#define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) + +#define HUC_STATUS2 _MMIO(0xD3B0) +#define HUC_FW_VERIFIED (1<<7) + +#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xC1DC) +#define HUC_LOAD_SUCCESSFUL (1 << 0) + +#define GUC_WOPCM_SIZE _MMIO(0xc050) +#define GUC_WOPCM_SIZE_LOCKED (1<<0) +#define GUC_WOPCM_SIZE_SHIFT 12 +#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT) + +#define GEN8_GT_PM_CONFIG _MMIO(0x138140) +#define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) +#define GEN9_GT_PM_CONFIG _MMIO(0x13816c) +#define GT_DOORBELL_ENABLE (1<<0) + +#define GEN8_GTCR _MMIO(0x4274) +#define GEN8_GTCR_INVALIDATE (1<<0) + +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) + +#define GUC_ARAT_C6DIS _MMIO(0xA178) + +#define GUC_SHIM_CONTROL _MMIO(0xc064) +#define GUC_DISABLE_SRAM_INIT_TO_ZEROES (1<<0) +#define GUC_ENABLE_READ_CACHE_LOGIC (1<<1) +#define GUC_ENABLE_MIA_CACHING (1<<2) +#define GUC_GEN10_MSGCH_ENABLE (1<<4) +#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA (1<<9) +#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10) +#define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) +#define GUC_GEN10_SHIM_WC_ENABLE (1<<21) + +#define GUC_SEND_INTERRUPT _MMIO(0xc4c8) +#define GUC_SEND_TRIGGER (1<<0) +#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) + +#define GUC_NUM_DOORBELLS 256 + +/* format of the HW-monitored doorbell cacheline */ +struct guc_doorbell_info { + u32 db_status; +#define GUC_DOORBELL_DISABLED 0 +#define GUC_DOORBELL_ENABLED 1 + + u32 cookie; + u32 reserved[14]; +} __packed; + +#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) +#define GEN8_DRB_VALID (1<<0) +#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) + +#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08) +#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16 +#define GEN12_DOORBELLS_PER_SQIDI (0xff) +#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff) + +#define DE_GUCRMR _MMIO(0x44054) + +#define GUC_BCS_RCS_IER _MMIO(0xC550) +#define GUC_VCS2_VCS1_IER _MMIO(0xC554) +#define GUC_WD_VECS_IER _MMIO(0xC558) +#define GUC_PM_P24C_IER _MMIO(0xC55C) + +/* GuC Interrupt Vector */ +#define GUC_INTR_GUC2HOST BIT(15) +#define GUC_INTR_EXEC_ERROR BIT(14) +#define GUC_INTR_DISPLAY_EVENT BIT(13) +#define GUC_INTR_SEM_SIG BIT(12) +#define GUC_INTR_IOMMU2GUC BIT(11) +#define GUC_INTR_DOORBELL_RANG BIT(10) +#define GUC_INTR_DMA_DONE BIT(9) +#define GUC_INTR_FATAL_ERROR BIT(8) +#define GUC_INTR_NOTIF_ERROR BIT(7) +#define GUC_INTR_SW_INT_6 BIT(6) +#define GUC_INTR_SW_INT_5 BIT(5) +#define GUC_INTR_SW_INT_4 BIT(4) +#define GUC_INTR_SW_INT_3 BIT(3) +#define GUC_INTR_SW_INT_2 BIT(2) +#define GUC_INTR_SW_INT_1 BIT(1) +#define GUC_INTR_SW_INT_0 BIT(0) + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c new file mode 100644 index 000000000000..e0d424c2b78c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -0,0 +1,1695 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include + +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_engine_types.h" +#include "xe_guc_submit.h" +#include "xe_gt.h" +#include "xe_force_wake.h" +#include "xe_gpu_scheduler.h" +#include "xe_hw_engine.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_map.h" +#include "xe_mocs.h" +#include "xe_ring_ops_types.h" +#include "xe_sched_job.h" +#include "xe_trace.h" +#include "xe_vm.h" + +#include "gt/intel_lrc_reg.h" + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static struct xe_device * +guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + +static struct xe_guc * +engine_to_guc(struct xe_engine *e) +{ + return &e->gt->uc.guc; +} + +/* + * Helpers for engine state, using an atomic as some of the bits can transition + * as the same time (e.g. a suspend can be happning at the same time as schedule + * engine done being processed). + */ +#define ENGINE_STATE_REGISTERED (1 << 0) +#define ENGINE_STATE_ENABLED (1 << 1) +#define ENGINE_STATE_PENDING_ENABLE (1 << 2) +#define ENGINE_STATE_PENDING_DISABLE (1 << 3) +#define ENGINE_STATE_DESTROYED (1 << 4) +#define ENGINE_STATE_SUSPENDED (1 << 5) +#define ENGINE_STATE_RESET (1 << 6) +#define ENGINE_STATE_KILLED (1 << 7) + +static bool engine_registered(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED; +} + +static void set_engine_registered(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state); +} + +static void clear_engine_registered(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state); +} + +static bool engine_enabled(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED; +} + +static void set_engine_enabled(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_ENABLED, &e->guc->state); +} + +static void clear_engine_enabled(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state); +} + +static bool engine_pending_enable(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE; +} + +static void set_engine_pending_enable(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state); +} + +static void clear_engine_pending_enable(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state); +} + +static bool engine_pending_disable(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE; +} + +static void set_engine_pending_disable(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state); +} + +static void clear_engine_pending_disable(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state); +} + +static bool engine_destroyed(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED; +} + +static void set_engine_destroyed(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state); +} + +static bool engine_banned(struct xe_engine *e) +{ + return (e->flags & ENGINE_FLAG_BANNED); +} + +static void set_engine_banned(struct xe_engine *e) +{ + e->flags |= ENGINE_FLAG_BANNED; +} + +static bool engine_suspended(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED; +} + +static void set_engine_suspended(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state); +} + +static void clear_engine_suspended(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state); +} + +static bool engine_reset(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_RESET; +} + +static void set_engine_reset(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_RESET, &e->guc->state); +} + +static bool engine_killed(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED; +} + +static void set_engine_killed(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_KILLED, &e->guc->state); +} + +static bool engine_killed_or_banned(struct xe_engine *e) +{ + return engine_killed(e) || engine_banned(e); +} + +static void guc_submit_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc *guc = arg; + + xa_destroy(&guc->submission_state.engine_lookup); + ida_destroy(&guc->submission_state.guc_ids); + bitmap_free(guc->submission_state.guc_ids_bitmap); +} + +#define GUC_ID_MAX 65535 +#define GUC_ID_NUMBER_MLRC 4096 +#define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) +#define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC + +static const struct xe_engine_ops guc_engine_ops; + +static void primelockdep(struct xe_guc *guc) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + + mutex_lock(&guc->submission_state.lock); + might_lock(&guc->submission_state.suspend.lock); + mutex_unlock(&guc->submission_state.lock); + + fs_reclaim_release(GFP_KERNEL); +} + +int xe_guc_submit_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int err; + + guc->submission_state.guc_ids_bitmap = + bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); + if (!guc->submission_state.guc_ids_bitmap) + return -ENOMEM; + + gt->engine_ops = &guc_engine_ops; + + mutex_init(&guc->submission_state.lock); + xa_init(&guc->submission_state.engine_lookup); + ida_init(&guc->submission_state.guc_ids); + + spin_lock_init(&guc->submission_state.suspend.lock); + guc->submission_state.suspend.context = dma_fence_context_alloc(1); + + primelockdep(guc); + + err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); + if (err) + return err; + + return 0; +} + +static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) +{ + int ret; + void *ptr; + + /* + * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, + * worse case user gets -ENOMEM on engine create and has to try again. + * + * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent + * failure. + */ + lockdep_assert_held(&guc->submission_state.lock); + + if (xe_engine_is_parallel(e)) { + void *bitmap = guc->submission_state.guc_ids_bitmap; + + ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, + order_base_2(e->width)); + } else { + ret = ida_simple_get(&guc->submission_state.guc_ids, 0, + GUC_ID_NUMBER_SLRC, GFP_NOWAIT); + } + if (ret < 0) + return ret; + + e->guc->id = ret; + if (xe_engine_is_parallel(e)) + e->guc->id += GUC_ID_START_MLRC; + + ptr = xa_store(&guc->submission_state.engine_lookup, + e->guc->id, e, GFP_NOWAIT); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + goto err_release; + } + + return 0; + +err_release: + ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); + return ret; +} + +static void release_guc_id(struct xe_guc *guc, struct xe_engine *e) +{ + mutex_lock(&guc->submission_state.lock); + xa_erase(&guc->submission_state.engine_lookup, e->guc->id); + if (xe_engine_is_parallel(e)) + bitmap_release_region(guc->submission_state.guc_ids_bitmap, + e->guc->id - GUC_ID_START_MLRC, + order_base_2(e->width)); + else + ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); + mutex_unlock(&guc->submission_state.lock); +} + +struct engine_policy { + u32 count; + struct guc_update_engine_policy h2g; +}; + +static u32 __guc_engine_policy_action_size(struct engine_policy *policy) +{ + size_t bytes = sizeof(policy->h2g.header) + + (sizeof(policy->h2g.klv[0]) * policy->count); + + return bytes / sizeof(u32); +} + +static void __guc_engine_policy_start_klv(struct engine_policy *policy, + u16 guc_id) +{ + policy->h2g.header.action = + XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; + policy->h2g.header.guc_id = guc_id; + policy->count = 0; +} + +#define MAKE_ENGINE_POLICY_ADD(func, id) \ +static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ + u32 data) \ +{ \ + XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ + \ + policy->h2g.klv[policy->count].kl = \ + FIELD_PREP(GUC_KLV_0_KEY, \ + GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ + FIELD_PREP(GUC_KLV_0_LEN, 1); \ + policy->h2g.klv[policy->count].value = data; \ + policy->count++; \ +} + +MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) +MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) +MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY) +#undef MAKE_ENGINE_POLICY_ADD + +static const int xe_engine_prio_to_guc[] = { + [XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, + [XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, + [XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, + [XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, +}; + +static void init_policies(struct xe_guc *guc, struct xe_engine *e) +{ + struct engine_policy policy; + enum xe_engine_priority prio = e->priority; + u32 timeslice_us = e->sched_props.timeslice_us; + u32 preempt_timeout_us = e->sched_props.preempt_timeout_us; + + XE_BUG_ON(!engine_registered(e)); + + __guc_engine_policy_start_klv(&policy, e->guc->id); + __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); + __guc_engine_policy_add_execution_quantum(&policy, timeslice_us); + __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us); + + xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, + __guc_engine_policy_action_size(&policy), 0, 0); +} + +static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) +{ + struct engine_policy policy; + + __guc_engine_policy_start_klv(&policy, e->guc->id); + __guc_engine_policy_add_preemption_timeout(&policy, 1); + + xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, + __guc_engine_policy_action_size(&policy), 0, 0); +} + +#define PARALLEL_SCRATCH_SIZE 2048 +#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) +#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) +#define CACHELINE_BYTES 64 + +struct sync_semaphore { + u32 semaphore; + u8 unused[CACHELINE_BYTES - sizeof(u32)]; +}; + +struct parallel_scratch { + struct guc_sched_wq_desc wq_desc; + + struct sync_semaphore go; + struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; + + u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - + sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)]; + + u32 wq[WQ_SIZE / sizeof(u32)]; +}; + +#define parallel_read(xe_, map_, field_) \ + xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_) +#define parallel_write(xe_, map_, field_, val_) \ + xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_) + +static void __register_mlrc_engine(struct xe_guc *guc, + struct xe_engine *e, + struct guc_ctxt_registration_info *info) +{ +#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) + u32 action[MAX_MLRC_REG_SIZE]; + int len = 0; + int i; + + XE_BUG_ON(!xe_engine_is_parallel(e)); + + action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; + action[len++] = info->flags; + action[len++] = info->context_idx; + action[len++] = info->engine_class; + action[len++] = info->engine_submit_mask; + action[len++] = info->wq_desc_lo; + action[len++] = info->wq_desc_hi; + action[len++] = info->wq_base_lo; + action[len++] = info->wq_base_hi; + action[len++] = info->wq_size; + action[len++] = e->width; + action[len++] = info->hwlrca_lo; + action[len++] = info->hwlrca_hi; + + for (i = 1; i < e->width; ++i) { + struct xe_lrc *lrc = e->lrc + i; + + action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); + action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); + } + + XE_BUG_ON(len > MAX_MLRC_REG_SIZE); +#undef MAX_MLRC_REG_SIZE + + xe_guc_ct_send(&guc->ct, action, len, 0, 0); +} + +static void __register_engine(struct xe_guc *guc, + struct guc_ctxt_registration_info *info) +{ + u32 action[] = { + XE_GUC_ACTION_REGISTER_CONTEXT, + info->flags, + info->context_idx, + info->engine_class, + info->engine_submit_mask, + info->wq_desc_lo, + info->wq_desc_hi, + info->wq_base_lo, + info->wq_base_hi, + info->wq_size, + info->hwlrca_lo, + info->hwlrca_hi, + }; + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + +static void register_engine(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct xe_lrc *lrc = e->lrc; + struct guc_ctxt_registration_info info; + + XE_BUG_ON(engine_registered(e)); + + memset(&info, 0, sizeof(info)); + info.context_idx = e->guc->id; + info.engine_class = xe_engine_class_to_guc_class(e->class); + info.engine_submit_mask = e->logical_mask; + info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); + info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); + info.flags = CONTEXT_REGISTRATION_FLAG_KMD; + + if (xe_engine_is_parallel(e)) { + u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); + struct iosys_map map = xe_lrc_parallel_map(lrc); + + info.wq_desc_lo = lower_32_bits(ggtt_addr + + offsetof(struct parallel_scratch, wq_desc)); + info.wq_desc_hi = upper_32_bits(ggtt_addr + + offsetof(struct parallel_scratch, wq_desc)); + info.wq_base_lo = lower_32_bits(ggtt_addr + + offsetof(struct parallel_scratch, wq[0])); + info.wq_base_hi = upper_32_bits(ggtt_addr + + offsetof(struct parallel_scratch, wq[0])); + info.wq_size = WQ_SIZE; + + e->guc->wqi_head = 0; + e->guc->wqi_tail = 0; + xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); + parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); + } + + set_engine_registered(e); + trace_xe_engine_register(e); + if (xe_engine_is_parallel(e)) + __register_mlrc_engine(guc, e, &info); + else + __register_engine(guc, &info); + init_policies(guc, e); +} + +static u32 wq_space_until_wrap(struct xe_engine *e) +{ + return (WQ_SIZE - e->guc->wqi_tail); +} + +static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(e->lrc); + unsigned int sleep_period_ms = 1; + +#define AVAILABLE_SPACE \ + CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE) + if (wqi_size > AVAILABLE_SPACE) { +try_again: + e->guc->wqi_head = parallel_read(xe, map, wq_desc.head); + if (wqi_size > AVAILABLE_SPACE) { + if (sleep_period_ms == 1024) { + xe_gt_reset_async(e->gt); + return -ENODEV; + } + + msleep(sleep_period_ms); + sleep_period_ms <<= 1; + goto try_again; + } + } +#undef AVAILABLE_SPACE + + return 0; +} + +static int wq_noop_append(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(e->lrc); + u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1; + + if (wq_wait_for_space(e, wq_space_until_wrap(e))) + return -ENODEV; + + XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); + + parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)], + FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | + FIELD_PREP(WQ_LEN_MASK, len_dw)); + e->guc->wqi_tail = 0; + + return 0; +} + +static void wq_item_append(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(e->lrc); + u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3]; + u32 wqi_size = (e->width + 3) * sizeof(u32); + u32 len_dw = (wqi_size / sizeof(u32)) - 1; + int i = 0, j; + + if (wqi_size > wq_space_until_wrap(e)) { + if (wq_noop_append(e)) + return; + } + if (wq_wait_for_space(e, wqi_size)) + return; + + wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | + FIELD_PREP(WQ_LEN_MASK, len_dw); + wqi[i++] = xe_lrc_descriptor(e->lrc); + wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) | + FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64)); + wqi[i++] = 0; + for (j = 1; j < e->width; ++j) { + struct xe_lrc *lrc = e->lrc + j; + + wqi[i++] = lrc->ring.tail / sizeof(u64); + } + + XE_BUG_ON(i != wqi_size / sizeof(u32)); + + iosys_map_incr(&map, offsetof(struct parallel_scratch, + wq[e->guc->wqi_tail / sizeof(u32)])); + xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); + e->guc->wqi_tail += wqi_size; + XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE); + + xe_device_wmb(xe); + + map = xe_lrc_parallel_map(e->lrc); + parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail); +} + +#define RESUME_PENDING ~0x0ull +static void submit_engine(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_lrc *lrc = e->lrc; + u32 action[3]; + u32 g2h_len = 0; + u32 num_g2h = 0; + int len = 0; + bool extra_submit = false; + + XE_BUG_ON(!engine_registered(e)); + + if (xe_engine_is_parallel(e)) + wq_item_append(e); + else + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + + if (engine_suspended(e) && !xe_engine_is_parallel(e)) + return; + + if (!engine_enabled(e) && !engine_suspended(e)) { + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; + action[len++] = e->guc->id; + action[len++] = GUC_CONTEXT_ENABLE; + g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; + num_g2h = 1; + if (xe_engine_is_parallel(e)) + extra_submit = true; + + e->guc->resume_time = RESUME_PENDING; + set_engine_pending_enable(e); + set_engine_enabled(e); + trace_xe_engine_scheduling_enable(e); + } else { + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; + action[len++] = e->guc->id; + trace_xe_engine_submit(e); + } + + xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); + + if (extra_submit) { + len = 0; + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; + action[len++] = e->guc->id; + trace_xe_engine_submit(e); + + xe_guc_ct_send(&guc->ct, action, len, 0, 0); + } +} + +static struct dma_fence * +guc_engine_run_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + struct xe_engine *e = job->engine; + + XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) && + !engine_banned(e) && !engine_suspended(e)); + + trace_xe_sched_job_run(job); + + if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) { + if (!engine_registered(e)) + register_engine(e); + e->ring_ops->emit_job(job); + submit_engine(e); + } + + if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) + return job->fence; + else + return dma_fence_get(job->fence); +} + +static void guc_engine_free_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + + trace_xe_sched_job_free(job); + xe_sched_job_put(job); +} + +static int guc_read_stopped(struct xe_guc *guc) +{ + return atomic_read(&guc->submission_state.stopped); +} + +#define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \ + u32 action[] = { \ + XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ + e->guc->id, \ + GUC_CONTEXT_##enable_disable, \ + } + +static void disable_scheduling_deregister(struct xe_guc *guc, + struct xe_engine *e) +{ + MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); + int ret; + + set_min_preemption_timeout(guc, e); + smp_rmb(); + ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) || + guc_read_stopped(guc), HZ * 5); + if (!ret) { + struct xe_gpu_scheduler *sched = &e->guc->sched; + + XE_WARN_ON("Pending enable failed to respond"); + xe_sched_submission_start(sched); + xe_gt_reset_async(e->gt); + xe_sched_tdr_queue_imm(sched); + return; + } + + clear_engine_enabled(e); + set_engine_pending_disable(e); + set_engine_destroyed(e); + trace_xe_engine_scheduling_disable(e); + + /* + * Reserve space for both G2H here as the 2nd G2H is sent from a G2H + * handler and we are not allowed to reserved G2H space in handlers. + */ + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + + G2H_LEN_DW_DEREGISTER_CONTEXT, 2); +} + +static void guc_engine_print(struct xe_engine *e, struct drm_printer *p); + +#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) +static void simple_error_capture(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct drm_printer p = drm_err_printer(""); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 adj_logical_mask = e->logical_mask; + u32 width_mask = (0x1 << e->width) - 1; + int i; + bool cookie; + + if (e->vm && !e->vm->error_capture.capture_once) { + e->vm->error_capture.capture_once = true; + cookie = dma_fence_begin_signalling(); + for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { + if (adj_logical_mask & BIT(i)) { + adj_logical_mask |= width_mask << i; + i += e->width; + } else { + ++i; + } + } + + xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_guc_ct_print(&guc->ct, &p); + guc_engine_print(e, &p); + for_each_hw_engine(hwe, guc_to_gt(guc), id) { + if (hwe->class != e->hwe->class || + !(BIT(hwe->logical_instance) & adj_logical_mask)) + continue; + xe_hw_engine_print_state(hwe, &p); + } + xe_analyze_vm(&p, e->vm, e->gt->info.id); + xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + dma_fence_end_signalling(cookie); + } +} +#else +static void simple_error_capture(struct xe_engine *e) +{ +} +#endif + +static enum drm_gpu_sched_stat +guc_engine_timedout_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + struct xe_sched_job *tmp_job; + struct xe_engine *e = job->engine; + struct xe_gpu_scheduler *sched = &e->guc->sched; + struct xe_device *xe = guc_to_xe(engine_to_guc(e)); + int err = -ETIME; + int i = 0; + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { + XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL); + XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e)); + + drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), e->guc->id, e->flags); + simple_error_capture(e); + } else { + drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), e->guc->id, e->flags); + } + trace_xe_sched_job_timedout(job); + + /* Kill the run_job entry point */ + xe_sched_submission_stop(sched); + + /* + * Kernel jobs should never fail, nor should VM jobs if they do + * somethings has gone wrong and the GT needs a reset + */ + if (e->flags & ENGINE_FLAG_KERNEL || + (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) { + if (!xe_sched_invalidate_job(job, 2)) { + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + xe_gt_reset_async(e->gt); + goto out; + } + } + + /* Engine state now stable, disable scheduling if needed */ + if (engine_enabled(e)) { + struct xe_guc *guc = engine_to_guc(e); + int ret; + + if (engine_reset(e)) + err = -EIO; + set_engine_banned(e); + xe_engine_get(e); + disable_scheduling_deregister(engine_to_guc(e), e); + + /* + * Must wait for scheduling to be disabled before signalling + * any fences, if GT broken the GT reset code should signal us. + * + * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault + * error) messages which can cause the schedule disable to get + * lost. If this occurs, trigger a GT reset to recover. + */ + smp_rmb(); + ret = wait_event_timeout(guc->ct.wq, + !engine_pending_disable(e) || + guc_read_stopped(guc), HZ * 5); + if (!ret) { + XE_WARN_ON("Schedule disable failed to respond"); + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + xe_gt_reset_async(e->gt); + xe_sched_tdr_queue_imm(sched); + goto out; + } + } + + /* Stop fence signaling */ + xe_hw_fence_irq_stop(e->fence_irq); + + /* + * Fence state now stable, stop / start scheduler which cleans up any + * fences that are complete + */ + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + xe_sched_tdr_queue_imm(&e->guc->sched); + + /* Mark all outstanding jobs as bad, thus completing them */ + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) + xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); + spin_unlock(&sched->base.job_list_lock); + + /* Start fence signaling */ + xe_hw_fence_irq_start(e->fence_irq); + +out: + return DRM_GPU_SCHED_STAT_NOMINAL; +} + +static void __guc_engine_fini_async(struct work_struct *w) +{ + struct xe_guc_engine *ge = + container_of(w, struct xe_guc_engine, fini_async); + struct xe_engine *e = ge->engine; + struct xe_guc *guc = engine_to_guc(e); + + trace_xe_engine_destroy(e); + + if (e->flags & ENGINE_FLAG_PERSISTENT) + xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); + release_guc_id(guc, e); + xe_sched_entity_fini(&ge->entity); + xe_sched_fini(&ge->sched); + + if (!(e->flags & ENGINE_FLAG_KERNEL)) { + kfree(ge); + xe_engine_fini(e); + } +} + +static void guc_engine_fini_async(struct xe_engine *e) +{ + bool kernel = e->flags & ENGINE_FLAG_KERNEL; + + INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async); + queue_work(system_unbound_wq, &e->guc->fini_async); + + /* We must block on kernel engines so slabs are empty on driver unload */ + if (kernel) { + struct xe_guc_engine *ge = e->guc; + + flush_work(&ge->fini_async); + kfree(ge); + xe_engine_fini(e); + } +} + +static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e) +{ + /* + * Might be done from within the GPU scheduler, need to do async as we + * fini the scheduler when the engine is fini'd, the scheduler can't + * complete fini within itself (circular dependency). Async resolves + * this we and don't really care when everything is fini'd, just that it + * is. + */ + guc_engine_fini_async(e); +} + +static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg) +{ + struct xe_engine *e = msg->private_data; + struct xe_guc *guc = engine_to_guc(e); + + XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL); + trace_xe_engine_cleanup_entity(e); + + if (engine_registered(e)) + disable_scheduling_deregister(guc, e); + else + __guc_engine_fini(guc, e); +} + +static bool guc_engine_allowed_to_change_state(struct xe_engine *e) +{ + return !engine_killed_or_banned(e) && engine_registered(e); +} + +static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg) +{ + struct xe_engine *e = msg->private_data; + struct xe_guc *guc = engine_to_guc(e); + + if (guc_engine_allowed_to_change_state(e)) + init_policies(guc, e); + kfree(msg); +} + +static void suspend_fence_signal(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + + XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) && + !guc_read_stopped(guc)); + XE_BUG_ON(!e->guc->suspend_pending); + + e->guc->suspend_pending = false; + smp_wmb(); + wake_up(&e->guc->suspend_wait); +} + +static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg) +{ + struct xe_engine *e = msg->private_data; + struct xe_guc *guc = engine_to_guc(e); + + if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) && + engine_enabled(e)) { + wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING || + guc_read_stopped(guc)); + + if (!guc_read_stopped(guc)) { + MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); + s64 since_resume_ms = + ktime_ms_delta(ktime_get(), + e->guc->resume_time); + s64 wait_ms = e->vm->preempt.min_run_period_ms - + since_resume_ms; + + if (wait_ms > 0 && e->guc->resume_time) + msleep(wait_ms); + + set_engine_suspended(e); + clear_engine_enabled(e); + set_engine_pending_disable(e); + trace_xe_engine_scheduling_disable(e); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + } + } else if (e->guc->suspend_pending) { + set_engine_suspended(e); + suspend_fence_signal(e); + } +} + +static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg) +{ + struct xe_engine *e = msg->private_data; + struct xe_guc *guc = engine_to_guc(e); + + if (guc_engine_allowed_to_change_state(e)) { + MAKE_SCHED_CONTEXT_ACTION(e, ENABLE); + + e->guc->resume_time = RESUME_PENDING; + clear_engine_suspended(e); + set_engine_pending_enable(e); + set_engine_enabled(e); + trace_xe_engine_scheduling_enable(e); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + } else { + clear_engine_suspended(e); + } +} + +#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ +#define SET_SCHED_PROPS 2 +#define SUSPEND 3 +#define RESUME 4 + +static void guc_engine_process_msg(struct xe_sched_msg *msg) +{ + trace_xe_sched_msg_recv(msg); + + switch (msg->opcode) { + case CLEANUP: + __guc_engine_process_msg_cleanup(msg); + break; + case SET_SCHED_PROPS: + __guc_engine_process_msg_set_sched_props(msg); + break; + case SUSPEND: + __guc_engine_process_msg_suspend(msg); + break; + case RESUME: + __guc_engine_process_msg_resume(msg); + break; + default: + XE_BUG_ON("Unknown message type"); + } +} + +static const struct drm_sched_backend_ops drm_sched_ops = { + .run_job = guc_engine_run_job, + .free_job = guc_engine_free_job, + .timedout_job = guc_engine_timedout_job, +}; + +static const struct xe_sched_backend_ops xe_sched_ops = { + .process_msg = guc_engine_process_msg, +}; + +static int guc_engine_init(struct xe_engine *e) +{ + struct xe_gpu_scheduler *sched; + struct xe_guc *guc = engine_to_guc(e); + struct xe_guc_engine *ge; + long timeout; + int err; + + XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc))); + + ge = kzalloc(sizeof(*ge), GFP_KERNEL); + if (!ge) + return -ENOMEM; + + e->guc = ge; + ge->engine = e; + init_waitqueue_head(&ge->suspend_wait); + + timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5; + err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, + e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, + 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, + e->name, gt_to_xe(e->gt)->drm.dev); + if (err) + goto err_free; + + sched = &ge->sched; + err = xe_sched_entity_init(&ge->entity, sched); + if (err) + goto err_sched; + e->priority = XE_ENGINE_PRIORITY_NORMAL; + + mutex_lock(&guc->submission_state.lock); + + err = alloc_guc_id(guc, e); + if (err) + goto err_entity; + + e->entity = &ge->entity; + + if (guc_read_stopped(guc)) + xe_sched_stop(sched); + + mutex_unlock(&guc->submission_state.lock); + + switch (e->class) { + case XE_ENGINE_CLASS_RENDER: + sprintf(e->name, "rcs%d", e->guc->id); + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + sprintf(e->name, "vcs%d", e->guc->id); + break; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + sprintf(e->name, "vecs%d", e->guc->id); + break; + case XE_ENGINE_CLASS_COPY: + sprintf(e->name, "bcs%d", e->guc->id); + break; + case XE_ENGINE_CLASS_COMPUTE: + sprintf(e->name, "ccs%d", e->guc->id); + break; + default: + XE_WARN_ON(e->class); + } + + trace_xe_engine_create(e); + + return 0; + +err_entity: + xe_sched_entity_fini(&ge->entity); +err_sched: + xe_sched_fini(&ge->sched); +err_free: + kfree(ge); + + return err; +} + +static void guc_engine_kill(struct xe_engine *e) +{ + trace_xe_engine_kill(e); + set_engine_killed(e); + xe_sched_tdr_queue_imm(&e->guc->sched); +} + +static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg, + u32 opcode) +{ + INIT_LIST_HEAD(&msg->link); + msg->opcode = opcode; + msg->private_data = e; + + trace_xe_sched_msg_add(msg); + xe_sched_add_msg(&e->guc->sched, msg); +} + +#define STATIC_MSG_CLEANUP 0 +#define STATIC_MSG_SUSPEND 1 +#define STATIC_MSG_RESUME 2 +static void guc_engine_fini(struct xe_engine *e) +{ + struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP; + + if (!(e->flags & ENGINE_FLAG_KERNEL)) + guc_engine_add_msg(e, msg, CLEANUP); + else + __guc_engine_fini(engine_to_guc(e), e); +} + +static int guc_engine_set_priority(struct xe_engine *e, + enum xe_engine_priority priority) +{ + struct xe_sched_msg *msg; + + if (e->priority == priority || engine_killed_or_banned(e)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + guc_engine_add_msg(e, msg, SET_SCHED_PROPS); + e->priority = priority; + + return 0; +} + +static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) +{ + struct xe_sched_msg *msg; + + if (e->sched_props.timeslice_us == timeslice_us || + engine_killed_or_banned(e)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + e->sched_props.timeslice_us = timeslice_us; + guc_engine_add_msg(e, msg, SET_SCHED_PROPS); + + return 0; +} + +static int guc_engine_set_preempt_timeout(struct xe_engine *e, + u32 preempt_timeout_us) +{ + struct xe_sched_msg *msg; + + if (e->sched_props.preempt_timeout_us == preempt_timeout_us || + engine_killed_or_banned(e)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + e->sched_props.preempt_timeout_us = preempt_timeout_us; + guc_engine_add_msg(e, msg, SET_SCHED_PROPS); + + return 0; +} + +static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms) +{ + struct xe_gpu_scheduler *sched = &e->guc->sched; + + XE_BUG_ON(engine_registered(e)); + XE_BUG_ON(engine_banned(e)); + XE_BUG_ON(engine_killed(e)); + + sched->base.timeout = job_timeout_ms; + + return 0; +} + +static int guc_engine_suspend(struct xe_engine *e) +{ + struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND; + + if (engine_killed_or_banned(e) || e->guc->suspend_pending) + return -EINVAL; + + e->guc->suspend_pending = true; + guc_engine_add_msg(e, msg, SUSPEND); + + return 0; +} + +static void guc_engine_suspend_wait(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + + wait_event(e->guc->suspend_wait, !e->guc->suspend_pending || + guc_read_stopped(guc)); +} + +static void guc_engine_resume(struct xe_engine *e) +{ + struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME; + + XE_BUG_ON(e->guc->suspend_pending); + + xe_mocs_init_engine(e); + guc_engine_add_msg(e, msg, RESUME); +} + +/* + * All of these functions are an abstraction layer which other parts of XE can + * use to trap into the GuC backend. All of these functions, aside from init, + * really shouldn't do much other than trap into the DRM scheduler which + * synchronizes these operations. + */ +static const struct xe_engine_ops guc_engine_ops = { + .init = guc_engine_init, + .kill = guc_engine_kill, + .fini = guc_engine_fini, + .set_priority = guc_engine_set_priority, + .set_timeslice = guc_engine_set_timeslice, + .set_preempt_timeout = guc_engine_set_preempt_timeout, + .set_job_timeout = guc_engine_set_job_timeout, + .suspend = guc_engine_suspend, + .suspend_wait = guc_engine_suspend_wait, + .resume = guc_engine_resume, +}; + +static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e) +{ + struct xe_gpu_scheduler *sched = &e->guc->sched; + + /* Stop scheduling + flush any DRM scheduler operations */ + xe_sched_submission_stop(sched); + + /* Clean up lost G2H + reset engine state */ + if (engine_destroyed(e) && engine_registered(e)) { + if (engine_banned(e)) + xe_engine_put(e); + else + __guc_engine_fini(guc, e); + } + if (e->guc->suspend_pending) { + set_engine_suspended(e); + suspend_fence_signal(e); + } + atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, + &e->guc->state); + e->guc->resume_time = 0; + trace_xe_engine_stop(e); + + /* + * Ban any engine (aside from kernel and engines used for VM ops) with a + * started but not complete job or if a job has gone through a GT reset + * more than twice. + */ + if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) { + struct xe_sched_job *job = xe_sched_first_pending_job(sched); + + if (job) { + if ((xe_sched_job_started(job) && + !xe_sched_job_completed(job)) || + xe_sched_invalidate_job(job, 2)) { + trace_xe_sched_job_ban(job); + xe_sched_tdr_queue_imm(&e->guc->sched); + set_engine_banned(e); + } + } + } +} + +int xe_guc_submit_reset_prepare(struct xe_guc *guc) +{ + int ret; + + /* + * Using an atomic here rather than submission_state.lock as this + * function can be called while holding the CT lock (engine reset + * failure). submission_state.lock needs the CT lock to resubmit jobs. + * Atomic is not ideal, but it works to prevent against concurrent reset + * and releasing any TDRs waiting on guc->submission_state.stopped. + */ + ret = atomic_fetch_or(1, &guc->submission_state.stopped); + smp_wmb(); + wake_up_all(&guc->ct.wq); + + return ret; +} + +void xe_guc_submit_reset_wait(struct xe_guc *guc) +{ + wait_event(guc->ct.wq, !guc_read_stopped(guc)); +} + +int xe_guc_submit_stop(struct xe_guc *guc) +{ + struct xe_engine *e; + unsigned long index; + + XE_BUG_ON(guc_read_stopped(guc) != 1); + + mutex_lock(&guc->submission_state.lock); + + xa_for_each(&guc->submission_state.engine_lookup, index, e) + guc_engine_stop(guc, e); + + mutex_unlock(&guc->submission_state.lock); + + /* + * No one can enter the backend at this point, aside from new engine + * creation which is protected by guc->submission_state.lock. + */ + + return 0; +} + +static void guc_engine_start(struct xe_engine *e) +{ + struct xe_gpu_scheduler *sched = &e->guc->sched; + + if (!engine_killed_or_banned(e)) { + int i; + + trace_xe_engine_resubmit(e); + for (i = 0; i < e->width; ++i) + xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail); + xe_sched_resubmit_jobs(sched); + } + + xe_sched_submission_start(sched); +} + +int xe_guc_submit_start(struct xe_guc *guc) +{ + struct xe_engine *e; + unsigned long index; + + XE_BUG_ON(guc_read_stopped(guc) != 1); + + mutex_lock(&guc->submission_state.lock); + atomic_dec(&guc->submission_state.stopped); + xa_for_each(&guc->submission_state.engine_lookup, index, e) + guc_engine_start(e); + mutex_unlock(&guc->submission_state.lock); + + wake_up_all(&guc->ct.wq); + + return 0; +} + +static struct xe_engine * +g2h_engine_lookup(struct xe_guc *guc, u32 guc_id) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + + if (unlikely(guc_id >= GUC_ID_MAX)) { + drm_err(&xe->drm, "Invalid guc_id %u", guc_id); + return NULL; + } + + e = xa_load(&guc->submission_state.engine_lookup, guc_id); + if (unlikely(!e)) { + drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); + return NULL; + } + + XE_BUG_ON(e->guc->id != guc_id); + + return e; +} + +static void deregister_engine(struct xe_guc *guc, struct xe_engine *e) +{ + u32 action[] = { + XE_GUC_ACTION_DEREGISTER_CONTEXT, + e->guc->id, + }; + + trace_xe_engine_deregister(e); + + xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); +} + +int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + u32 guc_id = msg[0]; + + if (unlikely(len < 2)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + e = g2h_engine_lookup(guc, guc_id); + if (unlikely(!e)) + return -EPROTO; + + if (unlikely(!engine_pending_enable(e) && + !engine_pending_disable(e))) { + drm_err(&xe->drm, "Unexpected engine state 0x%04x", + atomic_read(&e->guc->state)); + return -EPROTO; + } + + trace_xe_engine_scheduling_done(e); + + if (engine_pending_enable(e)) { + e->guc->resume_time = ktime_get(); + clear_engine_pending_enable(e); + smp_wmb(); + wake_up_all(&guc->ct.wq); + } else { + clear_engine_pending_disable(e); + if (e->guc->suspend_pending) { + suspend_fence_signal(e); + } else { + if (engine_banned(e)) { + smp_wmb(); + wake_up_all(&guc->ct.wq); + } + deregister_engine(guc, e); + } + } + + return 0; +} + +int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + e = g2h_engine_lookup(guc, guc_id); + if (unlikely(!e)) + return -EPROTO; + + if (!engine_destroyed(e) || engine_pending_disable(e) || + engine_pending_enable(e) || engine_enabled(e)) { + drm_err(&xe->drm, "Unexpected engine state 0x%04x", + atomic_read(&e->guc->state)); + return -EPROTO; + } + + trace_xe_engine_deregister_done(e); + + clear_engine_registered(e); + if (engine_banned(e)) + xe_engine_put(e); + else + __guc_engine_fini(guc, e); + + return 0; +} + +int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + e = g2h_engine_lookup(guc, guc_id); + if (unlikely(!e)) + return -EPROTO; + + drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); + + /* FIXME: Do error capture, most likely async */ + + trace_xe_engine_reset(e); + + /* + * A banned engine is a NOP at this point (came from + * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel + * jobs by setting timeout of the job to the minimum value kicking + * guc_engine_timedout_job. + */ + set_engine_reset(e); + if (!engine_banned(e)) + xe_sched_tdr_queue_imm(&e->guc->sched); + + return 0; +} + +int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, + u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + e = g2h_engine_lookup(guc, guc_id); + if (unlikely(!e)) + return -EPROTO; + + drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); + trace_xe_engine_memory_cat_error(e); + + /* Treat the same as engine reset */ + set_engine_reset(e); + if (!engine_banned(e)) + xe_sched_tdr_queue_imm(&e->guc->sched); + + return 0; +} + +int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + u8 guc_class, instance; + u32 reason; + + if (unlikely(len != 3)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + guc_class = msg[0]; + instance = msg[1]; + reason = msg[2]; + + /* Unexpected failure of a hardware feature, log an actual error */ + drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", + guc_class, instance, reason); + + xe_gt_reset_async(guc_to_gt(guc)); + + return 0; +} + +static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(e->lrc); + int i; + + drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", + e->guc->wqi_head, parallel_read(xe, map, wq_desc.head)); + drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", + e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail)); + drm_printf(p, "\tWQ status: %u\n", + parallel_read(xe, map, wq_desc.wq_status)); + if (parallel_read(xe, map, wq_desc.head) != + parallel_read(xe, map, wq_desc.tail)) { + for (i = parallel_read(xe, map, wq_desc.head); + i != parallel_read(xe, map, wq_desc.tail); + i = (i + sizeof(u32)) % WQ_SIZE) + drm_printf(p, "\tWQ[%ld]: 0x%08x\n", i / sizeof(u32), + parallel_read(xe, map, wq[i / sizeof(u32)])); + } +} + +static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) +{ + struct xe_gpu_scheduler *sched = &e->guc->sched; + struct xe_sched_job *job; + int i; + + drm_printf(p, "\nGuC ID: %d\n", e->guc->id); + drm_printf(p, "\tName: %s\n", e->name); + drm_printf(p, "\tClass: %d\n", e->class); + drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask); + drm_printf(p, "\tWidth: %d\n", e->width); + drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount)); + drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout); + drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us); + drm_printf(p, "\tPreempt timeout: %u (us)\n", + e->sched_props.preempt_timeout_us); + for (i = 0; i < e->width; ++i ) { + struct xe_lrc *lrc = e->lrc + i; + + drm_printf(p, "\tHW Context Desc: 0x%08x\n", + lower_32_bits(xe_lrc_ggtt_addr(lrc))); + drm_printf(p, "\tLRC Head: (memory) %u\n", + xe_lrc_ring_head(lrc)); + drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", + lrc->ring.tail, + xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL)); + drm_printf(p, "\tStart seqno: (memory) %d\n", + xe_lrc_start_seqno(lrc)); + drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc)); + } + drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state)); + drm_printf(p, "\tFlags: 0x%lx\n", e->flags); + if (xe_engine_is_parallel(e)) + guc_engine_wq_print(e, p); + + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) + drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", + xe_sched_job_seqno(job), + dma_fence_is_signaled(job->fence) ? 1 : 0, + dma_fence_is_signaled(&job->drm.s_fence->finished) ? + 1 : 0); + spin_unlock(&sched->base.job_list_lock); +} + +void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) +{ + struct xe_engine *e; + unsigned long index; + + if (!xe_device_guc_submission_enabled(guc_to_xe(guc))) + return; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.engine_lookup, index, e) + guc_engine_print(e, p); + mutex_unlock(&guc->submission_state.lock); +} diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h new file mode 100644 index 000000000000..8002734d6f24 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_SUBMIT_H_ +#define _XE_GUC_SUBMIT_H_ + +#include + +struct drm_printer; +struct xe_engine; +struct xe_guc; + +int xe_guc_submit_init(struct xe_guc *guc); +void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); + +int xe_guc_submit_reset_prepare(struct xe_guc *guc); +void xe_guc_submit_reset_wait(struct xe_guc *guc); +int xe_guc_submit_stop(struct xe_guc *guc); +int xe_guc_submit_start(struct xe_guc *guc); + +int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, + u32 len); +int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h new file mode 100644 index 000000000000..ca177853cc12 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_TYPES_H_ +#define _XE_GUC_TYPES_H_ + +#include +#include + +#include "xe_guc_ads_types.h" +#include "xe_guc_ct_types.h" +#include "xe_guc_fwif.h" +#include "xe_guc_log_types.h" +#include "xe_guc_pc_types.h" +#include "xe_uc_fw_types.h" + +/** + * struct xe_guc - Graphic micro controller + */ +struct xe_guc { + /** @fw: Generic uC firmware management */ + struct xe_uc_fw fw; + /** @log: GuC log */ + struct xe_guc_log log; + /** @ads: GuC ads */ + struct xe_guc_ads ads; + /** @ct: GuC ct */ + struct xe_guc_ct ct; + /** @pc: GuC Power Conservation */ + struct xe_guc_pc pc; + /** @submission_state: GuC submission state */ + struct { + /** @engine_lookup: Lookup an xe_engine from guc_id */ + struct xarray engine_lookup; + /** @guc_ids: used to allocate new guc_ids, single-lrc */ + struct ida guc_ids; + /** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ + unsigned long *guc_ids_bitmap; + /** @stopped: submissions are stopped */ + atomic_t stopped; + /** @lock: protects submission state */ + struct mutex lock; + /** @suspend: suspend fence state */ + struct { + /** @lock: suspend fences lock */ + spinlock_t lock; + /** @context: suspend fences context */ + u64 context; + /** @seqno: suspend fences seqno */ + u32 seqno; + } suspend; + } submission_state; + /** @hwconfig: Hardware config state */ + struct { + /** @bo: buffer object of the hardware config */ + struct xe_bo *bo; + /** @size: size of the hardware config */ + u32 size; + } hwconfig; + + /** + * @notify_reg: Register which is written to notify GuC of H2G messages + */ + u32 notify_reg; + /** @params: Control params for fw initialization */ + u32 params[GUC_CTL_MAX_DWORDS]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c new file mode 100644 index 000000000000..93b22fac6e14 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_reg.h" +#include "xe_huc.h" +#include "xe_mmio.h" +#include "xe_uc_fw.h" + +static struct xe_gt * +huc_to_gt(struct xe_huc *huc) +{ + return container_of(huc, struct xe_gt, uc.huc); +} + +static struct xe_device * +huc_to_xe(struct xe_huc *huc) +{ + return gt_to_xe(huc_to_gt(huc)); +} + +static struct xe_guc * +huc_to_guc(struct xe_huc *huc) +{ + return &container_of(huc, struct xe_uc, huc)->guc; +} + +int xe_huc_init(struct xe_huc *huc) +{ + struct xe_device *xe = huc_to_xe(huc); + int ret; + + huc->fw.type = XE_UC_FW_TYPE_HUC; + ret = xe_uc_fw_init(&huc->fw); + if (ret) + goto out; + + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); + + return 0; + +out: + if (xe_uc_fw_is_disabled(&huc->fw)) { + drm_info(&xe->drm, "HuC disabled\n"); + return 0; + } + drm_err(&xe->drm, "HuC init failed with %d", ret); + return ret; +} + +int xe_huc_upload(struct xe_huc *huc) +{ + if (xe_uc_fw_is_disabled(&huc->fw)) + return 0; + return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); +} + +int xe_huc_auth(struct xe_huc *huc) +{ + struct xe_device *xe = huc_to_xe(huc); + struct xe_gt *gt = huc_to_gt(huc); + struct xe_guc *guc = huc_to_guc(huc); + int ret; + if (xe_uc_fw_is_disabled(&huc->fw)) + return 0; + + XE_BUG_ON(xe_uc_fw_is_running(&huc->fw)); + + if (!xe_uc_fw_is_loaded(&huc->fw)) + return -ENOEXEC; + + ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) + + xe_uc_fw_rsa_offset(&huc->fw)); + if (ret) { + drm_err(&xe->drm, "HuC: GuC did not ack Auth request %d\n", + ret); + goto fail; + } + + ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg, + HUC_LOAD_SUCCESSFUL, + HUC_LOAD_SUCCESSFUL, 100); + if (ret) { + drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); + goto fail; + } + + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); + drm_dbg(&xe->drm, "HuC authenticated\n"); + + return 0; + +fail: + drm_err(&xe->drm, "HuC authentication failed %d\n", ret); + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL); + + return ret; +} + +void xe_huc_sanitize(struct xe_huc *huc) +{ + if (xe_uc_fw_is_disabled(&huc->fw)) + return; + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); +} + +void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) +{ + struct xe_gt *gt = huc_to_gt(huc); + int err; + + xe_uc_fw_print(&huc->fw, p); + + if (xe_uc_fw_is_disabled(&huc->fw)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + drm_printf(p, "\nHuC status: 0x%08x\n", + xe_mmio_read32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg)); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h new file mode 100644 index 000000000000..5802c43b6ce2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HUC_H_ +#define _XE_HUC_H_ + +#include "xe_huc_types.h" + +struct drm_printer; + +int xe_huc_init(struct xe_huc *huc); +int xe_huc_upload(struct xe_huc *huc); +int xe_huc_auth(struct xe_huc *huc); +void xe_huc_sanitize(struct xe_huc *huc); +void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c new file mode 100644 index 000000000000..268bac36336a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_huc.h" +#include "xe_huc_debugfs.h" +#include "xe_macros.h" + +static struct xe_gt * +huc_to_gt(struct xe_huc *huc) +{ + return container_of(huc, struct xe_gt, uc.huc); +} + +static struct xe_device * +huc_to_xe(struct xe_huc *huc) +{ + return gt_to_xe(huc_to_gt(huc)); +} + +static struct xe_huc *node_to_huc(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int huc_info(struct seq_file *m, void *data) +{ + struct xe_huc *huc = node_to_huc(m->private); + struct xe_device *xe = huc_to_xe(huc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_huc_print_info(huc, &p); + xe_device_mem_access_put(xe); + + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"huc_info", huc_info, 0}, +}; + +void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent) +{ + struct drm_minor *minor = huc_to_xe(huc)->drm.primary; + struct drm_info_list *local; + int i; + +#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) + local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) { + XE_WARN_ON("Couldn't allocate memory"); + return; + } + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = huc; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.h b/drivers/gpu/drm/xe/xe_huc_debugfs.h new file mode 100644 index 000000000000..ec58f1818804 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HUC_DEBUGFS_H_ +#define _XE_HUC_DEBUGFS_H_ + +struct dentry; +struct xe_huc; + +void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h new file mode 100644 index 000000000000..cae6d19097df --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HUC_TYPES_H_ +#define _XE_HUC_TYPES_H_ + +#include "xe_uc_fw_types.h" + +/** + * struct xe_huc - HuC + */ +struct xe_huc { + /** @fw: Generic uC firmware management */ + struct xe_uc_fw fw; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c new file mode 100644 index 000000000000..fd89dd90131c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_hw_engine.h" + +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_execlist.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_topology.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_reg_sr.h" +#include "xe_sched_job.h" +#include "xe_wa.h" + +#include "gt/intel_engine_regs.h" +#include "i915_reg.h" +#include "gt/intel_gt_regs.h" + +#define MAX_MMIO_BASES 3 +struct engine_info { + const char *name; + unsigned int class : 8; + unsigned int instance : 8; + enum xe_force_wake_domains domain; + /* mmio bases table *must* be sorted in reverse graphics_ver order */ + struct engine_mmio_base { + unsigned int graphics_ver : 8; + unsigned int base : 24; + } mmio_bases[MAX_MMIO_BASES]; +}; + +static const struct engine_info engine_infos[] = { + [XE_HW_ENGINE_RCS0] = { + .name = "rcs0", + .class = XE_ENGINE_CLASS_RENDER, + .instance = 0, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 1, .base = RENDER_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS0] = { + .name = "bcs0", + .class = XE_ENGINE_CLASS_COPY, + .instance = 0, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 6, .base = BLT_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS1] = { + .name = "bcs1", + .class = XE_ENGINE_CLASS_COPY, + .instance = 1, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS2] = { + .name = "bcs2", + .class = XE_ENGINE_CLASS_COPY, + .instance = 2, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS3] = { + .name = "bcs3", + .class = XE_ENGINE_CLASS_COPY, + .instance = 3, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS4] = { + .name = "bcs4", + .class = XE_ENGINE_CLASS_COPY, + .instance = 4, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS5] = { + .name = "bcs5", + .class = XE_ENGINE_CLASS_COPY, + .instance = 5, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS6] = { + .name = "bcs6", + .class = XE_ENGINE_CLASS_COPY, + .instance = 6, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS7] = { + .name = "bcs7", + .class = XE_ENGINE_CLASS_COPY, + .instance = 7, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS8] = { + .name = "bcs8", + .class = XE_ENGINE_CLASS_COPY, + .instance = 8, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE } + }, + }, + + [XE_HW_ENGINE_VCS0] = { + .name = "vcs0", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 0, + .domain = XE_FW_MEDIA_VDBOX0, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_BSD_RING_BASE }, + { .graphics_ver = 6, .base = GEN6_BSD_RING_BASE }, + { .graphics_ver = 4, .base = BSD_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS1] = { + .name = "vcs1", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 1, + .domain = XE_FW_MEDIA_VDBOX1, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE }, + { .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS2] = { + .name = "vcs2", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 2, + .domain = XE_FW_MEDIA_VDBOX2, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS3] = { + .name = "vcs3", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 3, + .domain = XE_FW_MEDIA_VDBOX3, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS4] = { + .name = "vcs4", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 4, + .domain = XE_FW_MEDIA_VDBOX4, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS5] = { + .name = "vcs5", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 5, + .domain = XE_FW_MEDIA_VDBOX5, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS6] = { + .name = "vcs6", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 6, + .domain = XE_FW_MEDIA_VDBOX6, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS7] = { + .name = "vcs7", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 7, + .domain = XE_FW_MEDIA_VDBOX7, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE } + }, + }, + [XE_HW_ENGINE_VECS0] = { + .name = "vecs0", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 0, + .domain = XE_FW_MEDIA_VEBOX0, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE }, + { .graphics_ver = 7, .base = VEBOX_RING_BASE } + }, + }, + [XE_HW_ENGINE_VECS1] = { + .name = "vecs1", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 1, + .domain = XE_FW_MEDIA_VEBOX1, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } + }, + }, + [XE_HW_ENGINE_VECS2] = { + .name = "vecs2", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 2, + .domain = XE_FW_MEDIA_VEBOX2, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE } + }, + }, + [XE_HW_ENGINE_VECS3] = { + .name = "vecs3", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 3, + .domain = XE_FW_MEDIA_VEBOX3, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } + }, + }, + [XE_HW_ENGINE_CCS0] = { + .name = "ccs0", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 0, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE }, + }, + }, + [XE_HW_ENGINE_CCS1] = { + .name = "ccs1", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 1, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE }, + }, + }, + [XE_HW_ENGINE_CCS2] = { + .name = "ccs2", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 2, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE }, + }, + }, + [XE_HW_ENGINE_CCS3] = { + .name = "ccs3", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 3, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }, + }, + }, +}; + +static u32 engine_info_mmio_base(const struct engine_info *info, + unsigned int graphics_ver) +{ + int i; + + for (i = 0; i < MAX_MMIO_BASES; i++) + if (graphics_ver >= info->mmio_bases[i].graphics_ver) + break; + + XE_BUG_ON(i == MAX_MMIO_BASES); + XE_BUG_ON(!info->mmio_bases[i].base); + + return info->mmio_bases[i].base; +} + +static void hw_engine_fini(struct drm_device *drm, void *arg) +{ + struct xe_hw_engine *hwe = arg; + + if (hwe->exl_port) + xe_execlist_port_destroy(hwe->exl_port); + xe_lrc_finish(&hwe->kernel_lrc); + + xe_bo_unpin_map_no_vm(hwe->hwsp); + + hwe->gt = NULL; +} + +static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, u32 reg, u32 val) +{ + XE_BUG_ON(reg & hwe->mmio_base); + xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); + + xe_mmio_write32(hwe->gt, reg + hwe->mmio_base, val); +} + +static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, u32 reg) +{ + XE_BUG_ON(reg & hwe->mmio_base); + xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); + + return xe_mmio_read32(hwe->gt, reg + hwe->mmio_base); +} + +void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) +{ + u32 ccs_mask = + xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); + + if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask & BIT(0)) + xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg, + _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); + + hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0); + hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg, + xe_bo_ggtt_addr(hwe->hwsp)); + hw_engine_mmio_write32(hwe, RING_MODE_GEN7(0).reg, + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg, + _MASKED_BIT_DISABLE(STOP_RING)); + hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg); +} + +static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, + enum xe_hw_engine_id id) +{ + struct xe_device *xe = gt_to_xe(gt); + const struct engine_info *info; + + if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) + return; + + if (!(gt->info.engine_mask & BIT(id))) + return; + + info = &engine_infos[id]; + + XE_BUG_ON(hwe->gt); + + hwe->gt = gt; + hwe->class = info->class; + hwe->instance = info->instance; + hwe->mmio_base = engine_info_mmio_base(info, GRAPHICS_VER(xe)); + hwe->domain = info->domain; + hwe->name = info->name; + hwe->fence_irq = >->fence_irq[info->class]; + hwe->engine_id = id; + + xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); + xe_wa_process_engine(hwe); + + xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); + xe_reg_whitelist_process_engine(hwe); +} + +static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, + enum xe_hw_engine_id id) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name); + XE_BUG_ON(!(gt->info.engine_mask & BIT(id))); + + xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); + xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt); + + hwe->hwsp = xe_bo_create_locked(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(hwe->hwsp)) { + err = PTR_ERR(hwe->hwsp); + goto err_name; + } + + err = xe_bo_pin(hwe->hwsp); + if (err) + goto err_unlock_put_hwsp; + + err = xe_bo_vmap(hwe->hwsp); + if (err) + goto err_unpin_hwsp; + + xe_bo_unlock_no_vm(hwe->hwsp); + + err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K); + if (err) + goto err_hwsp; + + if (!xe_device_guc_submission_enabled(xe)) { + hwe->exl_port = xe_execlist_port_create(xe, hwe); + if (IS_ERR(hwe->exl_port)) { + err = PTR_ERR(hwe->exl_port); + goto err_kernel_lrc; + } + } + + if (xe_device_guc_submission_enabled(xe)) + xe_hw_engine_enable_ring(hwe); + + /* We reserve the highest BCS instance for USM */ + if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY) + gt->usm.reserved_bcs_instance = hwe->instance; + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); + if (err) + return err; + + return 0; + +err_unpin_hwsp: + xe_bo_unpin(hwe->hwsp); +err_unlock_put_hwsp: + xe_bo_unlock_no_vm(hwe->hwsp); + xe_bo_put(hwe->hwsp); +err_kernel_lrc: + xe_lrc_finish(&hwe->kernel_lrc); +err_hwsp: + xe_bo_put(hwe->hwsp); +err_name: + hwe->name = NULL; + + return err; +} + +static void hw_engine_setup_logical_mapping(struct xe_gt *gt) +{ + int class; + + /* FIXME: Doing a simple logical mapping that works for most hardware */ + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int logical_instance = 0; + + for_each_hw_engine(hwe, gt, id) + if (hwe->class == class) + hwe->logical_instance = logical_instance++; + } +} + +static void read_fuses(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 media_fuse; + u16 vdbox_mask; + u16 vebox_mask; + u32 bcs_mask; + int i, j; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + /* + * FIXME: Hack job, thinking we should have table of vfuncs for each + * class which picks the correct vfunc based on IP version. + */ + + media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg); + if (GRAPHICS_VERx100(xe) < 1250) + media_fuse = ~media_fuse; + + vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; + vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> + GEN11_GT_VEBOX_DISABLE_SHIFT; + + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!(BIT(j) & vdbox_mask)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "vcs%u fused off\n", j); + } + } + + for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!(BIT(j) & vebox_mask)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "vecs%u fused off\n", j); + } + } + + bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg); + bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask); + + for (i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!(BIT(j/2) & bcs_mask)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "bcs%u fused off\n", j); + } + } + + /* TODO: compute engines */ +} + +int xe_hw_engines_init_early(struct xe_gt *gt) +{ + int i; + + read_fuses(gt); + + for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) + hw_engine_init_early(gt, >->hw_engines[i], i); + + return 0; +} + +int xe_hw_engines_init(struct xe_gt *gt) +{ + int err; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + err = hw_engine_init(gt, hwe, id); + if (err) + return err; + } + + hw_engine_setup_logical_mapping(gt); + + return 0; +} + +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) +{ + wake_up_all(>_to_xe(hwe->gt)->ufence_wq); + + if (hwe->irq_handler) + hwe->irq_handler(hwe, intr_vec); + + if (intr_vec & GT_RENDER_USER_INTERRUPT) + xe_hw_fence_irq_run(hwe->fence_irq); +} + +void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p) +{ + if (!xe_hw_engine_is_valid(hwe)) + return; + + drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name, + hwe->logical_instance); + drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", + hwe->domain, + xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain)); + drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base); + + drm_printf(p, "\tHWSTAM: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_HWSTAM(0).reg)); + drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_HWS_PGA(0).reg)); + + drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0).reg)); + drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg)); + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", + hw_engine_mmio_read32(hwe, + RING_EXECLIST_SQ_CONTENTS(0).reg)); + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", + hw_engine_mmio_read32(hwe, + RING_EXECLIST_SQ_CONTENTS(0).reg) + 4); + drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg)); + + drm_printf(p, "\tRING_START: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_START(0).reg)); + drm_printf(p, "\tRING_HEAD: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_HEAD(0).reg) & HEAD_ADDR); + drm_printf(p, "\tRING_TAIL: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_TAIL(0).reg) & TAIL_ADDR); + drm_printf(p, "\tRING_CTL: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_CTL(0).reg)); + drm_printf(p, "\tRING_MODE: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg)); + drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_MODE_GEN7(0).reg)); + + drm_printf(p, "\tRING_IMR: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_IMR(0).reg)); + drm_printf(p, "\tRING_ESR: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_ESR(0).reg)); + drm_printf(p, "\tRING_EMR: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EMR(0).reg)); + drm_printf(p, "\tRING_EIR: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EIR(0).reg)); + + drm_printf(p, "\tACTHD: 0x%08x_%08x\n", + hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0).reg), + hw_engine_mmio_read32(hwe, RING_ACTHD(0).reg)); + drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", + hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0).reg), + hw_engine_mmio_read32(hwe, RING_BBADDR(0).reg)); + drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", + hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0).reg), + hw_engine_mmio_read32(hwe, RING_DMA_FADD(0).reg)); + + drm_printf(p, "\tIPEIR: 0x%08x\n", + hw_engine_mmio_read32(hwe, IPEIR(0).reg)); + drm_printf(p, "\tIPEHR: 0x%08x\n\n", + hw_engine_mmio_read32(hwe, IPEHR(0).reg)); + + if (hwe->class == XE_ENGINE_CLASS_COMPUTE) + drm_printf(p, "\tGEN12_RCU_MODE: 0x%08x\n", + xe_mmio_read32(hwe->gt, GEN12_RCU_MODE.reg)); + +} + +u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, + enum xe_engine_class engine_class) +{ + u32 mask = 0; + enum xe_hw_engine_id id; + + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { + if (engine_infos[id].class == engine_class && + gt->info.engine_mask & BIT(id)) + mask |= BIT(engine_infos[id].instance); + } + return mask; +} + +bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + + return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && + hwe->instance == gt->usm.reserved_bcs_instance; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h new file mode 100644 index 000000000000..ceab65397256 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_H_ +#define _XE_HW_ENGINE_H_ + +#include "xe_hw_engine_types.h" + +struct drm_printer; + +int xe_hw_engines_init_early(struct xe_gt *gt); +int xe_hw_engines_init(struct xe_gt *gt); +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec); +void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); +void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p); +u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, + enum xe_engine_class engine_class); + +bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); +static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) +{ + return hwe->name; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h new file mode 100644 index 000000000000..05a2fdc381d7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_TYPES_H_ +#define _XE_HW_ENGINE_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_lrc_types.h" +#include "xe_reg_sr_types.h" + +/* See "Engine ID Definition" struct in the Icelake PRM */ +enum xe_engine_class { + XE_ENGINE_CLASS_RENDER = 0, + XE_ENGINE_CLASS_VIDEO_DECODE = 1, + XE_ENGINE_CLASS_VIDEO_ENHANCE = 2, + XE_ENGINE_CLASS_COPY = 3, + XE_ENGINE_CLASS_OTHER = 4, + XE_ENGINE_CLASS_COMPUTE = 5, + XE_ENGINE_CLASS_MAX = 6, +}; + +enum xe_hw_engine_id { + XE_HW_ENGINE_RCS0, + XE_HW_ENGINE_BCS0, + XE_HW_ENGINE_BCS1, + XE_HW_ENGINE_BCS2, + XE_HW_ENGINE_BCS3, + XE_HW_ENGINE_BCS4, + XE_HW_ENGINE_BCS5, + XE_HW_ENGINE_BCS6, + XE_HW_ENGINE_BCS7, + XE_HW_ENGINE_BCS8, + XE_HW_ENGINE_VCS0, + XE_HW_ENGINE_VCS1, + XE_HW_ENGINE_VCS2, + XE_HW_ENGINE_VCS3, + XE_HW_ENGINE_VCS4, + XE_HW_ENGINE_VCS5, + XE_HW_ENGINE_VCS6, + XE_HW_ENGINE_VCS7, + XE_HW_ENGINE_VECS0, + XE_HW_ENGINE_VECS1, + XE_HW_ENGINE_VECS2, + XE_HW_ENGINE_VECS3, + XE_HW_ENGINE_CCS0, + XE_HW_ENGINE_CCS1, + XE_HW_ENGINE_CCS2, + XE_HW_ENGINE_CCS3, + XE_NUM_HW_ENGINES, +}; + +/* FIXME: s/XE_HW_ENGINE_MAX_INSTANCE/XE_HW_ENGINE_MAX_COUNT */ +#define XE_HW_ENGINE_MAX_INSTANCE 9 + +struct xe_bo; +struct xe_execlist_port; +struct xe_gt; + +/** + * struct xe_hw_engine - Hardware engine + * + * Contains all the hardware engine state for physical instances. + */ +struct xe_hw_engine { + /** @gt: graphics tile this hw engine belongs to */ + struct xe_gt *gt; + /** @name: name of this hw engine */ + const char *name; + /** @class: class of this hw engine */ + enum xe_engine_class class; + /** @instance: physical instance of this hw engine */ + u16 instance; + /** @logical_instance: logical instance of this hw engine */ + u16 logical_instance; + /** @mmio_base: MMIO base address of this hw engine*/ + u32 mmio_base; + /** + * @reg_sr: table with registers to be restored on GT init/resume/reset + */ + struct xe_reg_sr reg_sr; + /** + * @reg_whitelist: table with registers to be whitelisted + */ + struct xe_reg_sr reg_whitelist; + /** + * @reg_lrc: LRC workaround registers + */ + struct xe_reg_sr reg_lrc; + /** @domain: force wake domain of this hw engine */ + enum xe_force_wake_domains domain; + /** @hwsp: hardware status page buffer object */ + struct xe_bo *hwsp; + /** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */ + struct xe_lrc kernel_lrc; + /** @exl_port: execlists port */ + struct xe_execlist_port *exl_port; + /** @fence_irq: fence IRQ to run when a hw engine IRQ is received */ + struct xe_hw_fence_irq *fence_irq; + /** @irq_handler: IRQ handler to run when hw engine IRQ is received */ + void (*irq_handler)(struct xe_hw_engine *, u16); + /** @engine_id: id for this hw engine */ + enum xe_hw_engine_id engine_id; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c new file mode 100644 index 000000000000..e56ca2867545 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_hw_fence.h" + +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_hw_engine.h" +#include "xe_macros.h" +#include "xe_map.h" +#include "xe_trace.h" + +static struct kmem_cache *xe_hw_fence_slab; + +int __init xe_hw_fence_module_init(void) +{ + xe_hw_fence_slab = kmem_cache_create("xe_hw_fence", + sizeof(struct xe_hw_fence), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!xe_hw_fence_slab) + return -ENOMEM; + + return 0; +} + +void xe_hw_fence_module_exit(void) +{ + rcu_barrier(); + kmem_cache_destroy(xe_hw_fence_slab); +} + +static struct xe_hw_fence *fence_alloc(void) +{ + return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL); +} + +static void fence_free(struct rcu_head *rcu) +{ + struct xe_hw_fence *fence = + container_of(rcu, struct xe_hw_fence, dma.rcu); + + if (!WARN_ON_ONCE(!fence)) + kmem_cache_free(xe_hw_fence_slab, fence); +} + +static void hw_fence_irq_run_cb(struct irq_work *work) +{ + struct xe_hw_fence_irq *irq = container_of(work, typeof(*irq), work); + struct xe_hw_fence *fence, *next; + bool tmp; + + tmp = dma_fence_begin_signalling(); + spin_lock(&irq->lock); + if (irq->enabled) { + list_for_each_entry_safe(fence, next, &irq->pending, irq_link) { + struct dma_fence *dma_fence = &fence->dma; + + trace_xe_hw_fence_try_signal(fence); + if (dma_fence_is_signaled_locked(dma_fence)) { + trace_xe_hw_fence_signal(fence); + list_del_init(&fence->irq_link); + dma_fence_put(dma_fence); + } + } + } + spin_unlock(&irq->lock); + dma_fence_end_signalling(tmp); +} + +void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq) +{ + spin_lock_init(&irq->lock); + init_irq_work(&irq->work, hw_fence_irq_run_cb); + INIT_LIST_HEAD(&irq->pending); + irq->enabled = true; +} + +void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) +{ + struct xe_hw_fence *fence, *next; + unsigned long flags; + int err; + bool tmp; + + if (XE_WARN_ON(!list_empty(&irq->pending))) { + tmp = dma_fence_begin_signalling(); + spin_lock_irqsave(&irq->lock, flags); + list_for_each_entry_safe(fence, next, &irq->pending, irq_link) { + list_del_init(&fence->irq_link); + err = dma_fence_signal_locked(&fence->dma); + dma_fence_put(&fence->dma); + XE_WARN_ON(err); + } + spin_unlock_irqrestore(&irq->lock, flags); + dma_fence_end_signalling(tmp); + } +} + +void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) +{ + irq_work_queue(&irq->work); +} + +void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq) +{ + spin_lock_irq(&irq->lock); + irq->enabled = false; + spin_unlock_irq(&irq->lock); +} + +void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq) +{ + spin_lock_irq(&irq->lock); + irq->enabled = true; + spin_unlock_irq(&irq->lock); + + irq_work_queue(&irq->work); +} + +void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, + struct xe_hw_fence_irq *irq, const char *name) +{ + ctx->gt = gt; + ctx->irq = irq; + ctx->dma_fence_ctx = dma_fence_context_alloc(1); + ctx->next_seqno = 1; + sprintf(ctx->name, "%s", name); +} + +void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx) +{ +} + +static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence); + +static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence) +{ + return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock); +} + +static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + + return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); +} + +static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + + return fence->ctx->name; +} + +static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + struct xe_device *xe = gt_to_xe(fence->ctx->gt); + u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); + + return dma_fence->error || + (s32)fence->dma.seqno <= (s32)seqno; +} + +static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence); + + dma_fence_get(dma_fence); + list_add_tail(&fence->irq_link, &irq->pending); + + /* SW completed (no HW IRQ) so kick handler to signal fence */ + if (xe_hw_fence_signaled(dma_fence)) + xe_hw_fence_irq_run(irq); + + return true; +} + +static void xe_hw_fence_release(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + + trace_xe_hw_fence_free(fence); + XE_BUG_ON(!list_empty(&fence->irq_link)); + call_rcu(&dma_fence->rcu, fence_free); +} + +static const struct dma_fence_ops xe_hw_fence_ops = { + .get_driver_name = xe_hw_fence_get_driver_name, + .get_timeline_name = xe_hw_fence_get_timeline_name, + .enable_signaling = xe_hw_fence_enable_signaling, + .signaled = xe_hw_fence_signaled, + .release = xe_hw_fence_release, +}; + +static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence) +{ + if (XE_WARN_ON(fence->ops != &xe_hw_fence_ops)) + return NULL; + + return container_of(fence, struct xe_hw_fence, dma); +} + +struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, + struct iosys_map seqno_map) +{ + struct xe_hw_fence *fence; + + fence = fence_alloc(); + if (!fence) + return ERR_PTR(-ENOMEM); + + dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, + ctx->dma_fence_ctx, ctx->next_seqno++); + + fence->ctx = ctx; + fence->seqno_map = seqno_map; + INIT_LIST_HEAD(&fence->irq_link); + + trace_xe_hw_fence_create(fence); + + return fence; +} diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h new file mode 100644 index 000000000000..07f202db6526 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_fence.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_HW_FENCE_H_ +#define _XE_HW_FENCE_H_ + +#include "xe_hw_fence_types.h" + +int xe_hw_fence_module_init(void); +void xe_hw_fence_module_exit(void); + +void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq); + +void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, + struct xe_hw_fence_irq *irq, const char *name); +void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx); + +struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, + struct iosys_map seqno_map); + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h new file mode 100644 index 000000000000..a78e50eb3cb8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HW_FENCE_TYPES_H_ +#define _XE_HW_FENCE_TYPES_H_ + +#include +#include +#include +#include +#include + +struct xe_gt; + +/** + * struct xe_hw_fence_irq - hardware fence IRQ handler + * + * One per engine class, signals completed xe_hw_fences, triggered via hw engine + * interrupt. On each trigger, search list of pending fences and signal. + */ +struct xe_hw_fence_irq { + /** @lock: protects all xe_hw_fences + pending list */ + spinlock_t lock; + /** @work: IRQ worker run to signal the fences */ + struct irq_work work; + /** @pending: list of pending xe_hw_fences */ + struct list_head pending; + /** @enabled: fence signaling enabled */ + bool enabled; +}; + +#define MAX_FENCE_NAME_LEN 16 + +/** + * struct xe_hw_fence_ctx - hardware fence context + * + * The context for a hardware fence. 1 to 1 relationship with xe_engine. Points + * to a xe_hw_fence_irq, maintains serial seqno. + */ +struct xe_hw_fence_ctx { + /** @gt: graphics tile of hardware fence context */ + struct xe_gt *gt; + /** @irq: fence irq handler */ + struct xe_hw_fence_irq *irq; + /** @dma_fence_ctx: dma fence context for hardware fence */ + u64 dma_fence_ctx; + /** @next_seqno: next seqno for hardware fence */ + u32 next_seqno; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; +}; + +/** + * struct xe_hw_fence - hardware fence + * + * Used to indicate a xe_sched_job is complete via a seqno written to memory. + * Signals on error or seqno past. + */ +struct xe_hw_fence { + /** @dma: base dma fence for hardware fence context */ + struct dma_fence dma; + /** @ctx: hardware fence context */ + struct xe_hw_fence_ctx *ctx; + /** @seqno_map: I/O map for seqno */ + struct iosys_map seqno_map; + /** @irq_link: Link in struct xe_hw_fence_irq.pending */ + struct list_head irq_link; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c new file mode 100644 index 000000000000..df2e3573201d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -0,0 +1,565 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include + +#include "xe_device.h" +#include "xe_drv.h" +#include "xe_guc.h" +#include "xe_gt.h" +#include "xe_hw_engine.h" +#include "xe_mmio.h" + +#include "i915_reg.h" +#include "gt/intel_gt_regs.h" + +static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) +{ + u32 val = xe_mmio_read32(gt, reg.reg); + + if (val == 0) + return; + + drm_WARN(>_to_xe(gt)->drm, 1, + "Interrupt register 0x%x is not zero: 0x%08x\n", + reg.reg, val); + xe_mmio_write32(gt, reg.reg, 0xffffffff); + xe_mmio_read32(gt, reg.reg); + xe_mmio_write32(gt, reg.reg, 0xffffffff); + xe_mmio_read32(gt, reg.reg); +} + +static void gen3_irq_init(struct xe_gt *gt, + i915_reg_t imr, u32 imr_val, + i915_reg_t ier, u32 ier_val, + i915_reg_t iir) +{ + gen3_assert_iir_is_zero(gt, iir); + + xe_mmio_write32(gt, ier.reg, ier_val); + xe_mmio_write32(gt, imr.reg, imr_val); + xe_mmio_read32(gt, imr.reg); +} +#define GEN3_IRQ_INIT(gt, type, imr_val, ier_val) \ + gen3_irq_init((gt), \ + type##IMR, imr_val, \ + type##IER, ier_val, \ + type##IIR) + +static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir, + i915_reg_t ier) +{ + xe_mmio_write32(gt, imr.reg, 0xffffffff); + xe_mmio_read32(gt, imr.reg); + + xe_mmio_write32(gt, ier.reg, 0); + + /* IIR can theoretically queue up two events. Be paranoid. */ + xe_mmio_write32(gt, iir.reg, 0xffffffff); + xe_mmio_read32(gt, iir.reg); + xe_mmio_write32(gt, iir.reg, 0xffffffff); + xe_mmio_read32(gt, iir.reg); +} +#define GEN3_IRQ_RESET(gt, type) \ + gen3_irq_reset((gt), type##IMR, type##IIR, type##IER) + +static u32 gen11_intr_disable(struct xe_gt *gt) +{ + xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, 0); + + /* + * Now with master disabled, get a sample of level indications + * for this interrupt. Indications will be cleared on related acks. + * New indications can and will light up during processing, + * and will generate new interrupt after enabling master. + */ + return xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); +} + +static u32 +gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) +{ + u32 iir; + + if (!(master_ctl & GEN11_GU_MISC_IRQ)) + return 0; + + iir = xe_mmio_read32(gt, GEN11_GU_MISC_IIR.reg); + if (likely(iir)) + xe_mmio_write32(gt, GEN11_GU_MISC_IIR.reg, iir); + + return iir; +} + +static inline void gen11_intr_enable(struct xe_gt *gt, bool stall) +{ + xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, GEN11_MASTER_IRQ); + if (stall) + xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); +} + +static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +{ + u32 irqs, dmask, smask; + u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); + u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + + if (xe_device_guc_submission_enabled(xe)) { + irqs = GT_RENDER_USER_INTERRUPT | + GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; + } else { + irqs = GT_RENDER_USER_INTERRUPT | + GT_CS_MASTER_ERROR_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; + } + + dmask = irqs << 16 | irqs; + smask = irqs << 16; + + /* Enable RCS, BCS, VCS and VECS class interrupts. */ + xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, dmask); + xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, dmask); + if (ccs_mask) + xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, smask); + + /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ + xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~smask); + xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~smask); + if (bcs_mask & (BIT(1)|BIT(2))) + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask); + if (bcs_mask & (BIT(3)|BIT(4))) + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~dmask); + if (bcs_mask & (BIT(5)|BIT(6))) + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask); + if (bcs_mask & (BIT(7)|BIT(8))) + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~dmask); + //if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) + // intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask); + //if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) + // intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask); + xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~dmask); + //if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) + // intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask); + if (ccs_mask & (BIT(0)|BIT(1))) + xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~dmask); + if (ccs_mask & (BIT(2)|BIT(3))) + xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~dmask); + + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. + */ + /* TODO: gt->pm_ier, gt->pm_imr */ + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0); + + /* Same thing for GuC interrupts */ + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0); +} + +static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +{ + /* TODO: PCH */ + + gen11_gt_irq_postinstall(xe, gt); + + GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, + GEN11_GU_MISC_GSE); + + gen11_intr_enable(gt, true); +} + +static u32 +gen11_gt_engine_identity(struct xe_device *xe, + struct xe_gt *gt, + const unsigned int bank, + const unsigned int bit) +{ + u32 timeout_ts; + u32 ident; + + lockdep_assert_held(&xe->irq.lock); + + xe_mmio_write32(gt, GEN11_IIR_REG_SELECTOR(bank).reg, BIT(bit)); + + /* + * NB: Specs do not specify how long to spin wait, + * so we do ~100us as an educated guess. + */ + timeout_ts = (local_clock() >> 10) + 100; + do { + ident = xe_mmio_read32(gt, GEN11_INTR_IDENTITY_REG(bank).reg); + } while (!(ident & GEN11_INTR_DATA_VALID) && + !time_after32(local_clock() >> 10, timeout_ts)); + + if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { + drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", + bank, bit, ident); + return 0; + } + + xe_mmio_write32(gt, GEN11_INTR_IDENTITY_REG(bank).reg, + GEN11_INTR_DATA_VALID); + + return ident; +} + +#define OTHER_MEDIA_GUC_INSTANCE 16 + +static void +gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) +{ + if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + return xe_guc_irq_handler(>->uc.guc, iir); + if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) + return xe_guc_irq_handler(>->uc.guc, iir); + + if (instance != OTHER_GUC_INSTANCE && + instance != OTHER_MEDIA_GUC_INSTANCE) { + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", + instance, iir); + } +} + +static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, + u32 master_ctl, long unsigned int *intr_dw, + u32 *identity) +{ + unsigned int bank, bit; + u16 instance, intr_vec; + enum xe_engine_class class; + struct xe_hw_engine *hwe; + + spin_lock(&xe->irq.lock); + + for (bank = 0; bank < 2; bank++) { + if (!(master_ctl & GEN11_GT_DW_IRQ(bank))) + continue; + + if (!xe_gt_is_media_type(gt)) { + intr_dw[bank] = + xe_mmio_read32(gt, GEN11_GT_INTR_DW(bank).reg); + for_each_set_bit(bit, intr_dw + bank, 32) + identity[bit] = gen11_gt_engine_identity(xe, gt, + bank, + bit); + xe_mmio_write32(gt, GEN11_GT_INTR_DW(bank).reg, + intr_dw[bank]); + } + + for_each_set_bit(bit, intr_dw + bank, 32) { + class = GEN11_INTR_ENGINE_CLASS(identity[bit]); + instance = GEN11_INTR_ENGINE_INSTANCE(identity[bit]); + intr_vec = GEN11_INTR_ENGINE_INTR(identity[bit]); + + if (class == XE_ENGINE_CLASS_OTHER) { + gen11_gt_other_irq_handler(gt, instance, + intr_vec); + continue; + } + + hwe = xe_gt_hw_engine(gt, class, instance, false); + if (!hwe) + continue; + + xe_hw_engine_handle_irq(hwe, intr_vec); + } + } + + spin_unlock(&xe->irq.lock); +} + +static irqreturn_t gen11_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_gt *gt = xe_device_get_gt(xe, 0); /* Only 1 GT here */ + u32 master_ctl, gu_misc_iir; + long unsigned int intr_dw[2]; + u32 identity[32]; + + master_ctl = gen11_intr_disable(gt); + if (!master_ctl) { + gen11_intr_enable(gt, false); + return IRQ_NONE; + } + + gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); + + gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); + + gen11_intr_enable(gt, false); + + return IRQ_HANDLED; +} + +static u32 dg1_intr_disable(struct xe_device *xe) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + u32 val; + + /* First disable interrupts */ + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, 0); + + /* Get the indication levels and ack the master unit */ + val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg); + if (unlikely(!val)) + return 0; + + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, val); + + return val; +} + +static void dg1_intr_enable(struct xe_device *xe, bool stall) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, DG1_MSTR_IRQ); + if (stall) + xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg); +} + +static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +{ + gen11_gt_irq_postinstall(xe, gt); + + GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, + GEN11_GU_MISC_GSE); + + if (gt->info.id + 1 == xe->info.tile_count) + dg1_intr_enable(xe, true); +} + +static irqreturn_t dg1_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_gt *gt; + u32 master_tile_ctl, master_ctl = 0, gu_misc_iir; + long unsigned int intr_dw[2]; + u32 identity[32]; + u8 id; + + /* TODO: This really shouldn't be copied+pasted */ + + master_tile_ctl = dg1_intr_disable(xe); + if (!master_tile_ctl) { + dg1_intr_enable(xe, false); + return IRQ_NONE; + } + + for_each_gt(gt, xe, id) { + if ((master_tile_ctl & DG1_MSTR_TILE(gt->info.vram_id)) == 0) + continue; + + if (!xe_gt_is_media_type(gt)) + master_ctl = xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); + + /* + * We might be in irq handler just when PCIe DPC is initiated + * and all MMIO reads will be returned with all 1's. Ignore this + * irq as device is inaccessible. + */ + if (master_ctl == REG_GENMASK(31, 0)) { + dev_dbg(gt_to_xe(gt)->drm.dev, + "Ignore this IRQ as device might be in DPC containment.\n"); + return IRQ_HANDLED; + } + + if (!xe_gt_is_media_type(gt)) + xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, master_ctl); + gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); + } + + gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); + + dg1_intr_enable(xe, false); + + return IRQ_HANDLED; +} + +static void gen11_gt_irq_reset(struct xe_gt *gt) +{ + u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); + u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + + /* Disable RCS, BCS, VCS and VECS class engines. */ + xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, 0); + if (ccs_mask) + xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, 0); + + /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ + xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~0); + if (bcs_mask & (BIT(1)|BIT(2))) + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0); + if (bcs_mask & (BIT(3)|BIT(4))) + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~0); + if (bcs_mask & (BIT(5)|BIT(6))) + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0); + if (bcs_mask & (BIT(7)|BIT(8))) + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~0); +// if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) +// xe_mmio_write32(xe, GEN12_VCS4_VCS5_INTR_MASK.reg, ~0); +// if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) +// xe_mmio_write32(xe, GEN12_VCS6_VCS7_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~0); +// if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) +// xe_mmio_write32(xe, GEN12_VECS2_VECS3_INTR_MASK.reg, ~0); + if (ccs_mask & (BIT(0)|BIT(1))) + xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~0); + if (ccs_mask & (BIT(2)|BIT(3))) + xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~0); + + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0); +} + +static void gen11_irq_reset(struct xe_gt *gt) +{ + gen11_intr_disable(gt); + + gen11_gt_irq_reset(gt); + + GEN3_IRQ_RESET(gt, GEN11_GU_MISC_); + GEN3_IRQ_RESET(gt, GEN8_PCU_); +} + +static void dg1_irq_reset(struct xe_gt *gt) +{ + if (gt->info.id == 0) + dg1_intr_disable(gt_to_xe(gt)); + + gen11_gt_irq_reset(gt); + + GEN3_IRQ_RESET(gt, GEN11_GU_MISC_); + GEN3_IRQ_RESET(gt, GEN8_PCU_); +} + +void xe_irq_reset(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) { + if (GRAPHICS_VERx100(xe) >= 1210) { + dg1_irq_reset(gt); + } else if (GRAPHICS_VER(xe) >= 11) { + gen11_irq_reset(gt); + } else { + drm_err(&xe->drm, "No interrupt reset hook"); + } + } +} + +void xe_gt_irq_postinstall(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (GRAPHICS_VERx100(xe) >= 1210) + dg1_irq_postinstall(xe, gt); + else if (GRAPHICS_VER(xe) >= 11) + gen11_irq_postinstall(xe, gt); + else + drm_err(&xe->drm, "No interrupt postinstall hook"); +} + +static void xe_irq_postinstall(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) + xe_gt_irq_postinstall(gt); +} + +static irq_handler_t xe_irq_handler(struct xe_device *xe) +{ + if (GRAPHICS_VERx100(xe) >= 1210) { + return dg1_irq_handler; + } else if (GRAPHICS_VER(xe) >= 11) { + return gen11_irq_handler; + } else { + return NULL; + } +} + +static void irq_uninstall(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int irq = pdev->irq; + + if (!xe->irq.enabled) + return; + + xe->irq.enabled = false; + xe_irq_reset(xe); + free_irq(irq, xe); + if (pdev->msi_enabled) + pci_disable_msi(pdev); +} + +int xe_irq_install(struct xe_device *xe) +{ + int irq = to_pci_dev(xe->drm.dev)->irq; + static irq_handler_t irq_handler; + int err; + + irq_handler = xe_irq_handler(xe); + if (!irq_handler) { + drm_err(&xe->drm, "No supported interrupt handler"); + return -EINVAL; + } + + xe->irq.enabled = true; + + xe_irq_reset(xe); + + err = request_irq(irq, irq_handler, + IRQF_SHARED, DRIVER_NAME, xe); + if (err < 0) { + xe->irq.enabled = false; + return err; + } + + err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe); + if (err) + return err; + + return err; +} + +void xe_irq_shutdown(struct xe_device *xe) +{ + irq_uninstall(&xe->drm, xe); +} + +void xe_irq_suspend(struct xe_device *xe) +{ + spin_lock_irq(&xe->irq.lock); + xe->irq.enabled = false; + xe_irq_reset(xe); + spin_unlock_irq(&xe->irq.lock); +} + +void xe_irq_resume(struct xe_device *xe) +{ + spin_lock_irq(&xe->irq.lock); + xe->irq.enabled = true; + xe_irq_reset(xe); + xe_irq_postinstall(xe); + spin_unlock_irq(&xe->irq.lock); +} diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h new file mode 100644 index 000000000000..34ecf22b32d3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_IRQ_H_ +#define _XE_IRQ_H_ + +struct xe_device; +struct xe_gt; + +int xe_irq_install(struct xe_device *xe); +void xe_gt_irq_postinstall(struct xe_gt *gt); +void xe_irq_shutdown(struct xe_device *xe); +void xe_irq_suspend(struct xe_device *xe); +void xe_irq_resume(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c new file mode 100644 index 000000000000..056c2c5a0b81 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -0,0 +1,841 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_lrc.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine_types.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_hw_fence.h" +#include "xe_vm.h" + +#include "i915_reg.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_lrc_reg.h" +#include "gt/intel_engine_regs.h" + +#define GEN8_CTX_VALID (1 << 0) +#define GEN8_CTX_L3LLC_COHERENT (1 << 5) +#define GEN8_CTX_PRIVILEGE (1 << 8) +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 +#define INTEL_LEGACY_64B_CONTEXT 3 + +#define GEN11_ENGINE_CLASS_SHIFT 61 +#define GEN11_ENGINE_INSTANCE_SHIFT 48 + +static struct xe_device * +lrc_to_xe(struct xe_lrc *lrc) +{ + return gt_to_xe(lrc->fence_ctx.gt); +} + +size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + /* 14 pages since graphics_ver == 11 */ + return 14 * SZ_4K; + default: + WARN(1, "Unknown engine class: %d", class); + fallthrough; + case XE_ENGINE_CLASS_COPY: + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return 2 * SZ_4K; + } +} + +/* + * The per-platform tables are u8-encoded in @data. Decode @data and set the + * addresses' offset and commands in @regs. The following encoding is used + * for each byte. There are 2 steps: decoding commands and decoding addresses. + * + * Commands: + * [7]: create NOPs - number of NOPs are set in lower bits + * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set + * MI_LRI_FORCE_POSTED + * [5:0]: Number of NOPs or registers to set values to in case of + * MI_LOAD_REGISTER_IMM + * + * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" + * number of registers. They are set by using the REG/REG16 macros: the former + * is used for offsets smaller than 0x200 while the latter is for values bigger + * than that. Those macros already set all the bits documented below correctly: + * + * [7]: When a register offset needs more than 6 bits, use additional bytes, to + * follow, for the lower bits + * [6:0]: Register offset, without considering the engine base. + * + * This function only tweaks the commands and register offsets. Values are not + * filled out. + */ +static void set_offsets(u32 *regs, + const u8 *data, + const struct xe_hw_engine *hwe) +#define NOP(x) (BIT(7) | (x)) +#define LRI(count, flags) ((flags) << 6 | (count) | \ + BUILD_BUG_ON_ZERO(count >= BIT(6))) +#define POSTED BIT(0) +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) +#define REG16(x) \ + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ + (((x) >> 2) & 0x7f) +#define END 0 +{ + const u32 base = hwe->mmio_base; + + while (*data) { + u8 count, flags; + + if (*data & BIT(7)) { /* skip */ + count = *data++ & ~BIT(7); + regs += count; + continue; + } + + count = *data & 0x3f; + flags = *data >> 6; + data++; + + *regs = MI_LOAD_REGISTER_IMM(count); + if (flags & POSTED) + *regs |= MI_LRI_FORCE_POSTED; + *regs |= MI_LRI_LRM_CS_MMIO; + regs++; + + XE_BUG_ON(!count); + do { + u32 offset = 0; + u8 v; + + do { + v = *data++; + offset <<= 7; + offset |= v & ~BIT(7); + } while (v & BIT(7)); + + regs[0] = base + (offset << 2); + regs += 2; + } while (--count); + } + + *regs = MI_BATCH_BUFFER_END | BIT(0); +} + +static const u8 gen12_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + +static const u8 dg2_xcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + +static const u8 gen12_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + NOP(3 + 9 + 1), + + LRI(51, POSTED), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + REG(0x084), + NOP(1), + + END +}; + +static const u8 xehp_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + +static const u8 dg2_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + +static const u8 mtl_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(2), + LRI(2, POSTED), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + +#undef END +#undef REG16 +#undef REG +#undef LRI +#undef NOP + +static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) +{ + if (class == XE_ENGINE_CLASS_RENDER) { + if (GRAPHICS_VERx100(xe) >= 1270) + return mtl_rcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1255) + return dg2_rcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1250) + return xehp_rcs_offsets; + else + return gen12_rcs_offsets; + } else { + if (GRAPHICS_VERx100(xe) >= 1255) + return dg2_xcs_offsets; + else + return gen12_xcs_offsets; + } +} + +static void set_context_control(u32 * regs, struct xe_hw_engine *hwe) +{ + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; + + /* TODO: Timestamp */ +} + +static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) +{ + struct xe_device *xe = gt_to_xe(hwe->gt); + + if (GRAPHICS_VERx100(xe) >= 1250) + return 0x70; + else + return 0x60; +} + +static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) +{ + int x; + + x = lrc_ring_mi_mode(hwe); + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; +} + +static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) +{ + return 0; +} + +u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) +{ + return lrc->ring.size; +} + +/* Make the magic macros work */ +#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset + +#define LRC_SEQNO_PPHWSP_OFFSET 512 +#define LRC_START_SEQNO_PPHWSP_OFFSET LRC_SEQNO_PPHWSP_OFFSET + 8 +#define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_PPHWSP_SIZE SZ_4K + +static size_t lrc_reg_size(struct xe_device *xe) +{ + if (GRAPHICS_VERx100(xe) >= 1250) + return 96 * sizeof(u32); + else + return 80 * sizeof(u32); +} + +size_t xe_lrc_skip_size(struct xe_device *xe) +{ + return LRC_PPHWSP_SIZE + lrc_reg_size(xe); +} + +static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) +{ + /* The seqno is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; +} + +static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) +{ + /* The start seqno is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; +} + +static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) +{ + /* The parallel is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; +} + +static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; +} + +#define DECL_MAP_ADDR_HELPERS(elem) \ +static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ +{ \ + struct iosys_map map = lrc->bo->vmap; \ +\ + XE_BUG_ON(iosys_map_is_null(&map)); \ + iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ + return map; \ +} \ +static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ +{ \ + return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ +} \ + +DECL_MAP_ADDR_HELPERS(ring) +DECL_MAP_ADDR_HELPERS(pphwsp) +DECL_MAP_ADDR_HELPERS(seqno) +DECL_MAP_ADDR_HELPERS(regs) +DECL_MAP_ADDR_HELPERS(start_seqno) +DECL_MAP_ADDR_HELPERS(parallel) + +#undef DECL_MAP_ADDR_HELPERS + +u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_pphwsp_ggtt_addr(lrc); +} + +u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_regs_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + return xe_map_read32(xe, &map); +} + +void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_regs_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + xe_map_write32(xe, &map, val); +} + +static void *empty_lrc_data(struct xe_hw_engine *hwe) +{ + struct xe_device *xe = gt_to_xe(hwe->gt); + void *data; + u32 *regs; + + data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); + if (!data) + return NULL; + + /* 1st page: Per-Process of HW status Page */ + regs = data + LRC_PPHWSP_SIZE; + set_offsets(regs, reg_offsets(xe, hwe->class), hwe); + set_context_control(regs, hwe); + reset_stop_ring(regs, hwe); + + return data; +} + +static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) +{ + u64 desc = xe_vm_pdp4_descriptor(vm, lrc->full_gt); + + xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); + xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); +} + +#define PVC_CTX_ASID (0x2e + 1) +#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) +#define ACC_GRANULARITY_S 20 +#define ACC_NOTIFY_S 16 + +int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + struct xe_engine *e, struct xe_vm *vm, u32 ring_size) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + struct iosys_map map; + void *init_data = NULL; + u32 arb_enable; + int err; + + lrc->flags = 0; + + lrc->bo = xe_bo_create_locked(xe, hwe->gt, vm, + ring_size + xe_lrc_size(xe, hwe->class), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(lrc->bo)) + return PTR_ERR(lrc->bo); + + if (xe_gt_is_media_type(hwe->gt)) + lrc->full_gt = xe_find_full_gt(hwe->gt); + else + lrc->full_gt = hwe->gt; + + /* + * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address + * via VM bind calls. + */ + err = xe_bo_pin(lrc->bo); + if (err) + goto err_unlock_put_bo; + lrc->flags |= XE_LRC_PINNED; + + err = xe_bo_vmap(lrc->bo); + if (err) + goto err_unpin_bo; + + xe_bo_unlock_vm_held(lrc->bo); + + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, + hwe->fence_irq, hwe->name); + + if (!gt->default_lrc[hwe->class]) { + init_data = empty_lrc_data(hwe); + if (!init_data) { + xe_lrc_finish(lrc); + return -ENOMEM; + } + } + + /* + * Init Per-Process of HW status Page, LRC / context state to known + * values + */ + map = __xe_lrc_pphwsp_map(lrc); + if (!init_data) { + xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ + xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, + gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, + xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); + } else { + xe_map_memcpy_to(xe, &map, 0, init_data, + xe_lrc_size(xe, hwe->class)); + kfree(init_data); + } + + if (vm) + xe_lrc_set_ppgtt(lrc, vm); + + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, + RING_CTL_SIZE(lrc->ring.size) | RING_VALID); + if (xe->info.supports_usm && vm) { + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, + (e->usm.acc_granularity << + ACC_GRANULARITY_S) | vm->usm.asid); + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, + (e->usm.acc_notify << ACC_NOTIFY_S) | + e->usm.acc_trigger); + } + + lrc->desc = GEN8_CTX_VALID; + lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT; + /* TODO: Priority */ + + /* While this appears to have something about privileged batches or + * some such, it really just means PPGTT mode. + */ + if (vm) + lrc->desc |= GEN8_CTX_PRIVILEGE; + + if (GRAPHICS_VERx100(xe) < 1250) { + lrc->desc |= (u64)hwe->instance << GEN11_ENGINE_INSTANCE_SHIFT; + lrc->desc |= (u64)hwe->class << GEN11_ENGINE_CLASS_SHIFT; + } + + arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; + xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); + + return 0; + +err_unpin_bo: + if (lrc->flags & XE_LRC_PINNED) + xe_bo_unpin(lrc->bo); +err_unlock_put_bo: + xe_bo_unlock_vm_held(lrc->bo); + xe_bo_put(lrc->bo); + return err; +} + +void xe_lrc_finish(struct xe_lrc *lrc) +{ + struct ww_acquire_ctx ww; + + xe_hw_fence_ctx_finish(&lrc->fence_ctx); + if (lrc->flags & XE_LRC_PINNED) { + if (lrc->bo->vm) + xe_vm_lock(lrc->bo->vm, &ww, 0, false); + else + xe_bo_lock_no_vm(lrc->bo, NULL); + xe_bo_unpin(lrc->bo); + if (lrc->bo->vm) + xe_vm_unlock(lrc->bo->vm, &ww); + else + xe_bo_unlock_no_vm(lrc->bo); + } + xe_bo_put(lrc->bo); +} + +void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) +{ + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); +} + +u32 xe_lrc_ring_head(struct xe_lrc *lrc) +{ + return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; +} + +u32 xe_lrc_ring_space(struct xe_lrc *lrc) +{ + const u32 head = xe_lrc_ring_head(lrc); + const u32 tail = lrc->ring.tail; + const u32 size = lrc->ring.size; + + return ((head - tail - 1) & (size - 1)) + 1; +} + +static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, + const void *data, size_t size) +{ + struct xe_device *xe = lrc_to_xe(lrc); + + iosys_map_incr(&ring, lrc->ring.tail); + xe_map_memcpy_to(xe, &ring, 0, data, size); + lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); +} + +void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) +{ + struct iosys_map ring; + u32 rhs; + size_t aligned_size; + + XE_BUG_ON(!IS_ALIGNED(size, 4)); + aligned_size = ALIGN(size, 8); + + ring = __xe_lrc_ring_map(lrc); + + XE_BUG_ON(lrc->ring.tail >= lrc->ring.size); + rhs = lrc->ring.size - lrc->ring.tail; + if (size > rhs) { + __xe_lrc_write_ring(lrc, ring, data, rhs); + __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); + } else { + __xe_lrc_write_ring(lrc, ring, data, size); + } + + if (aligned_size > size) { + u32 noop = MI_NOOP; + + __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); + } +} + +u64 xe_lrc_descriptor(struct xe_lrc *lrc) +{ + return lrc->desc | xe_lrc_ggtt_addr(lrc); +} + +u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_seqno_ggtt_addr(lrc); +} + +struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) +{ + return &xe_hw_fence_create(&lrc->fence_ctx, + __xe_lrc_seqno_map(lrc))->dma; +} + +s32 xe_lrc_seqno(struct xe_lrc *lrc) +{ + struct iosys_map map = __xe_lrc_seqno_map(lrc); + + return xe_map_read32(lrc_to_xe(lrc), &map); +} + +s32 xe_lrc_start_seqno(struct xe_lrc *lrc) +{ + struct iosys_map map = __xe_lrc_start_seqno_map(lrc); + + return xe_map_read32(lrc_to_xe(lrc), &map); +} + +u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_start_seqno_ggtt_addr(lrc); +} + +u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_parallel_ggtt_addr(lrc); +} + +struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) +{ + return __xe_lrc_parallel_map(lrc); +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h new file mode 100644 index 000000000000..e37f89e75ef8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _XE_LRC_H_ +#define _XE_LRC_H_ + +#include "xe_lrc_types.h" + +struct xe_device; +struct xe_engine; +enum xe_engine_class; +struct xe_hw_engine; +struct xe_vm; + +#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) + +int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + struct xe_engine *e, struct xe_vm *vm, u32 ring_size); +void xe_lrc_finish(struct xe_lrc *lrc); + +size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class); +u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); + +void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head); +u32 xe_lrc_ring_head(struct xe_lrc *lrc); +u32 xe_lrc_ring_space(struct xe_lrc *lrc); +void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); + +u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); +u32 *xe_lrc_regs(struct xe_lrc *lrc); + +u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr); +void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val); + +u64 xe_lrc_descriptor(struct xe_lrc *lrc); + +u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc); +struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc); +s32 xe_lrc_seqno(struct xe_lrc *lrc); + +u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc); +s32 xe_lrc_start_seqno(struct xe_lrc *lrc); + +u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); +struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); + +size_t xe_lrc_skip_size(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h new file mode 100644 index 000000000000..2827efa2091d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_LRC_TYPES_H_ +#define _XE_LRC_TYPES_H_ + +#include "xe_hw_fence_types.h" + +struct xe_bo; + +/** + * struct xe_lrc - Logical ring context (LRC) and submission ring object + */ +struct xe_lrc { + /** + * @bo: buffer object (memory) for logical ring context, per process HW + * status page, and submission ring. + */ + struct xe_bo *bo; + + /** @full_gt: full GT which this LRC belongs to */ + struct xe_gt *full_gt; + + /** @flags: LRC flags */ + u32 flags; +#define XE_LRC_PINNED BIT(1) + + /** @ring: submission ring state */ + struct { + /** @size: size of submission ring */ + u32 size; + /** @tail: tail of submission ring */ + u32 tail; + /** @old_tail: shadow of tail */ + u32 old_tail; + } ring; + + /** @desc: LRC descriptor */ + u64 desc; + + /** @fence_ctx: context for hw fence */ + struct xe_hw_fence_ctx fence_ctx; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h new file mode 100644 index 000000000000..0d24c124d202 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_MACROS_H_ +#define _XE_MACROS_H_ + +#include + +#define XE_EXTRA_DEBUG 1 +#define XE_WARN_ON WARN_ON +#define XE_BUG_ON BUG_ON + +#define XE_IOCTL_ERR(xe, cond) \ + ((cond) && (drm_info(&(xe)->drm, \ + "Ioctl argument check failed at %s:%d: %s", \ + __FILE__, __LINE__, #cond), 1)) + +#endif diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h new file mode 100644 index 000000000000..0bac1f73a80d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_map.h @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __XE_MAP_H__ +#define __XE_MAP_H__ + +#include + +#include + +/** + * DOC: Map layer + * + * All access to any memory shared with a device (both sysmem and vram) in the + * XE driver should go through this layer (xe_map). This layer is built on top + * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory` + * and with extra hooks into the XE driver that allows adding asserts to memory + * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics). + */ + +static inline void xe_map_memcpy_to(struct xe_device *xe, struct iosys_map *dst, + size_t dst_offset, const void *src, + size_t len) +{ + xe_device_assert_mem_access(xe); + iosys_map_memcpy_to(dst, dst_offset, src, len); +} + +static inline void xe_map_memcpy_from(struct xe_device *xe, void *dst, + const struct iosys_map *src, + size_t src_offset, size_t len) +{ + xe_device_assert_mem_access(xe); + iosys_map_memcpy_from(dst, src, src_offset, len); +} + +static inline void xe_map_memset(struct xe_device *xe, + struct iosys_map *dst, size_t offset, + int value, size_t len) +{ + xe_device_assert_mem_access(xe); + iosys_map_memset(dst, offset, value, len); +} + +/* FIXME: We likely should kill these two functions sooner or later */ +static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map) +{ + xe_device_assert_mem_access(xe); + + if (map->is_iomem) + return readl(map->vaddr_iomem); + else + return READ_ONCE(*(u32 *)map->vaddr); +} + +static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map, + u32 val) +{ + xe_device_assert_mem_access(xe); + + if (map->is_iomem) + writel(val, map->vaddr_iomem); + else + *(u32 *)map->vaddr = val; +} + +#define xe_map_rd(xe__, map__, offset__, type__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_rd(map__, offset__, type__); \ +}) + +#define xe_map_wr(xe__, map__, offset__, type__, val__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_wr(map__, offset__, type__, val__); \ +}) + +#define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_rd_field(map__, struct_offset__, struct_type__, field__); \ +}) + +#define xe_map_wr_field(xe__, map__, struct_offset__, struct_type__, field__, val__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__); \ +}) + +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c new file mode 100644 index 000000000000..7fc40e8009c3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -0,0 +1,1168 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ +#include "xe_migrate.h" + +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_engine.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_hw_engine.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_mocs.h" +#include "xe_pt.h" +#include "xe_res_cursor.h" +#include "xe_sched_job.h" +#include "xe_sync.h" +#include "xe_trace.h" +#include "xe_vm.h" + +#include +#include +#include +#include + +#include "gt/intel_gpu_commands.h" + +struct xe_migrate { + struct xe_engine *eng; + struct xe_gt *gt; + struct mutex job_mutex; + struct xe_bo *pt_bo; + struct xe_bo *cleared_bo; + u64 batch_base_ofs; + u64 usm_batch_base_ofs; + u64 cleared_vram_ofs; + struct dma_fence *fence; + struct drm_suballoc_manager vm_update_sa; +}; + +#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ +#define NUM_KERNEL_PDE 17 +#define NUM_PT_SLOTS 32 +#define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M) + +struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt) +{ + return gt->migrate->eng; +} + +static void xe_migrate_fini(struct drm_device *dev, void *arg) +{ + struct xe_migrate *m = arg; + struct ww_acquire_ctx ww; + + xe_vm_lock(m->eng->vm, &ww, 0, false); + xe_bo_unpin(m->pt_bo); + if (m->cleared_bo) + xe_bo_unpin(m->cleared_bo); + xe_vm_unlock(m->eng->vm, &ww); + + dma_fence_put(m->fence); + if (m->cleared_bo) + xe_bo_put(m->cleared_bo); + xe_bo_put(m->pt_bo); + drm_suballoc_manager_fini(&m->vm_update_sa); + mutex_destroy(&m->job_mutex); + xe_vm_close_and_put(m->eng->vm); + xe_engine_put(m->eng); +} + +static u64 xe_migrate_vm_addr(u64 slot, u32 level) +{ + XE_BUG_ON(slot >= NUM_PT_SLOTS); + + /* First slot is reserved for mapping of PT bo and bb, start from 1 */ + return (slot + 1ULL) << xe_pt_shift(level + 1); +} + +static u64 xe_migrate_vram_ofs(u64 addr) +{ + return addr + (256ULL << xe_pt_shift(2)); +} + +/* + * For flat CCS clearing we need a cleared chunk of memory to copy from, + * since the CCS clearing mode of XY_FAST_COLOR_BLT appears to be buggy + * (it clears on only 14 bytes in each chunk of 16). + * If clearing the main surface one can use the part of the main surface + * already cleared, but for clearing as part of copying non-compressed + * data out of system memory, we don't readily have a cleared part of + * VRAM to copy from, so create one to use for that case. + */ +static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) +{ + struct xe_gt *gt = m->gt; + struct xe_device *xe = vm->xe; + size_t cleared_size; + u64 vram_addr; + bool is_vram; + + if (!xe_device_has_flat_ccs(xe)) + return 0; + + cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER); + cleared_size = PAGE_ALIGN(cleared_size); + m->cleared_bo = xe_bo_create_pin_map(xe, gt, vm, cleared_size, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(m->cleared_bo)) + return PTR_ERR(m->cleared_bo); + + xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size); + vram_addr = xe_bo_addr(m->cleared_bo, 0, GEN8_PAGE_SIZE, &is_vram); + XE_BUG_ON(!is_vram); + m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr); + + return 0; +} + +static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, + struct xe_vm *vm) +{ + u8 id = gt->info.id; + u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; + u32 map_ofs, level, i; + struct xe_device *xe = gt_to_xe(m->gt); + struct xe_bo *bo, *batch = gt->kernel_bb_pool.bo; + u64 entry; + int ret; + + /* Can't bump NUM_PT_SLOTS too high */ + BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/GEN8_PAGE_SIZE); + /* Must be a multiple of 64K to support all platforms */ + BUILD_BUG_ON(NUM_PT_SLOTS * GEN8_PAGE_SIZE % SZ_64K); + /* And one slot reserved for the 4KiB page table updates */ + BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); + + /* Need to be sure everything fits in the first PT, or create more */ + XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M); + + bo = xe_bo_create_pin_map(vm->xe, m->gt, vm, + num_entries * GEN8_PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ret = xe_migrate_create_cleared_bo(m, vm); + if (ret) { + xe_bo_put(bo); + return ret; + } + + entry = gen8_pde_encode(bo, bo->size - GEN8_PAGE_SIZE, XE_CACHE_WB); + xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); + + map_ofs = (num_entries - num_level) * GEN8_PAGE_SIZE; + + /* Map the entire BO in our level 0 pt */ + for (i = 0, level = 0; i < num_entries; level++) { + entry = gen8_pte_encode(NULL, bo, i * GEN8_PAGE_SIZE, + XE_CACHE_WB, 0, 0); + + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); + + if (vm->flags & XE_VM_FLAGS_64K) + i += 16; + else + i += 1; + } + + if (!IS_DGFX(xe)) { + XE_BUG_ON(xe->info.supports_usm); + + /* Write out batch too */ + m->batch_base_ofs = NUM_PT_SLOTS * GEN8_PAGE_SIZE; + for (i = 0; i < batch->size; + i += vm->flags & XE_VM_FLAGS_64K ? GEN8_64K_PAGE_SIZE : + GEN8_PAGE_SIZE) { + entry = gen8_pte_encode(NULL, batch, i, + XE_CACHE_WB, 0, 0); + + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, + entry); + level++; + } + } else { + bool is_lmem; + u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_lmem); + + m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); + + if (xe->info.supports_usm) { + batch = gt->usm.bb_pool.bo; + batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, + &is_lmem); + m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); + } + } + + for (level = 1; level < num_level; level++) { + u32 flags = 0; + + if (vm->flags & XE_VM_FLAGS_64K && level == 1) + flags = GEN12_PDE_64K; + + entry = gen8_pde_encode(bo, map_ofs + (level - 1) * + GEN8_PAGE_SIZE, XE_CACHE_WB); + xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE * level, u64, + entry | flags); + } + + /* Write PDE's that point to our BO. */ + for (i = 0; i < num_entries - num_level; i++) { + entry = gen8_pde_encode(bo, i * GEN8_PAGE_SIZE, + XE_CACHE_WB); + + xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE + + (i + 1) * 8, u64, entry); + } + + /* Identity map the entire vram at 256GiB offset */ + if (IS_DGFX(xe)) { + u64 pos, ofs, flags; + + level = 2; + ofs = map_ofs + GEN8_PAGE_SIZE * level + 256 * 8; + flags = GEN8_PAGE_RW | GEN8_PAGE_PRESENT | PPAT_CACHED | + GEN12_PPGTT_PTE_LM | GEN8_PDPE_PS_1G; + + /* + * Use 1GB pages, it shouldn't matter the physical amount of + * vram is less, when we don't access it. + */ + for (pos = 0; pos < xe->mem.vram.size; pos += SZ_1G, ofs += 8) + xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); + } + + /* + * Example layout created above, with root level = 3: + * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's + * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's + * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's + * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2] + * + * This makes the lowest part of the VM point to the pagetables. + * Hence the lowest 2M in the vm should point to itself, with a few writes + * and flushes, other parts of the VM can be used either for copying and + * clearing. + * + * For performance, the kernel reserves PDE's, so about 20 are left + * for async VM updates. + * + * To make it easier to work, each scratch PT is put in slot (1 + PT #) + * everywhere, this allows lockless updates to scratch pages by using + * the different addresses in VM. + */ +#define NUM_VMUSA_UNIT_PER_PAGE 32 +#define VM_SA_UPDATE_UNIT_SIZE (GEN8_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) +#define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) + drm_suballoc_manager_init(&m->vm_update_sa, + (map_ofs / GEN8_PAGE_SIZE - NUM_KERNEL_PDE) * + NUM_VMUSA_UNIT_PER_PAGE, 0); + + m->pt_bo = bo; + return 0; +} + +struct xe_migrate *xe_migrate_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_migrate *m; + struct xe_vm *vm; + struct ww_acquire_ctx ww; + int err; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); + if (!m) + return ERR_PTR(-ENOMEM); + + m->gt = gt; + + /* Special layout, prepared below.. */ + vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | + XE_VM_FLAG_SET_GT_ID(gt)); + if (IS_ERR(vm)) + return ERR_CAST(vm); + + xe_vm_lock(vm, &ww, 0, false); + err = xe_migrate_prepare_vm(gt, m, vm); + xe_vm_unlock(vm, &ww); + if (err) { + xe_vm_close_and_put(vm); + return ERR_PTR(err); + } + + if (xe->info.supports_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + XE_ENGINE_CLASS_COPY, + gt->usm.reserved_bcs_instance, + false); + if (!hwe) + return ERR_PTR(-EINVAL); + + m->eng = xe_engine_create(xe, vm, + BIT(hwe->logical_instance), 1, + hwe, ENGINE_FLAG_KERNEL); + } else { + m->eng = xe_engine_create_class(xe, gt, vm, + XE_ENGINE_CLASS_COPY, + ENGINE_FLAG_KERNEL); + } + if (IS_ERR(m->eng)) { + xe_vm_close_and_put(vm); + return ERR_CAST(m->eng); + } + + mutex_init(&m->job_mutex); + + err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); + if (err) + return ERR_PTR(err); + + return m; +} + +static void emit_arb_clear(struct xe_bb *bb) +{ + /* 1 dword */ + bb->cs[bb->len++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; +} + +static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur) +{ + /* + * For VRAM we use identity mapped pages so we are limited to current + * cursor size. For system we program the pages ourselves so we have no + * such limitation. + */ + return min_t(u64, MAX_PREEMPTDISABLE_TRANSFER, + mem_type_is_vram(cur->mem_type) ? cur->size : + cur->remaining); +} + +static u32 pte_update_size(struct xe_migrate *m, + bool is_vram, + struct xe_res_cursor *cur, + u64 *L0, u64 *L0_ofs, u32 *L0_pt, + u32 cmd_size, u32 pt_ofs, u32 avail_pts) +{ + u32 cmds = 0; + + *L0_pt = pt_ofs; + if (!is_vram) { + /* Clip L0 to available size */ + u64 size = min(*L0, (u64)avail_pts * SZ_2M); + u64 num_4k_pages = DIV_ROUND_UP(size, GEN8_PAGE_SIZE); + + *L0 = size; + *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); + + /* MI_STORE_DATA_IMM */ + cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff); + + /* PDE qwords */ + cmds += num_4k_pages * 2; + + /* Each chunk has a single blit command */ + cmds += cmd_size; + } else { + /* Offset into identity map. */ + *L0_ofs = xe_migrate_vram_ofs(cur->start); + cmds += cmd_size; + } + + return cmds; +} + +static void emit_pte(struct xe_migrate *m, + struct xe_bb *bb, u32 at_pt, + bool is_vram, + struct xe_res_cursor *cur, + u32 size, struct xe_bo *bo) +{ + u32 ptes; + u64 ofs = at_pt * GEN8_PAGE_SIZE; + u64 cur_ofs; + + /* + * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently + * we're only emitting VRAM PTEs during sanity tests, so when + * that's moved to a Kunit test, we should condition VRAM PTEs + * on running tests. + */ + + ptes = DIV_ROUND_UP(size, GEN8_PAGE_SIZE); + + while (ptes) { + u32 chunk = min(0x1ffU, ptes); + + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | + (chunk * 2 + 1); + bb->cs[bb->len++] = ofs; + bb->cs[bb->len++] = 0; + + cur_ofs = ofs; + ofs += chunk * 8; + ptes -= chunk; + + while (chunk--) { + u64 addr; + + XE_BUG_ON(cur->start & (PAGE_SIZE - 1)); + + if (is_vram) { + addr = cur->start; + + /* Is this a 64K PTE entry? */ + if ((m->eng->vm->flags & XE_VM_FLAGS_64K) && + !(cur_ofs & (16 * 8 - 1))) { + XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K)); + addr |= GEN12_PTE_PS64; + } + + addr |= GEN12_PPGTT_PTE_LM; + } else { + addr = xe_res_dma(cur); + } + addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + + xe_res_next(cur, PAGE_SIZE); + cur_ofs += 8; + } + } +} + +#define EMIT_COPY_CCS_DW 5 +static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, + u64 dst_ofs, bool dst_is_indirect, + u64 src_ofs, bool src_is_indirect, + u32 size) +{ + u32 *cs = bb->cs + bb->len; + u32 num_ccs_blks; + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + + num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), + NUM_CCS_BYTES_PER_BLOCK); + XE_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER); + *cs++ = XY_CTRL_SURF_COPY_BLT | + (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | + (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | + ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs) | + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); + *cs++ = lower_32_bits(dst_ofs); + *cs++ = upper_32_bits(dst_ofs) | + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); + + bb->len = cs - bb->cs; +} + +#define EMIT_COPY_DW 10 +static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, + u64 src_ofs, u64 dst_ofs, unsigned int size, + unsigned pitch) +{ + XE_BUG_ON(size / pitch > S16_MAX); + XE_BUG_ON(pitch / 4 > S16_MAX); + XE_BUG_ON(pitch > U16_MAX); + + bb->cs[bb->len++] = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); + bb->cs[bb->len++] = BLT_DEPTH_32 | pitch; + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; + bb->cs[bb->len++] = lower_32_bits(dst_ofs); + bb->cs[bb->len++] = upper_32_bits(dst_ofs); + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = pitch; + bb->cs[bb->len++] = lower_32_bits(src_ofs); + bb->cs[bb->len++] = upper_32_bits(src_ofs); +} + +static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, + enum dma_resv_usage usage) +{ + return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); +} + +static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) +{ + return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; +} + +static u32 xe_migrate_ccs_copy(struct xe_migrate *m, + struct xe_bb *bb, + u64 src_ofs, bool src_is_vram, + u64 dst_ofs, bool dst_is_vram, u32 dst_size, + u64 ccs_ofs, bool copy_ccs) +{ + struct xe_gt *gt = m->gt; + u32 flush_flags = 0; + + if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) { + /* + * If the bo doesn't have any CCS metadata attached, we still + * need to clear it for security reasons. + */ + emit_copy_ccs(gt, bb, dst_ofs, true, m->cleared_vram_ofs, false, + dst_size); + flush_flags = MI_FLUSH_DW_CCS; + } else if (copy_ccs) { + if (!src_is_vram) + src_ofs = ccs_ofs; + else if (!dst_is_vram) + dst_ofs = ccs_ofs; + + /* + * At the moment, we don't support copying CCS metadata from + * system to system. + */ + XE_BUG_ON(!src_is_vram && !dst_is_vram); + + emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs, + src_is_vram, dst_size); + if (dst_is_vram) + flush_flags = MI_FLUSH_DW_CCS; + } + + return flush_flags; +} + +struct dma_fence *xe_migrate_copy(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *src, + struct ttm_resource *dst) +{ + struct xe_gt *gt = m->gt; + struct xe_device *xe = gt_to_xe(gt); + struct dma_fence *fence = NULL; + u64 size = bo->size; + struct xe_res_cursor src_it, dst_it, ccs_it; + u64 src_L0_ofs, dst_L0_ofs; + u32 src_L0_pt, dst_L0_pt; + u64 src_L0, dst_L0; + int pass = 0; + int err; + bool src_is_vram = mem_type_is_vram(src->mem_type); + bool dst_is_vram = mem_type_is_vram(dst->mem_type); + bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(bo); + bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); + + if (!src_is_vram) + xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it); + else + xe_res_first(src, 0, bo->size, &src_it); + if (!dst_is_vram) + xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &dst_it); + else + xe_res_first(dst, 0, bo->size, &dst_it); + + if (copy_system_ccs) + xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo), + PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), + &ccs_it); + + while (size) { + u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ + struct xe_sched_job *job; + struct xe_bb *bb; + u32 flush_flags; + u32 update_idx; + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + bool usm = xe->info.supports_usm; + + src_L0 = xe_migrate_res_sizes(&src_it); + dst_L0 = xe_migrate_res_sizes(&dst_it); + + drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", + pass++, src_L0, dst_L0); + + src_L0 = min(src_L0, dst_L0); + + batch_size += pte_update_size(m, src_is_vram, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + NUM_PT_PER_BLIT); + + batch_size += pte_update_size(m, dst_is_vram, &dst_it, &src_L0, + &dst_L0_ofs, &dst_L0_pt, 0, + NUM_PT_PER_BLIT, NUM_PT_PER_BLIT); + + if (copy_system_ccs) { + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, false, &ccs_it, &ccs_size, + &ccs_ofs, &ccs_pt, 0, + 2 * NUM_PT_PER_BLIT, + NUM_PT_PER_BLIT); + } + + /* Add copy commands size here */ + batch_size += EMIT_COPY_DW + + (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0); + + bb = xe_bb_new(gt, batch_size, usm); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto err_sync; + } + + /* Preemption is enabled again by the ring ops. */ + if (!src_is_vram || !dst_is_vram) + emit_arb_clear(bb); + + if (!src_is_vram) + emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0, + bo); + else + xe_res_next(&src_it, src_L0); + + if (!dst_is_vram) + emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0, + bo); + else + xe_res_next(&dst_it, src_L0); + + if (copy_system_ccs) + emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, bo); + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, GEN8_PAGE_SIZE); + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram, + dst_L0_ofs, dst_is_vram, + src_L0, ccs_ofs, copy_ccs); + + mutex_lock(&m->job_mutex); + job = xe_bb_create_migration_job(m->eng, bb, + xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err; + } + + xe_sched_job_add_migrate_flush(job, flush_flags); + if (!fence) { + err = job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); + if (err) + goto err_job; + } + + xe_sched_job_arm(job); + dma_fence_put(fence); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + dma_fence_put(m->fence); + m->fence = dma_fence_get(fence); + + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + size -= src_L0; + continue; + +err_job: + xe_sched_job_put(job); +err: + mutex_unlock(&m->job_mutex); + xe_bb_free(bb, NULL); + +err_sync: + /* Sync partial copy if any. */ + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + + return ERR_PTR(err); + } + + return fence; +} + +static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch, u32 value, bool is_vram) +{ + u32 *cs = bb->cs + bb->len; + u32 len = XY_FAST_COLOR_BLT_DW; + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + + if (GRAPHICS_VERx100(gt->xe) < 1250) + len = 11; + + *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | + (len - 2); + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | + (pitch - 1); + *cs++ = 0; + *cs++ = (size / pitch) << 16 | pitch / 4; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs); + *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; + *cs++ = value; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + if (len > 11) { + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + } + + XE_BUG_ON(cs - bb->cs != len + bb->len); + bb->len += len; + + return 0; +} + +struct dma_fence *xe_migrate_clear(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *dst, + u32 value) +{ + bool clear_vram = mem_type_is_vram(dst->mem_type); + struct xe_gt *gt = m->gt; + struct xe_device *xe = gt_to_xe(gt); + struct dma_fence *fence = NULL; + u64 size = bo->size; + struct xe_res_cursor src_it; + struct ttm_resource *src = dst; + int err; + int pass = 0; + + if (!clear_vram) + xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it); + else + xe_res_first(src, 0, bo->size, &src_it); + + while (size) { + u64 clear_L0_ofs; + u32 clear_L0_pt; + u32 flush_flags = 0; + u64 clear_L0; + struct xe_sched_job *job; + struct xe_bb *bb; + u32 batch_size, update_idx; + bool usm = xe->info.supports_usm; + + clear_L0 = xe_migrate_res_sizes(&src_it); + drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); + + /* Calculate final sizes and batch size.. */ + batch_size = 2 + + pte_update_size(m, clear_vram, &src_it, + &clear_L0, &clear_L0_ofs, &clear_L0_pt, + XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT); + if (xe_device_has_flat_ccs(xe) && clear_vram) + batch_size += EMIT_COPY_CCS_DW; + + /* Clear commands */ + + if (WARN_ON_ONCE(!clear_L0)) + break; + + bb = xe_bb_new(gt, batch_size, usm); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto err_sync; + } + + size -= clear_L0; + + /* TODO: Add dependencies here */ + + /* Preemption is enabled again by the ring ops. */ + if (!clear_vram) { + emit_arb_clear(bb); + emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0, + bo); + } else { + xe_res_next(&src_it, clear_L0); + } + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE, + value, clear_vram); + if (xe_device_has_flat_ccs(xe) && clear_vram) { + emit_copy_ccs(gt, bb, clear_L0_ofs, true, + m->cleared_vram_ofs, false, clear_L0); + flush_flags = MI_FLUSH_DW_CCS; + } + + mutex_lock(&m->job_mutex); + job = xe_bb_create_migration_job(m->eng, bb, + xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err; + } + + xe_sched_job_add_migrate_flush(job, flush_flags); + + xe_sched_job_arm(job); + dma_fence_put(fence); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + dma_fence_put(m->fence); + m->fence = dma_fence_get(fence); + + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + continue; + +err: + mutex_unlock(&m->job_mutex); + xe_bb_free(bb, NULL); +err_sync: + /* Sync partial copies if any. */ + if (fence) { + dma_fence_wait(m->fence, false); + dma_fence_put(fence); + } + + return ERR_PTR(err); + } + + return fence; +} + +static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs, + const struct xe_vm_pgtable_update *update, + struct xe_migrate_pt_update *pt_update) +{ + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; + u32 chunk; + u32 ofs = update->ofs, size = update->qwords; + + /* + * If we have 512 entries (max), we would populate it ourselves, + * and update the PDE above it to the new pointer. + * The only time this can only happen if we have to update the top + * PDE. This requires a BO that is almost vm->size big. + * + * This shouldn't be possible in practice.. might change when 16K + * pages are used. Hence the BUG_ON. + */ + XE_BUG_ON(update->qwords > 0x1ff); + if (!ppgtt_ofs) { + bool is_lmem; + + ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0, + GEN8_PAGE_SIZE, + &is_lmem)); + XE_BUG_ON(!is_lmem); + } + + do { + u64 addr = ppgtt_ofs + ofs * 8; + chunk = min(update->qwords, 0x1ffU); + + /* Ensure populatefn can do memset64 by aligning bb->cs */ + if (!(bb->len & 1)) + bb->cs[bb->len++] = MI_NOOP; + + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | + (chunk * 2 + 1); + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + ops->populate(pt_update, gt, NULL, bb->cs + bb->len, ofs, chunk, + update); + + bb->len += chunk * 2; + ofs += chunk; + size -= chunk; + } while (size); +} + +struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m) +{ + return xe_vm_get(m->eng->vm); +} + +static struct dma_fence * +xe_migrate_update_pgtables_cpu(struct xe_migrate *m, + struct xe_vm *vm, struct xe_bo *bo, + const struct xe_vm_pgtable_update *updates, + u32 num_updates, bool wait_vm, + struct xe_migrate_pt_update *pt_update) +{ + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; + struct dma_fence *fence; + int err; + u32 i; + + /* Wait on BO moves for 10 ms, then fall back to GPU job */ + if (bo) { + long wait; + + wait = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, + true, HZ / 100); + if (wait <= 0) + return ERR_PTR(-ETIME); + } + if (wait_vm) { + long wait; + + wait = dma_resv_wait_timeout(&vm->resv, + DMA_RESV_USAGE_BOOKKEEP, + true, HZ / 100); + if (wait <= 0) + return ERR_PTR(-ETIME); + } + + if (ops->pre_commit) { + err = ops->pre_commit(pt_update); + if (err) + return ERR_PTR(err); + } + for (i = 0; i < num_updates; i++) { + const struct xe_vm_pgtable_update *update = &updates[i]; + + ops->populate(pt_update, m->gt, &update->pt_bo->vmap, NULL, + update->ofs, update->qwords, update); + } + + trace_xe_vm_cpu_bind(vm); + xe_device_wmb(vm->xe); + + fence = dma_fence_get_stub(); + + return fence; +} + +static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) +{ + int i; + + for (i = 0; i < num_syncs; i++) { + struct dma_fence *fence = syncs[i].fence; + + if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags)) + return false; + } + + return true; +} + +static bool engine_is_idle(struct xe_engine *e) +{ + return !e || e->lrc[0].fence_ctx.next_seqno == 1 || + xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno; +} + +struct dma_fence * +xe_migrate_update_pgtables(struct xe_migrate *m, + struct xe_vm *vm, + struct xe_bo *bo, + struct xe_engine *eng, + const struct xe_vm_pgtable_update *updates, + u32 num_updates, + struct xe_sync_entry *syncs, u32 num_syncs, + struct xe_migrate_pt_update *pt_update) +{ + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; + struct xe_gt *gt = m->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_sched_job *job; + struct dma_fence *fence; + struct drm_suballoc *sa_bo = NULL; + struct xe_vma *vma = pt_update->vma; + struct xe_bb *bb; + u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0; + u64 addr; + int err = 0; + bool usm = !eng && xe->info.supports_usm; + bool first_munmap_rebind = vma && vma->first_munmap_rebind; + + /* Use the CPU if no in syncs and engine is idle */ + if (no_in_syncs(syncs, num_syncs) && engine_is_idle(eng)) { + fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, + num_updates, + first_munmap_rebind, + pt_update); + if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN)) + return fence; + } + + /* fixed + PTE entries */ + if (IS_DGFX(xe)) + batch_size = 2; + else + batch_size = 6 + num_updates * 2; + + for (i = 0; i < num_updates; i++) { + u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff); + + /* align noop + MI_STORE_DATA_IMM cmd prefix */ + batch_size += 4 * num_cmds + updates[i].qwords * 2; + } + + /* + * XXX: Create temp bo to copy from, if batch_size becomes too big? + * + * Worst case: Sum(2 * (each lower level page size) + (top level page size)) + * Should be reasonably bound.. + */ + XE_BUG_ON(batch_size >= SZ_128K); + + bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm); + if (IS_ERR(bb)) + return ERR_CAST(bb); + + /* For sysmem PTE's, need to map them in our hole.. */ + if (!IS_DGFX(xe)) { + ppgtt_ofs = NUM_KERNEL_PDE - 1; + if (eng) { + XE_BUG_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT); + + sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, + GFP_KERNEL, true, 0); + if (IS_ERR(sa_bo)) { + err = PTR_ERR(sa_bo); + goto err; + } + + ppgtt_ofs = NUM_KERNEL_PDE + + (drm_suballoc_soffset(sa_bo) / + NUM_VMUSA_UNIT_PER_PAGE); + page_ofs = (drm_suballoc_soffset(sa_bo) % + NUM_VMUSA_UNIT_PER_PAGE) * + VM_SA_UPDATE_UNIT_SIZE; + } + + /* Preemption is enabled again by the ring ops. */ + emit_arb_clear(bb); + + /* Map our PT's to gtt */ + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | + (num_updates * 2 + 1); + bb->cs[bb->len++] = ppgtt_ofs * GEN8_PAGE_SIZE + page_ofs; + bb->cs[bb->len++] = 0; /* upper_32_bits */ + + for (i = 0; i < num_updates; i++) { + struct xe_bo *pt_bo = updates[i].pt_bo; + + BUG_ON(pt_bo->size != SZ_4K); + + addr = gen8_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, + 0, 0); + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + } + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + addr = xe_migrate_vm_addr(ppgtt_ofs, 0) + + (page_ofs / sizeof(u64)) * GEN8_PAGE_SIZE; + for (i = 0; i < num_updates; i++) + write_pgtable(m->gt, bb, addr + i * GEN8_PAGE_SIZE, + &updates[i], pt_update); + } else { + /* phys pages, no preamble required */ + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + /* Preemption is enabled again by the ring ops. */ + emit_arb_clear(bb); + for (i = 0; i < num_updates; i++) + write_pgtable(m->gt, bb, 0, &updates[i], pt_update); + } + + if (!eng) + mutex_lock(&m->job_mutex); + + job = xe_bb_create_migration_job(eng ?: m->eng, bb, + xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err_bb; + } + + /* Wait on BO move */ + if (bo) { + err = job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); + if (err) + goto err_job; + } + + /* + * Munmap style VM unbind, need to wait for all jobs to be complete / + * trigger preempts before moving forward + */ + if (first_munmap_rebind) { + err = job_add_deps(job, &vm->resv, + DMA_RESV_USAGE_BOOKKEEP); + if (err) + goto err_job; + } + + for (i = 0; !err && i < num_syncs; i++) + err = xe_sync_entry_add_deps(&syncs[i], job); + + if (err) + goto err_job; + + if (ops->pre_commit) { + err = ops->pre_commit(pt_update); + if (err) + goto err_job; + } + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + if (!eng) + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + drm_suballoc_free(sa_bo, fence); + + return fence; + +err_job: + xe_sched_job_put(job); +err_bb: + if (!eng) + mutex_unlock(&m->job_mutex); + xe_bb_free(bb, NULL); +err: + drm_suballoc_free(sa_bo, NULL); + return ERR_PTR(err); +} + +void xe_migrate_wait(struct xe_migrate *m) +{ + if (m->fence) + dma_fence_wait(m->fence, false); +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_migrate.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h new file mode 100644 index 000000000000..267057a3847f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __XE_MIGRATE__ +#define __XE_MIGRATE__ + +#include + +struct dma_fence; +struct iosys_map; +struct ttm_resource; + +struct xe_bo; +struct xe_gt; +struct xe_engine; +struct xe_migrate; +struct xe_migrate_pt_update; +struct xe_sync_entry; +struct xe_pt; +struct xe_vm; +struct xe_vm_pgtable_update; +struct xe_vma; + +struct xe_migrate_pt_update_ops { + /** + * populate() - Populate a command buffer or page-table with ptes. + * @pt_update: Embeddable callback argument. + * @gt: The gt for the current operation. + * @map: struct iosys_map into the memory to be populated. + * @pos: If @map is NULL, map into the memory to be populated. + * @ofs: qword offset into @map, unused if @map is NULL. + * @num_qwords: Number of qwords to write. + * @update: Information about the PTEs to be inserted. + * + * This interface is intended to be used as a callback into the + * page-table system to populate command buffers or shared + * page-tables with PTEs. + */ + void (*populate)(struct xe_migrate_pt_update *pt_update, + struct xe_gt *gt, struct iosys_map *map, + void *pos, u32 ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update); + + /** + * pre_commit(): Callback to be called just before arming the + * sched_job. + * @pt_update: Pointer to embeddable callback argument. + * + * Return: 0 on success, negative error code on error. + */ + int (*pre_commit)(struct xe_migrate_pt_update *pt_update); +}; + +struct xe_migrate_pt_update { + const struct xe_migrate_pt_update_ops *ops; + struct xe_vma *vma; +}; + +struct xe_migrate *xe_migrate_init(struct xe_gt *gt); + +struct dma_fence *xe_migrate_copy(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *src, + struct ttm_resource *dst); + +struct dma_fence *xe_migrate_clear(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *dst, + u32 value); + +struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); + +struct dma_fence * +xe_migrate_update_pgtables(struct xe_migrate *m, + struct xe_vm *vm, + struct xe_bo *bo, + struct xe_engine *eng, + const struct xe_vm_pgtable_update *updates, + u32 num_updates, + struct xe_sync_entry *syncs, u32 num_syncs, + struct xe_migrate_pt_update *pt_update); + +void xe_migrate_wait(struct xe_migrate *m); + +struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt); +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h new file mode 100644 index 000000000000..6a68fdff08dc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_migrate_doc.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_MIGRATE_DOC_H_ +#define _XE_MIGRATE_DOC_H_ + +/** + * DOC: Migrate Layer + * + * The XE migrate layer is used generate jobs which can copy memory (eviction), + * clear memory, or program tables (binds). This layer exists in every GT, has + * a migrate engine, and uses a special VM for all generated jobs. + * + * Special VM details + * ================== + * + * The special VM is configured with a page structure where we can dynamically + * map BOs which need to be copied and cleared, dynamically map other VM's page + * table BOs for updates, and identity map the entire device's VRAM with 1 GB + * pages. + * + * Currently the page structure consists of 48 phyiscal pages with 16 being + * reserved for BO mapping during copies and clear, 1 reserved for kernel binds, + * several pages are needed to setup the identity mappings (exact number based + * on how many bits of address space the device has), and the rest are reserved + * user bind operations. + * + * TODO: Diagram of layout + * + * Bind jobs + * ========= + * + * A bind job consist of two batches and runs either on the migrate engine + * (kernel binds) or the bind engine passed in (user binds). In both cases the + * VM of the engine is the migrate VM. + * + * The first batch is used to update the migration VM page structure to point to + * the bind VM page table BOs which need to be updated. A physical page is + * required for this. If it is a user bind, the page is allocated from pool of + * pages reserved user bind operations with drm_suballoc managing this pool. If + * it is a kernel bind, the page reserved for kernel binds is used. + * + * The first batch is only required for devices without VRAM as when the device + * has VRAM the bind VM page table BOs are in VRAM and the identity mapping can + * be used. + * + * The second batch is used to program page table updated in the bind VM. Why + * not just one batch? Well the TLBs need to be invalidated between these two + * batches and that only can be done from the ring. + * + * When the bind job complete, the page allocated is returned the pool of pages + * reserved for user bind operations if a user bind. No need do this for kernel + * binds as the reserved kernel page is serially used by each job. + * + * Copy / clear jobs + * ================= + * + * A copy or clear job consist of two batches and runs on the migrate engine. + * + * Like binds, the first batch is used update the migration VM page structure. + * In copy jobs, we need to map the source and destination of the BO into page + * the structure. In clear jobs, we just need to add 1 mapping of BO into the + * page structure. We use the 16 reserved pages in migration VM for mappings, + * this gives us a maximum copy size of 16 MB and maximum clear size of 32 MB. + * + * The second batch is used do either do the copy or clear. Again similar to + * binds, two batches are required as the TLBs need to be invalidated from the + * ring between the batches. + * + * More than one job will be generated if the BO is larger than maximum copy / + * clear size. + * + * Future work + * =========== + * + * Update copy and clear code to use identity mapped VRAM. + * + * Can we rework the use of the pages async binds to use all the entries in each + * page? + * + * Using large pages for sysmem mappings. + * + * Is it possible to identity map the sysmem? We should explore this. + */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c new file mode 100644 index 000000000000..42e2405f2f48 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_mmio.h" + +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_macros.h" +#include "xe_module.h" + +#include "i915_reg.h" +#include "gt/intel_engine_regs.h" +#include "gt/intel_gt_regs.h" + +#define XEHP_MTCFG_ADDR _MMIO(0x101800) +#define TILE_COUNT REG_GENMASK(15, 8) +#define GEN12_LMEM_BAR 2 + +static int xe_set_dma_info(struct xe_device *xe) +{ + unsigned int mask_size = xe->info.dma_mask_size; + int err; + + /* + * We don't have a max segment size, so set it to the max so sg's + * debugging layer doesn't complain + */ + dma_set_max_seg_size(xe->drm.dev, UINT_MAX); + + err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); + if (err) + goto mask_err; + + err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); + if (err) + goto mask_err; + + return 0; + +mask_err: + drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); + return err; +} + +#ifdef CONFIG_64BIT +static int +_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int bar_size = pci_rebar_bytes_to_size(size); + int ret; + + if (pci_resource_len(pdev, resno)) + pci_release_resource(pdev, resno); + + ret = pci_resize_resource(pdev, resno, bar_size); + if (ret) { + drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe)\n", + resno, 1 << bar_size, ERR_PTR(ret)); + return -1; + } + + drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); + return 1; +} + +static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct pci_bus *root = pdev->bus; + struct resource *root_res; + resource_size_t rebar_size; + resource_size_t current_size; + u32 pci_cmd; + int i; + int ret; + u64 force_lmem_bar_size = xe_force_lmem_bar_size; + + current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR)); + + if (force_lmem_bar_size) { + u32 bar_sizes; + + rebar_size = force_lmem_bar_size * (resource_size_t)SZ_1M; + bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR); + + if (rebar_size == current_size) + return 0; + + if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) || + rebar_size >= roundup_pow_of_two(lmem_size)) { + rebar_size = lmem_size; + drm_info(&xe->drm, + "Given bar size is not within supported size, setting it to default: %llu\n", + (u64)lmem_size >> 20); + } + } else { + rebar_size = current_size; + + if (rebar_size != roundup_pow_of_two(lmem_size)) + rebar_size = lmem_size; + else + return 0; + } + + while (root->parent) + root = root->parent; + + pci_bus_for_each_resource(root, root_res, i) { + if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && + root_res->start > 0x100000000ull) + break; + } + + if (!root_res) { + drm_info(&xe->drm, "Can't resize LMEM BAR - platform support is missing\n"); + return -1; + } + + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); + + ret = _resize_bar(xe, GEN12_LMEM_BAR, rebar_size); + + pci_assign_unassigned_bus_resources(pdev->bus); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); + return ret; +} +#else +static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) { return 0; } +#endif + +static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) +{ + if (!pci_resource_flags(pdev, bar)) + return false; + + if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET) + return false; + + if (!pci_resource_len(pdev, bar)) + return false; + + return true; +} + +int xe_mmio_probe_vram(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_gt *gt; + u8 id; + u64 lmem_size; + u64 original_size; + u64 current_size; + u64 flat_ccs_base; + int resize_result; + + if (!IS_DGFX(xe)) { + xe->mem.vram.mapping = 0; + xe->mem.vram.size = 0; + xe->mem.vram.io_start = 0; + + for_each_gt(gt, xe, id) { + gt->mem.vram.mapping = 0; + gt->mem.vram.size = 0; + gt->mem.vram.io_start = 0; + } + return 0; + } + + if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) { + drm_err(&xe->drm, "pci resource is not valid\n"); + return -ENXIO; + } + + gt = xe_device_get_gt(xe, 0); + lmem_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg); + + original_size = pci_resource_len(pdev, GEN12_LMEM_BAR); + + if (xe->info.has_flat_ccs) { + int err; + u32 reg; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE); + lmem_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + + drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n", + lmem_size, flat_ccs_base); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + } else { + flat_ccs_base = lmem_size; + } + + resize_result = xe_resize_lmem_bar(xe, lmem_size); + current_size = pci_resource_len(pdev, GEN12_LMEM_BAR); + xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); + + xe->mem.vram.size = min(current_size, lmem_size); + + if (!xe->mem.vram.size) + return -EIO; + + if (resize_result > 0) + drm_info(&xe->drm, "Successfully resize LMEM from %lluMiB to %lluMiB\n", + (u64)original_size >> 20, + (u64)current_size >> 20); + else if (xe->mem.vram.size < lmem_size && !xe_force_lmem_bar_size) + drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n", + (u64)xe->mem.vram.size >> 20); + if (xe->mem.vram.size < lmem_size) + drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", + lmem_size, xe->mem.vram.size); + +#ifdef CONFIG_64BIT + xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size); +#endif + + xe->mem.vram.size = min_t(u64, xe->mem.vram.size, flat_ccs_base); + + drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size); + + /* FIXME: Assuming equally partitioned VRAM, incorrect */ + if (xe->info.tile_count > 1) { + u8 adj_tile_count = xe->info.tile_count; + resource_size_t size, io_start; + + for_each_gt(gt, xe, id) + if (xe_gt_is_media_type(gt)) + --adj_tile_count; + + XE_BUG_ON(!adj_tile_count); + + size = xe->mem.vram.size / adj_tile_count; + io_start = xe->mem.vram.io_start; + + for_each_gt(gt, xe, id) { + if (id && !xe_gt_is_media_type(gt)) + io_start += size; + + gt->mem.vram.size = size; + gt->mem.vram.io_start = io_start; + gt->mem.vram.mapping = xe->mem.vram.mapping + + (io_start - xe->mem.vram.io_start); + + drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", + id, gt->info.vram_id, >->mem.vram.io_start, + >->mem.vram.size); + } + } else { + gt->mem.vram.size = xe->mem.vram.size; + gt->mem.vram.io_start = xe->mem.vram.io_start; + gt->mem.vram.mapping = xe->mem.vram.mapping; + + drm_info(&xe->drm, "VRAM: %pa\n", >->mem.vram.size); + } + return 0; +} + +static void xe_mmio_probe_tiles(struct xe_device *xe) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + u32 mtcfg; + u8 adj_tile_count; + u8 id; + + if (xe->info.tile_count == 1) + return; + + mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg); + adj_tile_count = xe->info.tile_count = + REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + if (xe->info.media_ver >= 13) + xe->info.tile_count *= 2; + + drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n", + xe->info.tile_count, adj_tile_count); + + if (xe->info.tile_count > 1) { + const int mmio_bar = 0; + size_t size; + void *regs; + + if (adj_tile_count > 1) { + pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); + xe->mmio.size = SZ_16M * adj_tile_count; + xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), + mmio_bar, xe->mmio.size); + } + + size = xe->mmio.size / adj_tile_count; + regs = xe->mmio.regs; + + for_each_gt(gt, xe, id) { + if (id && !xe_gt_is_media_type(gt)) + regs += size; + gt->mmio.size = size; + gt->mmio.regs = regs; + } + } +} + +static void mmio_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); + if (xe->mem.vram.mapping) + iounmap(xe->mem.vram.mapping); +} + +int xe_mmio_init(struct xe_device *xe) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + const int mmio_bar = 0; + int err; + + /* + * Map the entire BAR, which includes registers (0-4MB), reserved space + * (4MB-8MB), and GGTT (8MB-16MB). Other parts of the driver (GTs, + * GGTTs) will derive the pointers they need from the mapping in the + * device structure. + */ + xe->mmio.size = SZ_16M; + xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, + xe->mmio.size); + if (xe->mmio.regs == NULL) { + drm_err(&xe->drm, "failed to map registers\n"); + return -EIO; + } + + err = drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); + if (err) + return err; + + /* 1 GT for now, 1 to 1 mapping, may change on multi-GT devices */ + gt->mmio.size = xe->mmio.size; + gt->mmio.regs = xe->mmio.regs; + + /* + * The boot firmware initializes local memory and assesses its health. + * If memory training fails, the punit will have been instructed to + * keep the GT powered down; we won't be able to communicate with it + * and we should not continue with driver initialization. + */ + if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) { + drm_err(&xe->drm, "LMEM not initialized by firmware\n"); + return -ENODEV; + } + + err = xe_set_dma_info(xe); + if (err) + return err; + + xe_mmio_probe_tiles(xe); + + return 0; +} + +#define VALID_MMIO_FLAGS (\ + DRM_XE_MMIO_BITS_MASK |\ + DRM_XE_MMIO_READ |\ + DRM_XE_MMIO_WRITE) + +static const i915_reg_t mmio_read_whitelist[] = { + RING_TIMESTAMP(RENDER_RING_BASE), +}; + +int xe_mmio_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct drm_xe_mmio *args = data; + unsigned int bits_flag, bytes; + bool allowed; + int ret = 0; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value)) + return -EINVAL; + + allowed = capable(CAP_SYS_ADMIN); + if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) == DRM_XE_MMIO_READ)) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) { + if (mmio_read_whitelist[i].reg == args->addr) { + allowed = true; + break; + } + } + } + + if (XE_IOCTL_ERR(xe, !allowed)) + return -EPERM; + + bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK; + bytes = 1 << bits_flag; + if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size)) + return -EINVAL; + + xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL); + + if (args->flags & DRM_XE_MMIO_WRITE) { + switch (bits_flag) { + case DRM_XE_MMIO_8BIT: + return -EINVAL; /* TODO */ + case DRM_XE_MMIO_16BIT: + return -EINVAL; /* TODO */ + case DRM_XE_MMIO_32BIT: + if (XE_IOCTL_ERR(xe, args->value > U32_MAX)) + return -EINVAL; + xe_mmio_write32(to_gt(xe), args->addr, args->value); + break; + case DRM_XE_MMIO_64BIT: + xe_mmio_write64(to_gt(xe), args->addr, args->value); + break; + default: + drm_WARN(&xe->drm, 1, "Invalid MMIO bit size"); + ret = -EINVAL; + goto exit; + } + } + + if (args->flags & DRM_XE_MMIO_READ) { + switch (bits_flag) { + case DRM_XE_MMIO_8BIT: + return -EINVAL; /* TODO */ + case DRM_XE_MMIO_16BIT: + return -EINVAL; /* TODO */ + case DRM_XE_MMIO_32BIT: + args->value = xe_mmio_read32(to_gt(xe), args->addr); + break; + case DRM_XE_MMIO_64BIT: + args->value = xe_mmio_read64(to_gt(xe), args->addr); + break; + default: + drm_WARN(&xe->drm, 1, "Invalid MMIO bit size"); + ret = -EINVAL; + } + } + +exit: + xe_force_wake_put(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h new file mode 100644 index 000000000000..09d24467096f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_MMIO_H_ +#define _XE_MMIO_H_ + +#include + +#include "xe_gt_types.h" + +/* + * FIXME: This header has been deemed evil and we need to kill it. Temporarily + * including so we can use 'wait_for' and unblock initial development. A follow + * should replace 'wait_for' with a sane version and drop including this header. + */ +#include "i915_utils.h" + +struct drm_device; +struct drm_file; +struct xe_device; + +int xe_mmio_init(struct xe_device *xe); + +static inline u8 xe_mmio_read8(struct xe_gt *gt, u32 reg) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + return readb(gt->mmio.regs + reg); +} + +static inline void xe_mmio_write32(struct xe_gt *gt, + u32 reg, u32 val) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + writel(val, gt->mmio.regs + reg); +} + +static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + return readl(gt->mmio.regs + reg); +} + +static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 mask, + u32 val) +{ + u32 old, reg_val; + + old = xe_mmio_read32(gt, reg); + reg_val = (old & mask) | val; + xe_mmio_write32(gt, reg, reg_val); + + return old; +} + +static inline void xe_mmio_write64(struct xe_gt *gt, + u32 reg, u64 val) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + writeq(val, gt->mmio.regs + reg); +} + +static inline u64 xe_mmio_read64(struct xe_gt *gt, u32 reg) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + return readq(gt->mmio.regs + reg); +} + +static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, + u32 reg, u32 val, + u32 mask, u32 eval) +{ + u32 reg_val; + + xe_mmio_write32(gt, reg, val); + reg_val = xe_mmio_read32(gt, reg); + + return (reg_val & mask) != eval ? -EINVAL : 0; +} + +static inline int xe_mmio_wait32(struct xe_gt *gt, + u32 reg, u32 val, + u32 mask, u32 timeout_ms) +{ + return wait_for((xe_mmio_read32(gt, reg) & mask) == val, + timeout_ms); +} + +int xe_mmio_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg) +{ + return range && reg >= range->start && reg <= range->end; +} + +int xe_mmio_probe_vram(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c new file mode 100644 index 000000000000..86b966fffbe5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_gt.h" +#include "xe_platform_types.h" +#include "xe_mmio.h" +#include "xe_mocs.h" +#include "xe_step_types.h" + +#include "gt/intel_gt_regs.h" + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define mocs_dbg drm_dbg +#else +__printf(2, 3) +static inline void mocs_dbg(const struct drm_device *dev, + const char *format, ...) +{ /* noop */ } +#endif + +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum xe_mocs_info_index { + /* + * Not cached anywhere, coherency between CPU and GPU accesses is + * guaranteed. + */ + XE_MOCS_UNCACHED, + /* + * Cacheability and coherency controlled by the kernel automatically + * based on the xxxx IOCTL setting and the current + * usage of the surface (used for display scanout or not). + */ + XE_MOCS_PTE, + /* + * Cached in all GPU caches available on the platform. + * Coherency between CPU and GPU accesses to the surface is not + * guaranteed without extra synchronization. + */ + XE_MOCS_CACHED, +}; + +enum { + HAS_GLOBAL_MOCS = BIT(0), + HAS_RENDER_L3CC = BIT(1), +}; + +struct xe_mocs_entry { + u32 control_value; + u16 l3cc_value; + u16 used; +}; + +struct xe_mocs_info { + unsigned int size; + unsigned int n_entries; + const struct xe_mocs_entry *table; + u8 uc_index; + u8 wb_index; + u8 unused_entries_index; +}; + +/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ +#define _LE_CACHEABILITY(value) ((value) << 0) +#define _LE_TGT_CACHE(value) ((value) << 2) +#define LE_LRUM(value) ((value) << 4) +#define LE_AOM(value) ((value) << 6) +#define LE_RSC(value) ((value) << 7) +#define LE_SCC(value) ((value) << 8) +#define LE_PFM(value) ((value) << 11) +#define LE_SCF(value) ((value) << 14) +#define LE_COS(value) ((value) << 15) +#define LE_SSE(value) ((value) << 17) + +/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ +#define L3_ESC(value) ((value) << 0) +#define L3_SCC(value) ((value) << 1) +#define _L3_CACHEABILITY(value) ((value) << 4) +#define L3_GLBGO(value) ((value) << 6) +#define L3_LKUP(value) ((value) << 7) + +/* Helper defines */ +#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ +#define PVC_NUM_MOCS_ENTRIES 3 +#define MTL_NUM_MOCS_ENTRIES 16 + +/* (e)LLC caching options */ +/* + * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means + * the same as LE_UC + */ +#define LE_0_PAGETABLE _LE_CACHEABILITY(0) +#define LE_1_UC _LE_CACHEABILITY(1) +#define LE_2_WT _LE_CACHEABILITY(2) +#define LE_3_WB _LE_CACHEABILITY(3) + +/* Target cache */ +#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) +#define LE_TC_1_LLC _LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) + +/* L3 caching options */ +#define L3_0_DIRECT _L3_CACHEABILITY(0) +#define L3_1_UC _L3_CACHEABILITY(1) +#define L3_2_RESERVED _L3_CACHEABILITY(2) +#define L3_3_WB _L3_CACHEABILITY(3) + +#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ + [__idx] = { \ + .control_value = __control_value, \ + .l3cc_value = __l3cc_value, \ + .used = 1, \ + } + +/* + * MOCS tables + * + * These are the MOCS tables that are programmed across all the rings. + * The control value is programmed to all the rings that support the + * MOCS registers. While the l3cc_values are only programmed to the + * LNCFCMOCS0 - LNCFCMOCS32 registers. + * + * These tables are intended to be kept reasonably consistent across + * HW platforms, and for ICL+, be identical across OSes. To achieve + * that, for Icelake and above, list of entries is published as part + * of bspec. + * + * Entries not part of the following tables are undefined as far as + * userspace is concerned and shouldn't be relied upon. For Gen < 12 + * they will be initialized to PTE. Gen >= 12 don't have a setting for + * PTE and those platforms except TGL/RKL will be initialized L3 WB to + * catch accidental use of reserved and unused mocs indexes. + * + * The last few entries are reserved by the hardware. For ICL+ they + * should be initialized according to bspec and never used, for older + * platforms they should never be written to. + * + * NOTE1: These tables are part of bspec and defined as part of hardware + * interface for ICL+. For older platforms, they are part of kernel + * ABI. It is expected that, for specific hardware platform, existing + * entries will remain constant and the table will only be updated by + * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS + * indices have been set to L3 WB. These reserved entries should never + * be used, they may be changed to low performant variants with better + * coherency in the future if more entries are needed. + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. + */ + +#define GEN11_MOCS_ENTRIES \ + /* Entries 0 and 1 are defined per-platform */ \ + /* Base - L3 + LLC */ \ + MOCS_ENTRY(2, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_3_WB), \ + /* Base - Uncached */ \ + MOCS_ENTRY(3, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 */ \ + MOCS_ENTRY(4, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - LLC */ \ + MOCS_ENTRY(5, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* Age 0 - LLC */ \ + MOCS_ENTRY(6, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_1_UC), \ + /* Age 0 - L3 + LLC */ \ + MOCS_ENTRY(7, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_3_WB), \ + /* Age: Don't Chg. - LLC */ \ + MOCS_ENTRY(8, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_1_UC), \ + /* Age: Don't Chg. - L3 + LLC */ \ + MOCS_ENTRY(9, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_3_WB), \ + /* No AOM - LLC */ \ + MOCS_ENTRY(10, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM - L3 + LLC */ \ + MOCS_ENTRY(11, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age 0 - LLC */ \ + MOCS_ENTRY(12, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age 0 - L3 + LLC */ \ + MOCS_ENTRY(13, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age:DC - LLC */ \ + MOCS_ENTRY(14, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age:DC - L3 + LLC */ \ + MOCS_ENTRY(15, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_3_WB), \ + /* Self-Snoop - L3 + LLC */ \ + MOCS_ENTRY(18, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(12.5%) */ \ + MOCS_ENTRY(19, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(25%) */ \ + MOCS_ENTRY(20, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(50%) */ \ + MOCS_ENTRY(21, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(75%) */ \ + MOCS_ENTRY(22, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(87.5%) */ \ + MOCS_ENTRY(23, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ + L3_3_WB), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(62, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(63, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC) + +static const struct xe_mocs_entry tgl_mocs_desc[] = { + /* + * NOTE: + * Reserved and unspecified MOCS indices have been set to (L3 + LCC). + * These reserved entries should never be used, they may be changed + * to low performant variants with better coherency in the future if + * more entries are needed. We are programming index XE_MOCS_PTE(1) + * only, __init_mocs_table() take care to program unused index with + * this entry. + */ + MOCS_ENTRY(XE_MOCS_PTE, + LE_0_PAGETABLE | LE_TC_0_PAGETABLE, + L3_1_UC), + GEN11_MOCS_ENTRIES, + + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + +static const struct xe_mocs_entry dg1_mocs_desc[] = { + /* UC */ + MOCS_ENTRY(1, 0, L3_1_UC), + /* WB - L3 */ + MOCS_ENTRY(5, 0, L3_3_WB), + /* WB - L3 50% */ + MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB), + /* WB - L3 25% */ + MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB), + /* WB - L3 12.5% */ + MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB), + + /* HDC:L1 + L3 */ + MOCS_ENTRY(48, 0, L3_3_WB), + /* HDC:L1 */ + MOCS_ENTRY(49, 0, L3_1_UC), + + /* HW Reserved */ + MOCS_ENTRY(60, 0, L3_1_UC), + MOCS_ENTRY(61, 0, L3_1_UC), + MOCS_ENTRY(62, 0, L3_1_UC), + MOCS_ENTRY(63, 0, L3_1_UC), +}; + +static const struct xe_mocs_entry gen12_mocs_desc[] = { + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + +static const struct xe_mocs_entry dg2_mocs_desc[] = { + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = { + /* Wa_14011441408: Set Go to Memory for MOCS#0 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct xe_mocs_entry pvc_mocs_desc[] = { + /* Error */ + MOCS_ENTRY(0, 0, L3_3_WB), + + /* UC */ + MOCS_ENTRY(1, 0, L3_1_UC), + + /* WB */ + MOCS_ENTRY(2, 0, L3_3_WB), +}; + +static unsigned int get_mocs_settings(struct xe_device *xe, + struct xe_mocs_info *info) +{ + unsigned int flags; + + memset(info, 0, sizeof(struct xe_mocs_info)); + + info->unused_entries_index = XE_MOCS_PTE; + switch (xe->info.platform) { + case XE_PVC: + info->size = ARRAY_SIZE(pvc_mocs_desc); + info->table = pvc_mocs_desc; + info->n_entries = PVC_NUM_MOCS_ENTRIES; + info->uc_index = 1; + info->wb_index = 2; + info->unused_entries_index = 2; + break; + case XE_METEORLAKE: + info->size = ARRAY_SIZE(dg2_mocs_desc); + info->table = dg2_mocs_desc; + info->n_entries = MTL_NUM_MOCS_ENTRIES; + info->uc_index = 1; + info->unused_entries_index = 3; + break; + case XE_DG2: + if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 && + xe->info.step.graphics >= STEP_A0 && + xe->info.step.graphics <= STEP_B0) { + info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax); + info->table = dg2_mocs_desc_g10_ax; + } else { + info->size = ARRAY_SIZE(dg2_mocs_desc); + info->table = dg2_mocs_desc; + } + info->uc_index = 1; + info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->unused_entries_index = 3; + break; + case XE_DG1: + info->size = ARRAY_SIZE(dg1_mocs_desc); + info->table = dg1_mocs_desc; + info->uc_index = 1; + info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->uc_index = 1; + info->unused_entries_index = 5; + break; + case XE_TIGERLAKE: + info->size = ARRAY_SIZE(tgl_mocs_desc); + info->table = tgl_mocs_desc; + info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->uc_index = 3; + break; + case XE_ALDERLAKE_S: + case XE_ALDERLAKE_P: + info->size = ARRAY_SIZE(gen12_mocs_desc); + info->table = gen12_mocs_desc; + info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->uc_index = 3; + info->unused_entries_index = 2; + break; + default: + drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n"); + return 0; + } + + if (XE_WARN_ON(info->size > info->n_entries)) + return 0; + + flags = HAS_RENDER_L3CC; + if (!IS_DGFX(xe)) + flags |= HAS_GLOBAL_MOCS; + + return flags; +} + +/* + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise XE_MOCS_PTE's value is returned in this case. + */ +static u32 get_entry_control(const struct xe_mocs_info *info, + unsigned int index) +{ + if (index < info->size && info->table[index].used) + return info->table[index].control_value; + return info->table[info->unused_entries_index].control_value; +} + +static void __init_mocs_table(struct xe_gt *gt, + const struct xe_mocs_info *info, + u32 addr) +{ + struct xe_device *xe = gt_to_xe(gt); + + unsigned int i; + u32 mocs; + + mocs_dbg(>->xe->drm, "entries:%d\n", info->n_entries); + drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, + "Unused entries index should have been defined\n"); + for (i = 0; + i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; + i++) { + mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, _MMIO(addr + i * 4).reg, mocs); + xe_mmio_write32(gt, _MMIO(addr + i * 4).reg, mocs); + } +} + +/* + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is not zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. + */ +static u16 get_entry_l3cc(const struct xe_mocs_info *info, + unsigned int index) +{ + if (index < info->size && info->table[index].used) + return info->table[index].l3cc_value; + return info->table[info->unused_entries_index].l3cc_value; +} + +static u32 l3cc_combine(u16 low, u16 high) +{ + return low | (u32)high << 16; +} + +static void init_l3cc_table(struct xe_gt *gt, + const struct xe_mocs_info *info) +{ + unsigned int i; + u32 l3cc; + + mocs_dbg(>->xe->drm, "entries:%d\n", info->n_entries); + for (i = 0; + i < (info->n_entries + 1) / 2 ? + (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), + get_entry_l3cc(info, 2 * i + 1))), 1 : 0; + i++) { + mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, GEN9_LNCFCMOCS(i).reg, l3cc); + xe_mmio_write32(gt, GEN9_LNCFCMOCS(i).reg, l3cc); + } +} + +void xe_mocs_init_engine(const struct xe_engine *engine) +{ + struct xe_mocs_info table; + unsigned int flags; + + flags = get_mocs_settings(engine->gt->xe, &table); + if (!flags) + return; + + if (flags & HAS_RENDER_L3CC && engine->class == XE_ENGINE_CLASS_RENDER) + init_l3cc_table(engine->gt, &table); +} + +void xe_mocs_init(struct xe_gt *gt) +{ + struct xe_mocs_info table; + unsigned int flags; + + /* + * LLC and eDRAM control values are not applicable to dgfx + */ + flags = get_mocs_settings(gt->xe, &table); + mocs_dbg(>->xe->drm, "flag:0x%x\n", flags); + gt->mocs.uc_index = table.uc_index; + gt->mocs.wb_index = table.wb_index; + + if (flags & HAS_GLOBAL_MOCS) + __init_mocs_table(gt, &table, GEN12_GLOBAL_MOCS(0).reg); + + /* + * Initialize the L3CC table as part of mocs initalization to make + * sure the LNCFCMOCSx registers are programmed for the subsequent + * memory transactions including guc transactions + */ + if (flags & HAS_RENDER_L3CC) + init_l3cc_table(gt, &table); +} diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h new file mode 100644 index 000000000000..aba1abe216ab --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_MOCS_H_ +#define _XE_MOCS_H_ + +#include + +struct xe_engine; +struct xe_gt; + +void xe_mocs_init_engine(const struct xe_engine *engine); +void xe_mocs_init(struct xe_gt *gt); + +/** + * xe_mocs_index_to_value - Translate mocs index to the mocs value exected by + * most blitter commands. + * @mocs_index: index into the mocs tables + * + * Return: The corresponding mocs value to be programmed. + */ +static inline u32 xe_mocs_index_to_value(u32 mocs_index) +{ + return mocs_index << 1; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c new file mode 100644 index 000000000000..cc862553a252 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_module.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include + +#include "xe_drv.h" +#include "xe_hw_fence.h" +#include "xe_module.h" +#include "xe_pci.h" +#include "xe_sched_job.h" + +bool enable_guc = true; +module_param_named_unsafe(enable_guc, enable_guc, bool, 0444); +MODULE_PARM_DESC(enable_guc, "Enable GuC submission"); + +u32 xe_force_lmem_bar_size; +module_param_named(lmem_bar_size, xe_force_lmem_bar_size, uint, 0600); +MODULE_PARM_DESC(lmem_bar_size, "Set the lmem bar size(in MiB)"); + +int xe_guc_log_level = 5; +module_param_named(guc_log_level, xe_guc_log_level, int, 0600); +MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); + +char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE; +module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400); +MODULE_PARM_DESC(force_probe, + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + +struct init_funcs { + int (*init)(void); + void (*exit)(void); +}; +#define MAKE_INIT_EXIT_FUNCS(name) \ + { .init = xe_##name##_module_init, \ + .exit = xe_##name##_module_exit, } +static const struct init_funcs init_funcs[] = { + MAKE_INIT_EXIT_FUNCS(hw_fence), + MAKE_INIT_EXIT_FUNCS(sched_job), +}; + +static int __init xe_init(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { + err = init_funcs[i].init(); + if (err) { + while (i--) + init_funcs[i].exit(); + return err; + } + } + + return xe_register_pci_driver(); +} + +static void __exit xe_exit(void) +{ + int i; + + xe_unregister_pci_driver(); + + for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) + init_funcs[i].exit(); +} + +module_init(xe_init); +module_exit(xe_exit); + +MODULE_AUTHOR("Intel Corporation"); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h new file mode 100644 index 000000000000..2c6ee46f5595 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_module.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +/* Module modprobe variables */ +extern bool enable_guc; +extern bool enable_display; +extern u32 xe_force_lmem_bar_size; +extern int xe_guc_log_level; +extern char *xe_param_force_probe; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c new file mode 100644 index 000000000000..55d8a597a068 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -0,0 +1,651 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_pci.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "xe_drv.h" +#include "xe_device.h" +#include "xe_macros.h" +#include "xe_module.h" +#include "xe_pm.h" +#include "xe_step.h" + +#include "i915_reg.h" + +#define DEV_INFO_FOR_EACH_FLAG(func) \ + func(require_force_probe); \ + func(is_dgfx); \ + /* Keep has_* in alphabetical order */ \ + +struct xe_subplatform_desc { + enum xe_subplatform subplatform; + const char *name; + const u16 *pciidlist; +}; + +struct xe_gt_desc { + enum xe_gt_type type; + u8 vram_id; + u64 engine_mask; + u32 mmio_adj_limit; + u32 mmio_adj_offset; +}; + +struct xe_device_desc { + u8 graphics_ver; + u8 graphics_rel; + u8 media_ver; + u8 media_rel; + + u64 platform_engine_mask; /* Engines supported by the HW */ + + enum xe_platform platform; + const char *platform_name; + const struct xe_subplatform_desc *subplatforms; + const struct xe_gt_desc *extra_gts; + + u8 dma_mask_size; /* available DMA address bits */ + + u8 gt; /* GT number, 0 if undefined */ + +#define DEFINE_FLAG(name) u8 name:1 + DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); +#undef DEFINE_FLAG + + u8 vram_flags; + u8 max_tiles; + u8 vm_max_level; + + bool supports_usm; + bool has_flat_ccs; + bool has_4tile; +}; + +#define PLATFORM(x) \ + .platform = (x), \ + .platform_name = #x + +#define NOP(x) x + +/* Keep in gen based order, and chronological order within a gen */ +#define GEN12_FEATURES \ + .require_force_probe = true, \ + .graphics_ver = 12, \ + .media_ver = 12, \ + .dma_mask_size = 39, \ + .max_tiles = 1, \ + .vm_max_level = 3, \ + .vram_flags = 0 + +static const struct xe_device_desc tgl_desc = { + GEN12_FEATURES, + PLATFORM(XE_TIGERLAKE), + .platform_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | + BIT(XE_HW_ENGINE_VCS2), +}; + +static const struct xe_device_desc adl_s_desc = { + GEN12_FEATURES, + PLATFORM(XE_ALDERLAKE_S), + .platform_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | + BIT(XE_HW_ENGINE_VCS2), +}; + +static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 }; + +static const struct xe_device_desc adl_p_desc = { + GEN12_FEATURES, + PLATFORM(XE_ALDERLAKE_P), + .platform_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | + BIT(XE_HW_ENGINE_VCS2), + .subplatforms = (const struct xe_subplatform_desc[]) { + { XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids }, + {}, + }, +}; + +#define DGFX_FEATURES \ + .is_dgfx = 1 + +static const struct xe_device_desc dg1_desc = { + GEN12_FEATURES, + DGFX_FEATURES, + .graphics_rel = 10, + PLATFORM(XE_DG1), + .platform_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | + BIT(XE_HW_ENGINE_VCS2), +}; + +#define XE_HP_FEATURES \ + .require_force_probe = true, \ + .graphics_ver = 12, \ + .graphics_rel = 50, \ + .has_flat_ccs = true, \ + .dma_mask_size = 46, \ + .max_tiles = 1, \ + .vm_max_level = 3 + +#define XE_HPM_FEATURES \ + .media_ver = 12, \ + .media_rel = 50 + +static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 }; +static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 }; +static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; + +#define DG2_FEATURES \ + DGFX_FEATURES, \ + .graphics_rel = 55, \ + .media_rel = 55, \ + PLATFORM(XE_DG2), \ + .subplatforms = (const struct xe_subplatform_desc[]) { \ + { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ + { XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \ + { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \ + { } \ + }, \ + .platform_engine_mask = \ + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \ + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1) | \ + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \ + .require_force_probe = true, \ + .vram_flags = XE_VRAM_FLAGS_NEED64K, \ + .has_4tile = 1 + +static const struct xe_device_desc ats_m_desc = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + + DG2_FEATURES, +}; + +static const struct xe_device_desc dg2_desc = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + + DG2_FEATURES, +}; + +#define PVC_ENGINES \ + BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | \ + BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | \ + BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | \ + BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | \ + BIT(XE_HW_ENGINE_BCS8) | \ + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | \ + BIT(XE_HW_ENGINE_VCS2) | \ + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3) + +static const struct xe_gt_desc pvc_gts[] = { + { + .type = XE_GT_TYPE_REMOTE, + .vram_id = 1, + .engine_mask = PVC_ENGINES, + .mmio_adj_limit = 0, + .mmio_adj_offset = 0, + }, +}; + +static const __maybe_unused struct xe_device_desc pvc_desc = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + DGFX_FEATURES, + PLATFORM(XE_PVC), + .extra_gts = pvc_gts, + .graphics_rel = 60, + .has_flat_ccs = 0, + .media_rel = 60, + .platform_engine_mask = PVC_ENGINES, + .vram_flags = XE_VRAM_FLAGS_NEED64K, + .dma_mask_size = 52, + .max_tiles = 2, + .vm_max_level = 4, + .supports_usm = true, +}; + +#define MTL_MEDIA_ENGINES \ + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ + BIT(XE_HW_ENGINE_VECS0) /* TODO: GSC0 */ + +static const struct xe_gt_desc xelpmp_gts[] = { + { + .type = XE_GT_TYPE_MEDIA, + .vram_id = 0, + .engine_mask = MTL_MEDIA_ENGINES, + .mmio_adj_limit = 0x40000, + .mmio_adj_offset = 0x380000, + }, +}; + +#define MTL_MAIN_ENGINES \ + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \ + BIT(XE_HW_ENGINE_CCS0) + +static const struct xe_device_desc mtl_desc = { + /* + * Real graphics IP version will be obtained from hardware GMD_ID + * register. Value provided here is just for sanity checking. + */ + .require_force_probe = true, + .graphics_ver = 12, + .graphics_rel = 70, + .dma_mask_size = 46, + .max_tiles = 2, + .vm_max_level = 3, + .media_ver = 13, + PLATFORM(XE_METEORLAKE), + .extra_gts = xelpmp_gts, + .platform_engine_mask = MTL_MAIN_ENGINES, +}; + +#undef PLATFORM + +#define INTEL_VGA_DEVICE(id, info) { \ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, id), \ + PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16, \ + (unsigned long) info } + +/* + * Make sure any device matches here are from most specific to most + * general. For example, since the Quanta match is based on the subsystem + * and subvendor IDs, we need it to come before the more general IVB + * PCI ID matches, otherwise we'll use the wrong info struct above. + */ +static const struct pci_device_id pciidlist[] = { + XE_TGL_GT2_IDS(INTEL_VGA_DEVICE, &tgl_desc), + XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), + XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), + XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), + XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), + XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), + XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), + { } +}; +MODULE_DEVICE_TABLE(pci, pciidlist); + +#undef INTEL_VGA_DEVICE + +/* is device_id present in comma separated list of ids */ +static bool device_id_in_list(u16 device_id, const char *devices, bool negative) +{ + char *s, *p, *tok; + bool ret; + + if (!devices || !*devices) + return false; + + /* match everything */ + if (negative && strcmp(devices, "!*") == 0) + return true; + if (!negative && strcmp(devices, "*") == 0) + return true; + + s = kstrdup(devices, GFP_KERNEL); + if (!s) + return false; + + for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) { + u16 val; + + if (negative && tok[0] == '!') + tok++; + else if ((negative && tok[0] != '!') || + (!negative && tok[0] == '!')) + continue; + + if (kstrtou16(tok, 16, &val) == 0 && val == device_id) { + ret = true; + break; + } + } + + kfree(s); + + return ret; +} + +static bool id_forced(u16 device_id) +{ + return device_id_in_list(device_id, xe_param_force_probe, false); +} + +static bool id_blocked(u16 device_id) +{ + return device_id_in_list(device_id, xe_param_force_probe, true); +} + +static const struct xe_subplatform_desc * +subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc) +{ + const struct xe_subplatform_desc *sp; + const u16 *id; + + for (sp = desc->subplatforms; sp && sp->subplatform; sp++) + for (id = sp->pciidlist; *id; id++) + if (*id == xe->info.devid) + return sp; + + return NULL; +} + +static void xe_pci_remove(struct pci_dev *pdev) +{ + struct xe_device *xe; + + xe = pci_get_drvdata(pdev); + if (!xe) /* driver load aborted, nothing to cleanup */ + return; + + xe_device_remove(xe); + pci_set_drvdata(pdev, NULL); +} + +static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + const struct xe_device_desc *desc = (void *)ent->driver_data; + const struct xe_subplatform_desc *spd; + struct xe_device *xe; + struct xe_gt *gt; + u8 id; + int err; + + if (desc->require_force_probe && !id_forced(pdev->device)) { + dev_info(&pdev->dev, + "Your graphics device %04x is not officially supported\n" + "by xe driver in this kernel version. To force Xe probe,\n" + "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n" + "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n" + "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n", + pdev->device, pdev->device, pdev->device, + pdev->device, pdev->device); + return -ENODEV; + } + + if (id_blocked(pdev->device)) { + dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n", + pdev->vendor, pdev->device); + return -ENODEV; + } + + xe = xe_device_create(pdev, ent); + if (IS_ERR(xe)) + return PTR_ERR(xe); + + xe->info.graphics_verx100 = desc->graphics_ver * 100 + + desc->graphics_rel; + xe->info.media_verx100 = desc->media_ver * 100 + + desc->media_rel; + xe->info.is_dgfx = desc->is_dgfx; + xe->info.platform = desc->platform; + xe->info.dma_mask_size = desc->dma_mask_size; + xe->info.vram_flags = desc->vram_flags; + xe->info.tile_count = desc->max_tiles; + xe->info.vm_max_level = desc->vm_max_level; + xe->info.media_ver = desc->media_ver; + xe->info.supports_usm = desc->supports_usm; + xe->info.has_flat_ccs = desc->has_flat_ccs; + xe->info.has_4tile = desc->has_4tile; + + spd = subplatform_get(xe, desc); + xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE; + xe->info.step = xe_step_get(xe); + + for (id = 0; id < xe->info.tile_count; ++id) { + gt = xe->gt + id; + gt->info.id = id; + gt->xe = xe; + + if (id == 0) { + gt->info.type = XE_GT_TYPE_MAIN; + gt->info.vram_id = id; + gt->info.engine_mask = desc->platform_engine_mask; + gt->mmio.adj_limit = 0; + gt->mmio.adj_offset = 0; + } else { + gt->info.type = desc->extra_gts[id - 1].type; + gt->info.vram_id = desc->extra_gts[id - 1].vram_id; + gt->info.engine_mask = + desc->extra_gts[id - 1].engine_mask; + gt->mmio.adj_limit = + desc->extra_gts[id - 1].mmio_adj_limit; + gt->mmio.adj_offset = + desc->extra_gts[id - 1].mmio_adj_offset; + } + } + + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d", + desc->platform_name, spd ? spd->name : "", + xe->info.devid, xe->info.revid, + xe->info.is_dgfx, xe->info.graphics_verx100, + xe->info.media_verx100, + xe->info.dma_mask_size, xe->info.tile_count); + + drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n", + xe_step_name(xe->info.step.graphics), + xe_step_name(xe->info.step.media), + xe_step_name(xe->info.step.display), + xe_step_name(xe->info.step.basedie)); + + pci_set_drvdata(pdev, xe); + err = pci_enable_device(pdev); + if (err) { + drm_dev_put(&xe->drm); + return err; + } + + pci_set_master(pdev); + + if (pci_enable_msi(pdev) < 0) + drm_dbg(&xe->drm, "can't enable MSI"); + + err = xe_device_probe(xe); + if (err) { + pci_disable_device(pdev); + return err; + } + + xe_pm_runtime_init(xe); + + return 0; +} + +static void xe_pci_shutdown(struct pci_dev *pdev) +{ + xe_device_shutdown(pdev_to_xe_device(pdev)); +} + +#ifdef CONFIG_PM_SLEEP +static int xe_pci_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int err; + + err = xe_pm_suspend(pdev_to_xe_device(pdev)); + if (err) + return err; + + pci_save_state(pdev); + pci_disable_device(pdev); + + err = pci_set_power_state(pdev, PCI_D3hot); + if (err) + return err; + + return 0; +} + +static int xe_pci_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int err; + + err = pci_set_power_state(pdev, PCI_D0); + if (err) + return err; + + pci_restore_state(pdev); + + err = pci_enable_device(pdev); + if (err) + return err; + + pci_set_master(pdev); + + err = xe_pm_resume(pdev_to_xe_device(pdev)); + if (err) + return err; + + return 0; +} +#endif + +static int xe_pci_runtime_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + int err; + + err = xe_pm_runtime_suspend(xe); + if (err) + return err; + + pci_save_state(pdev); + + if (xe->d3cold_allowed) { + pci_disable_device(pdev); + pci_ignore_hotplug(pdev); + pci_set_power_state(pdev, PCI_D3cold); + } else { + pci_set_power_state(pdev, PCI_D3hot); + } + + return 0; +} + +static int xe_pci_runtime_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + int err; + + err = pci_set_power_state(pdev, PCI_D0); + if (err) + return err; + + pci_restore_state(pdev); + + if (xe->d3cold_allowed) { + err = pci_enable_device(pdev); + if (err) + return err; + + pci_set_master(pdev); + } + + return xe_pm_runtime_resume(xe); +} + +static int xe_pci_runtime_idle(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + + /* + * FIXME: d3cold should be allowed (true) if + * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe)) + * however the change to the buddy allocator broke the + * xe_bo_restore_kernel when the pci device is disabled + */ + xe->d3cold_allowed = false; + + return 0; +} + +static const struct dev_pm_ops xe_pm_ops = { + .suspend = xe_pci_suspend, + .resume = xe_pci_resume, + .freeze = xe_pci_suspend, + .thaw = xe_pci_resume, + .poweroff = xe_pci_suspend, + .restore = xe_pci_resume, + .runtime_suspend = xe_pci_runtime_suspend, + .runtime_resume = xe_pci_runtime_resume, + .runtime_idle = xe_pci_runtime_idle, +}; + +static struct pci_driver xe_pci_driver = { + .name = DRIVER_NAME, + .id_table = pciidlist, + .probe = xe_pci_probe, + .remove = xe_pci_remove, + .shutdown = xe_pci_shutdown, + .driver.pm = &xe_pm_ops, +}; + +int xe_register_pci_driver(void) +{ + return pci_register_driver(&xe_pci_driver); +} + +void xe_unregister_pci_driver(void) +{ + pci_unregister_driver(&xe_pci_driver); +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +static int dev_to_xe_device_fn(struct device *dev, void *data) + +{ + struct drm_device *drm = dev_get_drvdata(dev); + int (*xe_fn)(struct xe_device *xe) = data; + int ret = 0; + int idx; + + if (drm_dev_enter(drm, &idx)) + ret = xe_fn(to_xe_device(dev_get_drvdata(dev))); + drm_dev_exit(idx); + + return ret; +} + +/** + * xe_call_for_each_device - Iterate over all devices this driver binds to + * @xe_fn: Function to call for each device. + * + * This function iterated over all devices this driver binds to, and calls + * @xe_fn: for each one of them. If the called function returns anything else + * than 0, iteration is stopped and the return value is returned by this + * function. Across each function call, drm_dev_enter() / drm_dev_exit() is + * called for the corresponding drm device. + * + * Return: Zero or the error code of a call to @xe_fn returning an error + * code. + */ +int xe_call_for_each_device(xe_device_fn xe_fn) +{ + return driver_for_each_device(&xe_pci_driver.driver, NULL, + xe_fn, dev_to_xe_device_fn); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h new file mode 100644 index 000000000000..9e3089549d5f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_PCI_H_ +#define _XE_PCI_H_ + +#include "tests/xe_test.h" + +int xe_register_pci_driver(void); +void xe_unregister_pci_driver(void); + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +struct xe_device; + +typedef int (*xe_device_fn)(struct xe_device *); + +int xe_call_for_each_device(xe_device_fn xe_fn); +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c new file mode 100644 index 000000000000..236159c8a6c0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_pcode_api.h" +#include "xe_pcode.h" + +#include "xe_gt.h" +#include "xe_mmio.h" + +#include + +/** + * DOC: PCODE + * + * Xe PCODE is the component responsible for interfacing with the PCODE + * firmware. + * It shall provide a very simple ABI to other Xe components, but be the + * single and consolidated place that will communicate with PCODE. All read + * and write operations to PCODE will be internal and private to this component. + * + * What's next: + * - PCODE hw metrics + * - PCODE for display operations + */ + +static int pcode_mailbox_status(struct xe_gt *gt) +{ + u32 err; + static const struct pcode_err_decode err_decode[] = { + [PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"}, + [PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"}, + [PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"}, + [PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"}, + [PCODE_LOCKED] = {-EBUSY, "PCODE Locked"}, + [PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW, + "GT ratio out of range"}, + [PCODE_REJECTED] = {-EACCES, "PCODE Rejected"}, + [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, + }; + + lockdep_assert_held(>->pcode.lock); + + err = xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_ERROR_MASK; + if (err) { + drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, + err_decode[err].str ?: "Unknown"); + return err_decode[err].errno ?: -EPROTO; + } + + return 0; +} + +static bool pcode_mailbox_done(struct xe_gt *gt) +{ + lockdep_assert_held(>->pcode.lock); + return (xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) == 0; +} + +static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, + unsigned int timeout, bool return_data, bool atomic) +{ + lockdep_assert_held(>->pcode.lock); + + if (!pcode_mailbox_done(gt)) + return -EAGAIN; + + xe_mmio_write32(gt, PCODE_DATA0.reg, *data0); + xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0); + xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox); + + if (atomic) + _wait_for_atomic(pcode_mailbox_done(gt), timeout * 1000, 1); + else + wait_for(pcode_mailbox_done(gt), timeout); + + if (return_data) { + *data0 = xe_mmio_read32(gt, PCODE_DATA0.reg); + if (data1) + *data1 = xe_mmio_read32(gt, PCODE_DATA1.reg); + } + + return pcode_mailbox_status(gt); +} + +int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) +{ + int err; + + mutex_lock(>->pcode.lock); + err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false); + mutex_unlock(>->pcode.lock); + + return err; +} + +int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) +{ + int err; + + mutex_lock(>->pcode.lock); + err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false); + mutex_unlock(>->pcode.lock); + + return err; +} + +static bool xe_pcode_try_request(struct xe_gt *gt, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + u32 *status, bool atomic) +{ + *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, atomic); + + return (*status == 0) && ((request & reply_mask) == reply); +} + +/** + * xe_pcode_request - send PCODE request until acknowledgment + * @gt: gt + * @mbox: PCODE mailbox ID the request is targeted for + * @request: request ID + * @reply_mask: mask used to check for request acknowledgment + * @reply: value used to check for request acknowledgment + * @timeout_base_ms: timeout for polling with preemption enabled + * + * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. + * The request is acknowledged once the PCODE reply dword equals @reply after + * applying @reply_mask. Polling is first attempted with preemption enabled + * for @timeout_base_ms and if this times out for another 50 ms with + * preemption disabled. + * + * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some + * other error as reported by PCODE. + */ +int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + u32 status; + int ret; + bool atomic = false; + + mutex_lock(>->pcode.lock); + +#define COND \ + xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, atomic) + + /* + * Prime the PCODE by doing a request first. Normally it guarantees + * that a subsequent request, at most @timeout_base_ms later, succeeds. + * _wait_for() doesn't guarantee when its passed condition is evaluated + * first, so send the first request explicitly. + */ + if (COND) { + ret = 0; + goto out; + } + ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10); + if (!ret) + goto out; + + /* + * The above can time out if the number of requests was low (2 in the + * worst case) _and_ PCODE was busy for some reason even after a + * (queued) request and @timeout_base_ms delay. As a workaround retry + * the poll with preemption disabled to maximize the number of + * requests. Increase the timeout from @timeout_base_ms to 50ms to + * account for interrupts that could reduce the number of these + * requests, and for any quirks of the PCODE firmware that delays + * the request completion. + */ + drm_err(>_to_xe(gt)->drm, + "PCODE timeout, retrying with preemption disabled\n"); + drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); + preempt_disable(); + atomic = true; + ret = wait_for_atomic(COND, 50); + atomic = false; + preempt_enable(); + +out: + mutex_unlock(>->pcode.lock); + return status ? status : ret; +#undef COND +} +/** + * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table + * @gt: gt instance + * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz. + * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz. + * + * This function initialize PCODE's QOS frequency table for a proper minimal + * frequency/power steering decision, depending on the current requested GT + * frequency. For older platforms this was a more complete table including + * the IA freq. However for the latest platforms this table become a simple + * 1-1 Ring vs GT frequency. Even though, without setting it, PCODE might + * not take the right decisions for some memory frequencies and affect latency. + * + * It returns 0 on success, and -ERROR number on failure, -EINVAL if max + * frequency is higher then the minimal, and other errors directly translated + * from the PCODE Error returs: + * - -ENXIO: "Illegal Command" + * - -ETIMEDOUT: "Timed out" + * - -EINVAL: "Illegal Data" + * - -ENXIO, "Illegal Subcommand" + * - -EBUSY: "PCODE Locked" + * - -EOVERFLOW, "GT ratio out of range" + * - -EACCES, "PCODE Rejected" + * - -EPROTO, "Unknown" + */ +int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, + u32 max_gt_freq) +{ + int ret; + u32 freq; + + if (IS_DGFX(gt_to_xe(gt))) + return 0; + + if (max_gt_freq <= min_gt_freq) + return -EINVAL; + + mutex_lock(>->pcode.lock); + for (freq = min_gt_freq; freq <= max_gt_freq; freq++) { + u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq; + + ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE, + &data, NULL, 1, false, false); + if (ret) + goto unlock; + } + +unlock: + mutex_unlock(>->pcode.lock); + return ret; +} + +static bool pcode_dgfx_status_complete(struct xe_gt *gt) +{ + u32 data = DGFX_GET_INIT_STATUS; + int status = pcode_mailbox_rw(gt, DGFX_PCODE_STATUS, + &data, NULL, 1, true, false); + + return status == 0 && + (data & DGFX_INIT_STATUS_COMPLETE) == DGFX_INIT_STATUS_COMPLETE; +} + +/** + * xe_pcode_init - Ensure PCODE is initialized + * @gt: gt instance + * + * This function ensures that PCODE is properly initialized. To be called during + * probe and resume paths. + * + * It returns 0 on success, and -error number on failure. + */ +int xe_pcode_init(struct xe_gt *gt) +{ + int timeout = 180000; /* 3 min */ + int ret; + + if (!IS_DGFX(gt_to_xe(gt))) + return 0; + + mutex_lock(>->pcode.lock); + ret = wait_for(pcode_dgfx_status_complete(gt), timeout); + mutex_unlock(>->pcode.lock); + + if (ret) + drm_err(>_to_xe(gt)->drm, + "PCODE initialization timedout after: %d min\n", + timeout / 60000); + + return ret; +} + +/** + * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized. + * @gt: gt instance + * + * This function initializes the xe_pcode component, and when needed, it ensures + * that PCODE has properly performed its initialization and it is really ready + * to go. To be called once only during probe. + * + * It returns 0 on success, and -error number on failure. + */ +int xe_pcode_probe(struct xe_gt *gt) +{ + mutex_init(>->pcode.lock); + + if (!IS_DGFX(gt_to_xe(gt))) + return 0; + + return xe_pcode_init(gt); +} diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h new file mode 100644 index 000000000000..3b4aa8c1a3ba --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PCODE_H_ +#define _XE_PCODE_H_ + +#include +struct xe_gt; + +int xe_pcode_probe(struct xe_gt *gt); +int xe_pcode_init(struct xe_gt *gt); +int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, + u32 max_gt_freq); +int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); +int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val, + int timeout_ms); +#define xe_pcode_write(gt, mbox, val) \ + xe_pcode_write_timeout(gt, mbox, val, 1) + +int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_ms); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h new file mode 100644 index 000000000000..0762c8a912c7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +/* Internal to xe_pcode */ + +#define PCODE_MAILBOX _MMIO(0x138124) +#define PCODE_READY REG_BIT(31) +#define PCODE_MB_PARAM2 REG_GENMASK(23, 16) +#define PCODE_MB_PARAM1 REG_GENMASK(15, 8) +#define PCODE_MB_COMMAND REG_GENMASK(7, 0) +#define PCODE_ERROR_MASK 0xFF +#define PCODE_SUCCESS 0x0 +#define PCODE_ILLEGAL_CMD 0x1 +#define PCODE_TIMEOUT 0x2 +#define PCODE_ILLEGAL_DATA 0x3 +#define PCODE_ILLEGAL_SUBCOMMAND 0x4 +#define PCODE_LOCKED 0x6 +#define PCODE_GT_RATIO_OUT_OF_RANGE 0x10 +#define PCODE_REJECTED 0x11 + +#define PCODE_DATA0 _MMIO(0x138128) +#define PCODE_DATA1 _MMIO(0x13812C) + +/* Min Freq QOS Table */ +#define PCODE_WRITE_MIN_FREQ_TABLE 0x8 +#define PCODE_READ_MIN_FREQ_TABLE 0x9 +#define PCODE_FREQ_RING_RATIO_SHIFT 16 + +/* PCODE Init */ +#define DGFX_PCODE_STATUS 0x7E +#define DGFX_GET_INIT_STATUS 0x0 +#define DGFX_INIT_STATUS_COMPLETE 0x1 + +struct pcode_err_decode { + int errno; + const char *str; +}; + diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h new file mode 100644 index 000000000000..72612c832e88 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PLATFORM_INFO_TYPES_H_ +#define _XE_PLATFORM_INFO_TYPES_H_ + +/* Keep in gen based order, and chronological order within a gen */ +enum xe_platform { + XE_PLATFORM_UNINITIALIZED = 0, + /* gen12 */ + XE_TIGERLAKE, + XE_ROCKETLAKE, + XE_DG1, + XE_DG2, + XE_PVC, + XE_ALDERLAKE_S, + XE_ALDERLAKE_P, + XE_METEORLAKE, +}; + +enum xe_subplatform { + XE_SUBPLATFORM_UNINITIALIZED = 0, + XE_SUBPLATFORM_NONE, + XE_SUBPLATFORM_DG2_G10, + XE_SUBPLATFORM_DG2_G11, + XE_SUBPLATFORM_DG2_G12, + XE_SUBPLATFORM_ADLP_RPLU, +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c new file mode 100644 index 000000000000..fb0355530e7b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include + +#include "xe_bo.h" +#include "xe_bo_evict.h" +#include "xe_device.h" +#include "xe_pm.h" +#include "xe_gt.h" +#include "xe_ggtt.h" +#include "xe_irq.h" +#include "xe_pcode.h" + +/** + * DOC: Xe Power Management + * + * Xe PM shall be guided by the simplicity. + * Use the simplest hook options whenever possible. + * Let's not reinvent the runtime_pm references and hooks. + * Shall have a clear separation of display and gt underneath this component. + * + * What's next: + * + * For now s2idle and s3 are only working in integrated devices. The next step + * is to iterate through all VRAM's BO backing them up into the system memory + * before allowing the system suspend. + * + * Also runtime_pm needs to be here from the beginning. + * + * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC + * and no wait boost. Frequency optimizations should come on a next stage. + */ + +/** + * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle + * @xe: xe device instance + * + * Return: 0 on success + */ +int xe_pm_suspend(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err; + + for_each_gt(gt, xe, id) + xe_gt_suspend_prepare(gt); + + /* FIXME: Super racey... */ + err = xe_bo_evict_all(xe); + if (err) + return err; + + for_each_gt(gt, xe, id) { + err = xe_gt_suspend(gt); + if (err) + return err; + } + + xe_irq_suspend(xe); + + return 0; +} + +/** + * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 + * @xe: xe device instance + * + * Return: 0 on success + */ +int xe_pm_resume(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err; + + for_each_gt(gt, xe, id) { + err = xe_pcode_init(gt); + if (err) + return err; + } + + /* + * This only restores pinned memory which is the memory required for the + * GT(s) to resume. + */ + err = xe_bo_restore_kernel(xe); + if (err) + return err; + + xe_irq_resume(xe); + + for_each_gt(gt, xe, id) + xe_gt_resume(gt); + + err = xe_bo_restore_user(xe); + if (err) + return err; + + return 0; +} + +void xe_pm_runtime_init(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 1000); + pm_runtime_set_active(dev); + pm_runtime_allow(dev); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); +} + +int xe_pm_runtime_suspend(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err; + + if (xe->d3cold_allowed) { + if (xe_device_mem_access_ongoing(xe)) + return -EBUSY; + + err = xe_bo_evict_all(xe); + if (err) + return err; + } + + for_each_gt(gt, xe, id) { + err = xe_gt_suspend(gt); + if (err) + return err; + } + + xe_irq_suspend(xe); + + return 0; +} + +int xe_pm_runtime_resume(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err; + + if (xe->d3cold_allowed) { + for_each_gt(gt, xe, id) { + err = xe_pcode_init(gt); + if (err) + return err; + } + + /* + * This only restores pinned memory which is the memory + * required for the GT(s) to resume. + */ + err = xe_bo_restore_kernel(xe); + if (err) + return err; + } + + xe_irq_resume(xe); + + for_each_gt(gt, xe, id) + xe_gt_resume(gt); + + if (xe->d3cold_allowed) { + err = xe_bo_restore_user(xe); + if (err) + return err; + } + + return 0; +} + +int xe_pm_runtime_get(struct xe_device *xe) +{ + return pm_runtime_get_sync(xe->drm.dev); +} + +int xe_pm_runtime_put(struct xe_device *xe) +{ + pm_runtime_mark_last_busy(xe->drm.dev); + return pm_runtime_put_autosuspend(xe->drm.dev); +} + +/* Return true if resume operation happened and usage count was increased */ +bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe) +{ + /* In case we are suspended we need to immediately wake up */ + if (pm_runtime_suspended(xe->drm.dev)) + return !pm_runtime_resume_and_get(xe->drm.dev); + + return false; +} + +int xe_pm_runtime_get_if_active(struct xe_device *xe) +{ + WARN_ON(pm_runtime_suspended(xe->drm.dev)); + return pm_runtime_get_if_active(xe->drm.dev, true); +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h new file mode 100644 index 000000000000..b8c5f9558e26 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PM_H_ +#define _XE_PM_H_ + +#include + +struct xe_device; + +int xe_pm_suspend(struct xe_device *xe); +int xe_pm_resume(struct xe_device *xe); + +void xe_pm_runtime_init(struct xe_device *xe); +int xe_pm_runtime_suspend(struct xe_device *xe); +int xe_pm_runtime_resume(struct xe_device *xe); +int xe_pm_runtime_get(struct xe_device *xe); +int xe_pm_runtime_put(struct xe_device *xe); +bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe); +int xe_pm_runtime_get_if_active(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c new file mode 100644 index 000000000000..6ab9ff442766 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_engine.h" +#include "xe_preempt_fence.h" +#include "xe_vm.h" + +static void preempt_fence_work_func(struct work_struct *w) +{ + bool cookie = dma_fence_begin_signalling(); + struct xe_preempt_fence *pfence = + container_of(w, typeof(*pfence), preempt_work); + struct xe_engine *e = pfence->engine; + + if (pfence->error) + dma_fence_set_error(&pfence->base, pfence->error); + else + e->ops->suspend_wait(e); + + dma_fence_signal(&pfence->base); + dma_fence_end_signalling(cookie); + + queue_work(system_unbound_wq, &e->vm->preempt.rebind_work); + + xe_engine_put(e); +} + +static const char * +preempt_fence_get_driver_name(struct dma_fence *fence) +{ + return "xe"; +} + +static const char * +preempt_fence_get_timeline_name(struct dma_fence *fence) +{ + return "preempt"; +} + +static bool preempt_fence_enable_signaling(struct dma_fence *fence) +{ + struct xe_preempt_fence *pfence = + container_of(fence, typeof(*pfence), base); + struct xe_engine *e = pfence->engine; + + pfence->error = e->ops->suspend(e); + queue_work(system_unbound_wq, &pfence->preempt_work); + return true; +} + +static const struct dma_fence_ops preempt_fence_ops = { + .get_driver_name = preempt_fence_get_driver_name, + .get_timeline_name = preempt_fence_get_timeline_name, + .enable_signaling = preempt_fence_enable_signaling, +}; + +/** + * xe_preempt_fence_alloc() - Allocate a preempt fence with minimal + * initialization + * + * Allocate a preempt fence, and initialize its list head. + * If the preempt_fence allocated has been armed with + * xe_preempt_fence_arm(), it must be freed using dma_fence_put(). If not, + * it must be freed using xe_preempt_fence_free(). + * + * Return: A struct xe_preempt_fence pointer used for calling into + * xe_preempt_fence_arm() or xe_preempt_fence_free(). + * An error pointer on error. + */ +struct xe_preempt_fence *xe_preempt_fence_alloc(void) +{ + struct xe_preempt_fence *pfence; + + pfence = kmalloc(sizeof(*pfence), GFP_KERNEL); + if (!pfence) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&pfence->link); + INIT_WORK(&pfence->preempt_work, preempt_fence_work_func); + + return pfence; +} + +/** + * xe_preempt_fence_free() - Free a preempt fence allocated using + * xe_preempt_fence_alloc(). + * @pfence: pointer obtained from xe_preempt_fence_alloc(); + * + * Free a preempt fence that has not yet been armed. + */ +void xe_preempt_fence_free(struct xe_preempt_fence *pfence) +{ + list_del(&pfence->link); + kfree(pfence); +} + +/** + * xe_preempt_fence_arm() - Arm a preempt fence allocated using + * xe_preempt_fence_alloc(). + * @pfence: The struct xe_preempt_fence pointer returned from + * xe_preempt_fence_alloc(). + * @e: The struct xe_engine used for arming. + * @context: The dma-fence context used for arming. + * @seqno: The dma-fence seqno used for arming. + * + * Inserts the preempt fence into @context's timeline, takes @link off any + * list, and registers the struct xe_engine as the xe_engine to be preempted. + * + * Return: A pointer to a struct dma_fence embedded into the preempt fence. + * This function doesn't error. + */ +struct dma_fence * +xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e, + u64 context, u32 seqno) +{ + list_del_init(&pfence->link); + pfence->engine = xe_engine_get(e); + dma_fence_init(&pfence->base, &preempt_fence_ops, + &e->compute.lock, context, seqno); + + return &pfence->base; +} + +/** + * xe_preempt_fence_create() - Helper to create and arm a preempt fence. + * @e: The struct xe_engine used for arming. + * @context: The dma-fence context used for arming. + * @seqno: The dma-fence seqno used for arming. + * + * Allocates and inserts the preempt fence into @context's timeline, + * and registers @e as the struct xe_engine to be preempted. + * + * Return: A pointer to the resulting struct dma_fence on success. An error + * pointer on error. In particular if allocation fails it returns + * ERR_PTR(-ENOMEM); + */ +struct dma_fence * +xe_preempt_fence_create(struct xe_engine *e, + u64 context, u32 seqno) +{ + struct xe_preempt_fence *pfence; + + pfence = xe_preempt_fence_alloc(); + if (IS_ERR(pfence)) + return ERR_CAST(pfence); + + return xe_preempt_fence_arm(pfence, e, context, seqno); +} + +bool xe_fence_is_xe_preempt(const struct dma_fence *fence) +{ + return fence->ops == &preempt_fence_ops; +} diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h new file mode 100644 index 000000000000..4f3966103203 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_preempt_fence.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PREEMPT_FENCE_H_ +#define _XE_PREEMPT_FENCE_H_ + +#include "xe_preempt_fence_types.h" + +struct list_head; + +struct dma_fence * +xe_preempt_fence_create(struct xe_engine *e, + u64 context, u32 seqno); + +struct xe_preempt_fence *xe_preempt_fence_alloc(void); + +void xe_preempt_fence_free(struct xe_preempt_fence *pfence); + +struct dma_fence * +xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e, + u64 context, u32 seqno); + +static inline struct xe_preempt_fence * +to_preempt_fence(struct dma_fence *fence) +{ + return container_of(fence, struct xe_preempt_fence, base); +} + +/** + * xe_preempt_fence_link() - Return a link used to keep unarmed preempt + * fences on a list. + * @pfence: Pointer to the preempt fence. + * + * The link is embedded in the struct xe_preempt_fence. Use + * link_to_preempt_fence() to convert back to the preempt fence. + * + * Return: A pointer to an embedded struct list_head. + */ +static inline struct list_head * +xe_preempt_fence_link(struct xe_preempt_fence *pfence) +{ + return &pfence->link; +} + +/** + * to_preempt_fence_from_link() - Convert back to a preempt fence pointer + * from a link obtained with xe_preempt_fence_link(). + * @link: The struct list_head obtained from xe_preempt_fence_link(). + * + * Return: A pointer to the embedding struct xe_preempt_fence. + */ +static inline struct xe_preempt_fence * +to_preempt_fence_from_link(struct list_head *link) +{ + return container_of(link, struct xe_preempt_fence, link); +} + +bool xe_fence_is_xe_preempt(const struct dma_fence *fence); +#endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h new file mode 100644 index 000000000000..9d9efd8ff0ed --- /dev/null +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PREEMPT_FENCE_TYPES_H_ +#define _XE_PREEMPT_FENCE_TYPES_H_ + +#include +#include + +struct xe_engine; + +/** + * struct xe_preempt_fence - XE preempt fence + * + * A preemption fence which suspends the execution of an xe_engine on the + * hardware and triggers a callback once the xe_engine is complete. + */ +struct xe_preempt_fence { + /** @base: dma fence base */ + struct dma_fence base; + /** @link: link into list of pending preempt fences */ + struct list_head link; + /** @engine: xe engine for this preempt fence */ + struct xe_engine *engine; + /** @preempt_work: work struct which issues preemption */ + struct work_struct preempt_work; + /** @error: preempt fence is in error state */ + int error; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c new file mode 100644 index 000000000000..81193ddd0af7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -0,0 +1,1542 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_migrate.h" +#include "xe_pt.h" +#include "xe_pt_types.h" +#include "xe_pt_walk.h" +#include "xe_vm.h" +#include "xe_res_cursor.h" + +struct xe_pt_dir { + struct xe_pt pt; + /** @dir: Directory structure for the xe_pt_walk functionality */ + struct xe_ptw_dir dir; +}; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) +#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) +#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) +#else +#define xe_pt_set_addr(__xe_pt, __addr) +#define xe_pt_addr(__xe_pt) 0ull +#endif + +static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; +static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; + +#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) + +static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) +{ + return container_of(pt, struct xe_pt_dir, pt); +} + +static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) +{ + return container_of(pt_dir->dir.entries[index], struct xe_pt, base); +} + +/** + * gen8_pde_encode() - Encode a page-table directory entry pointing to + * another page-table. + * @bo: The page-table bo of the page-table to point to. + * @bo_offset: Offset in the page-table bo to point to. + * @level: The cache level indicating the caching of @bo. + * + * TODO: Rename. + * + * Return: An encoded page directory entry. No errors. + */ +u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, + const enum xe_cache_level level) +{ + u64 pde; + bool is_lmem; + + pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem); + pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_lmem); + + /* FIXME: I don't think the PPAT handling is correct for MTL */ + + if (level != XE_CACHE_NONE) + pde |= PPAT_CACHED_PDE; + else + pde |= PPAT_UNCACHED; + + return pde; +} + +static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, + size_t page_size, bool *is_lmem) +{ + if (xe_vma_is_userptr(vma)) { + struct xe_res_cursor cur; + u64 page; + + *is_lmem = false; + page = offset >> PAGE_SHIFT; + offset &= (PAGE_SIZE - 1); + + xe_res_first_sg(vma->userptr.sg, page << PAGE_SHIFT, page_size, + &cur); + return xe_res_dma(&cur) + offset; + } else { + return xe_bo_addr(vma->bo, offset, page_size, is_lmem); + } +} + +static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, + u32 pt_level) +{ + pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~GEN8_PAGE_RW; + + /* FIXME: I don't think the PPAT handling is correct for MTL */ + + switch (cache) { + case XE_CACHE_NONE: + pte |= PPAT_UNCACHED; + break; + case XE_CACHE_WT: + pte |= PPAT_DISPLAY_ELLC; + break; + default: + pte |= PPAT_CACHED; + break; + } + + if (pt_level == 1) + pte |= GEN8_PDE_PS_2M; + else if (pt_level == 2) + pte |= GEN8_PDPE_PS_1G; + + /* XXX: Does hw support 1 GiB pages? */ + XE_BUG_ON(pt_level > 2); + + return pte; +} + +/** + * gen8_pte_encode() - Encode a page-table entry pointing to memory. + * @vma: The vma representing the memory to point to. + * @bo: If @vma is NULL, representing the memory to point to. + * @offset: The offset into @vma or @bo. + * @cache: The cache level indicating + * @flags: Currently only supports PTE_READ_ONLY for read-only access. + * @pt_level: The page-table level of the page-table into which the entry + * is to be inserted. + * + * TODO: Rename. + * + * Return: An encoded page-table entry. No errors. + */ +u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, + u64 offset, enum xe_cache_level cache, + u32 flags, u32 pt_level) +{ + u64 pte; + bool is_vram; + + if (vma) + pte = vma_addr(vma, offset, GEN8_PAGE_SIZE, &is_vram); + else + pte = xe_bo_addr(bo, offset, GEN8_PAGE_SIZE, &is_vram); + + if (is_vram) { + pte |= GEN12_PPGTT_PTE_LM; + if (vma && vma->use_atomic_access_pte_bit) + pte |= GEN12_USM_PPGTT_PTE_AE; + } + + return __gen8_pte_encode(pte, cache, flags, pt_level); +} + +static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm, + unsigned int level) +{ + u8 id = gt->info.id; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + if (!vm->scratch_bo[id]) + return 0; + + if (level == 0) { + u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0, + XE_CACHE_WB, 0, 0); + if (vm->flags & XE_VM_FLAGS_64K) + empty |= GEN12_PTE_PS64; + + return empty; + } else { + return gen8_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0, + XE_CACHE_WB); + } +} + +/** + * xe_pt_create() - Create a page-table. + * @vm: The vm to create for. + * @gt: The gt to create for. + * @level: The page-table level. + * + * Allocate and initialize a single struct xe_pt metadata structure. Also + * create the corresponding page-table bo, but don't initialize it. If the + * level is grater than zero, then it's assumed to be a directory page- + * table and the directory structure is also allocated and initialized to + * NULL pointers. + * + * Return: A valid struct xe_pt pointer on success, Pointer error code on + * error. + */ +struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, + unsigned int level) +{ + struct xe_pt *pt; + struct xe_bo *bo; + size_t size; + int err; + + size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) + + GEN8_PDES * sizeof(struct xe_ptw *); + pt = kzalloc(size, GFP_KERNEL); + if (!pt) + return ERR_PTR(-ENOMEM); + + bo = xe_bo_create_pin_map(vm->xe, gt, vm, SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto err_kfree; + } + pt->bo = bo; + pt->level = level; + pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; + + XE_BUG_ON(level > XE_VM_MAX_LEVEL); + + return pt; + +err_kfree: + kfree(pt); + return ERR_PTR(err); +} + +/** + * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero + * entries. + * @gt: The gt the scratch pagetable of which to use. + * @vm: The vm we populate for. + * @pt: The pagetable the bo of which to initialize. + * + * Populate the page-table bo of @pt with entries pointing into the gt's + * scratch page-table tree if any. Otherwise populate with zeros. + */ +void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, + struct xe_pt *pt) +{ + struct iosys_map *map = &pt->bo->vmap; + u64 empty; + int i; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + if (!vm->scratch_bo[gt->info.id]) { + /* + * FIXME: Some memory is allocated already allocated to zero? + * Find out which memory that is and avoid this memset... + */ + xe_map_memset(vm->xe, map, 0, 0, SZ_4K); + } else { + empty = __xe_pt_empty_pte(gt, vm, pt->level); + for (i = 0; i < GEN8_PDES; i++) + xe_pt_write(vm->xe, map, i, empty); + } +} + +/** + * xe_pt_shift() - Return the ilog2 value of the size of the address range of + * a page-table at a certain level. + * @level: The level. + * + * Return: The ilog2 value of the size of the address range of a page-table + * at level @level. + */ +unsigned int xe_pt_shift(unsigned int level) +{ + return GEN8_PTE_SHIFT + GEN8_PDE_SHIFT * level; +} + +/** + * xe_pt_destroy() - Destroy a page-table tree. + * @pt: The root of the page-table tree to destroy. + * @flags: vm flags. Currently unused. + * @deferred: List head of lockless list for deferred putting. NULL for + * immediate putting. + * + * Puts the page-table bo, recursively calls xe_pt_destroy on all children + * and finally frees @pt. TODO: Can we remove the @flags argument? + */ +void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) +{ + int i; + + if (!pt) + return; + + XE_BUG_ON(!list_empty(&pt->bo->vmas)); + xe_bo_unpin(pt->bo); + xe_bo_put_deferred(pt->bo, deferred); + + if (pt->level > 0 && pt->num_live) { + struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); + + for (i = 0; i < GEN8_PDES; i++) { + if (xe_pt_entry(pt_dir, i)) + xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, + deferred); + } + } + kfree(pt); +} + +/** + * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the + * given gt and vm. + * @xe: xe device. + * @gt: gt to set up for. + * @vm: vm to set up for. + * + * Sets up a pagetable tree with one page-table per level and a single + * leaf bo. All pagetable entries point to the single page-table or, + * for L0, the single bo one level below. + * + * Return: 0 on success, negative error code on error. + */ +int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm) +{ + u8 id = gt->info.id; + int i; + + vm->scratch_bo[id] = xe_bo_create(xe, gt, vm, SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(vm->scratch_bo[id])) + return PTR_ERR(vm->scratch_bo[id]); + xe_bo_pin(vm->scratch_bo[id]); + + for (i = 0; i < vm->pt_root[id]->level; i++) { + vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i); + if (IS_ERR(vm->scratch_pt[id][i])) + return PTR_ERR(vm->scratch_pt[id][i]); + + xe_pt_populate_empty(gt, vm, vm->scratch_pt[id][i]); + } + + return 0; +} + +/** + * DOC: Pagetable building + * + * Below we use the term "page-table" for both page-directories, containing + * pointers to lower level page-directories or page-tables, and level 0 + * page-tables that contain only page-table-entries pointing to memory pages. + * + * When inserting an address range in an already existing page-table tree + * there will typically be a set of page-tables that are shared with other + * address ranges, and a set that are private to this address range. + * The set of shared page-tables can be at most two per level, + * and those can't be updated immediately because the entries of those + * page-tables may still be in use by the gpu for other mappings. Therefore + * when inserting entries into those, we instead stage those insertions by + * adding insertion data into struct xe_vm_pgtable_update structures. This + * data, (subtrees for the cpu and page-table-entries for the gpu) is then + * added in a separate commit step. CPU-data is committed while still under the + * vm lock, the object lock and for userptr, the notifier lock in read mode. + * The GPU async data is committed either by the GPU or CPU after fulfilling + * relevant dependencies. + * For non-shared page-tables (and, in fact, for shared ones that aren't + * existing at the time of staging), we add the data in-place without the + * special update structures. This private part of the page-table tree will + * remain disconnected from the vm page-table tree until data is committed to + * the shared page tables of the vm tree in the commit phase. + */ + +struct xe_pt_update { + /** @update: The update structure we're building for this parent. */ + struct xe_vm_pgtable_update *update; + /** @parent: The parent. Used to detect a parent change. */ + struct xe_pt *parent; + /** @preexisting: Whether the parent was pre-existing or allocated */ + bool preexisting; +}; + +struct xe_pt_stage_bind_walk { + /** base: The base class. */ + struct xe_pt_walk base; + + /* Input parameters for the walk */ + /** @vm: The vm we're building for. */ + struct xe_vm *vm; + /** @gt: The gt we're building for. */ + struct xe_gt *gt; + /** @cache: Desired cache level for the ptes */ + enum xe_cache_level cache; + /** @default_pte: PTE flag only template. No address is associated */ + u64 default_pte; + /** @dma_offset: DMA offset to add to the PTE. */ + u64 dma_offset; + /** + * @needs_64k: This address range enforces 64K alignment and + * granularity. + */ + bool needs_64K; + /** + * @pte_flags: Flags determining PTE setup. These are not flags + * encoded directly in the PTE. See @default_pte for those. + */ + u32 pte_flags; + + /* Also input, but is updated during the walk*/ + /** @curs: The DMA address cursor. */ + struct xe_res_cursor *curs; + /** @va_curs_start: The Virtual address coresponding to @curs->start */ + u64 va_curs_start; + + /* Output */ + struct xe_walk_update { + /** @wupd.entries: Caller provided storage. */ + struct xe_vm_pgtable_update *entries; + /** @wupd.num_used_entries: Number of update @entries used. */ + unsigned int num_used_entries; + /** @wupd.updates: Tracks the update entry at a given level */ + struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; + } wupd; + + /* Walk state */ + /** + * @l0_end_addr: The end address of the current l0 leaf. Used for + * 64K granularity detection. + */ + u64 l0_end_addr; + /** @addr_64K: The start address of the current 64K chunk. */ + u64 addr_64K; + /** @found_64: Whether @add_64K actually points to a 64K chunk. */ + bool found_64K; +}; + +static int +xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, + pgoff_t offset, bool alloc_entries) +{ + struct xe_pt_update *upd = &wupd->updates[parent->level]; + struct xe_vm_pgtable_update *entry; + + /* + * For *each level*, we could only have one active + * struct xt_pt_update at any one time. Once we move on to a + * new parent and page-directory, the old one is complete, and + * updates are either already stored in the build tree or in + * @wupd->entries + */ + if (likely(upd->parent == parent)) + return 0; + + upd->parent = parent; + upd->preexisting = true; + + if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) + return -EINVAL; + + entry = wupd->entries + wupd->num_used_entries++; + upd->update = entry; + entry->ofs = offset; + entry->pt_bo = parent->bo; + entry->pt = parent; + entry->flags = 0; + entry->qwords = 0; + + if (alloc_entries) { + entry->pt_entries = kmalloc_array(GEN8_PDES, + sizeof(*entry->pt_entries), + GFP_KERNEL); + if (!entry->pt_entries) + return -ENOMEM; + } + + return 0; +} + +/* + * NOTE: This is a very frequently called function so we allow ourselves + * to annotate (using branch prediction hints) the fastpath of updating a + * non-pre-existing pagetable with leaf ptes. + */ +static int +xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, + pgoff_t offset, struct xe_pt *xe_child, u64 pte) +{ + struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; + struct xe_pt_update *child_upd = xe_child ? + &xe_walk->wupd.updates[xe_child->level] : NULL; + int ret; + + ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); + if (unlikely(ret)) + return ret; + + /* + * Register this new pagetable so that it won't be recognized as + * a shared pagetable by a subsequent insertion. + */ + if (unlikely(child_upd)) { + child_upd->update = NULL; + child_upd->parent = xe_child; + child_upd->preexisting = false; + } + + if (likely(!upd->preexisting)) { + /* Continue building a non-connected subtree. */ + struct iosys_map *map = &parent->bo->vmap; + + if (unlikely(xe_child)) + parent->base.dir->entries[offset] = &xe_child->base; + + xe_pt_write(xe_walk->vm->xe, map, offset, pte); + parent->num_live++; + } else { + /* Shared pt. Stage update. */ + unsigned int idx; + struct xe_vm_pgtable_update *entry = upd->update; + + idx = offset - entry->ofs; + entry->pt_entries[idx].pt = xe_child; + entry->pt_entries[idx].pte = pte; + entry->qwords++; + } + + return 0; +} + +static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, + struct xe_pt_stage_bind_walk *xe_walk) +{ + u64 size, dma; + + /* Does the virtual range requested cover a huge pte? */ + if (!xe_pt_covers(addr, next, level, &xe_walk->base)) + return false; + + /* Does the DMA segment cover the whole pte? */ + if (next - xe_walk->va_curs_start > xe_walk->curs->size) + return false; + + /* Is the DMA address huge PTE size aligned? */ + size = next - addr; + dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); + + return IS_ALIGNED(dma, size); +} + +/* + * Scan the requested mapping to check whether it can be done entirely + * with 64K PTEs. + */ +static bool +xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) +{ + struct xe_res_cursor curs = *xe_walk->curs; + + if (!IS_ALIGNED(addr, SZ_64K)) + return false; + + if (next > xe_walk->l0_end_addr) + return false; + + xe_res_next(&curs, addr - xe_walk->va_curs_start); + for (; addr < next; addr += SZ_64K) { + if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) + return false; + + xe_res_next(&curs, SZ_64K); + } + + return addr == next; +} + +/* + * For non-compact "normal" 4K level-0 pagetables, we want to try to group + * addresses together in 64K-contigous regions to add a 64K TLB hint for the + * device to the PTE. + * This function determines whether the address is part of such a + * segment. For VRAM in normal pagetables, this is strictly necessary on + * some devices. + */ +static bool +xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) +{ + /* Address is within an already found 64k region */ + if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) + return true; + + xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); + xe_walk->addr_64K = addr; + + return xe_walk->found_64K; +} + +static int +xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_stage_bind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); + struct xe_pt *xe_child; + bool covers; + int ret = 0; + u64 pte; + + /* Is this a leaf entry ?*/ + if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { + struct xe_res_cursor *curs = xe_walk->curs; + + XE_WARN_ON(xe_walk->va_curs_start != addr); + + pte = __gen8_pte_encode(xe_res_dma(curs) + xe_walk->dma_offset, + xe_walk->cache, xe_walk->pte_flags, + level); + pte |= xe_walk->default_pte; + + /* + * Set the GEN12_PTE_PS64 hint if possible, otherwise if + * this device *requires* 64K PTE size for VRAM, fail. + */ + if (level == 0 && !xe_parent->is_compact) { + if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) + pte |= GEN12_PTE_PS64; + else if (XE_WARN_ON(xe_walk->needs_64K)) + return -EINVAL; + } + + ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); + if (unlikely(ret)) + return ret; + + xe_res_next(curs, next - addr); + xe_walk->va_curs_start = next; + *action = ACTION_CONTINUE; + + return ret; + } + + /* + * Descending to lower level. Determine if we need to allocate a + * new page table or -directory, which we do if there is no + * previous one or there is one we can completely replace. + */ + if (level == 1) { + walk->shifts = xe_normal_pt_shifts; + xe_walk->l0_end_addr = next; + } + + covers = xe_pt_covers(addr, next, level, &xe_walk->base); + if (covers || !*child) { + u64 flags = 0; + + xe_child = xe_pt_create(xe_walk->vm, xe_walk->gt, level - 1); + if (IS_ERR(xe_child)) + return PTR_ERR(xe_child); + + xe_pt_set_addr(xe_child, + round_down(addr, 1ull << walk->shifts[level])); + + if (!covers) + xe_pt_populate_empty(xe_walk->gt, xe_walk->vm, xe_child); + + *child = &xe_child->base; + + /* + * Prefer the compact pagetable layout for L0 if possible. + * TODO: Suballocate the pt bo to avoid wasting a lot of + * memory. + */ + if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 && + covers && xe_pt_scan_64K(addr, next, xe_walk)) { + walk->shifts = xe_compact_pt_shifts; + flags |= GEN12_PDE_64K; + xe_child->is_compact = true; + } + + pte = gen8_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags; + ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, + pte); + } + + *action = ACTION_SUBTREE; + return ret; +} + +static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { + .pt_entry = xe_pt_stage_bind_entry, +}; + +/** + * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address + * range. + * @gt: The gt we're building for. + * @vma: The vma indicating the address range. + * @entries: Storage for the update entries used for connecting the tree to + * the main tree at commit time. + * @num_entries: On output contains the number of @entries used. + * + * This function builds a disconnected page-table tree for a given address + * range. The tree is connected to the main vm tree for the gpu using + * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). + * The function builds xe_vm_pgtable_update structures for already existing + * shared page-tables, and non-existing shared and non-shared page-tables + * are built and populated directly. + * + * Return 0 on success, negative error code on error. + */ +static int +xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, u32 *num_entries) +{ + struct xe_bo *bo = vma->bo; + bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo); + struct xe_res_cursor curs; + struct xe_pt_stage_bind_walk xe_walk = { + .base = { + .ops = &xe_pt_stage_bind_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .vm = vma->vm, + .gt = gt, + .curs = &curs, + .va_curs_start = vma->start, + .pte_flags = vma->pte_flags, + .wupd.entries = entries, + .needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram, + }; + struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; + int ret; + + if (is_vram) { + xe_walk.default_pte = GEN12_PPGTT_PTE_LM; + if (vma && vma->use_atomic_access_pte_bit) + xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE; + xe_walk.dma_offset = gt->mem.vram.io_start - + gt_to_xe(gt)->mem.vram.io_start; + xe_walk.cache = XE_CACHE_WB; + } else { + if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT) + xe_walk.cache = XE_CACHE_WT; + else + xe_walk.cache = XE_CACHE_WB; + } + + xe_bo_assert_held(bo); + if (xe_vma_is_userptr(vma)) + xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1, + &curs); + else if (xe_bo_is_vram(bo)) + xe_res_first(bo->ttm.resource, vma->bo_offset, + vma->end - vma->start + 1, &curs); + else + xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset, + vma->end - vma->start + 1, &curs); + + ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1, + &xe_walk.base); + + *num_entries = xe_walk.wupd.num_used_entries; + return ret; +} + +/** + * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a + * shared pagetable. + * @addr: The start address within the non-shared pagetable. + * @end: The end address within the non-shared pagetable. + * @level: The level of the non-shared pagetable. + * @walk: Walk info. The function adjusts the walk action. + * @action: next action to perform (see enum page_walk_action) + * @offset: Ignored on input, First non-shared entry on output. + * @end_offset: Ignored on input, Last non-shared entry + 1 on output. + * + * A non-shared page-table has some entries that belong to the address range + * and others that don't. This function determines the entries that belong + * fully to the address range. Depending on level, some entries may + * partially belong to the address range (that can't happen at level 0). + * The function detects that and adjust those offsets to not include those + * partial entries. Iff it does detect partial entries, we know that there must + * be shared page tables also at lower levels, so it adjusts the walk action + * accordingly. + * + * Return: true if there were non-shared entries, false otherwise. + */ +static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, + struct xe_pt_walk *walk, + enum page_walk_action *action, + pgoff_t *offset, pgoff_t *end_offset) +{ + u64 size = 1ull << walk->shifts[level]; + + *offset = xe_pt_offset(addr, level, walk); + *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; + + if (!level) + return true; + + /* + * If addr or next are not size aligned, there are shared pts at lower + * level, so in that case traverse down the subtree + */ + *action = ACTION_CONTINUE; + if (!IS_ALIGNED(addr, size)) { + *action = ACTION_SUBTREE; + (*offset)++; + } + + if (!IS_ALIGNED(end, size)) { + *action = ACTION_SUBTREE; + (*end_offset)--; + } + + return *end_offset > *offset; +} + +struct xe_pt_zap_ptes_walk { + /** @base: The walk base-class */ + struct xe_pt_walk base; + + /* Input parameters for the walk */ + /** @gt: The gt we're building for */ + struct xe_gt *gt; + + /* Output */ + /** @needs_invalidate: Whether we need to invalidate TLB*/ + bool needs_invalidate; +}; + +static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_zap_ptes_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + pgoff_t end_offset; + + XE_BUG_ON(!*child); + XE_BUG_ON(!level && xe_child->is_compact); + + /* + * Note that we're called from an entry callback, and we're dealing + * with the child of that entry rather than the parent, so need to + * adjust level down. + */ + if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, + &end_offset)) { + xe_map_memset(gt_to_xe(xe_walk->gt), &xe_child->bo->vmap, + offset * sizeof(u64), 0, + (end_offset - offset) * sizeof(u64)); + xe_walk->needs_invalidate = true; + } + + return 0; +} + +static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { + .pt_entry = xe_pt_zap_ptes_entry, +}; + +/** + * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range + * @gt: The gt we're zapping for. + * @vma: GPU VMA detailing address range. + * + * Eviction and Userptr invalidation needs to be able to zap the + * gpu ptes of a given address range in pagefaulting mode. + * In order to be able to do that, that function needs access to the shared + * page-table entrieaso it can either clear the leaf PTEs or + * clear the pointers to lower-level page-tables. The caller is required + * to hold the necessary locks to ensure neither the page-table connectivity + * nor the page-table entries of the range is updated from under us. + * + * Return: Whether ptes were actually updated and a TLB invalidation is + * required. + */ +bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma) +{ + struct xe_pt_zap_ptes_walk xe_walk = { + .base = { + .ops = &xe_pt_zap_ptes_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .gt = gt, + }; + struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; + + if (!(vma->gt_present & BIT(gt->info.id))) + return false; + + (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, + &xe_walk.base); + + return xe_walk.needs_invalidate; +} + +static void +xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt, + struct iosys_map *map, void *data, + u32 qword_ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update) +{ + struct xe_pt_entry *ptes = update->pt_entries; + u64 *ptr = data; + u32 i; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + for (i = 0; i < num_qwords; i++) { + if (map) + xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * + sizeof(u64), u64, ptes[i].pte); + else + ptr[i] = ptes[i].pte; + } +} + +static void xe_pt_abort_bind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries) +{ + u32 i, j; + + for (i = 0; i < num_entries; i++) { + if (!entries[i].pt_entries) + continue; + + for (j = 0; j < entries[i].qwords; j++) + xe_pt_destroy(entries[i].pt_entries[j].pt, vma->vm->flags, NULL); + kfree(entries[i].pt_entries); + } +} + +static void xe_pt_commit_locks_assert(struct xe_vma *vma) +{ + struct xe_vm *vm = vma->vm; + + lockdep_assert_held(&vm->lock); + + if (xe_vma_is_userptr(vma)) + lockdep_assert_held_read(&vm->userptr.notifier_lock); + else + dma_resv_assert_held(vma->bo->ttm.base.resv); + + dma_resv_assert_held(&vm->resv); +} + +static void xe_pt_commit_bind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries, bool rebind, + struct llist_head *deferred) +{ + u32 i, j; + + xe_pt_commit_locks_assert(vma); + + for (i = 0; i < num_entries; i++) { + struct xe_pt *pt = entries[i].pt; + struct xe_pt_dir *pt_dir; + + if (!rebind) + pt->num_live += entries[i].qwords; + + if (!pt->level) { + kfree(entries[i].pt_entries); + continue; + } + + pt_dir = as_xe_pt_dir(pt); + for (j = 0; j < entries[i].qwords; j++) { + u32 j_ = j + entries[i].ofs; + struct xe_pt *newpte = entries[i].pt_entries[j].pt; + + if (xe_pt_entry(pt_dir, j_)) + xe_pt_destroy(xe_pt_entry(pt_dir, j_), + vma->vm->flags, deferred); + + pt_dir->dir.entries[j_] = &newpte->base; + } + kfree(entries[i].pt_entries); + } +} + +static int +xe_pt_prepare_bind(struct xe_gt *gt, struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, u32 *num_entries, + bool rebind) +{ + int err; + + *num_entries = 0; + err = xe_pt_stage_bind(gt, vma, entries, num_entries); + if (!err) + BUG_ON(!*num_entries); + else /* abort! */ + xe_pt_abort_bind(vma, entries, *num_entries); + + return err; +} + +static void xe_vm_dbg_print_entries(struct xe_device *xe, + const struct xe_vm_pgtable_update *entries, + unsigned int num_entries) +#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) +{ + unsigned int i; + + vm_dbg(&xe->drm, "%u entries to update\n", num_entries); + for (i = 0; i < num_entries; i++) { + const struct xe_vm_pgtable_update *entry = &entries[i]; + struct xe_pt *xe_pt = entry->pt; + u64 page_size = 1ull << xe_pt_shift(xe_pt->level); + u64 end; + u64 start; + + XE_BUG_ON(entry->pt->is_compact); + start = entry->ofs * page_size; + end = start + page_size * entry->qwords; + vm_dbg(&xe->drm, + "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", + i, xe_pt->level, entry->ofs, entry->qwords, + xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); + } +} +#else +{} +#endif + +#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT + +static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) +{ + u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2; + static u32 count; + + if (count++ % divisor == divisor - 1) { + struct xe_vm *vm = vma->vm; + + vma->userptr.divisor = divisor << 1; + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&vma->userptr.invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); + return true; + } + + return false; +} + +#else + +static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma) +{ + return false; +} + +#endif + +/** + * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks + * @base: Base we derive from. + * @bind: Whether this is a bind or an unbind operation. A bind operation + * makes the pre-commit callback error with -EAGAIN if it detects a + * pending invalidation. + * @locked: Whether the pre-commit callback locked the userptr notifier lock + * and it needs unlocking. + */ +struct xe_pt_migrate_pt_update { + struct xe_migrate_pt_update base; + bool bind; + bool locked; +}; + +static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) +{ + struct xe_pt_migrate_pt_update *userptr_update = + container_of(pt_update, typeof(*userptr_update), base); + struct xe_vma *vma = pt_update->vma; + unsigned long notifier_seq = vma->userptr.notifier_seq; + struct xe_vm *vm = vma->vm; + + userptr_update->locked = false; + + /* + * Wait until nobody is running the invalidation notifier, and + * since we're exiting the loop holding the notifier lock, + * nobody can proceed invalidating either. + * + * Note that we don't update the vma->userptr.notifier_seq since + * we don't update the userptr pages. + */ + do { + down_read(&vm->userptr.notifier_lock); + if (!mmu_interval_read_retry(&vma->userptr.notifier, + notifier_seq)) + break; + + up_read(&vm->userptr.notifier_lock); + + if (userptr_update->bind) + return -EAGAIN; + + notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); + } while (true); + + /* Inject errors to test_whether they are handled correctly */ + if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) { + up_read(&vm->userptr.notifier_lock); + return -EAGAIN; + } + + userptr_update->locked = true; + + return 0; +} + +static const struct xe_migrate_pt_update_ops bind_ops = { + .populate = xe_vm_populate_pgtable, +}; + +static const struct xe_migrate_pt_update_ops userptr_bind_ops = { + .populate = xe_vm_populate_pgtable, + .pre_commit = xe_pt_userptr_pre_commit, +}; + +/** + * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma + * address range. + * @gt: The gt to bind for. + * @vma: The vma to bind. + * @e: The engine with which to do pipelined page-table updates. + * @syncs: Entries to sync on before binding the built tree to the live vm tree. + * @num_syncs: Number of @sync entries. + * @rebind: Whether we're rebinding this vma to the same address range without + * an unbind in-between. + * + * This function builds a page-table tree (see xe_pt_stage_bind() for more + * information on page-table building), and the xe_vm_pgtable_update entries + * abstracting the operations needed to attach it to the main vm tree. It + * then takes the relevant locks and updates the metadata side of the main + * vm tree and submits the operations for pipelined attachment of the + * gpu page-table to the vm main tree, (which can be done either by the + * cpu and the GPU). + * + * Return: A valid dma-fence representing the pipelined attachment operation + * on success, an error pointer on error. + */ +struct dma_fence * +__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs, + bool rebind) +{ + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; + struct xe_pt_migrate_pt_update bind_pt_update = { + .base = { + .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, + .vma = vma, + }, + .bind = true, + }; + struct xe_vm *vm = vma->vm; + u32 num_entries; + struct dma_fence *fence; + int err; + + bind_pt_update.locked = false; + xe_bo_assert_held(vma->bo); + xe_vm_assert_held(vm); + XE_BUG_ON(xe_gt_is_media_type(gt)); + + vm_dbg(&vma->vm->xe->drm, + "Preparing bind, with range [%llx...%llx) engine %p.\n", + vma->start, vma->end, e); + + err = xe_pt_prepare_bind(gt, vma, entries, &num_entries, rebind); + if (err) + goto err; + XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); + + xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); + + fence = xe_migrate_update_pgtables(gt->migrate, + vm, vma->bo, + e ? e : vm->eng[gt->info.id], + entries, num_entries, + syncs, num_syncs, + &bind_pt_update.base); + if (!IS_ERR(fence)) { + LLIST_HEAD(deferred); + + /* add shared fence now for pagetable delayed destroy */ + dma_resv_add_fence(&vm->resv, fence, !rebind && + vma->last_munmap_rebind ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + if (!xe_vma_is_userptr(vma) && !vma->bo->vm) + dma_resv_add_fence(vma->bo->ttm.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + xe_pt_commit_bind(vma, entries, num_entries, rebind, + bind_pt_update.locked ? &deferred : NULL); + + /* This vma is live (again?) now */ + vma->gt_present |= BIT(gt->info.id); + + if (bind_pt_update.locked) { + vma->userptr.initial_bind = true; + up_read(&vm->userptr.notifier_lock); + xe_bo_put_commit(&deferred); + } + if (!rebind && vma->last_munmap_rebind && + xe_vm_in_compute_mode(vm)) + queue_work(vm->xe->ordered_wq, + &vm->preempt.rebind_work); + } else { + if (bind_pt_update.locked) + up_read(&vm->userptr.notifier_lock); + xe_pt_abort_bind(vma, entries, num_entries); + } + + return fence; + +err: + return ERR_PTR(err); +} + +struct xe_pt_stage_unbind_walk { + /** @base: The pagewalk base-class. */ + struct xe_pt_walk base; + + /* Input parameters for the walk */ + /** @gt: The gt we're unbinding from. */ + struct xe_gt *gt; + + /** + * @modified_start: Walk range start, modified to include any + * shared pagetables that we're the only user of and can thus + * treat as private. + */ + u64 modified_start; + /** @modified_end: Walk range start, modified like @modified_start. */ + u64 modified_end; + + /* Output */ + /* @wupd: Structure to track the page-table updates we're building */ + struct xe_walk_update wupd; +}; + +/* + * Check whether this range is the only one populating this pagetable, + * and in that case, update the walk range checks so that higher levels don't + * view us as a shared pagetable. + */ +static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, + const struct xe_pt *child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_stage_unbind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + unsigned int shift = walk->shifts[level]; + u64 size = 1ull << shift; + + if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && + ((next - addr) >> shift) == child->num_live) { + u64 size = 1ull << walk->shifts[level + 1]; + + *action = ACTION_CONTINUE; + + if (xe_walk->modified_start >= addr) + xe_walk->modified_start = round_down(addr, size); + if (xe_walk->modified_end <= next) + xe_walk->modified_end = round_up(next, size); + + return true; + } + + return false; +} + +static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + + XE_BUG_ON(!*child); + XE_BUG_ON(!level && xe_child->is_compact); + + xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); + + return 0; +} + +static int +xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_stage_unbind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + pgoff_t end_offset; + u64 size = 1ull << walk->shifts[--level]; + + if (!IS_ALIGNED(addr, size)) + addr = xe_walk->modified_start; + if (!IS_ALIGNED(next, size)) + next = xe_walk->modified_end; + + /* Parent == *child is the root pt. Don't kill it. */ + if (parent != *child && + xe_pt_check_kill(addr, next, level, xe_child, action, walk)) + return 0; + + if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, + &end_offset)) + return 0; + + (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); + xe_walk->wupd.updates[level].update->qwords = end_offset - offset; + + return 0; +} + +static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { + .pt_entry = xe_pt_stage_unbind_entry, + .pt_post_descend = xe_pt_stage_unbind_post_descend, +}; + +/** + * xe_pt_stage_unbind() - Build page-table update structures for an unbind + * operation + * @gt: The gt we're unbinding for. + * @vma: The vma we're unbinding. + * @entries: Caller-provided storage for the update structures. + * + * Builds page-table update structures for an unbind operation. The function + * will attempt to remove all page-tables that we're the only user + * of, and for that to work, the unbind operation must be committed in the + * same critical section that blocks racing binds to the same page-table tree. + * + * Return: The number of entries used. + */ +static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma, + struct xe_vm_pgtable_update *entries) +{ + struct xe_pt_stage_unbind_walk xe_walk = { + .base = { + .ops = &xe_pt_stage_unbind_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .gt = gt, + .modified_start = vma->start, + .modified_end = vma->end + 1, + .wupd.entries = entries, + }; + struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; + + (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, + &xe_walk.base); + + return xe_walk.wupd.num_used_entries; +} + +static void +xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, + struct xe_gt *gt, struct iosys_map *map, + void *ptr, u32 qword_ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update) +{ + struct xe_vma *vma = pt_update->vma; + u64 empty = __xe_pt_empty_pte(gt, vma->vm, update->pt->level); + int i; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + if (map && map->is_iomem) + for (i = 0; i < num_qwords; ++i) + xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * + sizeof(u64), u64, empty); + else if (map) + memset64(map->vaddr + qword_ofs * sizeof(u64), empty, + num_qwords); + else + memset64(ptr, empty, num_qwords); +} + +static void +xe_pt_commit_unbind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, u32 num_entries, + struct llist_head *deferred) +{ + u32 j; + + xe_pt_commit_locks_assert(vma); + + for (j = 0; j < num_entries; ++j) { + struct xe_vm_pgtable_update *entry = &entries[j]; + struct xe_pt *pt = entry->pt; + + pt->num_live -= entry->qwords; + if (pt->level) { + struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); + u32 i; + + for (i = entry->ofs; i < entry->ofs + entry->qwords; + i++) { + if (xe_pt_entry(pt_dir, i)) + xe_pt_destroy(xe_pt_entry(pt_dir, i), + vma->vm->flags, deferred); + + pt_dir->dir.entries[i] = NULL; + } + } + } +} + +static const struct xe_migrate_pt_update_ops unbind_ops = { + .populate = xe_migrate_clear_pgtable_callback, +}; + +static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { + .populate = xe_migrate_clear_pgtable_callback, + .pre_commit = xe_pt_userptr_pre_commit, +}; + +/** + * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma + * address range. + * @gt: The gt to unbind for. + * @vma: The vma to unbind. + * @e: The engine with which to do pipelined page-table updates. + * @syncs: Entries to sync on before disconnecting the tree to be destroyed. + * @num_syncs: Number of @sync entries. + * + * This function builds a the xe_vm_pgtable_update entries abstracting the + * operations needed to detach the page-table tree to be destroyed from the + * man vm tree. + * It then takes the relevant locks and submits the operations for + * pipelined detachment of the gpu page-table from the vm main tree, + * (which can be done either by the cpu and the GPU), Finally it frees the + * detached page-table tree. + * + * Return: A valid dma-fence representing the pipelined detachment operation + * on success, an error pointer on error. + */ +struct dma_fence * +__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; + struct xe_pt_migrate_pt_update unbind_pt_update = { + .base = { + .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : + &unbind_ops, + .vma = vma, + }, + }; + struct xe_vm *vm = vma->vm; + u32 num_entries; + struct dma_fence *fence = NULL; + LLIST_HEAD(deferred); + + xe_bo_assert_held(vma->bo); + xe_vm_assert_held(vm); + XE_BUG_ON(xe_gt_is_media_type(gt)); + + vm_dbg(&vma->vm->xe->drm, + "Preparing unbind, with range [%llx...%llx) engine %p.\n", + vma->start, vma->end, e); + + num_entries = xe_pt_stage_unbind(gt, vma, entries); + XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); + + xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); + + /* + * Even if we were already evicted and unbind to destroy, we need to + * clear again here. The eviction may have updated pagetables at a + * lower level, because it needs to be more conservative. + */ + fence = xe_migrate_update_pgtables(gt->migrate, + vm, NULL, e ? e : + vm->eng[gt->info.id], + entries, num_entries, + syncs, num_syncs, + &unbind_pt_update.base); + if (!IS_ERR(fence)) { + /* add shared fence now for pagetable delayed destroy */ + dma_resv_add_fence(&vm->resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + + /* This fence will be installed by caller when doing eviction */ + if (!xe_vma_is_userptr(vma) && !vma->bo->vm) + dma_resv_add_fence(vma->bo->ttm.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + xe_pt_commit_unbind(vma, entries, num_entries, + unbind_pt_update.locked ? &deferred : NULL); + vma->gt_present &= ~BIT(gt->info.id); + } + + if (!vma->gt_present) + list_del_init(&vma->rebind_link); + + if (unbind_pt_update.locked) { + XE_WARN_ON(!xe_vma_is_userptr(vma)); + + if (!vma->gt_present) { + spin_lock(&vm->userptr.invalidated_lock); + list_del_init(&vma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + } + up_read(&vm->userptr.notifier_lock); + xe_bo_put_commit(&deferred); + } + + return fence; +} diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h new file mode 100644 index 000000000000..1152043e5c63 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef _XE_PT_H_ +#define _XE_PT_H_ + +#include + +#include "xe_pt_types.h" + +struct dma_fence; +struct xe_bo; +struct xe_device; +struct xe_engine; +struct xe_gt; +struct xe_sync_entry; +struct xe_vm; +struct xe_vma; + +#define xe_pt_write(xe, map, idx, data) \ + xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data) + +unsigned int xe_pt_shift(unsigned int level); + +struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, + unsigned int level); + +int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm); + +void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, + struct xe_pt *pt); + +void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred); + +struct dma_fence * +__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs, + bool rebind); + +struct dma_fence * +__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs); + +bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma); + +u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, + const enum xe_cache_level level); + +u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, + u64 offset, enum xe_cache_level cache, + u32 flags, u32 pt_level); +#endif diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h new file mode 100644 index 000000000000..2ed64c0a4485 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PT_TYPES_H_ +#define _XE_PT_TYPES_H_ + +#include "xe_pt_walk.h" + +enum xe_cache_level { + XE_CACHE_NONE, + XE_CACHE_WT, + XE_CACHE_WB, +}; + +#define XE_VM_MAX_LEVEL 4 + +struct xe_pt { + struct xe_ptw base; + struct xe_bo *bo; + unsigned int level; + unsigned int num_live; + bool rebind; + bool is_compact; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) + /** addr: Virtual address start address of the PT. */ + u64 addr; +#endif +}; + +struct xe_pt_entry { + struct xe_pt *pt; + u64 pte; +}; + +struct xe_vm_pgtable_update { + /** @bo: page table bo to write to */ + struct xe_bo *pt_bo; + + /** @ofs: offset inside this PTE to begin writing to (in qwords) */ + u32 ofs; + + /** @qwords: number of PTE's to write */ + u32 qwords; + + /** @pt: opaque pointer useful for the caller of xe_migrate_update_pgtables */ + struct xe_pt *pt; + + /** @pt_entries: Newly added pagetable entries */ + struct xe_pt_entry *pt_entries; + + /** @flags: Target flags */ + u32 flags; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c new file mode 100644 index 000000000000..0def89af4372 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ +#include "xe_pt_walk.h" + +/** + * DOC: GPU page-table tree walking. + * The utilities in this file are similar to the CPU page-table walk + * utilities in mm/pagewalk.c. The main difference is that we distinguish + * the various levels of a page-table tree with an unsigned integer rather + * than by name. 0 is the lowest level, and page-tables with level 0 can + * not be directories pointing to lower levels, whereas all other levels + * can. The user of the utilities determines the highest level. + * + * Nomenclature: + * Each struct xe_ptw, regardless of level is referred to as a page table, and + * multiple page tables typically form a page table tree with page tables at + * intermediate levels being page directories pointing at page tables at lower + * levels. A shared page table for a given address range is a page-table which + * is neither fully within nor fully outside the address range and that can + * thus be shared by two or more address ranges. + * + * Please keep this code generic so that it can used as a drm-wide page- + * table walker should other drivers find use for it. + */ +static u64 xe_pt_addr_end(u64 addr, u64 end, unsigned int level, + const struct xe_pt_walk *walk) +{ + u64 size = 1ull << walk->shifts[level]; + u64 tmp = round_up(addr + 1, size); + + return min_t(u64, tmp, end); +} + +static bool xe_pt_next(pgoff_t *offset, u64 *addr, u64 next, u64 end, + unsigned int level, const struct xe_pt_walk *walk) +{ + pgoff_t step = 1; + + /* Shared pt walk skips to the last pagetable */ + if (unlikely(walk->shared_pt_mode)) { + unsigned int shift = walk->shifts[level]; + u64 skip_to = round_down(end, 1ull << shift); + + if (skip_to > next) { + step += (skip_to - next) >> shift; + next = skip_to; + } + } + + *addr = next; + *offset += step; + + return next != end; +} + +/** + * xe_pt_walk_range() - Walk a range of a gpu page table tree with callbacks + * for each page-table entry in all levels. + * @parent: The root page table for walk start. + * @level: The root page table level. + * @addr: Virtual address start. + * @end: Virtual address end + 1. + * @walk: Walk info. + * + * Similar to the CPU page-table walker, this is a helper to walk + * a gpu page table and call a provided callback function for each entry. + * + * Return: 0 on success, negative error code on error. The error is + * propagated from the callback and on error the walk is terminated. + */ +int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk) +{ + pgoff_t offset = xe_pt_offset(addr, level, walk); + struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL; + const struct xe_pt_walk_ops *ops = walk->ops; + enum page_walk_action action; + struct xe_ptw *child; + int err = 0; + u64 next; + + do { + next = xe_pt_addr_end(addr, end, level, walk); + if (walk->shared_pt_mode && xe_pt_covers(addr, next, level, + walk)) + continue; +again: + action = ACTION_SUBTREE; + child = entries ? entries[offset] : NULL; + err = ops->pt_entry(parent, offset, level, addr, next, + &child, &action, walk); + if (err) + break; + + /* Probably not needed yet for gpu pagetable walk. */ + if (unlikely(action == ACTION_AGAIN)) + goto again; + + if (likely(!level || !child || action == ACTION_CONTINUE)) + continue; + + err = xe_pt_walk_range(child, level - 1, addr, next, walk); + + if (!err && ops->pt_post_descend) + err = ops->pt_post_descend(parent, offset, level, addr, + next, &child, &action, walk); + if (err) + break; + + } while (xe_pt_next(&offset, &addr, next, end, level, walk)); + + return err; +} + +/** + * xe_pt_walk_shared() - Walk shared page tables of a page-table tree. + * @parent: Root page table directory. + * @level: Level of the root. + * @addr: Start address. + * @end: Last address + 1. + * @walk: Walk info. + * + * This function is similar to xe_pt_walk_range() but it skips page tables + * that are private to the range. Since the root (or @parent) page table is + * typically also a shared page table this function is different in that it + * calls the pt_entry callback and the post_descend callback also for the + * root. The root can be detected in the callbacks by checking whether + * parent == *child. + * Walking only the shared page tables is common for unbind-type operations + * where the page-table entries for an address range are cleared or detached + * from the main page-table tree. + * + * Return: 0 on success, negative error code on error: If a callback + * returns an error, the walk will be terminated and the error returned by + * this function. + */ +int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk) +{ + const struct xe_pt_walk_ops *ops = walk->ops; + enum page_walk_action action = ACTION_SUBTREE; + struct xe_ptw *child = parent; + int err; + + walk->shared_pt_mode = true; + err = walk->ops->pt_entry(parent, 0, level + 1, addr, end, + &child, &action, walk); + + if (err || action != ACTION_SUBTREE) + return err; + + err = xe_pt_walk_range(parent, level, addr, end, walk); + if (!err && ops->pt_post_descend) { + err = ops->pt_post_descend(parent, 0, level + 1, addr, end, + &child, &action, walk); + } + return err; +} diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h new file mode 100644 index 000000000000..42c51fa601ec --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef __XE_PT_WALK__ +#define __XE_PT_WALK__ + +#include +#include + +struct xe_ptw_dir; + +/** + * struct xe_ptw - base class for driver pagetable subclassing. + * @dir: Pointer to an array of children if any. + * + * Drivers could subclass this, and if it's a page-directory, typically + * embed the xe_ptw_dir::entries array in the same allocation. + */ +struct xe_ptw { + struct xe_ptw_dir *dir; +}; + +/** + * struct xe_ptw_dir - page directory structure + * @entries: Array holding page directory children. + * + * It is the responsibility of the user to ensure @entries is + * correctly sized. + */ +struct xe_ptw_dir { + struct xe_ptw *entries[0]; +}; + +/** + * struct xe_pt_walk - Embeddable struct for walk parameters + */ +struct xe_pt_walk { + /** @ops: The walk ops used for the pagewalk */ + const struct xe_pt_walk_ops *ops; + /** + * @shifts: Array of page-table entry shifts used for the + * different levels, starting out with the leaf level 0 + * page-shift as the first entry. It's legal for this pointer to be + * changed during the walk. + */ + const u64 *shifts; + /** @max_level: Highest populated level in @sizes */ + unsigned int max_level; + /** + * @shared_pt_mode: Whether to skip all entries that are private + * to the address range and called only for entries that are + * shared with other address ranges. Such entries are referred to + * as shared pagetables. + */ + bool shared_pt_mode; +}; + +/** + * typedef xe_pt_entry_fn - gpu page-table-walk callback-function + * @parent: The parent page.table. + * @offset: The offset (number of entries) into the page table. + * @level: The level of @parent. + * @addr: The virtual address. + * @next: The virtual address for the next call, or end address. + * @child: Pointer to pointer to child page-table at this @offset. The + * function may modify the value pointed to if, for example, allocating a + * child page table. + * @action: The walk action to take upon return. See . + * @walk: The walk parameters. + */ +typedef int (*xe_pt_entry_fn)(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk); + +/** + * struct xe_pt_walk_ops - Walk callbacks. + */ +struct xe_pt_walk_ops { + /** + * @pt_entry: Callback to be called for each page table entry prior + * to descending to the next level. The returned value of the action + * function parameter is honored. + */ + xe_pt_entry_fn pt_entry; + /** + * @pt_post_descend: Callback to be called for each page table entry + * after return from descending to the next level. The returned value + * of the action function parameter is ignored. + */ + xe_pt_entry_fn pt_post_descend; +}; + +int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk); + +int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk); + +/** + * xe_pt_covers - Whether the address range covers an entire entry in @level + * @addr: Start of the range. + * @end: End of range + 1. + * @level: Page table level. + * @walk: Page table walk info. + * + * This function is a helper to aid in determining whether a leaf page table + * entry can be inserted at this @level. + * + * Return: Whether the range provided covers exactly an entry at this level. + */ +static inline bool xe_pt_covers(u64 addr, u64 end, unsigned int level, + const struct xe_pt_walk *walk) +{ + u64 pt_size = 1ull << walk->shifts[level]; + + return end - addr == pt_size && IS_ALIGNED(addr, pt_size); +} + +/** + * xe_pt_num_entries: Number of page-table entries of a given range at this + * level + * @addr: Start address. + * @end: End address. + * @level: Page table level. + * @walk: Walk info. + * + * Return: The number of page table entries at this level between @start and + * @end. + */ +static inline pgoff_t +xe_pt_num_entries(u64 addr, u64 end, unsigned int level, + const struct xe_pt_walk *walk) +{ + u64 pt_size = 1ull << walk->shifts[level]; + + return (round_up(end, pt_size) - round_down(addr, pt_size)) >> + walk->shifts[level]; +} + +/** + * xe_pt_offset: Offset of the page-table entry for a given address. + * @addr: The address. + * @level: Page table level. + * @walk: Walk info. + * + * Return: The page table entry offset for the given address in a + * page table with size indicated by @level. + */ +static inline pgoff_t +xe_pt_offset(u64 addr, unsigned int level, const struct xe_pt_walk *walk) +{ + if (level < walk->max_level) + addr &= ((1ull << walk->shifts[level + 1]) - 1); + + return addr >> walk->shifts[level]; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c new file mode 100644 index 000000000000..6e904e97f456 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_query.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_macros.h" +#include "xe_query.h" +#include "xe_ggtt.h" +#include "xe_guc_hwconfig.h" + +static const enum xe_engine_class xe_to_user_engine_class[] = { + [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER, + [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY, + [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE, + [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE, + [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE, +}; + +static size_t calc_hw_engine_info_size(struct xe_device *xe) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_gt *gt; + u8 gt_id; + int i = 0; + + for_each_gt(gt, xe, gt_id) + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + i++; + } + + return i * sizeof(struct drm_xe_engine_class_instance); +} + +static int query_engines(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + size_t size = calc_hw_engine_info_size(xe); + struct drm_xe_engine_class_instance __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_engine_class_instance *hw_engine_info; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_gt *gt; + u8 gt_id; + int i = 0; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + hw_engine_info = kmalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !hw_engine_info)) + return -ENOMEM; + + for_each_gt(gt, xe, gt_id) + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + hw_engine_info[i].engine_class = + xe_to_user_engine_class[hwe->class]; + hw_engine_info[i].engine_instance = + hwe->logical_instance; + hw_engine_info[i++].gt_id = gt->info.id; + } + + if (copy_to_user(query_ptr, hw_engine_info, size)) { + kfree(hw_engine_info); + return -EFAULT; + } + kfree(hw_engine_info); + + return 0; +} + +static size_t calc_memory_usage_size(struct xe_device *xe) +{ + u32 num_managers = 1; + int i; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) + if (ttm_manager_type(&xe->ttm, i)) + num_managers++; + + return offsetof(struct drm_xe_query_mem_usage, regions[num_managers]); +} + +static int query_memory_usage(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + size_t size = calc_memory_usage_size(xe); + struct drm_xe_query_mem_usage *usage; + struct drm_xe_query_mem_usage __user *query_ptr = + u64_to_user_ptr(query->data); + struct ttm_resource_manager *man; + int ret, i; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + usage = kmalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !usage)) + return -ENOMEM; + + usage->pad = 0; + + man = ttm_manager_type(&xe->ttm, XE_PL_TT); + usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM; + usage->regions[0].instance = 0; + usage->regions[0].pad = 0; + usage->regions[0].min_page_size = PAGE_SIZE; + usage->regions[0].max_page_size = PAGE_SIZE; + usage->regions[0].total_size = man->size << PAGE_SHIFT; + usage->regions[0].used = ttm_resource_manager_usage(man); + usage->num_regions = 1; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { + man = ttm_manager_type(&xe->ttm, i); + if (man) { + usage->regions[usage->num_regions].mem_class = + XE_MEM_REGION_CLASS_VRAM; + usage->regions[usage->num_regions].instance = + usage->num_regions; + usage->regions[usage->num_regions].pad = 0; + usage->regions[usage->num_regions].min_page_size = + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? + SZ_64K : PAGE_SIZE; + usage->regions[usage->num_regions].max_page_size = + SZ_1G; + usage->regions[usage->num_regions].total_size = + man->size; + usage->regions[usage->num_regions++].used = + ttm_resource_manager_usage(man); + } + } + + if (!copy_to_user(query_ptr, usage, size)) + ret = 0; + else + ret = -ENOSPC; + + kfree(usage); + return ret; +} + +static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) +{ + u32 num_params = XE_QUERY_CONFIG_NUM_PARAM; + size_t size = + sizeof(struct drm_xe_query_config) + num_params * sizeof(u64); + struct drm_xe_query_config __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_config *config; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + config = kzalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !config)) + return -ENOMEM; + + config->num_params = num_params; + config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = + xe->info.devid | (xe->info.revid << 16); + if (to_gt(xe)->mem.vram.size) + config->info[XE_QUERY_CONFIG_FLAGS] = + XE_QUERY_CONFIG_FLAGS_HAS_VRAM; + if (xe->info.enable_guc) + config->info[XE_QUERY_CONFIG_FLAGS] |= + XE_QUERY_CONFIG_FLAGS_USE_GUC; + config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] = + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; + config->info[XE_QUERY_CONFIG_VA_BITS] = 12 + + (9 * (xe->info.vm_max_level + 1)); + config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count; + config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] = + hweight_long(xe->info.mem_region_mask); + + if (copy_to_user(query_ptr, config, size)) { + kfree(config); + return -EFAULT; + } + kfree(config); + + return 0; +} + +static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) +{ + struct xe_gt *gt; + size_t size = sizeof(struct drm_xe_query_gts) + + xe->info.tile_count * sizeof(struct drm_xe_query_gt); + struct drm_xe_query_gts __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_gts *gts; + u8 id; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + gts = kzalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !gts)) + return -ENOMEM; + + gts->num_gt = xe->info.tile_count; + for_each_gt(gt, xe, id) { + if (id == 0) + gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN; + else if (xe_gt_is_media_type(gt)) + gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA; + else + gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE; + gts->gts[id].instance = id; + gts->gts[id].clock_freq = gt->info.clock_freq; + if (!IS_DGFX(xe)) + gts->gts[id].native_mem_regions = 0x1; + else + gts->gts[id].native_mem_regions = + BIT(gt->info.vram_id) << 1; + gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^ + gts->gts[id].native_mem_regions; + } + + if (copy_to_user(query_ptr, gts, size)) { + kfree(gts); + return -EFAULT; + } + kfree(gts); + + return 0; +} + +static int query_hwconfig(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + size_t size = xe_guc_hwconfig_size(>->uc.guc); + void __user *query_ptr = u64_to_user_ptr(query->data); + void *hwconfig; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + hwconfig = kzalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !hwconfig)) + return -ENOMEM; + + xe_device_mem_access_get(xe); + xe_guc_hwconfig_copy(>->uc.guc, hwconfig); + xe_device_mem_access_put(xe); + + if (copy_to_user(query_ptr, hwconfig, size)) { + kfree(hwconfig); + return -EFAULT; + } + kfree(hwconfig); + + return 0; +} + +static size_t calc_topo_query_size(struct xe_device *xe) +{ + return xe->info.tile_count * + (3 * sizeof(struct drm_xe_query_topology_mask) + + sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); +} + +static void __user *copy_mask(void __user *ptr, + struct drm_xe_query_topology_mask *topo, + void *mask, size_t mask_size) +{ + topo->num_bytes = mask_size; + + if (copy_to_user(ptr, topo, sizeof(*topo))) + return ERR_PTR(-EFAULT); + ptr += sizeof(topo); + + if (copy_to_user(ptr, mask, mask_size)) + return ERR_PTR(-EFAULT); + ptr += mask_size; + + return ptr; +} + +static int query_gt_topology(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = calc_topo_query_size(xe); + struct drm_xe_query_topology_mask topo; + struct xe_gt *gt; + int id; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + for_each_gt(gt, xe, id) { + topo.gt_id = id; + + topo.type = XE_TOPO_DSS_GEOMETRY; + query_ptr = copy_mask(query_ptr, &topo, + gt->fuse_topo.g_dss_mask, + sizeof(gt->fuse_topo.g_dss_mask)); + if (IS_ERR(query_ptr)) + return PTR_ERR(query_ptr); + + topo.type = XE_TOPO_DSS_COMPUTE; + query_ptr = copy_mask(query_ptr, &topo, + gt->fuse_topo.c_dss_mask, + sizeof(gt->fuse_topo.c_dss_mask)); + if (IS_ERR(query_ptr)) + return PTR_ERR(query_ptr); + + topo.type = XE_TOPO_EU_PER_DSS; + query_ptr = copy_mask(query_ptr, &topo, + gt->fuse_topo.eu_mask_per_dss, + sizeof(gt->fuse_topo.eu_mask_per_dss)); + if (IS_ERR(query_ptr)) + return PTR_ERR(query_ptr); + } + + return 0; +} + +static int (* const xe_query_funcs[])(struct xe_device *xe, + struct drm_xe_device_query *query) = { + query_engines, + query_memory_usage, + query_config, + query_gts, + query_hwconfig, + query_gt_topology, +}; + +int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct drm_xe_device_query *query = data; + u32 idx; + + if (XE_IOCTL_ERR(xe, query->extensions != 0)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs))) + return -EINVAL; + + idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs)); + if (XE_IOCTL_ERR(xe, !xe_query_funcs[idx])) + return -EINVAL; + + return xe_query_funcs[idx](xe, query); +} diff --git a/drivers/gpu/drm/xe/xe_query.h b/drivers/gpu/drm/xe/xe_query.h new file mode 100644 index 000000000000..beeb7a8192b4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_query.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_QUERY_H_ +#define _XE_QUERY_H_ + +struct drm_device; +struct drm_file; + +int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c new file mode 100644 index 000000000000..16e025dcf2cc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_reg_sr.h" + +#include +#include +#include + +#include +#include + +#include "xe_rtp_types.h" +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_macros.h" +#include "xe_mmio.h" + +#include "gt/intel_engine_regs.h" +#include "gt/intel_gt_regs.h" + +#define XE_REG_SR_GROW_STEP_DEFAULT 16 + +static void reg_sr_fini(struct drm_device *drm, void *arg) +{ + struct xe_reg_sr *sr = arg; + + xa_destroy(&sr->xa); + kfree(sr->pool.arr); + memset(&sr->pool, 0, sizeof(sr->pool)); +} + +int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) +{ + xa_init(&sr->xa); + memset(&sr->pool, 0, sizeof(sr->pool)); + sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT; + sr->name = name; + + return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); +} + +int xe_reg_sr_dump_kv(struct xe_reg_sr *sr, + struct xe_reg_sr_kv **dst) +{ + struct xe_reg_sr_kv *iter; + struct xe_reg_sr_entry *entry; + unsigned long idx; + + if (xa_empty(&sr->xa)) { + *dst = NULL; + return 0; + } + + *dst = kmalloc_array(sr->pool.used, sizeof(**dst), GFP_KERNEL); + if (!*dst) + return -ENOMEM; + + iter = *dst; + xa_for_each(&sr->xa, idx, entry) { + iter->k = idx; + iter->v = *entry; + iter++; + } + + return 0; +} + +static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) +{ + if (sr->pool.used == sr->pool.allocated) { + struct xe_reg_sr_entry *arr; + + arr = krealloc_array(sr->pool.arr, + ALIGN(sr->pool.allocated + 1, sr->pool.grow_step), + sizeof(*arr), GFP_KERNEL); + if (!arr) + return NULL; + + sr->pool.arr = arr; + sr->pool.allocated += sr->pool.grow_step; + } + + return &sr->pool.arr[sr->pool.used++]; +} + +static bool compatible_entries(const struct xe_reg_sr_entry *e1, + const struct xe_reg_sr_entry *e2) +{ + /* + * Don't allow overwriting values: clr_bits/set_bits should be disjoint + * when operating in the same register + */ + if (e1->clr_bits & e2->clr_bits || e1->set_bits & e2->set_bits || + e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits) + return false; + + if (e1->masked_reg != e2->masked_reg) + return false; + + if (e1->reg_type != e2->reg_type) + return false; + + return true; +} + +int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, + const struct xe_reg_sr_entry *e) +{ + unsigned long idx = reg; + struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx); + int ret; + + if (pentry) { + if (!compatible_entries(pentry, e)) { + ret = -EINVAL; + goto fail; + } + + pentry->clr_bits |= e->clr_bits; + pentry->set_bits |= e->set_bits; + pentry->read_mask |= e->read_mask; + + return 0; + } + + pentry = alloc_entry(sr); + if (!pentry) { + ret = -ENOMEM; + goto fail; + } + + *pentry = *e; + ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL)); + if (ret) + goto fail; + + return 0; + +fail: + DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n", + idx, e->clr_bits, e->set_bits, + str_yes_no(e->masked_reg), ret); + + return ret; +} + +static void apply_one_mmio(struct xe_gt *gt, u32 reg, + struct xe_reg_sr_entry *entry) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 val; + + /* + * If this is a masked register, need to figure what goes on the upper + * 16 bits: it's either the clr_bits (when using FIELD_SET and WR) or + * the set_bits, when using SET. + * + * When it's not masked, we have to read it from hardware, unless we are + * supposed to set all bits. + */ + if (entry->masked_reg) + val = (entry->clr_bits ?: entry->set_bits << 16); + else if (entry->clr_bits + 1) + val = (entry->reg_type == XE_RTP_REG_MCR ? + xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) : + xe_mmio_read32(gt, reg)) & (~entry->clr_bits); + else + val = 0; + + /* + * TODO: add selftest to validate all tables, regardless of platform: + * - Masked registers can't have set_bits with upper bits set + * - set_bits must be contained in clr_bits + */ + val |= entry->set_bits; + + drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val); + + if (entry->reg_type == XE_RTP_REG_MCR) + xe_gt_mcr_multicast_write(gt, MCR_REG(reg), val); + else + xe_mmio_write32(gt, reg, val); +} + +void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_reg_sr_entry *entry; + unsigned long reg; + int err; + + drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name); + + err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); + if (err) + goto err_force_wake; + + xa_for_each(&sr->xa, reg, entry) + apply_one_mmio(gt, reg, entry); + + err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + + return; + +err_force_wake: + drm_err(&xe->drm, "Failed to apply, err=%d\n", err); +} + +void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, + struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_reg_sr_entry *entry; + unsigned long reg; + unsigned int slot = 0; + int err; + + drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); + + err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); + if (err) + goto err_force_wake; + + xa_for_each(&sr->xa, reg, entry) { + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg, + reg | entry->set_bits); + slot++; + } + + /* And clear the rest just in case of garbage */ + for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg, + RING_NOPID(mmio_base).reg); + + err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + + return; + +err_force_wake: + drm_err(&xe->drm, "Failed to apply, err=%d\n", err); +} diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h new file mode 100644 index 000000000000..c3a9db251e92 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_REG_SR_ +#define _XE_REG_SR_ + +#include "xe_reg_sr_types.h" + +/* + * Reg save/restore bookkeeping + */ + +struct xe_device; +struct xe_gt; + +int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); +int xe_reg_sr_dump_kv(struct xe_reg_sr *sr, + struct xe_reg_sr_kv **dst); + +int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, + const struct xe_reg_sr_entry *e); +void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt); +void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, + struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h new file mode 100644 index 000000000000..2fa7ff3966ba --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_REG_SR_TYPES_ +#define _XE_REG_SR_TYPES_ + +#include +#include + +#include "i915_reg_defs.h" + +struct xe_reg_sr_entry { + u32 clr_bits; + u32 set_bits; + /* Mask for bits to consider when reading value back */ + u32 read_mask; + /* + * "Masked registers" are marked in spec as register with the upper 16 + * bits as a mask for the bits that is being updated on the lower 16 + * bits when writing to it. + */ + u8 masked_reg; + u8 reg_type; +}; + +struct xe_reg_sr_kv { + u32 k; + struct xe_reg_sr_entry v; +}; + +struct xe_reg_sr { + struct { + struct xe_reg_sr_entry *arr; + unsigned int used; + unsigned int allocated; + unsigned int grow_step; + } pool; + struct xarray xa; + const char *name; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c new file mode 100644 index 000000000000..2e0c87b72395 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_reg_whitelist.h" + +#include "xe_platform_types.h" +#include "xe_gt_types.h" +#include "xe_rtp.h" + +#include "../i915/gt/intel_engine_regs.h" +#include "../i915/gt/intel_gt_regs.h" + +#undef _MMIO +#undef MCR_REG +#define _MMIO(x) _XE_RTP_REG(x) +#define MCR_REG(x) _XE_RTP_MCR_REG(x) + +static bool match_not_render(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return hwe->class != XE_ENGINE_CLASS_RENDER; +} + +static const struct xe_rtp_entry register_whitelist[] = { + { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_WHITELIST_REGISTER(PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4) + }, + { XE_RTP_NAME("1508744258, 14012131227, 1808121037"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_WHITELIST_REGISTER(GEN7_COMMON_SLICE_CHICKEN1, 0) + }, + { XE_RTP_NAME("1806527549"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_WHITELIST_REGISTER(HIZ_CHICKEN, 0) + }, + { XE_RTP_NAME("allow_read_ctx_timestamp"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)), + XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0), + RING_FORCE_TO_NONPRIV_ACCESS_RD, + XE_RTP_FLAG(ENGINE_BASE)) + }, + { XE_RTP_NAME("16014440446_part_1"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_WHITELIST_REGISTER(_MMIO(0x4400), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64) + }, + { XE_RTP_NAME("16014440446_part_2"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_WHITELIST_REGISTER(_MMIO(0x4500), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64) + }, + {} +}; + +/** + * xe_reg_whitelist_process_engine - process table of registers to whitelist + * @hwe: engine instance to process whitelist for + * + * Process wwhitelist table for this platform, saving in @hwe all the + * registers that need to be whitelisted by the hardware so they can be accessed + * by userspace. + */ +void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) +{ + xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe); +} diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h new file mode 100644 index 000000000000..6e861b1bdb01 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_REG_WHITELIST_ +#define _XE_REG_WHITELIST_ + +struct xe_hw_engine; + +void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h new file mode 100644 index 000000000000..f54409850d74 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XE_RES_CURSOR_H__ +#define __XE_RES_CURSOR_H__ + +#include + +#include +#include +#include +#include +#include + +#include "xe_bo.h" +#include "xe_macros.h" +#include "xe_ttm_vram_mgr.h" + +/* state back for walking over vram_mgr and gtt_mgr allocations */ +struct xe_res_cursor { + u64 start; + u64 size; + u64 remaining; + void *node; + u32 mem_type; + struct scatterlist *sgl; +}; + +/** + * xe_res_first - initialize a xe_res_cursor + * + * @res: TTM resource object to walk + * @start: Start of the range + * @size: Size of the range + * @cur: cursor object to initialize + * + * Start walking over the range of allocations between @start and @size. + */ +static inline void xe_res_first(struct ttm_resource *res, + u64 start, u64 size, + struct xe_res_cursor *cur) +{ + struct drm_buddy_block *block; + struct list_head *head, *next; + + cur->sgl = NULL; + if (!res) + goto fallback; + + XE_BUG_ON(start + size > res->size); + + cur->mem_type = res->mem_type; + + switch (cur->mem_type) { + case XE_PL_VRAM0: + case XE_PL_VRAM1: + head = &to_xe_ttm_vram_mgr_resource(res)->blocks; + + block = list_first_entry_or_null(head, + struct drm_buddy_block, + link); + if (!block) + goto fallback; + + while (start >= xe_ttm_vram_mgr_block_size(block)) { + start -= xe_ttm_vram_mgr_block_size(block); + + next = block->link.next; + if (next != head) + block = list_entry(next, struct drm_buddy_block, + link); + } + + cur->start = xe_ttm_vram_mgr_block_start(block) + start; + cur->size = min(xe_ttm_vram_mgr_block_size(block) - start, + size); + cur->remaining = size; + cur->node = block; + break; + default: + goto fallback; + } + + return; + +fallback: + cur->start = start; + cur->size = size; + cur->remaining = size; + cur->node = NULL; + cur->mem_type = XE_PL_TT; + XE_WARN_ON(res && start + size > res->size); + return; +} + +static inline void __xe_res_sg_next(struct xe_res_cursor *cur) +{ + struct scatterlist *sgl = cur->sgl; + u64 start = cur->start; + + while (start >= sg_dma_len(sgl)) { + start -= sg_dma_len(sgl); + sgl = sg_next(sgl); + XE_BUG_ON(!sgl); + } + + cur->start = start; + cur->size = sg_dma_len(sgl) - start; + cur->sgl = sgl; +} + +/** + * xe_res_first_sg - initialize a xe_res_cursor with a scatter gather table + * + * @sg: scatter gather table to walk + * @start: Start of the range + * @size: Size of the range + * @cur: cursor object to initialize + * + * Start walking over the range of allocations between @start and @size. + */ +static inline void xe_res_first_sg(const struct sg_table *sg, + u64 start, u64 size, + struct xe_res_cursor *cur) +{ + XE_BUG_ON(!sg); + XE_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE) || + !IS_ALIGNED(size, PAGE_SIZE)); + cur->node = NULL; + cur->start = start; + cur->remaining = size; + cur->size = 0; + cur->sgl = sg->sgl; + cur->mem_type = XE_PL_TT; + __xe_res_sg_next(cur); +} + +/** + * xe_res_next - advance the cursor + * + * @cur: the cursor to advance + * @size: number of bytes to move forward + * + * Move the cursor @size bytes forwrad, walking to the next node if necessary. + */ +static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) +{ + struct drm_buddy_block *block; + struct list_head *next; + u64 start; + + XE_BUG_ON(size > cur->remaining); + + cur->remaining -= size; + if (!cur->remaining) + return; + + if (cur->size > size) { + cur->size -= size; + cur->start += size; + return; + } + + if (cur->sgl) { + cur->start += size; + __xe_res_sg_next(cur); + return; + } + + switch (cur->mem_type) { + case XE_PL_VRAM0: + case XE_PL_VRAM1: + start = size - cur->size; + block = cur->node; + + next = block->link.next; + block = list_entry(next, struct drm_buddy_block, link); + + + while (start >= xe_ttm_vram_mgr_block_size(block)) { + start -= xe_ttm_vram_mgr_block_size(block); + + next = block->link.next; + block = list_entry(next, struct drm_buddy_block, link); + } + + cur->start = xe_ttm_vram_mgr_block_start(block) + start; + cur->size = min(xe_ttm_vram_mgr_block_size(block) - start, + cur->remaining); + cur->node = block; + break; + default: + return; + } +} + +/** + * xe_res_dma - return dma address of cursor at current position + * + * @cur: the cursor to return the dma address from + */ +static inline u64 xe_res_dma(const struct xe_res_cursor *cur) +{ + return cur->sgl ? sg_dma_address(cur->sgl) + cur->start : cur->start; +} +#endif diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c new file mode 100644 index 000000000000..fda7978a63e0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_engine_types.h" +#include "xe_gt.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_ring_ops.h" +#include "xe_sched_job.h" +#include "xe_vm_types.h" + +#include "i915_reg.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_lrc_reg.h" + +static u32 preparser_disable(bool state) +{ + return MI_ARB_CHECK | BIT(8) | state; +} + +static int emit_aux_table_inv(struct xe_gt *gt, u32 addr, u32 *dw, int i) +{ + dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; + dw[i++] = addr + gt->mmio.adj_offset; + dw[i++] = AUX_INV; + dw[i++] = MI_NOOP; + + return i; +} + +static int emit_user_interrupt(u32 *dw, int i) +{ + dw[i++] = MI_USER_INTERRUPT; + dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; + dw[i++] = MI_ARB_CHECK; + + return i; +} + +static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) +{ + dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2; + dw[i++] = addr; + dw[i++] = 0; + dw[i++] = value; + + return i; +} + +static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) +{ + dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; + dw[i++] = 0; + dw[i++] = value; + + return i; +} + +static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) +{ + dw[i++] = MI_BATCH_BUFFER_START_GEN8 | ppgtt_flag; + dw[i++] = lower_32_bits(batch_addr); + dw[i++] = upper_32_bits(batch_addr); + + return i; +} + +static int emit_flush_invalidate(u32 flag, u32 *dw, int i) +{ + dw[i] = MI_FLUSH_DW + 1; + dw[i] |= flag; + dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_DW_STORE_INDEX; + + dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + dw[i++] = 0; + dw[i++] = ~0U; + + return i; +} + +static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i) +{ + u32 flags = PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | + PIPE_CONTROL_TLB_INVALIDATE | + PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE | + PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_STORE_DATA_INDEX; + + flags &= ~mask_flags; + + dw[i++] = GFX_OP_PIPE_CONTROL(6); + dw[i++] = flags; + dw[i++] = LRC_PPHWSP_SCRATCH_ADDR; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + + return i; +} + +#define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21)) + +static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, + u32 *dw, int i) +{ + dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED; + dw[i++] = lower_32_bits(addr); + dw[i++] = upper_32_bits(addr); + dw[i++] = lower_32_bits(value); + dw[i++] = upper_32_bits(value); + + return i; +} + +static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, + int i) +{ + dw[i++] = GFX_OP_PIPE_CONTROL(6); + dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL : + PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) | + PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE; + dw[i++] = addr; + dw[i++] = 0; + dw[i++] = value; + dw[i++] = 0; /* We're thrashing one extra dword. */ + + return i; +} + +static u32 get_ppgtt_flag(struct xe_sched_job *job) +{ + return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0; +} + +static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + u32 ppgtt_flag = get_ppgtt_flag(job); + + /* XXX: Conditional flushing possible */ + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(0, dw, i); + dw[i++] = preparser_disable(false); + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + + if (job->user_fence.used) + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, + job->user_fence.value, + dw, i); + + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i); + + i = emit_user_interrupt(dw, i); + + XE_BUG_ON(i > MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + u32 ppgtt_flag = get_ppgtt_flag(job); + struct xe_gt *gt = job->engine->gt; + struct xe_device *xe = gt_to_xe(gt); + bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE; + + /* XXX: Conditional flushing possible */ + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(decode ? MI_INVALIDATE_BSD : 0, dw, i); + /* Wa_1809175790 */ + if (!xe->info.has_flat_ccs) { + if (decode) + i = emit_aux_table_inv(gt, GEN12_VD0_AUX_INV.reg, dw, i); + else + i = emit_aux_table_inv(gt, GEN12_VE0_AUX_INV.reg, dw, i); + } + dw[i++] = preparser_disable(false); + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + + if (job->user_fence.used) + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, + job->user_fence.value, + dw, i); + + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i); + + i = emit_user_interrupt(dw, i); + + XE_BUG_ON(i > MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +/* + * 3D-related flags that can't be set on _engines_ that lack access to the 3D + * pipeline (i.e., CCS engines). + */ +#define PIPE_CONTROL_3D_ENGINE_FLAGS (\ + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ + PIPE_CONTROL_TILE_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_STALL | \ + PIPE_CONTROL_STALL_AT_SCOREBOARD | \ + PIPE_CONTROL_PSD_SYNC | \ + PIPE_CONTROL_AMFS_FLUSH | \ + PIPE_CONTROL_VF_CACHE_INVALIDATE | \ + PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET) + +/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */ +#define PIPE_CONTROL_3D_ARCH_FLAGS ( \ + PIPE_CONTROL_3D_ENGINE_FLAGS | \ + PIPE_CONTROL_INDIRECT_STATE_DISABLE | \ + PIPE_CONTROL_FLUSH_ENABLE | \ + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ + PIPE_CONTROL_DC_FLUSH_ENABLE) + +static void __emit_job_gen12_render_compute(struct xe_sched_job *job, + struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + u32 ppgtt_flag = get_ppgtt_flag(job); + struct xe_gt *gt = job->engine->gt; + struct xe_device *xe = gt_to_xe(gt); + bool pvc = xe->info.platform == XE_PVC; + u32 mask_flags = 0; + + /* XXX: Conditional flushing possible */ + dw[i++] = preparser_disable(true); + if (pvc) + mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; + else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) + mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; + i = emit_pipe_invalidate(mask_flags, dw, i); + /* Wa_1809175790 */ + if (!xe->info.has_flat_ccs) + i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i); + dw[i++] = preparser_disable(false); + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + + if (job->user_fence.used) + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, + job->user_fence.value, + dw, i); + + i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, pvc, dw, i); + + i = emit_user_interrupt(dw, i); + + XE_BUG_ON(i > MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static void emit_migration_job_gen12(struct xe_sched_job *job, + struct xe_lrc *lrc, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i); + + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(0, dw, i); + dw[i++] = preparser_disable(false); + + i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i); + + dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | + MI_FLUSH_DW_OP_STOREDW) + 1; + dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT; + dw[i++] = 0; + dw[i++] = seqno; /* value */ + + i = emit_user_interrupt(dw, i); + + XE_BUG_ON(i > MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static void emit_job_gen12_copy(struct xe_sched_job *job) +{ + int i; + + if (xe_sched_job_is_migration(job->engine)) { + emit_migration_job_gen12(job, job->engine->lrc, + xe_sched_job_seqno(job)); + return; + } + + for (i = 0; i < job->engine->width; ++i) + __emit_job_gen12_copy(job, job->engine->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); +} + +static void emit_job_gen12_video(struct xe_sched_job *job) +{ + int i; + + /* FIXME: Not doing parallel handshake for now */ + for (i = 0; i < job->engine->width; ++i) + __emit_job_gen12_video(job, job->engine->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); +} + +static void emit_job_gen12_render_compute(struct xe_sched_job *job) +{ + int i; + + for (i = 0; i < job->engine->width; ++i) + __emit_job_gen12_render_compute(job, job->engine->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); +} + +static const struct xe_ring_ops ring_ops_gen12_copy = { + .emit_job = emit_job_gen12_copy, +}; + +static const struct xe_ring_ops ring_ops_gen12_video = { + .emit_job = emit_job_gen12_video, +}; + +static const struct xe_ring_ops ring_ops_gen12_render_compute = { + .emit_job = emit_job_gen12_render_compute, +}; + +const struct xe_ring_ops * +xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_COPY: + return &ring_ops_gen12_copy; + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return &ring_ops_gen12_video; + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + return &ring_ops_gen12_render_compute; + default: + return NULL; + } +} diff --git a/drivers/gpu/drm/xe/xe_ring_ops.h b/drivers/gpu/drm/xe/xe_ring_ops.h new file mode 100644 index 000000000000..e942735d76a6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ring_ops.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RING_OPS_H_ +#define _XE_RING_OPS_H_ + +#include "xe_hw_engine_types.h" +#include "xe_ring_ops_types.h" + +struct xe_gt; + +const struct xe_ring_ops * +xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h new file mode 100644 index 000000000000..1ae56e2ee7b4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RING_OPS_TYPES_H_ +#define _XE_RING_OPS_TYPES_H_ + +struct xe_sched_job; + +#define MAX_JOB_SIZE_DW 48 +#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4) + +/** + * struct xe_ring_ops - Ring operations + */ +struct xe_ring_ops { + /** @emit_job: Write job to ring */ + void (*emit_job)(struct xe_sched_job *job); +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c new file mode 100644 index 000000000000..9e8d0e43c643 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_rtp.h" + +#include + +#include "xe_gt.h" +#include "xe_macros.h" +#include "xe_reg_sr.h" + +/** + * DOC: Register Table Processing + * + * Internal infrastructure to define how registers should be updated based on + * rules and actions. This can be used to define tables with multiple entries + * (one per register) that will be walked over at some point in time to apply + * the values to the registers that have matching rules. + */ + +static bool rule_matches(struct xe_gt *gt, + struct xe_hw_engine *hwe, + const struct xe_rtp_entry *entry) +{ + const struct xe_device *xe = gt_to_xe(gt); + const struct xe_rtp_rule *r; + unsigned int i; + bool match; + + for (r = entry->rules, i = 0; i < entry->n_rules; + r = &entry->rules[++i]) { + switch (r->match_type) { + case XE_RTP_MATCH_PLATFORM: + match = xe->info.platform == r->platform; + break; + case XE_RTP_MATCH_SUBPLATFORM: + match = xe->info.platform == r->platform && + xe->info.subplatform == r->subplatform; + break; + case XE_RTP_MATCH_GRAPHICS_VERSION: + /* TODO: match display */ + match = xe->info.graphics_verx100 == r->ver_start; + break; + case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + match = xe->info.graphics_verx100 >= r->ver_start && + xe->info.graphics_verx100 <= r->ver_end; + break; + case XE_RTP_MATCH_MEDIA_VERSION: + match = xe->info.media_verx100 == r->ver_start; + break; + case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + match = xe->info.media_verx100 >= r->ver_start && + xe->info.media_verx100 <= r->ver_end; + break; + case XE_RTP_MATCH_STEP: + /* TODO: match media/display */ + match = xe->info.step.graphics >= r->step_start && + xe->info.step.graphics < r->step_end; + break; + case XE_RTP_MATCH_ENGINE_CLASS: + match = hwe->class == r->engine_class; + break; + case XE_RTP_MATCH_NOT_ENGINE_CLASS: + match = hwe->class != r->engine_class; + break; + case XE_RTP_MATCH_FUNC: + match = r->match_func(gt, hwe); + break; + case XE_RTP_MATCH_INTEGRATED: + match = !xe->info.is_dgfx; + break; + case XE_RTP_MATCH_DISCRETE: + match = xe->info.is_dgfx; + break; + + default: + XE_WARN_ON(r->match_type); + } + + if (!match) + return false; + } + + return true; +} + +static void rtp_add_sr_entry(const struct xe_rtp_entry *entry, + struct xe_gt *gt, + u32 mmio_base, + struct xe_reg_sr *sr) +{ + u32 reg = entry->regval.reg + mmio_base; + struct xe_reg_sr_entry sr_entry = { + .clr_bits = entry->regval.clr_bits, + .set_bits = entry->regval.set_bits, + .read_mask = entry->regval.read_mask, + .masked_reg = entry->regval.flags & XE_RTP_FLAG_MASKED_REG, + .reg_type = entry->regval.reg_type, + }; + + xe_reg_sr_add(sr, reg, &sr_entry); +} + +/** + * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr + * @entries: Table with RTP definitions + * @sr: Where to add an entry to with the values for matching. This can be + * viewed as the "coalesced view" of multiple the tables. The bits for each + * register set are expected not to collide with previously added entries + * @gt: The GT to be used for matching rules + * @hwe: Engine instance to use for matching rules and as mmio base + * + * Walk the table pointed by @entries (with an empty sentinel) and add all + * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is + * used to calculate the right register offset + */ +void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, + struct xe_gt *gt, struct xe_hw_engine *hwe) +{ + const struct xe_rtp_entry *entry; + + for (entry = entries; entry && entry->name; entry++) { + u32 mmio_base = 0; + + if (entry->regval.flags & XE_RTP_FLAG_FOREACH_ENGINE) { + struct xe_hw_engine *each_hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(each_hwe, gt, id) { + mmio_base = each_hwe->mmio_base; + + if (rule_matches(gt, each_hwe, entry)) + rtp_add_sr_entry(entry, gt, mmio_base, sr); + } + } else if (rule_matches(gt, hwe, entry)) { + if (entry->regval.flags & XE_RTP_FLAG_ENGINE_BASE) + mmio_base = hwe->mmio_base; + + rtp_add_sr_entry(entry, gt, mmio_base, sr); + } + } +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h new file mode 100644 index 000000000000..d4e11fdde77f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -0,0 +1,340 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RTP_ +#define _XE_RTP_ + +#include +#include + +#include "xe_rtp_types.h" + +#include "i915_reg_defs.h" + +/* + * Register table poke infrastructure + */ + +struct xe_hw_engine; +struct xe_gt; +struct xe_reg_sr; + +/* + * Helper macros - not to be used outside this header. + */ +/* This counts to 12. Any more, it will return 13th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#define __CALL_FOR_EACH_1(MACRO_, x, ...) MACRO_(x) +#define __CALL_FOR_EACH_2(MACRO_, x, ...) \ + MACRO_(x) __CALL_FOR_EACH_1(MACRO_, ##__VA_ARGS__) +#define __CALL_FOR_EACH_3(MACRO_, x, ...) \ + MACRO_(x) __CALL_FOR_EACH_2(MACRO_, ##__VA_ARGS__) +#define __CALL_FOR_EACH_4(MACRO_, x, ...) \ + MACRO_(x) __CALL_FOR_EACH_3(MACRO_, ##__VA_ARGS__) + +#define _CALL_FOR_EACH(NARGS_, MACRO_, x, ...) \ + CONCATENATE(__CALL_FOR_EACH_, NARGS_)(MACRO_, x, ##__VA_ARGS__) +#define CALL_FOR_EACH(MACRO_, x, ...) \ + _CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__) + +#define _XE_RTP_REG(x_) (x_), \ + .reg_type = XE_RTP_REG_REGULAR +#define _XE_RTP_MCR_REG(x_) (x_), \ + .reg_type = XE_RTP_REG_MCR + +/* + * Helper macros for concatenating prefix - do not use them directly outside + * this header + */ +#define __ADD_XE_RTP_FLAG_PREFIX(x) CONCATENATE(XE_RTP_FLAG_, x) | +#define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) , + +/* + * Macros to encode rules to match against platform, IP version, stepping, etc. + * Shouldn't be used directly - see XE_RTP_RULES() + */ + +#define _XE_RTP_RULE_PLATFORM(plat__) \ + { .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ } + +#define _XE_RTP_RULE_SUBPLATFORM(plat__, sub__) \ + { .match_type = XE_RTP_MATCH_SUBPLATFORM, \ + .platform = plat__, .subplatform = sub__ } + +#define _XE_RTP_RULE_STEP(start__, end__) \ + { .match_type = XE_RTP_MATCH_STEP, \ + .step_start = start__, .step_end = end__ } + +#define _XE_RTP_RULE_ENGINE_CLASS(cls__) \ + { .match_type = XE_RTP_MATCH_ENGINE_CLASS, \ + .engine_class = (cls__) } + +/** + * XE_RTP_RULE_PLATFORM - Create rule matching platform + * @plat_: platform to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_PLATFORM(plat_) \ + _XE_RTP_RULE_PLATFORM(XE_##plat_) + +/** + * XE_RTP_RULE_SUBPLATFORM - Create rule matching platform and sub-platform + * @plat_: platform to match + * @sub_: sub-platform to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_SUBPLATFORM(plat_, sub_) \ + _XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_) + +/** + * XE_RTP_RULE_STEP - Create rule matching platform stepping + * @start_: First stepping matching the rule + * @end_: First stepping that does not match the rule + * + * Note that the range matching this rule [ @start_, @end_ ), i.e. inclusive on + * the left, exclusive on the right. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_STEP(start_, end_) \ + _XE_RTP_RULE_STEP(STEP_##start_, STEP_##end_) + +/** + * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class + * @cls_: Engine class to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_ENGINE_CLASS(cls_) \ + _XE_RTP_RULE_ENGINE_CLASS(XE_ENGINE_CLASS_##cls_) + +/** + * XE_RTP_RULE_FUNC - Create rule using callback function for match + * @func__: Function to call to decide if rule matches + * + * This allows more complex checks to be performed. The ``XE_RTP`` + * infrastructure will simply call the function @func_ passed to decide if this + * rule matches the device. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_FUNC(func__) \ + { .match_type = XE_RTP_MATCH_FUNC, \ + .match_func = (func__) } + +/** + * XE_RTP_RULE_GRAPHICS_VERSION - Create rule matching graphics version + * @ver__: Graphics IP version to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_GRAPHICS_VERSION(ver__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION, \ + .ver_start = ver__, } + +/** + * XE_RTP_RULE_GRAPHICS_VERSION_RANGE - Create rule matching a range of graphics version + * @ver_start__: First graphics IP version to match + * @ver_end__: Last graphics IP version to match + * + * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. + * inclusive on boths sides + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_GRAPHICS_VERSION_RANGE(ver_start__, ver_end__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, \ + .ver_start = ver_start__, .ver_end = ver_end__, } + +/** + * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version + * @ver__: Graphics IP version to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_VERSION(ver__) \ + { .match_type = XE_RTP_MATCH_MEDIA_VERSION, \ + .ver_start = ver__, } + +/** + * XE_RTP_RULE_MEDIA_VERSION_RANGE - Create rule matching a range of media version + * @ver_start__: First media IP version to match + * @ver_end__: Last media IP version to match + * + * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. + * inclusive on boths sides + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_VERSION_RANGE(ver_start__, ver_end__) \ + { .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE, \ + .ver_start = ver_start__, .ver_end = ver_end__, } + +/** + * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_IS_INTEGRATED \ + { .match_type = XE_RTP_MATCH_INTEGRATED } + +/** + * XE_RTP_RULE_IS_DISCRETE - Create a rule matching discrete graphics devices + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_IS_DISCRETE \ + { .match_type = XE_RTP_MATCH_DISCRETE } + +/** + * XE_RTP_WR - Helper to write a value to the register, overriding all the bits + * @reg_: Register + * @val_: Value to set + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * The correspondent notation in bspec is: + * + * REGNAME = VALUE + */ +#define XE_RTP_WR(reg_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_), \ + .read_mask = (~0u), ##__VA_ARGS__ } + +/** + * XE_RTP_SET - Set bits from @val_ in the register. + * @reg_: Register + * @val_: Bits to set in the register + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * For masked registers this translates to a single write, while for other + * registers it's a RMW. The correspondent bspec notation is (example for bits 2 + * and 5, but could be any): + * + * REGNAME[2] = 1 + * REGNAME[5] = 1 + */ +#define XE_RTP_SET(reg_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_), \ + .read_mask = (val_), ##__VA_ARGS__ } + +/** + * XE_RTP_CLR: Clear bits from @val_ in the register. + * @reg_: Register + * @val_: Bits to clear in the register + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * For masked registers this translates to a single write, while for other + * registers it's a RMW. The correspondent bspec notation is (example for bits 2 + * and 5, but could be any): + * + * REGNAME[2] = 0 + * REGNAME[5] = 0 + */ +#define XE_RTP_CLR(reg_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = 0, \ + .read_mask = (val_), ##__VA_ARGS__ } + +/** + * XE_RTP_FIELD_SET: Set a bit range, defined by @mask_bits_, to the value in + * @reg_: Register + * @mask_bits_: Mask of bits to be changed in the register, forming a field + * @val_: Value to set in the field denoted by @mask_bits_ + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * For masked registers this translates to a single write, while for other + * registers it's a RMW. The correspondent bspec notation is: + * + * REGNAME[:] = VALUE + */ +#define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ + .read_mask = (mask_bits_), ##__VA_ARGS__ } + +#define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ + .read_mask = 0, ##__VA_ARGS__ } + +/** + * XE_WHITELIST_REGISTER - Add register to userspace whitelist + * @reg_: Register + * @flags_: Whitelist-specific flags to set + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * Add a register to the whitelist, allowing userspace to modify the ster with + * regular user privileges. + */ +#define XE_WHITELIST_REGISTER(reg_, flags_, ...) \ + /* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\ + .regval = { .reg = reg_, .set_bits = (flags_), \ + .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID, \ + ##__VA_ARGS__ } + +/** + * XE_RTP_NAME - Helper to set the name in xe_rtp_entry + * @s_: Name describing this rule, often a HW-specific number + * + * TODO: maybe move this behind a debug config? + */ +#define XE_RTP_NAME(s_) .name = (s_) + +/** + * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_regval entry + * @f1_: Last part of a ``XE_RTP_FLAG_*`` + * @...: Additional flags, defined like @f1_ + * + * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be + * easily used to define struct xe_rtp_regval entries. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * XE_RTP_FLAG(FOREACH_ENGINE, MASKED_REG), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_FLAG(f1_, ...) \ + .flags = (CALL_FOR_EACH(__ADD_XE_RTP_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0) + +/** + * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry + * @r1: Last part of XE_RTP_MATCH_* + * @...: Additional rules, defined like @r1 + * + * At least one rule is needed and up to 4 are supported. Multiple rules are + * AND'ed together, i.e. all the rules must evaluate to true for the entry to + * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_RULES(r1, ...) \ + .n_rules = COUNT_ARGS(r1, ##__VA_ARGS__), \ + .rules = (struct xe_rtp_rule[]) { \ + CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__) \ + } + +void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, + struct xe_gt *gt, struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h new file mode 100644 index 000000000000..b55b556a2495 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RTP_TYPES_ +#define _XE_RTP_TYPES_ + +#include + +#include "i915_reg_defs.h" + +struct xe_hw_engine; +struct xe_gt; + +enum { + XE_RTP_REG_REGULAR, + XE_RTP_REG_MCR, +}; + +/** + * struct xe_rtp_regval - register and value for rtp table + */ +struct xe_rtp_regval { + /** @reg: Register */ + u32 reg; + /* + * TODO: maybe we need a union here with a func pointer for cases + * that are too specific to be generalized + */ + /** @clr_bits: bits to clear when updating register */ + u32 clr_bits; + /** @set_bits: bits to set when updating register */ + u32 set_bits; +#define XE_RTP_NOCHECK .read_mask = 0 + /** @read_mask: mask for bits to consider when reading value back */ + u32 read_mask; +#define XE_RTP_FLAG_FOREACH_ENGINE BIT(0) +#define XE_RTP_FLAG_MASKED_REG BIT(1) +#define XE_RTP_FLAG_ENGINE_BASE BIT(2) + /** @flags: flags to apply on rule evaluation or action */ + u8 flags; + /** @reg_type: register type, see ``XE_RTP_REG_*`` */ + u8 reg_type; +}; + +enum { + XE_RTP_MATCH_PLATFORM, + XE_RTP_MATCH_SUBPLATFORM, + XE_RTP_MATCH_GRAPHICS_VERSION, + XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, + XE_RTP_MATCH_MEDIA_VERSION, + XE_RTP_MATCH_MEDIA_VERSION_RANGE, + XE_RTP_MATCH_INTEGRATED, + XE_RTP_MATCH_DISCRETE, + XE_RTP_MATCH_STEP, + XE_RTP_MATCH_ENGINE_CLASS, + XE_RTP_MATCH_NOT_ENGINE_CLASS, + XE_RTP_MATCH_FUNC, +}; + +/** struct xe_rtp_rule - match rule for processing entry */ +struct xe_rtp_rule { + u8 match_type; + + /* match filters */ + union { + /* MATCH_PLATFORM / MATCH_SUBPLATFORM */ + struct { + u8 platform; + u8 subplatform; + }; + /* + * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE / + * MATCH_MEDIA_VERSION / XE_RTP_MATCH_MEDIA_VERSION_RANGE + */ + struct { + u32 ver_start; +#define XE_RTP_END_VERSION_UNDEFINED U32_MAX + u32 ver_end; + }; + /* MATCH_STEP */ + struct { + u8 step_start; + u8 step_end; + }; + /* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */ + struct { + u8 engine_class; + }; + /* MATCH_FUNC */ + bool (*match_func)(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + }; +}; + +/** struct xe_rtp_entry - Entry in an rtp table */ +struct xe_rtp_entry { + const char *name; + const struct xe_rtp_regval regval; + const struct xe_rtp_rule *rules; + unsigned int n_rules; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c new file mode 100644 index 000000000000..7403410cd806 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_sa.h" + +static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) +{ + struct xe_sa_manager *sa_manager = arg; + struct xe_bo *bo = sa_manager->bo; + + if (!bo) { + drm_err(drm, "no bo for sa manager\n"); + return; + } + + drm_suballoc_manager_fini(&sa_manager->base); + + if (bo->vmap.is_iomem) + kvfree(sa_manager->cpu_ptr); + + xe_bo_unpin_map_no_vm(bo); + sa_manager->bo = NULL; +} + +int xe_sa_bo_manager_init(struct xe_gt *gt, + struct xe_sa_manager *sa_manager, + u32 size, u32 align) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 managed_size = size - SZ_4K; + struct xe_bo *bo; + + sa_manager->bo = NULL; + + bo = xe_bo_create_pin_map(xe, gt, NULL, size, ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) { + drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", + PTR_ERR(bo)); + return PTR_ERR(bo); + } + sa_manager->bo = bo; + + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); + sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); + + if (bo->vmap.is_iomem) { + sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); + if (!sa_manager->cpu_ptr) { + xe_bo_unpin_map_no_vm(sa_manager->bo); + sa_manager->bo = NULL; + return -ENOMEM; + } + } else { + sa_manager->cpu_ptr = bo->vmap.vaddr; + memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); + } + + return drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, + sa_manager); +} + +struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, + unsigned size) +{ + return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0); +} + +void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) +{ + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); + struct xe_device *xe = gt_to_xe(sa_manager->bo->gt); + + if (!sa_manager->bo->vmap.is_iomem) + return; + + xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo), + xe_sa_bo_cpu_addr(sa_bo), + drm_suballoc_size(sa_bo)); +} + +void xe_sa_bo_free(struct drm_suballoc *sa_bo, + struct dma_fence *fence) +{ + drm_suballoc_free(sa_bo, fence); +} diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h new file mode 100644 index 000000000000..742282ef7179 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef _XE_SA_H_ +#define _XE_SA_H_ + +#include "xe_sa_types.h" + +struct dma_fence; +struct xe_bo; +struct xe_gt; + +int xe_sa_bo_manager_init(struct xe_gt *gt, + struct xe_sa_manager *sa_manager, + u32 size, u32 align); + +struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, + u32 size); +void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); +void xe_sa_bo_free(struct drm_suballoc *sa_bo, + struct dma_fence *fence); + +static inline struct xe_sa_manager * +to_xe_sa_manager(struct drm_suballoc_manager *mng) +{ + return container_of(mng, struct xe_sa_manager, base); +} + +static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa) +{ + return to_xe_sa_manager(sa->manager)->gpu_addr + + drm_suballoc_soffset(sa); +} + +static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa) +{ + return to_xe_sa_manager(sa->manager)->cpu_ptr + + drm_suballoc_soffset(sa); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h new file mode 100644 index 000000000000..2ef896aeca1d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef _XE_SA_TYPES_H_ +#define _XE_SA_TYPES_H_ + +#include + +struct xe_bo; + +struct xe_sa_manager { + struct drm_suballoc_manager base; + struct xe_bo *bo; + u64 gpu_addr; + void *cpu_ptr; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c new file mode 100644 index 000000000000..ab81bfe17e8a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_sched_job.h" + +#include +#include + +#include "xe_device_types.h" +#include "xe_engine.h" +#include "xe_gt.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_trace.h" +#include "xe_vm.h" + +static struct kmem_cache *xe_sched_job_slab; +static struct kmem_cache *xe_sched_job_parallel_slab; + +int __init xe_sched_job_module_init(void) +{ + xe_sched_job_slab = + kmem_cache_create("xe_sched_job", + sizeof(struct xe_sched_job) + + sizeof(u64), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!xe_sched_job_slab) + return -ENOMEM; + + xe_sched_job_parallel_slab = + kmem_cache_create("xe_sched_job_parallel", + sizeof(struct xe_sched_job) + + sizeof(u64) * + XE_HW_ENGINE_MAX_INSTANCE , 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!xe_sched_job_parallel_slab) { + kmem_cache_destroy(xe_sched_job_slab); + return -ENOMEM; + } + + return 0; +} + +void xe_sched_job_module_exit(void) +{ + kmem_cache_destroy(xe_sched_job_slab); + kmem_cache_destroy(xe_sched_job_parallel_slab); +} + +static struct xe_sched_job *job_alloc(bool parallel) +{ + return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab : + xe_sched_job_slab, GFP_KERNEL); +} + +bool xe_sched_job_is_migration(struct xe_engine *e) +{ + return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION) && + !(e->flags & ENGINE_FLAG_WA); +} + +static void job_free(struct xe_sched_job *job) +{ + struct xe_engine *e = job->engine; + bool is_migration = xe_sched_job_is_migration(e); + + kmem_cache_free(xe_engine_is_parallel(job->engine) || is_migration ? + xe_sched_job_parallel_slab : xe_sched_job_slab, job); +} + +struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, + u64 *batch_addr) +{ + struct xe_sched_job *job; + struct dma_fence **fences; + bool is_migration = xe_sched_job_is_migration(e); + int err; + int i, j; + u32 width; + + /* Migration and kernel engines have their own locking */ + if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM | + ENGINE_FLAG_WA))) { + lockdep_assert_held(&e->vm->lock); + if (!xe_vm_no_dma_fences(e->vm)) + xe_vm_assert_held(e->vm); + } + + job = job_alloc(xe_engine_is_parallel(e) || is_migration); + if (!job) + return ERR_PTR(-ENOMEM); + + job->engine = e; + kref_init(&job->refcount); + xe_engine_get(job->engine); + + err = drm_sched_job_init(&job->drm, e->entity, 1, NULL); + if (err) + goto err_free; + + if (!xe_engine_is_parallel(e)) { + job->fence = xe_lrc_create_seqno_fence(e->lrc); + if (IS_ERR(job->fence)) { + err = PTR_ERR(job->fence); + goto err_sched_job; + } + } else { + struct dma_fence_array *cf; + + fences = kmalloc_array(e->width, sizeof(*fences), GFP_KERNEL); + if (!fences) { + err = -ENOMEM; + goto err_sched_job; + } + + for (j = 0; j < e->width; ++j) { + fences[j] = xe_lrc_create_seqno_fence(e->lrc + j); + if (IS_ERR(fences[j])) { + err = PTR_ERR(fences[j]); + goto err_fences; + } + } + + cf = dma_fence_array_create(e->width, fences, + e->parallel.composite_fence_ctx, + e->parallel.composite_fence_seqno++, + false); + if (!cf) { + --e->parallel.composite_fence_seqno; + err = -ENOMEM; + goto err_fences; + } + + /* Sanity check */ + for (j = 0; j < e->width; ++j) + XE_BUG_ON(cf->base.seqno != fences[j]->seqno); + + job->fence = &cf->base; + } + + width = e->width; + if (is_migration) + width = 2; + + for (i = 0; i < width; ++i) + job->batch_addr[i] = batch_addr[i]; + + trace_xe_sched_job_create(job); + return job; + +err_fences: + for (j = j - 1; j >= 0; --j) { + --e->lrc[j].fence_ctx.next_seqno; + dma_fence_put(fences[j]); + } + kfree(fences); +err_sched_job: + drm_sched_job_cleanup(&job->drm); +err_free: + xe_engine_put(e); + job_free(job); + return ERR_PTR(err); +} + +/** + * xe_sched_job_destroy - Destroy XE schedule job + * @ref: reference to XE schedule job + * + * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup + * base DRM schedule job, and free memory for XE schedule job. + */ +void xe_sched_job_destroy(struct kref *ref) +{ + struct xe_sched_job *job = + container_of(ref, struct xe_sched_job, refcount); + + xe_engine_put(job->engine); + dma_fence_put(job->fence); + drm_sched_job_cleanup(&job->drm); + job_free(job); +} + +void xe_sched_job_set_error(struct xe_sched_job *job, int error) +{ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) + return; + + dma_fence_set_error(job->fence, error); + + if (dma_fence_is_array(job->fence)) { + struct dma_fence_array *array = + to_dma_fence_array(job->fence); + struct dma_fence **child = array->fences; + unsigned int nchild = array->num_fences; + + do { + struct dma_fence *current_fence = *child++; + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + ¤t_fence->flags)) + continue; + dma_fence_set_error(current_fence, error); + } while (--nchild); + } + + trace_xe_sched_job_set_error(job); + + dma_fence_enable_sw_signaling(job->fence); + xe_hw_fence_irq_run(job->engine->fence_irq); +} + +bool xe_sched_job_started(struct xe_sched_job *job) +{ + struct xe_lrc *lrc = job->engine->lrc; + + return xe_lrc_start_seqno(lrc) >= xe_sched_job_seqno(job); +} + +bool xe_sched_job_completed(struct xe_sched_job *job) +{ + struct xe_lrc *lrc = job->engine->lrc; + + /* + * Can safely check just LRC[0] seqno as that is last seqno written when + * parallel handshake is done. + */ + + return xe_lrc_seqno(lrc) >= xe_sched_job_seqno(job); +} + +void xe_sched_job_arm(struct xe_sched_job *job) +{ + drm_sched_job_arm(&job->drm); +} + +void xe_sched_job_push(struct xe_sched_job *job) +{ + xe_sched_job_get(job); + trace_xe_sched_job_exec(job); + drm_sched_entity_push_job(&job->drm); + xe_sched_job_put(job); +} diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h new file mode 100644 index 000000000000..5315ad8656c2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_SCHED_JOB_H_ +#define _XE_SCHED_JOB_H_ + +#include "xe_sched_job_types.h" + +#define XE_SCHED_HANG_LIMIT 1 +#define XE_SCHED_JOB_TIMEOUT LONG_MAX + +int xe_sched_job_module_init(void); +void xe_sched_job_module_exit(void); + +struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, + u64 *batch_addr); +void xe_sched_job_destroy(struct kref *ref); + +/** + * xe_sched_job_get - get reference to XE schedule job + * @job: XE schedule job object + * + * Increment XE schedule job's reference count + */ +static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job) +{ + kref_get(&job->refcount); + return job; +} + +/** + * xe_sched_job_put - put reference to XE schedule job + * @job: XE schedule job object + * + * Decrement XE schedule job's reference count, call xe_sched_job_destroy when + * reference count == 0. + */ +static inline void xe_sched_job_put(struct xe_sched_job *job) +{ + kref_put(&job->refcount, xe_sched_job_destroy); +} + +void xe_sched_job_set_error(struct xe_sched_job *job, int error); +static inline bool xe_sched_job_is_error(struct xe_sched_job *job) +{ + return job->fence->error < 0; +} + +bool xe_sched_job_started(struct xe_sched_job *job); +bool xe_sched_job_completed(struct xe_sched_job *job); + +void xe_sched_job_arm(struct xe_sched_job *job); +void xe_sched_job_push(struct xe_sched_job *job); + +static inline struct xe_sched_job * +to_xe_sched_job(struct drm_sched_job *drm) +{ + return container_of(drm, struct xe_sched_job, drm); +} + +static inline u32 xe_sched_job_seqno(struct xe_sched_job *job) +{ + return job->fence->seqno; +} + +static inline void +xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags) +{ + job->migrate_flush_flags = flags; +} + +bool xe_sched_job_is_migration(struct xe_engine *e); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h new file mode 100644 index 000000000000..fd1d75996127 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_SCHED_JOB_TYPES_H_ +#define _XE_SCHED_JOB_TYPES_H_ + +#include + +#include + +struct xe_engine; + +/** + * struct xe_sched_job - XE schedule job (batch buffer tracking) + */ +struct xe_sched_job { + /** @drm: base DRM scheduler job */ + struct drm_sched_job drm; + /** @engine: XE submission engine */ + struct xe_engine *engine; + /** @refcount: ref count of this job */ + struct kref refcount; + /** + * @fence: dma fence to indicate completion. 1 way relationship - job + * can safely reference fence, fence cannot safely reference job. + */ +#define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS + struct dma_fence *fence; + /** @user_fence: write back value when BB is complete */ + struct { + /** @used: user fence is used */ + bool used; + /** @addr: address to write to */ + u64 addr; + /** @value: write back value */ + u64 value; + } user_fence; + /** @migrate_flush_flags: Additional flush flags for migration jobs */ + u32 migrate_flush_flags; + /** @batch_addr: batch buffer address of job */ + u64 batch_addr[0]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c new file mode 100644 index 000000000000..ca77d0971529 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_step.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_step.h" + +#include "xe_device.h" +#include "xe_platform_types.h" + +/* + * Provide mapping between PCI's revision ID to the individual GMD + * (Graphics/Media/Display) stepping values that can be compared numerically. + * + * Some platforms may have unusual ways of mapping PCI revision ID to GMD + * steppings. E.g., in some cases a higher PCI revision may translate to a + * lower stepping of the GT and/or display IP. + * + * Also note that some revisions/steppings may have been set aside as + * placeholders but never materialized in real hardware; in those cases there + * may be jumps in the revision IDs or stepping values in the tables below. + */ + +/* + * Some platforms always have the same stepping value for GT and display; + * use a macro to define these to make it easier to identify the platforms + * where the two steppings can deviate. + */ +#define COMMON_GT_MEDIA_STEP(x_) \ + .graphics = STEP_##x_, \ + .media = STEP_##x_ + +#define COMMON_STEP(x_) \ + COMMON_GT_MEDIA_STEP(x_), \ + .graphics = STEP_##x_, \ + .media = STEP_##x_, \ + .display = STEP_##x_ + +__diag_push(); +__diag_ignore_all("-Woverride-init", "Allow field overrides in table"); + +/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ +static const struct xe_step_info tgl_revids[] = { + [0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, + [1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 }, +}; + +static const struct xe_step_info dg1_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, +}; + +static const struct xe_step_info adls_revids[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, +}; + +static const struct xe_step_info dg2_g10_revid_step_tbl[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, +}; + +static const struct xe_step_info dg2_g11_revid_step_tbl[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 }, + [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 }, +}; + +static const struct xe_step_info dg2_g12_revid_step_tbl[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 }, +}; + +static const struct xe_step_info pvc_revid_step_tbl[] = { + [0x3] = { .graphics = STEP_A0 }, + [0x5] = { .graphics = STEP_B0 }, + [0x6] = { .graphics = STEP_B1 }, + [0x7] = { .graphics = STEP_C0 }, +}; + +static const int pvc_basedie_subids[] = { + [0x0] = STEP_A0, + [0x3] = STEP_B0, + [0x4] = STEP_B1, + [0x5] = STEP_B3, +}; + +__diag_pop(); + +struct xe_step_info xe_step_get(struct xe_device *xe) +{ + const struct xe_step_info *revids = NULL; + struct xe_step_info step = {}; + u16 revid = xe->info.revid; + int size = 0; + const int *basedie_info = NULL; + int basedie_size = 0; + int baseid = 0; + + if (xe->info.platform == XE_PVC) { + baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid); + revid = FIELD_GET(GENMASK(2, 0), xe->info.revid); + revids = pvc_revid_step_tbl; + size = ARRAY_SIZE(pvc_revid_step_tbl); + basedie_info = pvc_basedie_subids; + basedie_size = ARRAY_SIZE(pvc_basedie_subids); + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10) { + revids = dg2_g10_revid_step_tbl; + size = ARRAY_SIZE(dg2_g10_revid_step_tbl); + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G11) { + revids = dg2_g11_revid_step_tbl; + size = ARRAY_SIZE(dg2_g11_revid_step_tbl); + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) { + revids = dg2_g12_revid_step_tbl; + size = ARRAY_SIZE(dg2_g12_revid_step_tbl); + } else if (xe->info.platform == XE_ALDERLAKE_S) { + revids = adls_revids; + size = ARRAY_SIZE(adls_revids); + } else if (xe->info.platform == XE_DG1) { + revids = dg1_revids; + size = ARRAY_SIZE(dg1_revids); + } else if (xe->info.platform == XE_TIGERLAKE) { + revids = tgl_revids; + size = ARRAY_SIZE(tgl_revids); + } + + /* Not using the stepping scheme for the platform yet. */ + if (!revids) + return step; + + if (revid < size && revids[revid].graphics != STEP_NONE) { + step = revids[revid]; + } else { + drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid); + + /* + * If we hit a gap in the revid array, use the information for + * the next revid. + * + * This may be wrong in all sorts of ways, especially if the + * steppings in the array are not monotonically increasing, but + * it's better than defaulting to 0. + */ + while (revid < size && revids[revid].graphics == STEP_NONE) + revid++; + + if (revid < size) { + drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n", + revid); + step = revids[revid]; + } else { + drm_dbg(&xe->drm, "Using future steppings\n"); + step.graphics = STEP_FUTURE; + step.display = STEP_FUTURE; + } + } + + drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE); + + if (basedie_info && basedie_size) { + if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) { + step.basedie = basedie_info[baseid]; + } else { + drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid); + step.basedie = STEP_FUTURE; + } + } + + return step; +} + +#define STEP_NAME_CASE(name) \ + case STEP_##name: \ + return #name; + +const char *xe_step_name(enum xe_step step) +{ + switch (step) { + STEP_NAME_LIST(STEP_NAME_CASE); + + default: + return "**"; + } +} diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h new file mode 100644 index 000000000000..0c596c8579fb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_step.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_STEP_H_ +#define _XE_STEP_H_ + +#include + +#include "xe_step_types.h" + +struct xe_device; + +struct xe_step_info xe_step_get(struct xe_device *xe); +const char *xe_step_name(enum xe_step step); + +#endif diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h new file mode 100644 index 000000000000..b7859f9647ca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_step_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_STEP_TYPES_H_ +#define _XE_STEP_TYPES_H_ + +#include + +struct xe_step_info { + u8 graphics; + u8 media; + u8 display; + u8 basedie; +}; + +#define STEP_ENUM_VAL(name) STEP_##name, + +#define STEP_NAME_LIST(func) \ + func(A0) \ + func(A1) \ + func(A2) \ + func(B0) \ + func(B1) \ + func(B2) \ + func(B3) \ + func(C0) \ + func(C1) \ + func(D0) \ + func(D1) \ + func(E0) \ + func(F0) \ + func(G0) \ + func(H0) \ + func(I0) \ + func(I1) \ + func(J0) + +/* + * Symbolic steppings that do not match the hardware. These are valid both as gt + * and display steppings as symbolic names. + */ +enum xe_step { + STEP_NONE = 0, + STEP_NAME_LIST(STEP_ENUM_VAL) + STEP_FUTURE, + STEP_FOREVER, +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c new file mode 100644 index 000000000000..0fbd8d0978cf --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_sync.h" + +#include +#include +#include +#include +#include +#include + +#include "xe_device_types.h" +#include "xe_sched_job_types.h" +#include "xe_macros.h" + +#define SYNC_FLAGS_TYPE_MASK 0x3 +#define SYNC_FLAGS_FENCE_INSTALLED 0x10000 + +struct user_fence { + struct xe_device *xe; + struct kref refcount; + struct dma_fence_cb cb; + struct work_struct worker; + struct mm_struct *mm; + u64 __user *addr; + u64 value; +}; + +static void user_fence_destroy(struct kref *kref) +{ + struct user_fence *ufence = container_of(kref, struct user_fence, + refcount); + + mmdrop(ufence->mm); + kfree(ufence); +} + +static void user_fence_get(struct user_fence *ufence) +{ + kref_get(&ufence->refcount); +} + +static void user_fence_put(struct user_fence *ufence) +{ + kref_put(&ufence->refcount, user_fence_destroy); +} + +static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) +{ + struct user_fence *ufence; + + ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); + if (!ufence) + return NULL; + + ufence->xe = xe; + kref_init(&ufence->refcount); + ufence->addr = u64_to_user_ptr(addr); + ufence->value = value; + ufence->mm = current->mm; + mmgrab(ufence->mm); + + return ufence; +} + +static void user_fence_worker(struct work_struct *w) +{ + struct user_fence *ufence = container_of(w, struct user_fence, worker); + + if (mmget_not_zero(ufence->mm)) { + kthread_use_mm(ufence->mm); + if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) + XE_WARN_ON("Copy to user failed"); + kthread_unuse_mm(ufence->mm); + mmput(ufence->mm); + } + + wake_up_all(&ufence->xe->ufence_wq); + user_fence_put(ufence); +} + +static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +{ + INIT_WORK(&ufence->worker, user_fence_worker); + queue_work(ufence->xe->ordered_wq, &ufence->worker); + dma_fence_put(fence); +} + +static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct user_fence *ufence = container_of(cb, struct user_fence, cb); + + kick_ufence(ufence, fence); +} + +int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, + struct xe_sync_entry *sync, + struct drm_xe_sync __user *sync_user, + bool exec, bool no_dma_fences) +{ + struct drm_xe_sync sync_in; + int err; + + if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user))) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, sync_in.flags & + ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL))) + return -EINVAL; + + switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) { + case DRM_XE_SYNC_SYNCOBJ: + if (XE_IOCTL_ERR(xe, no_dma_fences)) + return -ENOTSUPP; + + if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) + return -EINVAL; + + sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); + if (XE_IOCTL_ERR(xe, !sync->syncobj)) + return -ENOENT; + + if (!(sync_in.flags & DRM_XE_SYNC_SIGNAL)) { + sync->fence = drm_syncobj_fence_get(sync->syncobj); + if (XE_IOCTL_ERR(xe, !sync->fence)) + return -EINVAL; + } + break; + + case DRM_XE_SYNC_TIMELINE_SYNCOBJ: + if (XE_IOCTL_ERR(xe, no_dma_fences)) + return -ENOTSUPP; + + if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, sync_in.timeline_value == 0)) + return -EINVAL; + + sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); + if (XE_IOCTL_ERR(xe, !sync->syncobj)) + return -ENOENT; + + if (sync_in.flags & DRM_XE_SYNC_SIGNAL) { + sync->chain_fence = dma_fence_chain_alloc(); + if (!sync->chain_fence) + return -ENOMEM; + } else { + sync->fence = drm_syncobj_fence_get(sync->syncobj); + if (XE_IOCTL_ERR(xe, !sync->fence)) + return -EINVAL; + + err = dma_fence_chain_find_seqno(&sync->fence, + sync_in.timeline_value); + if (err) + return err; + } + break; + + case DRM_XE_SYNC_DMA_BUF: + if (XE_IOCTL_ERR(xe, "TODO")) + return -EINVAL; + break; + + case DRM_XE_SYNC_USER_FENCE: + if (XE_IOCTL_ERR(xe, !(sync_in.flags & DRM_XE_SYNC_SIGNAL))) + return -ENOTSUPP; + + if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7)) + return -EINVAL; + + if (exec) { + sync->addr = sync_in.addr; + } else { + sync->ufence = user_fence_create(xe, sync_in.addr, + sync_in.timeline_value); + if (XE_IOCTL_ERR(xe, !sync->ufence)) + return -ENOMEM; + } + + break; + + default: + return -EINVAL; + } + + sync->flags = sync_in.flags; + sync->timeline_value = sync_in.timeline_value; + + return 0; +} + +int xe_sync_entry_wait(struct xe_sync_entry *sync) +{ + if (sync->fence) + dma_fence_wait(sync->fence, true); + + return 0; +} + +int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) +{ + int err; + + if (sync->fence) { + err = drm_sched_job_add_dependency(&job->drm, + dma_fence_get(sync->fence)); + if (err) { + dma_fence_put(sync->fence); + return err; + } + } + + return 0; +} + +bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, + struct dma_fence *fence) +{ + if (!(sync->flags & DRM_XE_SYNC_SIGNAL) || + sync->flags & SYNC_FLAGS_FENCE_INSTALLED) + return false; + + if (sync->chain_fence) { + drm_syncobj_add_point(sync->syncobj, sync->chain_fence, + fence, sync->timeline_value); + /* + * The chain's ownership is transferred to the + * timeline. + */ + sync->chain_fence = NULL; + } else if (sync->syncobj) { + drm_syncobj_replace_fence(sync->syncobj, fence); + } else if (sync->ufence) { + int err; + + dma_fence_get(fence); + user_fence_get(sync->ufence); + err = dma_fence_add_callback(fence, &sync->ufence->cb, + user_fence_cb); + if (err == -ENOENT) { + kick_ufence(sync->ufence, fence); + } else if (err) { + XE_WARN_ON("failed to add user fence"); + user_fence_put(sync->ufence); + dma_fence_put(fence); + } + } else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) == + DRM_XE_SYNC_USER_FENCE) { + job->user_fence.used = true; + job->user_fence.addr = sync->addr; + job->user_fence.value = sync->timeline_value; + } + + /* TODO: external BO? */ + + sync->flags |= SYNC_FLAGS_FENCE_INSTALLED; + + return true; +} + +void xe_sync_entry_cleanup(struct xe_sync_entry *sync) +{ + if (sync->syncobj) + drm_syncobj_put(sync->syncobj); + if (sync->fence) + dma_fence_put(sync->fence); + if (sync->chain_fence) + dma_fence_put(&sync->chain_fence->base); + if (sync->ufence) + user_fence_put(sync->ufence); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h new file mode 100644 index 000000000000..4cbcf7a19911 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_SYNC_H_ +#define _XE_SYNC_H_ + +#include "xe_sync_types.h" + +struct xe_device; +struct xe_file; +struct xe_sched_job; + +int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, + struct xe_sync_entry *sync, + struct drm_xe_sync __user *sync_user, + bool exec, bool compute_mode); +int xe_sync_entry_wait(struct xe_sync_entry *sync); +int xe_sync_entry_add_deps(struct xe_sync_entry *sync, + struct xe_sched_job *job); +bool xe_sync_entry_signal(struct xe_sync_entry *sync, + struct xe_sched_job *job, + struct dma_fence *fence); +void xe_sync_entry_cleanup(struct xe_sync_entry *sync); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h new file mode 100644 index 000000000000..24fccc26cb53 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_SYNC_TYPES_H_ +#define _XE_SYNC_TYPES_H_ + +#include + +struct drm_syncobj; +struct dma_fence; +struct dma_fence_chain; +struct drm_xe_sync; +struct user_fence; + +struct xe_sync_entry { + struct drm_syncobj *syncobj; + struct dma_fence *fence; + struct dma_fence_chain *chain_fence; + struct user_fence *ufence; + u64 addr; + u64 timeline_value; + u32 flags; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c new file mode 100644 index 000000000000..2570c0b859c4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h new file mode 100644 index 000000000000..a5f963f1f6eb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -0,0 +1,513 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright © 2022 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_H_ + +#include +#include + +#include "xe_bo_types.h" +#include "xe_engine_types.h" +#include "xe_gpu_scheduler_types.h" +#include "xe_gt_types.h" +#include "xe_guc_engine_types.h" +#include "xe_sched_job.h" +#include "xe_vm_types.h" + +DECLARE_EVENT_CLASS(xe_bo, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo), + + TP_STRUCT__entry( + __field(size_t, size) + __field(u32, flags) + __field(u64, vm) + ), + + TP_fast_assign( + __entry->size = bo->size; + __entry->flags = bo->flags; + __entry->vm = (u64)bo->vm; + ), + + TP_printk("size=%ld, flags=0x%02x, vm=0x%016llx", + __entry->size, __entry->flags, __entry->vm) +); + +DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + +DEFINE_EVENT(xe_bo, xe_bo_move, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + +DECLARE_EVENT_CLASS(xe_engine, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e), + + TP_STRUCT__entry( + __field(enum xe_engine_class, class) + __field(u32, logical_mask) + __field(u8, gt_id) + __field(u16, width) + __field(u16, guc_id) + __field(u32, guc_state) + __field(u32, flags) + ), + + TP_fast_assign( + __entry->class = e->class; + __entry->logical_mask = e->logical_mask; + __entry->gt_id = e->gt->info.id; + __entry->width = e->width; + __entry->guc_id = e->guc->id; + __entry->guc_state = atomic_read(&e->guc->state); + __entry->flags = e->flags; + ), + + TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", + __entry->class, __entry->logical_mask, + __entry->gt_id, __entry->width, __entry->guc_id, + __entry->guc_state, __entry->flags) +); + +DEFINE_EVENT(xe_engine, xe_engine_create, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_supress_resume, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_submit, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_scheduling_enable, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_scheduling_disable, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_scheduling_done, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_register, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_deregister, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_deregister_done, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_close, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_kill, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_cleanup_entity, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_destroy, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_reset, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_memory_cat_error, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_stop, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_resubmit, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DECLARE_EVENT_CLASS(xe_sched_job, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job), + + TP_STRUCT__entry( + __field(u32, seqno) + __field(u16, guc_id) + __field(u32, guc_state) + __field(u32, flags) + __field(int, error) + __field(u64, fence) + __field(u64, batch_addr) + ), + + TP_fast_assign( + __entry->seqno = xe_sched_job_seqno(job); + __entry->guc_id = job->engine->guc->id; + __entry->guc_state = + atomic_read(&job->engine->guc->state); + __entry->flags = job->engine->flags; + __entry->error = job->fence->error; + __entry->fence = (u64)job->fence; + __entry->batch_addr = (u64)job->batch_addr[0]; + ), + + TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + __entry->fence, __entry->seqno, __entry->guc_id, + __entry->batch_addr, __entry->guc_state, + __entry->flags, __entry->error) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_create, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_exec, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_run, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_free, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_timedout, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_set_error, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_ban, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DECLARE_EVENT_CLASS(xe_sched_msg, + TP_PROTO(struct xe_sched_msg *msg), + TP_ARGS(msg), + + TP_STRUCT__entry( + __field(u32, opcode) + __field(u16, guc_id) + ), + + TP_fast_assign( + __entry->opcode = msg->opcode; + __entry->guc_id = + ((struct xe_engine *)msg->private_data)->guc->id; + ), + + TP_printk("guc_id=%d, opcode=%u", __entry->guc_id, + __entry->opcode) +); + +DEFINE_EVENT(xe_sched_msg, xe_sched_msg_add, + TP_PROTO(struct xe_sched_msg *msg), + TP_ARGS(msg) +); + +DEFINE_EVENT(xe_sched_msg, xe_sched_msg_recv, + TP_PROTO(struct xe_sched_msg *msg), + TP_ARGS(msg) +); + +DECLARE_EVENT_CLASS(xe_hw_fence, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence), + + TP_STRUCT__entry( + __field(u64, ctx) + __field(u32, seqno) + __field(u64, fence) + ), + + TP_fast_assign( + __entry->ctx = fence->dma.context; + __entry->seqno = fence->dma.seqno; + __entry->fence = (u64)fence; + ), + + TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", + __entry->ctx, __entry->fence, __entry->seqno) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_signal, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DECLARE_EVENT_CLASS(xe_vma, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma), + + TP_STRUCT__entry( + __field(u64, vma) + __field(u32, asid) + __field(u64, start) + __field(u64, end) + __field(u64, ptr) + ), + + TP_fast_assign( + __entry->vma = (u64)vma; + __entry->asid = vma->vm->usm.asid; + __entry->start = vma->start; + __entry->end = vma->end; + __entry->ptr = (u64)vma->userptr.ptr; + ), + + TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", + __entry->vma, __entry->asid, __entry->start, + __entry->end, __entry->ptr) +) + +DEFINE_EVENT(xe_vma, xe_vma_flush, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pagefault, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_acc, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_fail, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pf_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_unbind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_evict, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DECLARE_EVENT_CLASS(xe_vm, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm), + + TP_STRUCT__entry( + __field(u64, vm) + __field(u32, asid) + ), + + TP_fast_assign( + __entry->vm = (u64)vm; + __entry->asid = vm->usm.asid; + ), + + TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, + __entry->asid) +); + +DEFINE_EVENT(xe_vm, xe_vm_create, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_free, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_restart, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +TRACE_EVENT(xe_guc_ct_h2g_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), + + TP_STRUCT__entry( + __field(u32, _head) + __field(u32, _tail) + __field(u32, size) + __field(u32, space) + __field(u32, len) + ), + + TP_fast_assign( + __entry->_head = _head; + __entry->_tail = _tail; + __entry->size = size; + __entry->space = space; + __entry->len = len; + ), + + TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +TRACE_EVENT(xe_guc_ct_g2h_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), + + TP_STRUCT__entry( + __field(u32, _head) + __field(u32, _tail) + __field(u32, size) + __field(u32, space) + __field(u32, len) + ), + + TP_fast_assign( + __entry->_head = _head; + __entry->_tail = _tail; + __entry->size = size; + __entry->space = space; + __entry->len = len; + ), + + TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace +#include diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c new file mode 100644 index 000000000000..a0ba8bba84d1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2022 Intel Corporation + * Copyright (C) 2021-2002 Red Hat + */ + +#include + +#include +#include +#include + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_ttm_gtt_mgr.h" + +struct xe_ttm_gtt_node { + struct ttm_buffer_object *tbo; + struct ttm_range_mgr_node base; +}; + +static inline struct xe_ttm_gtt_mgr * +to_gtt_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct xe_ttm_gtt_mgr, manager); +} + +static inline struct xe_ttm_gtt_node * +to_xe_ttm_gtt_node(struct ttm_resource *res) +{ + return container_of(res, struct xe_ttm_gtt_node, base.base); +} + +static int xe_ttm_gtt_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct xe_ttm_gtt_node *node; + int r; + + node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->tbo = tbo; + ttm_resource_init(tbo, place, &node->base.base); + + if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && + ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) { + r = -ENOSPC; + goto err_fini; + } + + node->base.mm_nodes[0].start = 0; + node->base.mm_nodes[0].size = PFN_UP(node->base.base.size); + node->base.base.start = XE_BO_INVALID_OFFSET; + + *res = &node->base.base; + + return 0; + +err_fini: + ttm_resource_fini(man, &node->base.base); + kfree(node); + return r; +} + +static void xe_ttm_gtt_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct xe_ttm_gtt_node *node = to_xe_ttm_gtt_node(res); + + ttm_resource_fini(man, res); + kfree(node); +} + +static void xe_ttm_gtt_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + +} + +static const struct ttm_resource_manager_func xe_ttm_gtt_mgr_func = { + .alloc = xe_ttm_gtt_mgr_new, + .free = xe_ttm_gtt_mgr_del, + .debug = xe_ttm_gtt_mgr_debug +}; + +static void ttm_gtt_mgr_fini(struct drm_device *drm, void *arg) +{ + struct xe_ttm_gtt_mgr *mgr = arg; + struct xe_device *xe = gt_to_xe(mgr->gt); + struct ttm_resource_manager *man = &mgr->manager; + int err; + + ttm_resource_manager_set_used(man, false); + + err = ttm_resource_manager_evict_all(&xe->ttm, man); + if (err) + return; + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL); +} + +int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr, + u64 gtt_size) +{ + struct xe_device *xe = gt_to_xe(gt); + struct ttm_resource_manager *man = &mgr->manager; + int err; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + mgr->gt = gt; + man->use_tt = true; + man->func = &xe_ttm_gtt_mgr_func; + + ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); + + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, &mgr->manager); + ttm_resource_manager_set_used(man, true); + + err = drmm_add_action_or_reset(&xe->drm, ttm_gtt_mgr_fini, mgr); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h new file mode 100644 index 000000000000..d1d57cb9c2b8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTGM_GTT_MGR_H_ +#define _XE_TTGM_GTT_MGR_H_ + +#include "xe_ttm_gtt_mgr_types.h" + +struct xe_gt; + +int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr, + u64 gtt_size); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h new file mode 100644 index 000000000000..c66737488326 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_GTT_MGR_TYPES_H_ +#define _XE_TTM_GTT_MGR_TYPES_H_ + +#include + +struct xe_gt; + +struct xe_ttm_gtt_mgr { + struct xe_gt *gt; + struct ttm_resource_manager manager; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c new file mode 100644 index 000000000000..e391e81d3640 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2022 Intel Corporation + * Copyright (C) 2021-2002 Red Hat + */ + +#include + +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_res_cursor.h" +#include "xe_ttm_vram_mgr.h" + +static inline struct xe_ttm_vram_mgr * +to_vram_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct xe_ttm_vram_mgr, manager); +} + +static inline struct xe_gt * +mgr_to_gt(struct xe_ttm_vram_mgr *mgr) +{ + return mgr->gt; +} + +static inline struct drm_buddy_block * +xe_ttm_vram_mgr_first_block(struct list_head *list) +{ + return list_first_entry_or_null(list, struct drm_buddy_block, link); +} + +static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 start, size; + + block = xe_ttm_vram_mgr_first_block(head); + if (!block) + return false; + + while (head != block->link.next) { + start = xe_ttm_vram_mgr_block_start(block); + size = xe_ttm_vram_mgr_block_size(block); + + block = list_entry(block->link.next, struct drm_buddy_block, + link); + if (start + size != xe_ttm_vram_mgr_block_start(block)) + return false; + } + + return true; +} + +static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + u64 max_bytes, cur_size, min_block_size; + struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); + struct xe_ttm_vram_mgr_resource *vres; + u64 size, remaining_size, lpfn, fpfn; + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + unsigned long pages_per_block; + int r; + + lpfn = (u64)place->lpfn << PAGE_SHIFT; + if (!lpfn) + lpfn = man->size; + + fpfn = (u64)place->fpfn << PAGE_SHIFT; + + max_bytes = mgr->gt->mem.vram.size; + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + pages_per_block = ~0ul; + } else { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + pages_per_block = HPAGE_PMD_NR; +#else + /* default to 2MB */ + pages_per_block = 2UL << (20UL - PAGE_SHIFT); +#endif + + pages_per_block = max_t(uint32_t, pages_per_block, + tbo->page_alignment); + } + + vres = kzalloc(sizeof(*vres), GFP_KERNEL); + if (!vres) + return -ENOMEM; + + ttm_resource_init(tbo, place, &vres->base); + remaining_size = vres->base.size; + + /* bail out quickly if there's likely not enough VRAM for this BO */ + if (ttm_resource_manager_usage(man) > max_bytes) { + r = -ENOSPC; + goto error_fini; + } + + INIT_LIST_HEAD(&vres->blocks); + + if (place->flags & TTM_PL_FLAG_TOPDOWN) + vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + + if (fpfn || lpfn != man->size) + /* Allocate blocks in desired range */ + vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + + mutex_lock(&mgr->lock); + while (remaining_size) { + if (tbo->page_alignment) + min_block_size = tbo->page_alignment << PAGE_SHIFT; + else + min_block_size = mgr->default_page_size; + + XE_BUG_ON(min_block_size < mm->chunk_size); + + /* Limit maximum size to 2GiB due to SG table limitations */ + size = min(remaining_size, 2ULL << 30); + + if (size >= pages_per_block << PAGE_SHIFT) + min_block_size = pages_per_block << PAGE_SHIFT; + + cur_size = size; + + if (fpfn + size != place->lpfn << PAGE_SHIFT) { + /* + * Except for actual range allocation, modify the size and + * min_block_size conforming to continuous flag enablement + */ + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_block_size = size; + /* + * Modify the size value if size is not + * aligned with min_block_size + */ + } else if (!IS_ALIGNED(size, min_block_size)) { + size = round_up(size, min_block_size); + } + } + + r = drm_buddy_alloc_blocks(mm, fpfn, + lpfn, + size, + min_block_size, + &vres->blocks, + vres->flags); + if (unlikely(r)) + goto error_free_blocks; + + if (size > remaining_size) + remaining_size = 0; + else + remaining_size -= size; + } + mutex_unlock(&mgr->lock); + + if (cur_size != size) { + struct drm_buddy_block *block; + struct list_head *trim_list; + u64 original_size; + LIST_HEAD(temp); + + trim_list = &vres->blocks; + original_size = vres->base.size; + + /* + * If size value is rounded up to min_block_size, trim the last + * block to the required size + */ + if (!list_is_singular(&vres->blocks)) { + block = list_last_entry(&vres->blocks, typeof(*block), link); + list_move_tail(&block->link, &temp); + trim_list = &temp; + /* + * Compute the original_size value by subtracting the + * last block size with (aligned size - original size) + */ + original_size = xe_ttm_vram_mgr_block_size(block) - + (size - cur_size); + } + + mutex_lock(&mgr->lock); + drm_buddy_block_trim(mm, + original_size, + trim_list); + mutex_unlock(&mgr->lock); + + if (!list_empty(&temp)) + list_splice_tail(trim_list, &vres->blocks); + } + + vres->base.start = 0; + list_for_each_entry(block, &vres->blocks, link) { + unsigned long start; + + start = xe_ttm_vram_mgr_block_start(block) + + xe_ttm_vram_mgr_block_size(block); + start >>= PAGE_SHIFT; + + if (start > PFN_UP(vres->base.size)) + start -= PFN_UP(vres->base.size); + else + start = 0; + vres->base.start = max(vres->base.start, start); + } + + if (xe_is_vram_mgr_blocks_contiguous(&vres->blocks)) + vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + + *res = &vres->base; + return 0; + +error_free_blocks: + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); +error_fini: + ttm_resource_fini(man, &vres->base); + kfree(vres); + + return r; +} + +static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(res); + struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); + + ttm_resource_fini(man, res); + + kfree(vres); +} + +static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_buddy_print(mm, printer); + mutex_unlock(&mgr->lock); + drm_printf(printer, "man size:%llu\n", man->size); +} + +static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = { + .alloc = xe_ttm_vram_mgr_new, + .free = xe_ttm_vram_mgr_del, + .debug = xe_ttm_vram_mgr_debug +}; + +static void ttm_vram_mgr_fini(struct drm_device *drm, void *arg) +{ + struct xe_ttm_vram_mgr *mgr = arg; + struct xe_device *xe = gt_to_xe(mgr->gt); + struct ttm_resource_manager *man = &mgr->manager; + int err; + + ttm_resource_manager_set_used(man, false); + + err = ttm_resource_manager_evict_all(&xe->ttm, man); + if (err) + return; + + drm_buddy_fini(&mgr->mm); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + mgr->gt->info.vram_id, + NULL); +} + +int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr) +{ + struct xe_device *xe = gt_to_xe(gt); + struct ttm_resource_manager *man = &mgr->manager; + int err; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + mgr->gt = gt; + man->func = &xe_ttm_vram_mgr_func; + + ttm_resource_manager_init(man, &xe->ttm, gt->mem.vram.size); + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); + if (err) + return err; + + mutex_init(&mgr->lock); + mgr->default_page_size = PAGE_SIZE; + + ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + gt->info.vram_id, + &mgr->manager); + ttm_resource_manager_set_used(man, true); + + err = drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); + if (err) + return err; + + return 0; +} + +int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, + struct ttm_resource *res, + u64 offset, u64 length, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt) +{ + struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0); + struct xe_res_cursor cursor; + struct scatterlist *sg; + int num_entries = 0; + int i, r; + + *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL); + if (!*sgt) + return -ENOMEM; + + /* Determine the number of DRM_BUDDY blocks to export */ + xe_res_first(res, offset, length, &cursor); + while (cursor.remaining) { + num_entries++; + xe_res_next(&cursor, cursor.size); + } + + r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); + if (r) + goto error_free; + + /* Initialize scatterlist nodes of sg_table */ + for_each_sgtable_sg((*sgt), sg, i) + sg->length = 0; + + /* + * Walk down DRM_BUDDY blocks to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_BUDDY block + * and the number of bytes from it. Access the following + * DRM_BUDDY block(s) if more buffer needs to exported + */ + xe_res_first(res, offset, length, &cursor); + for_each_sgtable_sg((*sgt), sg, i) { + phys_addr_t phys = cursor.start + gt->mem.vram.io_start; + size_t size = cursor.size; + dma_addr_t addr; + + addr = dma_map_resource(dev, phys, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + r = dma_mapping_error(dev, addr); + if (r) + goto error_unmap; + + sg_set_page(sg, NULL, size, 0); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = size; + + xe_res_next(&cursor, cursor.size); + } + + return 0; + +error_unmap: + for_each_sgtable_sg((*sgt), sg, i) { + if (!sg->length) + continue; + + dma_unmap_resource(dev, sg->dma_address, + sg->length, dir, + DMA_ATTR_SKIP_CPU_SYNC); + } + sg_free_table(*sgt); + +error_free: + kfree(*sgt); + return r; +} + +void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, + struct sg_table *sgt) +{ + struct scatterlist *sg; + int i; + + for_each_sgtable_sg(sgt, sg, i) + dma_unmap_resource(dev, sg->dma_address, + sg->length, dir, + DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(sgt); + kfree(sgt); +} diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h new file mode 100644 index 000000000000..537fccec4318 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_VRAM_MGR_H_ +#define _XE_TTM_VRAM_MGR_H_ + +#include "xe_ttm_vram_mgr_types.h" + +enum dma_data_direction; +struct xe_device; +struct xe_gt; + +int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr); +int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, + struct ttm_resource *res, + u64 offset, u64 length, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt); +void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, + struct sg_table *sgt); + +static inline u64 xe_ttm_vram_mgr_block_start(struct drm_buddy_block *block) +{ + return drm_buddy_block_offset(block); +} + +static inline u64 xe_ttm_vram_mgr_block_size(struct drm_buddy_block *block) +{ + return PAGE_SIZE << drm_buddy_block_order(block); +} + +static inline struct xe_ttm_vram_mgr_resource * +to_xe_ttm_vram_mgr_resource(struct ttm_resource *res) +{ + return container_of(res, struct xe_ttm_vram_mgr_resource, base); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h new file mode 100644 index 000000000000..39b93c71c21b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_VRAM_MGR_TYPES_H_ +#define _XE_TTM_VRAM_MGR_TYPES_H_ + +#include +#include + +struct xe_gt; + +/** + * struct xe_ttm_vram_mgr - XE TTM VRAM manager + * + * Manages placement of TTM resource in VRAM. + */ +struct xe_ttm_vram_mgr { + /** @gt: Graphics tile which the VRAM belongs to */ + struct xe_gt *gt; + /** @manager: Base TTM resource manager */ + struct ttm_resource_manager manager; + /** @mm: DRM buddy allocator which manages the VRAM */ + struct drm_buddy mm; + /** @default_page_size: default page size */ + u64 default_page_size; + /** @lock: protects allocations of VRAM */ + struct mutex lock; +}; + +/** + * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource + */ +struct xe_ttm_vram_mgr_resource { + /** @base: Base TTM resource */ + struct ttm_resource base; + /** @blocks: list of DRM buddy blocks */ + struct list_head blocks; + /** @flags: flags associated with the resource */ + unsigned long flags; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c new file mode 100644 index 000000000000..e043db037368 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_wa.h" + +#include "xe_platform_types.h" +#include "xe_gt_types.h" +#include "xe_rtp.h" + +#include "gt/intel_gt_regs.h" + +#undef _MMIO +#undef MCR_REG +#define _MMIO(x) _XE_RTP_REG(x) +#define MCR_REG(x) _XE_RTP_MCR_REG(x) + +static const struct xe_rtp_entry gt_tunings[] = { + { XE_RTP_NAME("Tuning: 32B Access Enable"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_SET(XEHP_SQCM, EN_32B_ACCESS) + }, + {} +}; + +static const struct xe_rtp_entry context_tunings[] = { + { XE_RTP_NAME("1604555607"), + XE_RTP_RULES(GRAPHICS_VERSION(1200)), + XE_RTP_FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128) + }, + {} +}; + +void xe_tuning_process_gt(struct xe_gt *gt) +{ + xe_rtp_process(gt_tunings, >->reg_sr, gt, NULL); +} diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h new file mode 100644 index 000000000000..66dbc93192bd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TUNING_ +#define _XE_TUNING_ + +struct xe_gt; + +void xe_tuning_process_gt(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c new file mode 100644 index 000000000000..938d14698003 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_device.h" +#include "xe_huc.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_pc.h" +#include "xe_guc_submit.h" +#include "xe_uc.h" +#include "xe_uc_fw.h" +#include "xe_wopcm.h" + +static struct xe_gt * +uc_to_gt(struct xe_uc *uc) +{ + return container_of(uc, struct xe_gt, uc); +} + +static struct xe_device * +uc_to_xe(struct xe_uc *uc) +{ + return gt_to_xe(uc_to_gt(uc)); +} + +/* Should be called once at driver load only */ +int xe_uc_init(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + ret = xe_guc_init(&uc->guc); + if (ret) + goto err; + + ret = xe_huc_init(&uc->huc); + if (ret) + goto err; + + ret = xe_wopcm_init(&uc->wopcm); + if (ret) + goto err; + + ret = xe_guc_submit_init(&uc->guc); + if (ret) + goto err; + + return 0; + +err: + /* If any uC firmwares not found, fall back to execlists */ + xe_device_guc_submission_disable(uc_to_xe(uc)); + + return ret; +} + +/** + * xe_uc_init_post_hwconfig - init Uc post hwconfig load + * @uc: The UC object + * + * Return: 0 on success, negative error code on error. + */ +int xe_uc_init_post_hwconfig(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_init_post_hwconfig(&uc->guc); +} + +static int uc_reset(struct xe_uc *uc) +{ + struct xe_device *xe = uc_to_xe(uc); + int ret; + + ret = xe_guc_reset(&uc->guc); + if (ret) { + drm_err(&xe->drm, "Failed to reset GuC, ret = %d\n", ret); + return ret; + } + + return 0; +} + +static int uc_sanitize(struct xe_uc *uc) +{ + xe_huc_sanitize(&uc->huc); + xe_guc_sanitize(&uc->guc); + + return uc_reset(uc); +} + +/** + * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig + * @uc: The UC object + * + * Return: 0 on success, negative error code on error. + */ +int xe_uc_init_hwconfig(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + ret = xe_guc_min_load_for_hwconfig(&uc->guc); + if (ret) + return ret; + + return 0; +} + +/* + * Should be called during driver load, after every GT reset, and after every + * suspend to reload / auth the firmwares. + */ +int xe_uc_init_hw(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + ret = uc_sanitize(uc); + if (ret) + return ret; + + ret = xe_huc_upload(&uc->huc); + if (ret) + return ret; + + ret = xe_guc_upload(&uc->guc); + if (ret) + return ret; + + ret = xe_guc_enable_communication(&uc->guc); + if (ret) + return ret; + + ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); + if (ret) + return ret; + + ret = xe_guc_post_load_init(&uc->guc); + if (ret) + return ret; + + ret = xe_guc_pc_start(&uc->guc.pc); + if (ret) + return ret; + + /* We don't fail the driver load if HuC fails to auth, but let's warn */ + ret = xe_huc_auth(&uc->huc); + XE_WARN_ON(ret); + + return 0; +} + +int xe_uc_reset_prepare(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_reset_prepare(&uc->guc); +} + +void xe_uc_stop_prepare(struct xe_uc *uc) +{ + xe_guc_stop_prepare(&uc->guc); +} + +int xe_uc_stop(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_stop(&uc->guc); +} + +int xe_uc_start(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_start(&uc->guc); +} + +static void uc_reset_wait(struct xe_uc *uc) +{ + int ret; + +again: + xe_guc_reset_wait(&uc->guc); + + ret = xe_uc_reset_prepare(uc); + if (ret) + goto again; +} + +int xe_uc_suspend(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + uc_reset_wait(uc); + + ret = xe_uc_stop(uc); + if (ret) + return ret; + + return xe_guc_suspend(&uc->guc); +} diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h new file mode 100644 index 000000000000..380e722f95fc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_H_ +#define _XE_UC_H_ + +#include "xe_uc_types.h" + +int xe_uc_init(struct xe_uc *uc); +int xe_uc_init_hwconfig(struct xe_uc *uc); +int xe_uc_init_post_hwconfig(struct xe_uc *uc); +int xe_uc_init_hw(struct xe_uc *uc); +int xe_uc_reset_prepare(struct xe_uc *uc); +void xe_uc_stop_prepare(struct xe_uc *uc); +int xe_uc_stop(struct xe_uc *uc); +int xe_uc_start(struct xe_uc *uc); +int xe_uc_suspend(struct xe_uc *uc); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c new file mode 100644 index 000000000000..0a39ec5a6e99 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_gt.h" +#include "xe_guc_debugfs.h" +#include "xe_huc_debugfs.h" +#include "xe_macros.h" +#include "xe_uc_debugfs.h" + +void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent) +{ + struct dentry *root; + + root = debugfs_create_dir("uc", parent); + if (IS_ERR(root)) { + XE_WARN_ON("Create UC directory failed"); + return; + } + + xe_guc_debugfs_register(&uc->guc, root); + xe_huc_debugfs_register(&uc->huc, root); +} diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.h b/drivers/gpu/drm/xe/xe_uc_debugfs.h new file mode 100644 index 000000000000..a13382df2bd7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_DEBUGFS_H_ +#define _XE_UC_DEBUGFS_H_ + +struct dentry; +struct xe_uc; + +void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c new file mode 100644 index 000000000000..86c47b7f0901 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include + +#include "xe_bo.h" +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_guc_reg.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_uc_fw.h" + +static struct xe_gt * +__uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type) +{ + if (type == XE_UC_FW_TYPE_GUC) + return container_of(uc_fw, struct xe_gt, uc.guc.fw); + + XE_BUG_ON(type != XE_UC_FW_TYPE_HUC); + return container_of(uc_fw, struct xe_gt, uc.huc.fw); +} + +static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw) +{ + return __uc_fw_to_gt(uc_fw, uc_fw->type); +} + +static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) +{ + return gt_to_xe(uc_fw_to_gt(uc_fw)); +} + +/* + * List of required GuC and HuC binaries per-platform. + * Must be ordered based on platform + revid, from newer to older. + */ +#define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ + fw_def(METEORLAKE, 0, guc_def(mtl, 70, 5, 2)) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 5, 2)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 5, 2)) \ + fw_def(PVC, 0, guc_def(pvc, 70, 5, 2)) \ + fw_def(DG2, 0, guc_def(dg2, 70, 5, 2)) \ + fw_def(DG1, 0, guc_def(dg1, 70, 5, 2)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 5, 2)) + +#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ + fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \ + fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) + +#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \ + "xe/" \ + __stringify(prefix_) "_" name_ "_" \ + __stringify(major_) ".bin" + +#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ + "xe/" \ + __stringify(prefix_) name_ \ + __stringify(major_) "." \ + __stringify(minor_) "." \ + __stringify(patch_) ".bin" + +#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ + __MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_) + +#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ + __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) + +/* All blobs need to be declared via MODULE_FIRMWARE() */ +#define XE_UC_MODULE_FW(platform_, revid_, uc_) \ + MODULE_FIRMWARE(uc_); + +XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH) +XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH) + +/* The below structs and macros are used to iterate across the list of blobs */ +struct __packed uc_fw_blob { + u8 major; + u8 minor; + const char *path; +}; + +#define UC_FW_BLOB(major_, minor_, path_) \ + { .major = major_, .minor = minor_, .path = path_ } + +#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \ + UC_FW_BLOB(major_, minor_, \ + MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_)) + +#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \ + UC_FW_BLOB(major_, minor_, \ + MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_)) + +struct __packed uc_fw_platform_requirement { + enum xe_platform p; + u8 rev; /* first platform rev using this FW */ + const struct uc_fw_blob blob; +}; + +#define MAKE_FW_LIST(platform_, revid_, uc_) \ +{ \ + .p = XE_##platform_, \ + .rev = revid_, \ + .blob = uc_, \ +}, + +struct fw_blobs_by_type { + const struct uc_fw_platform_requirement *blobs; + u32 count; +}; + +static void +uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) +{ + static const struct uc_fw_platform_requirement blobs_guc[] = { + XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) + }; + static const struct uc_fw_platform_requirement blobs_huc[] = { + XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) + }; + static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = { + [XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, + [XE_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, + }; + static const struct uc_fw_platform_requirement *fw_blobs; + enum xe_platform p = xe->info.platform; + u32 fw_count; + u8 rev = xe->info.revid; + int i; + + XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); + fw_blobs = blobs_all[uc_fw->type].blobs; + fw_count = blobs_all[uc_fw->type].count; + + for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) { + if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { + const struct uc_fw_blob *blob = &fw_blobs[i].blob; + + uc_fw->path = blob->path; + uc_fw->major_ver_wanted = blob->major; + uc_fw->minor_ver_wanted = blob->minor; + break; + } + } +} + +/** + * xe_uc_fw_copy_rsa - copy fw RSA to buffer + * + * @uc_fw: uC firmware + * @dst: dst buffer + * @max_len: max number of bytes to copy + * + * Return: number of copied bytes. + */ +size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + u32 size = min_t(u32, uc_fw->rsa_size, max_len); + + XE_BUG_ON(size % 4); + XE_BUG_ON(!xe_uc_fw_is_available(uc_fw)); + + xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap, + xe_uc_fw_rsa_offset(uc_fw), size); + + return size; +} + +static void uc_fw_fini(struct drm_device *drm, void *arg) +{ + struct xe_uc_fw *uc_fw = arg; + + if (!xe_uc_fw_is_available(uc_fw)) + return; + + xe_bo_unpin_map_no_vm(uc_fw->bo); + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); +} + +int xe_uc_fw_init(struct xe_uc_fw *uc_fw) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct device *dev = xe->drm.dev; + const struct firmware *fw = NULL; + struct uc_css_header *css; + struct xe_bo *obj; + size_t size; + int err; + + /* + * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status + * before we're looked at the HW caps to see if we have uc support + */ + BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); + XE_BUG_ON(uc_fw->status); + XE_BUG_ON(uc_fw->path); + + uc_fw_auto_select(xe, uc_fw); + xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? + XE_UC_FIRMWARE_SELECTED : + XE_UC_FIRMWARE_DISABLED : + XE_UC_FIRMWARE_NOT_SUPPORTED); + + /* Transform no huc in the list into firmware disabled */ + if (uc_fw->type == XE_UC_FW_TYPE_HUC && !xe_uc_fw_is_supported(uc_fw)) { + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); + err = -ENOPKG; + return err; + } + err = request_firmware(&fw, uc_fw->path, dev); + if (err) + goto fail; + + /* Check the size of the blob before examining buffer contents */ + if (unlikely(fw->size < sizeof(struct uc_css_header))) { + drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, sizeof(struct uc_css_header)); + err = -ENODATA; + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Check integrity of size values inside CSS header */ + size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - + css->exponent_size_dw) * sizeof(u32); + if (unlikely(size != sizeof(struct uc_css_header))) { + drm_warn(&xe->drm, + "%s firmware %s: unexpected header size: %zu != %zu\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, sizeof(struct uc_css_header)); + err = -EPROTO; + goto fail; + } + + /* uCode size must calculated from other sizes */ + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* now RSA */ + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = sizeof(struct uc_css_header) + uc_fw->ucode_size + + uc_fw->rsa_size; + if (unlikely(fw->size < size)) { + drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, size); + err = -ENOEXEC; + goto fail; + } + + /* Get version numbers from the CSS header */ + uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->sw_version); + uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->sw_version); + + if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + if (!xe_uc_fw_is_overridden(uc_fw)) { + err = -ENOEXEC; + goto fail; + } + } + + if (uc_fw->type == XE_UC_FW_TYPE_GUC) + uc_fw->private_data_size = css->private_data_size; + + obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(obj)) { + drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); + err = PTR_ERR(obj); + goto fail; + } + + uc_fw->bo = obj; + uc_fw->size = fw->size; + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE); + + release_firmware(fw); + + err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw); + if (err) + return err; + + return 0; + +fail: + xe_uc_fw_change_status(uc_fw, err == -ENOENT ? + XE_UC_FIRMWARE_MISSING : + XE_UC_FIRMWARE_ERROR); + + drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", + xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); + + release_firmware(fw); /* OK even if fw is NULL */ + return err; +} + +static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw) +{ + return xe_bo_ggtt_addr(uc_fw->bo); +} + +static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + u32 src_offset; + int ret; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + /* Set the source address for the uCode */ + src_offset = uc_fw_ggtt_offset(uc_fw); + xe_mmio_write32(gt, DMA_ADDR_0_LOW.reg, lower_32_bits(src_offset)); + xe_mmio_write32(gt, DMA_ADDR_0_HIGH.reg, upper_32_bits(src_offset)); + + /* Set the DMA destination */ + xe_mmio_write32(gt, DMA_ADDR_1_LOW.reg, offset); + xe_mmio_write32(gt, DMA_ADDR_1_HIGH.reg, DMA_ADDRESS_SPACE_WOPCM); + + /* + * Set the transfer size. The header plus uCode will be copied to WOPCM + * via DMA, excluding any other components + */ + xe_mmio_write32(gt, DMA_COPY_SIZE.reg, + sizeof(struct uc_css_header) + uc_fw->ucode_size); + + /* Start the DMA */ + xe_mmio_write32(gt, DMA_CTRL.reg, + _MASKED_BIT_ENABLE(dma_flags | START_DMA)); + + /* Wait for DMA to finish */ + ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100); + if (ret) + drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", + xe_uc_fw_type_repr(uc_fw->type), + xe_mmio_read32(gt, DMA_CTRL.reg)); + + /* Disable the bits once DMA is over */ + xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags)); + + return ret; +} + +int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + int err; + + /* make sure the status was cleared the last time we reset the uc */ + XE_BUG_ON(xe_uc_fw_is_loaded(uc_fw)); + + if (!xe_uc_fw_is_loadable(uc_fw)) + return -ENOEXEC; + + /* Call custom loader */ + err = uc_fw_xfer(uc_fw, offset, dma_flags); + if (err) + goto fail; + + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_TRANSFERRED); + return 0; + +fail: + drm_err(&xe->drm, "Failed to load %s firmware %s (%d)\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + err); + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOAD_FAIL); + return err; +} + + +void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p) +{ + drm_printf(p, "%s firmware: %s\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); + drm_printf(p, "\tstatus: %s\n", + xe_uc_fw_status_repr(uc_fw->status)); + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); + drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); +} diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h new file mode 100644 index 000000000000..b0df5064b27d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_FW_H_ +#define _XE_UC_FW_H_ + +#include + +#include "xe_uc_fw_types.h" +#include "xe_uc_fw_abi.h" +#include "xe_macros.h" + +struct drm_printer; + +int xe_uc_fw_init(struct xe_uc_fw *uc_fw); +size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len); +int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags); +void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p); + +static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw) +{ + return sizeof(struct uc_css_header) + uc_fw->ucode_size; +} + +static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw, + enum xe_uc_fw_status status) +{ + uc_fw->__status = status; +} + +static inline +const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status) +{ + switch (status) { + case XE_UC_FIRMWARE_NOT_SUPPORTED: + return "N/A"; + case XE_UC_FIRMWARE_UNINITIALIZED: + return "UNINITIALIZED"; + case XE_UC_FIRMWARE_DISABLED: + return "DISABLED"; + case XE_UC_FIRMWARE_SELECTED: + return "SELECTED"; + case XE_UC_FIRMWARE_MISSING: + return "MISSING"; + case XE_UC_FIRMWARE_ERROR: + return "ERROR"; + case XE_UC_FIRMWARE_AVAILABLE: + return "AVAILABLE"; + case XE_UC_FIRMWARE_INIT_FAIL: + return "INIT FAIL"; + case XE_UC_FIRMWARE_LOADABLE: + return "LOADABLE"; + case XE_UC_FIRMWARE_LOAD_FAIL: + return "LOAD FAIL"; + case XE_UC_FIRMWARE_TRANSFERRED: + return "TRANSFERRED"; + case XE_UC_FIRMWARE_RUNNING: + return "RUNNING"; + } + return ""; +} + +static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status) +{ + switch (status) { + case XE_UC_FIRMWARE_NOT_SUPPORTED: + return -ENODEV; + case XE_UC_FIRMWARE_UNINITIALIZED: + return -EACCES; + case XE_UC_FIRMWARE_DISABLED: + return -EPERM; + case XE_UC_FIRMWARE_MISSING: + return -ENOENT; + case XE_UC_FIRMWARE_ERROR: + return -ENOEXEC; + case XE_UC_FIRMWARE_INIT_FAIL: + case XE_UC_FIRMWARE_LOAD_FAIL: + return -EIO; + case XE_UC_FIRMWARE_SELECTED: + return -ESTALE; + case XE_UC_FIRMWARE_AVAILABLE: + case XE_UC_FIRMWARE_LOADABLE: + case XE_UC_FIRMWARE_TRANSFERRED: + case XE_UC_FIRMWARE_RUNNING: + return 0; + } + return -EINVAL; +} + +static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type) +{ + switch (type) { + case XE_UC_FW_TYPE_GUC: + return "GuC"; + case XE_UC_FW_TYPE_HUC: + return "HuC"; + } + return "uC"; +} + +static inline enum xe_uc_fw_status +__xe_uc_fw_status(struct xe_uc_fw *uc_fw) +{ + /* shouldn't call this before checking hw/blob availability */ + XE_BUG_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED); + return uc_fw->status; +} + +static inline bool xe_uc_fw_is_supported(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_NOT_SUPPORTED; +} + +static inline bool xe_uc_fw_is_enabled(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) > XE_UC_FIRMWARE_DISABLED; +} + +static inline bool xe_uc_fw_is_disabled(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_DISABLED; +} + +static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_AVAILABLE; +} + +static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE; +} + +static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_TRANSFERRED; +} + +static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING; +} + +static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) +{ + return uc_fw->user_overridden; +} + +static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw) +{ + if (xe_uc_fw_is_loaded(uc_fw)) + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE); +} + +static inline u32 __xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) +{ + return sizeof(struct uc_css_header) + uc_fw->ucode_size; +} + +/** + * xe_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded. + * @uc_fw: uC firmware. + * + * Get the size of the firmware and header that will be uploaded to WOPCM. + * + * Return: Upload firmware size, or zero on firmware fetch failure. + */ +static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) +{ + if (!xe_uc_fw_is_available(uc_fw)) + return 0; + + return __xe_uc_fw_get_upload_size(uc_fw); +} + +#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/xe" + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h new file mode 100644 index 000000000000..dafd26cb0c41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_FW_ABI_H +#define _XE_UC_FW_ABI_H + +#include +#include + +/** + * DOC: Firmware Layout + * + * The GuC/HuC firmware layout looks like this:: + * + * +======================================================================+ + * | Firmware blob | + * +===============+===============+============+============+============+ + * | CSS header | uCode | RSA key | modulus | exponent | + * +===============+===============+============+============+============+ + * <-header size-> <---header size continued -----------> + * <--- size -----------------------------------------------------------> + * <-key size-> + * <-mod size-> + * <-exp size-> + * + * The firmware may or may not have modulus key and exponent data. The header, + * uCode and RSA signature are must-have components that will be used by driver. + * Length of each components, which is all in dwords, can be found in header. + * In the case that modulus and exponent are not present in fw, a.k.a truncated + * image, the length value still appears in header. + * + * Driver will do some basic fw size validation based on the following rules: + * + * 1. Header, uCode and RSA are must-have components. + * 2. All firmware components, if they present, are in the sequence illustrated + * in the layout table above. + * 3. Length info of each component can be found in header, in dwords. + * 4. Modulus and exponent key are not required by driver. They may not appear + * in fw. So driver will load a truncated firmware in this case. + */ + +struct uc_css_header { + u32 module_type; + /* + * header_size includes all non-uCode bits, including css_header, rsa + * key, modulus key and exponent data. + */ + u32 header_size_dw; + u32 header_version; + u32 module_id; + u32 module_vendor; + u32 date; +#define CSS_DATE_DAY (0xFF << 0) +#define CSS_DATE_MONTH (0xFF << 8) +#define CSS_DATE_YEAR (0xFFFF << 16) + u32 size_dw; /* uCode plus header_size_dw */ + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; + u32 time; +#define CSS_TIME_HOUR (0xFF << 0) +#define CSS_DATE_MIN (0xFF << 8) +#define CSS_DATE_SEC (0xFFFF << 16) + char username[8]; + char buildnumber[12]; + u32 sw_version; +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) + u32 reserved0[13]; + union { + u32 private_data_size; /* only applies to GuC */ + u32 reserved1; + }; + u32 header_info; +} __packed; +static_assert(sizeof(struct uc_css_header) == 128); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h new file mode 100644 index 000000000000..1cfd30a655df --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_FW_TYPES_H_ +#define _XE_UC_FW_TYPES_H_ + +#include + +struct xe_bo; + +/* + * +------------+---------------------------------------------------+ + * | PHASE | FIRMWARE STATUS TRANSITIONS | + * +============+===================================================+ + * | | UNINITIALIZED | + * +------------+- / | \ -+ + * | | DISABLED <--/ | \--> NOT_SUPPORTED | + * | init_early | V | + * | | SELECTED | + * +------------+- / | \ -+ + * | | MISSING <--/ | \--> ERROR | + * | fetch | V | + * | | AVAILABLE | + * +------------+- | \ -+ + * | | | \--> INIT FAIL | + * | init | V | + * | | /------> LOADABLE <----<-----------\ | + * +------------+- \ / \ \ \ -+ + * | | LOAD FAIL <--< \--> TRANSFERRED \ | + * | upload | \ / \ / | + * | | \---------/ \--> RUNNING | + * +------------+---------------------------------------------------+ + */ + +/* + * FIXME: Ported from the i915 and this is state machine is way too complicated. + * Circle back and simplify this. + */ +enum xe_uc_fw_status { + XE_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */ + XE_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */ + XE_UC_FIRMWARE_DISABLED, /* disabled */ + XE_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */ + XE_UC_FIRMWARE_MISSING, /* blob not found on the system */ + XE_UC_FIRMWARE_ERROR, /* invalid format or version */ + XE_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + XE_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */ + XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ + XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */ + XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ + XE_UC_FIRMWARE_RUNNING /* init/auth done */ +}; + +enum xe_uc_fw_type { + XE_UC_FW_TYPE_GUC = 0, + XE_UC_FW_TYPE_HUC +}; +#define XE_UC_FW_NUM_TYPES 2 + +/** + * struct xe_uc_fw - XE micro controller firmware + */ +struct xe_uc_fw { + /** @type: type uC firmware */ + enum xe_uc_fw_type type; + union { + /** @status: firmware load status */ + const enum xe_uc_fw_status status; + /** + * @__status: private firmware load status - only to be used + * by firmware laoding code + */ + enum xe_uc_fw_status __status; + }; + /** @path: path to uC firmware */ + const char *path; + /** @user_overridden: user provided path to uC firmware via modparam */ + bool user_overridden; + /** @size: size of uC firmware including css header */ + size_t size; + + /** @bo: XE BO for uC firmware */ + struct xe_bo *bo; + + /* + * The firmware build process will generate a version header file with + * major and minor version defined. The versions are built into CSS + * header of firmware. The xe kernel driver set the minimal firmware + * version required per platform. + */ + + /** @major_ver_wanted: major firmware version wanted by platform */ + u16 major_ver_wanted; + /** @minor_ver_wanted: minor firmware version wanted by platform */ + u16 minor_ver_wanted; + /** @major_ver_found: major version found in firmware blob */ + u16 major_ver_found; + /** @minor_ver_found: major version found in firmware blob */ + u16 minor_ver_found; + + /** @rsa_size: RSA size */ + u32 rsa_size; + /** @ucode_size: micro kernel size */ + u32 ucode_size; + + /** @private_data_size: size of private data found in uC css header */ + u32 private_data_size; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h new file mode 100644 index 000000000000..49bef6498b85 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_types.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_TYPES_H_ +#define _XE_UC_TYPES_H_ + +#include "xe_guc_types.h" +#include "xe_huc_types.h" +#include "xe_wopcm_types.h" + +/** + * struct xe_uc - XE micro controllers + */ +struct xe_uc { + /** @guc: Graphics micro controller */ + struct xe_guc guc; + /** @huc: HuC */ + struct xe_huc huc; + /** @wopcm: WOPCM */ + struct xe_wopcm wopcm; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c new file mode 100644 index 000000000000..d47a8617c5b6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -0,0 +1,3407 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_vm.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_gt.h" +#include "xe_gt_pagefault.h" +#include "xe_migrate.h" +#include "xe_pm.h" +#include "xe_preempt_fence.h" +#include "xe_pt.h" +#include "xe_res_cursor.h" +#include "xe_trace.h" +#include "xe_sync.h" + +#define TEST_VM_ASYNC_OPS_ERROR + +/** + * xe_vma_userptr_check_repin() - Advisory check for repin needed + * @vma: The userptr vma + * + * Check if the userptr vma has been invalidated since last successful + * repin. The check is advisory only and can the function can be called + * without the vm->userptr.notifier_lock held. There is no guarantee that the + * vma userptr will remain valid after a lockless check, so typically + * the call needs to be followed by a proper check under the notifier_lock. + * + * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. + */ +int xe_vma_userptr_check_repin(struct xe_vma *vma) +{ + return mmu_interval_check_retry(&vma->userptr.notifier, + vma->userptr.notifier_seq) ? + -EAGAIN : 0; +} + +int xe_vma_userptr_pin_pages(struct xe_vma *vma) +{ + struct xe_vm *vm = vma->vm; + struct xe_device *xe = vm->xe; + const unsigned long num_pages = + (vma->end - vma->start + 1) >> PAGE_SHIFT; + struct page **pages; + bool in_kthread = !current->mm; + unsigned long notifier_seq; + int pinned, ret, i; + bool read_only = vma->pte_flags & PTE_READ_ONLY; + + lockdep_assert_held(&vm->lock); + XE_BUG_ON(!xe_vma_is_userptr(vma)); +retry: + if (vma->destroyed) + return 0; + + notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); + if (notifier_seq == vma->userptr.notifier_seq) + return 0; + + pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + if (vma->userptr.sg) { + dma_unmap_sgtable(xe->drm.dev, + vma->userptr.sg, + read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL, 0); + sg_free_table(vma->userptr.sg); + vma->userptr.sg = NULL; + } + + pinned = ret = 0; + if (in_kthread) { + if (!mmget_not_zero(vma->userptr.notifier.mm)) { + ret = -EFAULT; + goto mm_closed; + } + kthread_use_mm(vma->userptr.notifier.mm); + } + + while (pinned < num_pages) { + ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE, + num_pages - pinned, + read_only ? 0 : FOLL_WRITE, + &pages[pinned]); + if (ret < 0) { + if (in_kthread) + ret = 0; + break; + } + + pinned += ret; + ret = 0; + } + + if (in_kthread) { + kthread_unuse_mm(vma->userptr.notifier.mm); + mmput(vma->userptr.notifier.mm); + } +mm_closed: + if (ret) + goto out; + + ret = sg_alloc_table_from_pages(&vma->userptr.sgt, pages, pinned, + 0, (u64)pinned << PAGE_SHIFT, + GFP_KERNEL); + if (ret) { + vma->userptr.sg = NULL; + goto out; + } + vma->userptr.sg = &vma->userptr.sgt; + + ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg, + read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_NO_KERNEL_MAPPING); + if (ret) { + sg_free_table(vma->userptr.sg); + vma->userptr.sg = NULL; + goto out; + } + + for (i = 0; i < pinned; ++i) { + if (!read_only) { + lock_page(pages[i]); + set_page_dirty(pages[i]); + unlock_page(pages[i]); + } + + mark_page_accessed(pages[i]); + } + +out: + release_pages(pages, pinned); + kvfree(pages); + + if (!(ret < 0)) { + vma->userptr.notifier_seq = notifier_seq; + if (xe_vma_userptr_check_repin(vma) == -EAGAIN) + goto retry; + } + + return ret < 0 ? ret : 0; +} + +static bool preempt_fences_waiting(struct xe_vm *vm) +{ + struct xe_engine *e; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + list_for_each_entry(e, &vm->preempt.engines, compute.link) { + if (!e->compute.pfence || (e->compute.pfence && + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &e->compute.pfence->flags))) { + return true; + } + } + + return false; +} + +static void free_preempt_fences(struct list_head *list) +{ + struct list_head *link, *next; + + list_for_each_safe(link, next, list) + xe_preempt_fence_free(to_preempt_fence_from_link(link)); +} + +static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, + unsigned int *count) +{ + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + if (*count >= vm->preempt.num_engines) + return 0; + + for (; *count < vm->preempt.num_engines; ++(*count)) { + struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); + + if (IS_ERR(pfence)) + return PTR_ERR(pfence); + + list_move_tail(xe_preempt_fence_link(pfence), list); + } + + return 0; +} + +static int wait_for_existing_preempt_fences(struct xe_vm *vm) +{ + struct xe_engine *e; + + xe_vm_assert_held(vm); + + list_for_each_entry(e, &vm->preempt.engines, compute.link) { + if (e->compute.pfence) { + long timeout = dma_fence_wait(e->compute.pfence, false); + + if (timeout < 0) + return -ETIME; + dma_fence_put(e->compute.pfence); + e->compute.pfence = NULL; + } + } + + return 0; +} + +static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) +{ + struct list_head *link; + struct xe_engine *e; + + list_for_each_entry(e, &vm->preempt.engines, compute.link) { + struct dma_fence *fence; + + link = list->next; + XE_BUG_ON(link == list); + + fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), + e, e->compute.context, + ++e->compute.seqno); + dma_fence_put(e->compute.pfence); + e->compute.pfence = fence; + } +} + +static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) +{ + struct xe_engine *e; + struct ww_acquire_ctx ww; + int err; + + err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true); + if (err) + return err; + + list_for_each_entry(e, &vm->preempt.engines, compute.link) + if (e->compute.pfence) { + dma_resv_add_fence(bo->ttm.base.resv, + e->compute.pfence, + DMA_RESV_USAGE_BOOKKEEP); + } + + xe_bo_unlock(bo, &ww); + return 0; +} + +/** + * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv + * @vm: The vm. + * @fence: The fence to add. + * @usage: The resv usage for the fence. + * + * Loops over all of the vm's external object bindings and adds a @fence + * with the given @usage to all of the external object's reservation + * objects. + */ +void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, + enum dma_resv_usage usage) +{ + struct xe_vma *vma; + + list_for_each_entry(vma, &vm->extobj.list, extobj.link) + dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage); +} + +static void resume_and_reinstall_preempt_fences(struct xe_vm *vm) +{ + struct xe_engine *e; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + list_for_each_entry(e, &vm->preempt.engines, compute.link) { + e->ops->resume(e); + + dma_resv_add_fence(&vm->resv, e->compute.pfence, + DMA_RESV_USAGE_BOOKKEEP); + xe_vm_fence_all_extobjs(vm, e->compute.pfence, + DMA_RESV_USAGE_BOOKKEEP); + } +} + +int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e) +{ + struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; + struct ttm_validate_buffer *tv; + struct ww_acquire_ctx ww; + struct list_head objs; + struct dma_fence *pfence; + int err; + bool wait; + + XE_BUG_ON(!xe_vm_in_compute_mode(vm)); + + down_write(&vm->lock); + + err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1); + if (err) + goto out_unlock_outer; + + pfence = xe_preempt_fence_create(e, e->compute.context, + ++e->compute.seqno); + if (!pfence) { + err = -ENOMEM; + goto out_unlock; + } + + list_add(&e->compute.link, &vm->preempt.engines); + ++vm->preempt.num_engines; + e->compute.pfence = pfence; + + down_read(&vm->userptr.notifier_lock); + + dma_resv_add_fence(&vm->resv, pfence, + DMA_RESV_USAGE_BOOKKEEP); + + xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP); + + /* + * Check to see if a preemption on VM is in flight or userptr + * invalidation, if so trigger this preempt fence to sync state with + * other preempt fences on the VM. + */ + wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); + if (wait) + dma_fence_enable_sw_signaling(pfence); + + up_read(&vm->userptr.notifier_lock); + +out_unlock: + xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs); +out_unlock_outer: + up_write(&vm->lock); + + return err; +} + +/** + * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs + * that need repinning. + * @vm: The VM. + * + * This function checks for whether the VM has userptrs that need repinning, + * and provides a release-type barrier on the userptr.notifier_lock after + * checking. + * + * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. + */ +int __xe_vm_userptr_needs_repin(struct xe_vm *vm) +{ + lockdep_assert_held_read(&vm->userptr.notifier_lock); + + return (list_empty(&vm->userptr.repin_list) && + list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +/** + * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv + * objects of the vm's external buffer objects. + * @vm: The vm. + * @ww: Pointer to a struct ww_acquire_ctx locking context. + * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct + * ttm_validate_buffers used for locking. + * @tv: Pointer to a pointer that on output contains the actual storage used. + * @objs: List head for the buffer objects locked. + * @intr: Whether to lock interruptible. + * @num_shared: Number of dma-fence slots to reserve in the locked objects. + * + * Locks the vm dma-resv objects and all the dma-resv objects of the + * buffer objects on the vm external object list. The TTM utilities require + * a list of struct ttm_validate_buffers pointing to the actual buffer + * objects to lock. Storage for those struct ttm_validate_buffers should + * be provided in @tv_onstack, and is typically reserved on the stack + * of the caller. If the size of @tv_onstack isn't sufficient, then + * storage will be allocated internally using kvmalloc(). + * + * The function performs deadlock handling internally, and after a + * successful return the ww locking transaction should be considered + * sealed. + * + * Return: 0 on success, Negative error code on error. In particular if + * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case + * of error, any locking performed has been reverted. + */ +int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer **tv, + struct list_head *objs, + bool intr, + unsigned int num_shared) +{ + struct ttm_validate_buffer *tv_vm, *tv_bo; + struct xe_vma *vma, *next; + LIST_HEAD(dups); + int err; + + lockdep_assert_held(&vm->lock); + + if (vm->extobj.entries < XE_ONSTACK_TV) { + tv_vm = tv_onstack; + } else { + tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm), + GFP_KERNEL); + if (!tv_vm) + return -ENOMEM; + } + tv_bo = tv_vm + 1; + + INIT_LIST_HEAD(objs); + list_for_each_entry(vma, &vm->extobj.list, extobj.link) { + tv_bo->num_shared = num_shared; + tv_bo->bo = &vma->bo->ttm; + + list_add_tail(&tv_bo->head, objs); + tv_bo++; + } + tv_vm->num_shared = num_shared; + tv_vm->bo = xe_vm_ttm_bo(vm); + list_add_tail(&tv_vm->head, objs); + err = ttm_eu_reserve_buffers(ww, objs, intr, &dups); + if (err) + goto out_err; + + spin_lock(&vm->notifier.list_lock); + list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list, + notifier.rebind_link) { + xe_bo_assert_held(vma->bo); + + list_del_init(&vma->notifier.rebind_link); + if (vma->gt_present && !vma->destroyed) + list_move_tail(&vma->rebind_link, &vm->rebind_list); + } + spin_unlock(&vm->notifier.list_lock); + + *tv = tv_vm; + return 0; + +out_err: + if (tv_vm != tv_onstack) + kvfree(tv_vm); + + return err; +} + +/** + * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by + * xe_vm_lock_dma_resv() + * @vm: The vm. + * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv(). + * @tv: The value of *@tv given by xe_vm_lock_dma_resv(). + * @ww: The ww_acquire_context used for locking. + * @objs: The list returned from xe_vm_lock_dma_resv(). + * + * Unlocks the reservation objects and frees any memory allocated by + * xe_vm_lock_dma_resv(). + */ +void xe_vm_unlock_dma_resv(struct xe_vm *vm, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer *tv, + struct ww_acquire_ctx *ww, + struct list_head *objs) +{ + /* + * Nothing should've been able to enter the list while we were locked, + * since we've held the dma-resvs of all the vm's external objects, + * and holding the dma_resv of an object is required for list + * addition, and we shouldn't add ourselves. + */ + XE_WARN_ON(!list_empty(&vm->notifier.rebind_list)); + + ttm_eu_backoff_reservation(ww, objs); + if (tv && tv != tv_onstack) + kvfree(tv); +} + +static void preempt_rebind_work_func(struct work_struct *w) +{ + struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); + struct xe_vma *vma; + struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; + struct ttm_validate_buffer *tv; + struct ww_acquire_ctx ww; + struct list_head objs; + struct dma_fence *rebind_fence; + unsigned int fence_count = 0; + LIST_HEAD(preempt_fences); + int err; + long wait; + int __maybe_unused tries = 0; + + XE_BUG_ON(!xe_vm_in_compute_mode(vm)); + trace_xe_vm_rebind_worker_enter(vm); + + if (xe_vm_is_closed(vm)) { + trace_xe_vm_rebind_worker_exit(vm); + return; + } + + down_write(&vm->lock); + +retry: + if (vm->async_ops.error) + goto out_unlock_outer; + + /* + * Extreme corner where we exit a VM error state with a munmap style VM + * unbind inflight which requires a rebind. In this case the rebind + * needs to install some fences into the dma-resv slots. The worker to + * do this queued, let that worker make progress by dropping vm->lock + * and trying this again. + */ + if (vm->async_ops.munmap_rebind_inflight) { + up_write(&vm->lock); + flush_work(&vm->async_ops.work); + goto retry; + } + + if (xe_vm_userptr_check_repin(vm)) { + err = xe_vm_userptr_pin(vm); + if (err) + goto out_unlock_outer; + } + + err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, + false, vm->preempt.num_engines); + if (err) + goto out_unlock_outer; + + /* Fresh preempt fences already installed. Everyting is running. */ + if (!preempt_fences_waiting(vm)) + goto out_unlock; + + /* + * This makes sure vm is completely suspended and also balances + * xe_engine suspend- and resume; we resume *all* vm engines below. + */ + err = wait_for_existing_preempt_fences(vm); + if (err) + goto out_unlock; + + err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); + if (err) + goto out_unlock; + + list_for_each_entry(vma, &vm->rebind_list, rebind_link) { + if (xe_vma_is_userptr(vma) || vma->destroyed) + continue; + + err = xe_bo_validate(vma->bo, vm, false); + if (err) + goto out_unlock; + } + + rebind_fence = xe_vm_rebind(vm, true); + if (IS_ERR(rebind_fence)) { + err = PTR_ERR(rebind_fence); + goto out_unlock; + } + + if (rebind_fence) { + dma_fence_wait(rebind_fence, false); + dma_fence_put(rebind_fence); + } + + /* Wait on munmap style VM unbinds */ + wait = dma_resv_wait_timeout(&vm->resv, + DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (wait <= 0) { + err = -ETIME; + goto out_unlock; + } + +#define retry_required(__tries, __vm) \ + (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ + (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ + __xe_vm_userptr_needs_repin(__vm)) + + down_read(&vm->userptr.notifier_lock); + if (retry_required(tries, vm)) { + up_read(&vm->userptr.notifier_lock); + err = -EAGAIN; + goto out_unlock; + } + +#undef retry_required + + /* Point of no return. */ + arm_preempt_fences(vm, &preempt_fences); + resume_and_reinstall_preempt_fences(vm); + up_read(&vm->userptr.notifier_lock); + +out_unlock: + xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs); +out_unlock_outer: + if (err == -EAGAIN) { + trace_xe_vm_rebind_worker_retry(vm); + goto retry; + } + up_write(&vm->lock); + + free_preempt_fences(&preempt_fences); + + XE_WARN_ON(err < 0); /* TODO: Kill VM or put in error state */ + trace_xe_vm_rebind_worker_exit(vm); +} + +struct async_op_fence; +static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence); + +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier); + struct xe_vm *vm = vma->vm; + struct dma_resv_iter cursor; + struct dma_fence *fence; + long err; + + XE_BUG_ON(!xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + down_write(&vm->userptr.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + /* No need to stop gpu access if the userptr is not yet bound. */ + if (!vma->userptr.initial_bind) { + up_write(&vm->userptr.notifier_lock); + return true; + } + + /* + * Tell exec and rebind worker they need to repin and rebind this + * userptr. + */ + if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) { + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&vma->userptr.invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); + } + + up_write(&vm->userptr.notifier_lock); + + /* + * Preempt fences turn into schedule disables, pipeline these. + * Note that even in fault mode, we need to wait for binds and + * unbinds to complete, and those are attached as BOOKMARK fences + * to the vm. + */ + dma_resv_iter_begin(&cursor, &vm->resv, + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + + err = dma_resv_wait_timeout(&vm->resv, + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + XE_WARN_ON(err <= 0); + + if (xe_vm_in_fault_mode(vm)) { + err = xe_vm_invalidate_vma(vma); + XE_WARN_ON(err); + } + + trace_xe_vma_userptr_invalidate_complete(vma); + + return true; +} + +static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { + .invalidate = vma_userptr_invalidate, +}; + +int xe_vm_userptr_pin(struct xe_vm *vm) +{ + struct xe_vma *vma, *next; + int err = 0; + LIST_HEAD(tmp_evict); + + lockdep_assert_held_write(&vm->lock); + + /* Collect invalidated userptrs */ + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(vma, next, &vm->userptr.invalidated, + userptr.invalidate_link) { + list_del_init(&vma->userptr.invalidate_link); + list_move_tail(&vma->userptr_link, &vm->userptr.repin_list); + } + spin_unlock(&vm->userptr.invalidated_lock); + + /* Pin and move to temporary list */ + list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) { + err = xe_vma_userptr_pin_pages(vma); + if (err < 0) + goto out_err; + + list_move_tail(&vma->userptr_link, &tmp_evict); + } + + /* Take lock and move to rebind_list for rebinding. */ + err = dma_resv_lock_interruptible(&vm->resv, NULL); + if (err) + goto out_err; + + list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) { + list_del_init(&vma->userptr_link); + list_move_tail(&vma->rebind_link, &vm->rebind_list); + } + + dma_resv_unlock(&vm->resv); + + return 0; + +out_err: + list_splice_tail(&tmp_evict, &vm->userptr.repin_list); + + return err; +} + +/** + * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs + * that need repinning. + * @vm: The VM. + * + * This function does an advisory check for whether the VM has userptrs that + * need repinning. + * + * Return: 0 if there are no indications of userptrs needing repinning, + * -EAGAIN if there are. + */ +int xe_vm_userptr_check_repin(struct xe_vm *vm) +{ + return (list_empty_careful(&vm->userptr.repin_list) && + list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +static struct dma_fence * +xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs); + +struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) +{ + struct dma_fence *fence = NULL; + struct xe_vma *vma, *next; + + lockdep_assert_held(&vm->lock); + if (xe_vm_no_dma_fences(vm) && !rebind_worker) + return NULL; + + xe_vm_assert_held(vm); + list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) { + XE_WARN_ON(!vma->gt_present); + + list_del_init(&vma->rebind_link); + dma_fence_put(fence); + if (rebind_worker) + trace_xe_vma_rebind_worker(vma); + else + trace_xe_vma_rebind_exec(vma); + fence = xe_vm_bind_vma(vma, NULL, NULL, 0); + if (IS_ERR(fence)) + return fence; + } + + return fence; +} + +static struct xe_vma *xe_vma_create(struct xe_vm *vm, + struct xe_bo *bo, + u64 bo_offset_or_userptr, + u64 start, u64 end, + bool read_only, + u64 gt_mask) +{ + struct xe_vma *vma; + struct xe_gt *gt; + u8 id; + + XE_BUG_ON(start >= end); + XE_BUG_ON(end >= vm->size); + + vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (!vma) { + vma = ERR_PTR(-ENOMEM); + return vma; + } + + INIT_LIST_HEAD(&vma->rebind_link); + INIT_LIST_HEAD(&vma->unbind_link); + INIT_LIST_HEAD(&vma->userptr_link); + INIT_LIST_HEAD(&vma->userptr.invalidate_link); + INIT_LIST_HEAD(&vma->notifier.rebind_link); + INIT_LIST_HEAD(&vma->extobj.link); + + vma->vm = vm; + vma->start = start; + vma->end = end; + if (read_only) + vma->pte_flags = PTE_READ_ONLY; + + if (gt_mask) { + vma->gt_mask = gt_mask; + } else { + for_each_gt(gt, vm->xe, id) + if (!xe_gt_is_media_type(gt)) + vma->gt_mask |= 0x1 << id; + } + + if (vm->xe->info.platform == XE_PVC) + vma->use_atomic_access_pte_bit = true; + + if (bo) { + xe_bo_assert_held(bo); + vma->bo_offset = bo_offset_or_userptr; + vma->bo = xe_bo_get(bo); + list_add_tail(&vma->bo_link, &bo->vmas); + } else /* userptr */ { + u64 size = end - start + 1; + int err; + + vma->userptr.ptr = bo_offset_or_userptr; + + err = mmu_interval_notifier_insert(&vma->userptr.notifier, + current->mm, + vma->userptr.ptr, size, + &vma_userptr_notifier_ops); + if (err) { + kfree(vma); + vma = ERR_PTR(err); + return vma; + } + + vma->userptr.notifier_seq = LONG_MAX; + xe_vm_get(vm); + } + + return vma; +} + +static bool vm_remove_extobj(struct xe_vma *vma) +{ + if (!list_empty(&vma->extobj.link)) { + vma->vm->extobj.entries--; + list_del_init(&vma->extobj.link); + return true; + } + return false; +} + +static void xe_vma_destroy_late(struct xe_vma *vma) +{ + struct xe_vm *vm = vma->vm; + struct xe_device *xe = vm->xe; + bool read_only = vma->pte_flags & PTE_READ_ONLY; + + if (xe_vma_is_userptr(vma)) { + if (vma->userptr.sg) { + dma_unmap_sgtable(xe->drm.dev, + vma->userptr.sg, + read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL, 0); + sg_free_table(vma->userptr.sg); + vma->userptr.sg = NULL; + } + + /* + * Since userptr pages are not pinned, we can't remove + * the notifer until we're sure the GPU is not accessing + * them anymore + */ + mmu_interval_notifier_remove(&vma->userptr.notifier); + xe_vm_put(vm); + } else { + xe_bo_put(vma->bo); + } + + kfree(vma); +} + +static void vma_destroy_work_func(struct work_struct *w) +{ + struct xe_vma *vma = + container_of(w, struct xe_vma, destroy_work); + + xe_vma_destroy_late(vma); +} + +static struct xe_vma * +bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm, + struct xe_vma *ignore) +{ + struct xe_vma *vma; + + list_for_each_entry(vma, &bo->vmas, bo_link) { + if (vma != ignore && vma->vm == vm && !vma->destroyed) + return vma; + } + + return NULL; +} + +static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm, + struct xe_vma *ignore) +{ + struct ww_acquire_ctx ww; + bool ret; + + xe_bo_lock(bo, &ww, 0, false); + ret = !!bo_has_vm_references_locked(bo, vm, ignore); + xe_bo_unlock(bo, &ww); + + return ret; +} + +static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) +{ + list_add(&vma->extobj.link, &vm->extobj.list); + vm->extobj.entries++; +} + +static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) +{ + struct xe_bo *bo = vma->bo; + + lockdep_assert_held_write(&vm->lock); + + if (bo_has_vm_references(bo, vm, vma)) + return; + + __vm_insert_extobj(vm, vma); +} + +static void vma_destroy_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); + + INIT_WORK(&vma->destroy_work, vma_destroy_work_func); + queue_work(system_unbound_wq, &vma->destroy_work); +} + +static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) +{ + struct xe_vm *vm = vma->vm; + + lockdep_assert_held_write(&vm->lock); + XE_BUG_ON(!list_empty(&vma->unbind_link)); + + if (xe_vma_is_userptr(vma)) { + XE_WARN_ON(!vma->destroyed); + spin_lock(&vm->userptr.invalidated_lock); + list_del_init(&vma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + list_del(&vma->userptr_link); + } else { + xe_bo_assert_held(vma->bo); + list_del(&vma->bo_link); + + spin_lock(&vm->notifier.list_lock); + list_del(&vma->notifier.rebind_link); + spin_unlock(&vm->notifier.list_lock); + + if (!vma->bo->vm && vm_remove_extobj(vma)) { + struct xe_vma *other; + + other = bo_has_vm_references_locked(vma->bo, vm, NULL); + + if (other) + __vm_insert_extobj(vm, other); + } + } + + xe_vm_assert_held(vm); + if (!list_empty(&vma->rebind_link)) + list_del(&vma->rebind_link); + + if (fence) { + int ret = dma_fence_add_callback(fence, &vma->destroy_cb, + vma_destroy_cb); + + if (ret) { + XE_WARN_ON(ret != -ENOENT); + xe_vma_destroy_late(vma); + } + } else { + xe_vma_destroy_late(vma); + } +} + +static void xe_vma_destroy_unlocked(struct xe_vma *vma) +{ + struct ttm_validate_buffer tv[2]; + struct ww_acquire_ctx ww; + struct xe_bo *bo = vma->bo; + LIST_HEAD(objs); + LIST_HEAD(dups); + int err; + + memset(tv, 0, sizeof(tv)); + tv[0].bo = xe_vm_ttm_bo(vma->vm); + list_add(&tv[0].head, &objs); + + if (bo) { + tv[1].bo = &xe_bo_get(bo)->ttm; + list_add(&tv[1].head, &objs); + } + err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); + XE_WARN_ON(err); + + xe_vma_destroy(vma, NULL); + + ttm_eu_backoff_reservation(&ww, &objs); + if (bo) + xe_bo_put(bo); +} + +static struct xe_vma *to_xe_vma(const struct rb_node *node) +{ + BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0); + return (struct xe_vma *)node; +} + +static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b) +{ + if (a->end < b->start) { + return -1; + } else if (b->end < a->start) { + return 1; + } else { + return 0; + } +} + +static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b) +{ + return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0; +} + +int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node) +{ + struct xe_vma *cmp = to_xe_vma(node); + const struct xe_vma *own = key; + + if (own->start > cmp->end) + return 1; + + if (own->end < cmp->start) + return -1; + + return 0; +} + +struct xe_vma * +xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma) +{ + struct rb_node *node; + + if (xe_vm_is_closed(vm)) + return NULL; + + XE_BUG_ON(vma->end >= vm->size); + lockdep_assert_held(&vm->lock); + + node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb); + + return node ? to_xe_vma(node) : NULL; +} + +static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) +{ + XE_BUG_ON(vma->vm != vm); + lockdep_assert_held(&vm->lock); + + rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb); +} + +static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) +{ + XE_BUG_ON(vma->vm != vm); + lockdep_assert_held(&vm->lock); + + rb_erase(&vma->vm_node, &vm->vmas); + if (vm->usm.last_fault_vma == vma) + vm->usm.last_fault_vma = NULL; +} + +static void async_op_work_func(struct work_struct *w); +static void vm_destroy_work_func(struct work_struct *w); + +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +{ + struct xe_vm *vm; + int err, i = 0, number_gts = 0; + struct xe_gt *gt; + u8 id; + + vm = kzalloc(sizeof(*vm), GFP_KERNEL); + if (!vm) + return ERR_PTR(-ENOMEM); + + vm->xe = xe; + kref_init(&vm->refcount); + dma_resv_init(&vm->resv); + + vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1); + + vm->vmas = RB_ROOT; + vm->flags = flags; + + init_rwsem(&vm->lock); + + INIT_LIST_HEAD(&vm->rebind_list); + + INIT_LIST_HEAD(&vm->userptr.repin_list); + INIT_LIST_HEAD(&vm->userptr.invalidated); + init_rwsem(&vm->userptr.notifier_lock); + spin_lock_init(&vm->userptr.invalidated_lock); + + INIT_LIST_HEAD(&vm->notifier.rebind_list); + spin_lock_init(&vm->notifier.list_lock); + + INIT_LIST_HEAD(&vm->async_ops.pending); + INIT_WORK(&vm->async_ops.work, async_op_work_func); + spin_lock_init(&vm->async_ops.lock); + + INIT_WORK(&vm->destroy_work, vm_destroy_work_func); + + INIT_LIST_HEAD(&vm->preempt.engines); + vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ + + INIT_LIST_HEAD(&vm->extobj.list); + + if (!(flags & XE_VM_FLAG_MIGRATION)) { + /* We need to immeditatelly exit from any D3 state */ + xe_pm_runtime_get(xe); + xe_device_mem_access_get(xe); + } + + err = dma_resv_lock_interruptible(&vm->resv, NULL); + if (err) + goto err_put; + + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + vm->flags |= XE_VM_FLAGS_64K; + + for_each_gt(gt, xe, id) { + if (xe_gt_is_media_type(gt)) + continue; + + if (flags & XE_VM_FLAG_MIGRATION && + gt->info.id != XE_VM_FLAG_GT_ID(flags)) + continue; + + vm->pt_root[id] = xe_pt_create(vm, gt, xe->info.vm_max_level); + if (IS_ERR(vm->pt_root[id])) { + err = PTR_ERR(vm->pt_root[id]); + vm->pt_root[id] = NULL; + goto err_destroy_root; + } + } + + if (flags & XE_VM_FLAG_SCRATCH_PAGE) { + for_each_gt(gt, xe, id) { + if (!vm->pt_root[id]) + continue; + + err = xe_pt_create_scratch(xe, gt, vm); + if (err) + goto err_scratch_pt; + } + } + + if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + vm->flags |= XE_VM_FLAG_COMPUTE_MODE; + } + + if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) { + vm->async_ops.fence.context = dma_fence_context_alloc(1); + vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS; + } + + /* Fill pt_root after allocating scratch tables */ + for_each_gt(gt, xe, id) { + if (!vm->pt_root[id]) + continue; + + xe_pt_populate_empty(gt, vm, vm->pt_root[id]); + } + dma_resv_unlock(&vm->resv); + + /* Kernel migration VM shouldn't have a circular loop.. */ + if (!(flags & XE_VM_FLAG_MIGRATION)) { + for_each_gt(gt, xe, id) { + struct xe_vm *migrate_vm; + struct xe_engine *eng; + + if (!vm->pt_root[id]) + continue; + + migrate_vm = xe_migrate_get_vm(gt->migrate); + eng = xe_engine_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, + ENGINE_FLAG_VM); + xe_vm_put(migrate_vm); + if (IS_ERR(eng)) { + xe_vm_close_and_put(vm); + return ERR_CAST(eng); + } + vm->eng[id] = eng; + number_gts++; + } + } + + if (number_gts > 1) + vm->composite_fence_ctx = dma_fence_context_alloc(1); + + mutex_lock(&xe->usm.lock); + if (flags & XE_VM_FLAG_FAULT_MODE) + xe->usm.num_vm_in_fault_mode++; + else if (!(flags & XE_VM_FLAG_MIGRATION)) + xe->usm.num_vm_in_non_fault_mode++; + mutex_unlock(&xe->usm.lock); + + trace_xe_vm_create(vm); + + return vm; + +err_scratch_pt: + for_each_gt(gt, xe, id) { + if (!vm->pt_root[id]) + continue; + + i = vm->pt_root[id]->level; + while (i) + if (vm->scratch_pt[id][--i]) + xe_pt_destroy(vm->scratch_pt[id][i], + vm->flags, NULL); + xe_bo_unpin(vm->scratch_bo[id]); + xe_bo_put(vm->scratch_bo[id]); + } +err_destroy_root: + for_each_gt(gt, xe, id) { + if (vm->pt_root[id]) + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); + } + dma_resv_unlock(&vm->resv); +err_put: + dma_resv_fini(&vm->resv); + kfree(vm); + if (!(flags & XE_VM_FLAG_MIGRATION)) { + xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); + } + return ERR_PTR(err); +} + +static void flush_async_ops(struct xe_vm *vm) +{ + queue_work(system_unbound_wq, &vm->async_ops.work); + flush_work(&vm->async_ops.work); +} + +static void vm_error_capture(struct xe_vm *vm, int err, + u32 op, u64 addr, u64 size) +{ + struct drm_xe_vm_bind_op_error_capture capture; + u64 __user *address = + u64_to_user_ptr(vm->async_ops.error_capture.addr); + bool in_kthread = !current->mm; + + capture.error = err; + capture.op = op; + capture.addr = addr; + capture.size = size; + + if (in_kthread) { + if (!mmget_not_zero(vm->async_ops.error_capture.mm)) + goto mm_closed; + kthread_use_mm(vm->async_ops.error_capture.mm); + } + + if (copy_to_user(address, &capture, sizeof(capture))) + XE_WARN_ON("Copy to user failed"); + + if (in_kthread) { + kthread_unuse_mm(vm->async_ops.error_capture.mm); + mmput(vm->async_ops.error_capture.mm); + } + +mm_closed: + wake_up_all(&vm->async_ops.error_capture.wq); +} + +void xe_vm_close_and_put(struct xe_vm *vm) +{ + struct rb_root contested = RB_ROOT; + struct ww_acquire_ctx ww; + struct xe_device *xe = vm->xe; + struct xe_gt *gt; + u8 id; + + XE_BUG_ON(vm->preempt.num_engines); + + vm->size = 0; + smp_mb(); + flush_async_ops(vm); + if (xe_vm_in_compute_mode(vm)) + flush_work(&vm->preempt.rebind_work); + + for_each_gt(gt, xe, id) { + if (vm->eng[id]) { + xe_engine_kill(vm->eng[id]); + xe_engine_put(vm->eng[id]); + vm->eng[id] = NULL; + } + } + + down_write(&vm->lock); + xe_vm_lock(vm, &ww, 0, false); + while (vm->vmas.rb_node) { + struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node); + + if (xe_vma_is_userptr(vma)) { + down_read(&vm->userptr.notifier_lock); + vma->destroyed = true; + up_read(&vm->userptr.notifier_lock); + } + + rb_erase(&vma->vm_node, &vm->vmas); + + /* easy case, remove from VMA? */ + if (xe_vma_is_userptr(vma) || vma->bo->vm) { + xe_vma_destroy(vma, NULL); + continue; + } + + rb_add(&vma->vm_node, &contested, xe_vma_less_cb); + } + + /* + * All vm operations will add shared fences to resv. + * The only exception is eviction for a shared object, + * but even so, the unbind when evicted would still + * install a fence to resv. Hence it's safe to + * destroy the pagetables immediately. + */ + for_each_gt(gt, xe, id) { + if (vm->scratch_bo[id]) { + u32 i; + + xe_bo_unpin(vm->scratch_bo[id]); + xe_bo_put(vm->scratch_bo[id]); + for (i = 0; i < vm->pt_root[id]->level; i++) + xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, + NULL); + } + } + xe_vm_unlock(vm, &ww); + + if (contested.rb_node) { + + /* + * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL + * Since we hold a refcount to the bo, we can remove and free + * the members safely without locking. + */ + while (contested.rb_node) { + struct xe_vma *vma = to_xe_vma(contested.rb_node); + + rb_erase(&vma->vm_node, &contested); + xe_vma_destroy_unlocked(vma); + } + } + + if (vm->async_ops.error_capture.addr) + wake_up_all(&vm->async_ops.error_capture.wq); + + XE_WARN_ON(!list_empty(&vm->extobj.list)); + up_write(&vm->lock); + + xe_vm_put(vm); +} + +static void vm_destroy_work_func(struct work_struct *w) +{ + struct xe_vm *vm = + container_of(w, struct xe_vm, destroy_work); + struct ww_acquire_ctx ww; + struct xe_device *xe = vm->xe; + struct xe_gt *gt; + u8 id; + void *lookup; + + /* xe_vm_close_and_put was not called? */ + XE_WARN_ON(vm->size); + + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { + xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); + + mutex_lock(&xe->usm.lock); + lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); + XE_WARN_ON(lookup != vm); + mutex_unlock(&xe->usm.lock); + } + + /* + * XXX: We delay destroying the PT root until the VM if freed as PT root + * is needed for xe_vm_lock to work. If we remove that dependency this + * can be moved to xe_vm_close_and_put. + */ + xe_vm_lock(vm, &ww, 0, false); + for_each_gt(gt, xe, id) { + if (vm->pt_root[id]) { + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); + vm->pt_root[id] = NULL; + } + } + xe_vm_unlock(vm, &ww); + + mutex_lock(&xe->usm.lock); + if (vm->flags & XE_VM_FLAG_FAULT_MODE) + xe->usm.num_vm_in_fault_mode--; + else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) + xe->usm.num_vm_in_non_fault_mode--; + mutex_unlock(&xe->usm.lock); + + trace_xe_vm_free(vm); + dma_fence_put(vm->rebind_fence); + dma_resv_fini(&vm->resv); + kfree(vm); + +} + +void xe_vm_free(struct kref *ref) +{ + struct xe_vm *vm = container_of(ref, struct xe_vm, refcount); + + /* To destroy the VM we need to be able to sleep */ + queue_work(system_unbound_wq, &vm->destroy_work); +} + +struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) +{ + struct xe_vm *vm; + + mutex_lock(&xef->vm.lock); + vm = xa_load(&xef->vm.xa, id); + mutex_unlock(&xef->vm.lock); + + if (vm) + xe_vm_get(vm); + + return vm; +} + +u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt) +{ + XE_BUG_ON(xe_gt_is_media_type(full_gt)); + + return gen8_pde_encode(vm->pt_root[full_gt->info.id]->bo, 0, + XE_CACHE_WB); +} + +static struct dma_fence * +xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct xe_gt *gt; + struct dma_fence *fence = NULL; + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; + struct xe_vm *vm = vma->vm; + int cur_fence = 0, i; + int number_gts = hweight_long(vma->gt_present); + int err; + u8 id; + + trace_xe_vma_unbind(vma); + + if (number_gts > 1) { + fences = kmalloc_array(number_gts, sizeof(*fences), + GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + } + + for_each_gt(gt, vm->xe, id) { + if (!(vma->gt_present & BIT(id))) + goto next; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto err_fences; + } + + if (fences) + fences[cur_fence++] = fence; + +next: + if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list)) + e = list_next_entry(e, multi_gt_list); + } + + if (fences) { + cf = dma_fence_array_create(number_gts, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + if (!cf) { + --vm->composite_fence_seqno; + err = -ENOMEM; + goto err_fences; + } + } + + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence); + + return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence; + +err_fences: + if (fences) { + while (cur_fence) { + /* FIXME: Rewind the previous binds? */ + dma_fence_put(fences[--cur_fence]); + } + kfree(fences); + } + + return ERR_PTR(err); +} + +static struct dma_fence * +xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct xe_gt *gt; + struct dma_fence *fence; + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; + struct xe_vm *vm = vma->vm; + int cur_fence = 0, i; + int number_gts = hweight_long(vma->gt_mask); + int err; + u8 id; + + trace_xe_vma_bind(vma); + + if (number_gts > 1) { + fences = kmalloc_array(number_gts, sizeof(*fences), + GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + } + + for_each_gt(gt, vm->xe, id) { + if (!(vma->gt_mask & BIT(id))) + goto next; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs, + vma->gt_present & BIT(id)); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto err_fences; + } + + if (fences) + fences[cur_fence++] = fence; + +next: + if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list)) + e = list_next_entry(e, multi_gt_list); + } + + if (fences) { + cf = dma_fence_array_create(number_gts, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + if (!cf) { + --vm->composite_fence_seqno; + err = -ENOMEM; + goto err_fences; + } + } + + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence); + + return cf ? &cf->base : fence; + +err_fences: + if (fences) { + while (cur_fence) { + /* FIXME: Rewind the previous binds? */ + dma_fence_put(fences[--cur_fence]); + } + kfree(fences); + } + + return ERR_PTR(err); +} + +struct async_op_fence { + struct dma_fence fence; + struct dma_fence_cb cb; + struct xe_vm *vm; + wait_queue_head_t wq; + bool started; +}; + +static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +async_op_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "async_op_fence"; +} + +static const struct dma_fence_ops async_op_fence_ops = { + .get_driver_name = async_op_fence_get_driver_name, + .get_timeline_name = async_op_fence_get_timeline_name, +}; + +static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct async_op_fence *afence = + container_of(cb, struct async_op_fence, cb); + + dma_fence_signal(&afence->fence); + xe_vm_put(afence->vm); + dma_fence_put(&afence->fence); +} + +static void add_async_op_fence_cb(struct xe_vm *vm, + struct dma_fence *fence, + struct async_op_fence *afence) +{ + int ret; + + if (!xe_vm_no_dma_fences(vm)) { + afence->started = true; + smp_wmb(); + wake_up_all(&afence->wq); + } + + afence->vm = xe_vm_get(vm); + dma_fence_get(&afence->fence); + ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb); + if (ret == -ENOENT) + dma_fence_signal(&afence->fence); + if (ret) { + xe_vm_put(vm); + dma_fence_put(&afence->fence); + } + XE_WARN_ON(ret && ret != -ENOENT); +} + +int xe_vm_async_fence_wait_start(struct dma_fence *fence) +{ + if (fence->ops == &async_op_fence_ops) { + struct async_op_fence *afence = + container_of(fence, struct async_op_fence, fence); + + XE_BUG_ON(xe_vm_no_dma_fences(afence->vm)); + + smp_rmb(); + return wait_event_interruptible(afence->wq, afence->started); + } + + return 0; +} + +static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence) +{ + struct dma_fence *fence; + + xe_vm_assert_held(vm); + + fence = xe_vm_bind_vma(vma, e, syncs, num_syncs); + if (IS_ERR(fence)) + return PTR_ERR(fence); + if (afence) + add_async_op_fence_cb(vm, fence, afence); + + dma_fence_put(fence); + return 0; +} + +static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e, + struct xe_bo *bo, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence) +{ + int err; + + xe_vm_assert_held(vm); + xe_bo_assert_held(bo); + + if (bo) { + err = xe_bo_validate(bo, vm, true); + if (err) + return err; + } + + return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence); +} + +static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence) +{ + struct dma_fence *fence; + + xe_vm_assert_held(vm); + xe_bo_assert_held(vma->bo); + + fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs); + if (IS_ERR(fence)) + return PTR_ERR(fence); + if (afence) + add_async_op_fence_cb(vm, fence, afence); + + xe_vma_destroy(vma, fence); + dma_fence_put(fence); + + return 0; +} + +static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm, + u64 value) +{ + if (XE_IOCTL_ERR(xe, !value)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) + return -ENOTSUPP; + + if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr)) + return -ENOTSUPP; + + vm->async_ops.error_capture.mm = current->mm; + vm->async_ops.error_capture.addr = value; + init_waitqueue_head(&vm->async_ops.error_capture.wq); + + return 0; +} + +typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm, + u64 value); + +static const xe_vm_set_property_fn vm_set_property_funcs[] = { + [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] = + vm_set_error_capture_address, +}; + +static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm, + u64 extension) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_vm_set_property ext; + int err; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, ext.property >= + ARRAY_SIZE(vm_set_property_funcs))) + return -EINVAL; + + return vm_set_property_funcs[ext.property](xe, vm, ext.value); +} + +typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm, + u64 extension); + +static const xe_vm_set_property_fn vm_user_extension_funcs[] = { + [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm, + u64 extensions, int ext_number) +{ + u64 __user *address = u64_to_user_ptr(extensions); + struct xe_user_extension ext; + int err; + + if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, ext.name >= + ARRAY_SIZE(vm_user_extension_funcs))) + return -EINVAL; + + err = vm_user_extension_funcs[ext.name](xe, vm, extensions); + if (XE_IOCTL_ERR(xe, err)) + return err; + + if (ext.next_extension) + return vm_user_extensions(xe, vm, ext.next_extension, + ++ext_number); + + return 0; +} + +#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \ + DRM_XE_VM_CREATE_COMPUTE_MODE | \ + DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \ + DRM_XE_VM_CREATE_FAULT_MODE) + +int xe_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_create *args = data; + struct xe_vm *vm; + u32 id, asid; + int err; + u32 flags = 0; + + if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE && + args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE && + args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && + xe_device_in_non_fault_mode(xe))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) && + xe_device_in_fault_mode(xe))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && + !xe->info.supports_usm)) + return -EINVAL; + + if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE) + flags |= XE_VM_FLAG_SCRATCH_PAGE; + if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE) + flags |= XE_VM_FLAG_COMPUTE_MODE; + if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) + flags |= XE_VM_FLAG_ASYNC_BIND_OPS; + if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE) + flags |= XE_VM_FLAG_FAULT_MODE; + + vm = xe_vm_create(xe, flags); + if (IS_ERR(vm)) + return PTR_ERR(vm); + + if (args->extensions) { + err = vm_user_extensions(xe, vm, args->extensions, 0); + if (XE_IOCTL_ERR(xe, err)) { + xe_vm_close_and_put(vm); + return err; + } + } + + mutex_lock(&xef->vm.lock); + err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&xef->vm.lock); + if (err) { + xe_vm_close_and_put(vm); + return err; + } + + mutex_lock(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(0, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + mutex_unlock(&xe->usm.lock); + if (err) { + xe_vm_close_and_put(vm); + return err; + } + vm->usm.asid = asid; + + args->vm_id = id; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) + /* Warning: Security issue - never enable by default */ + args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, GEN8_PAGE_SIZE); +#endif + + return 0; +} + +int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_destroy *args = data; + struct xe_vm *vm; + + if (XE_IOCTL_ERR(xe, args->pad)) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) + return -ENOENT; + xe_vm_put(vm); + + /* FIXME: Extend this check to non-compute mode VMs */ + if (XE_IOCTL_ERR(xe, vm->preempt.num_engines)) + return -EBUSY; + + mutex_lock(&xef->vm.lock); + xa_erase(&xef->vm.xa, args->vm_id); + mutex_unlock(&xef->vm.lock); + + xe_vm_close_and_put(vm); + + return 0; +} + +static const u32 region_to_mem_type[] = { + XE_PL_TT, + XE_PL_VRAM0, + XE_PL_VRAM1, +}; + +static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, u32 region, + struct xe_sync_entry *syncs, u32 num_syncs, + struct async_op_fence *afence) +{ + int err; + + XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type)); + + if (!xe_vma_is_userptr(vma)) { + err = xe_bo_migrate(vma->bo, region_to_mem_type[region]); + if (err) + return err; + } + + if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) { + return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs, + afence); + } else { + int i; + + /* Nothing to do, signal fences now */ + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, + dma_fence_get_stub()); + if (afence) + dma_fence_signal(&afence->fence); + return 0; + } +} + +#define VM_BIND_OP(op) (op & 0xffff) + +static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_bo *bo, u32 op, + u32 region, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence) +{ + switch (VM_BIND_OP(op)) { + case XE_VM_BIND_OP_MAP: + return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence); + case XE_VM_BIND_OP_UNMAP: + case XE_VM_BIND_OP_UNMAP_ALL: + return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence); + case XE_VM_BIND_OP_MAP_USERPTR: + return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence); + case XE_VM_BIND_OP_PREFETCH: + return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs, + afence); + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + return -EINVAL; + } +} + +struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm) +{ + int idx = vm->flags & XE_VM_FLAG_MIGRATION ? + XE_VM_FLAG_GT_ID(vm->flags) : 0; + + /* Safe to use index 0 as all BO in the VM share a single dma-resv lock */ + return &vm->pt_root[idx]->bo->ttm; +} + +static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv) +{ + tv->num_shared = 1; + tv->bo = xe_vm_ttm_bo(vm); +} + +static bool is_map_op(u32 op) +{ + return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP || + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR; +} + +static bool is_unmap_op(u32 op) +{ + return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP || + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL; +} + +static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_bo *bo, + struct drm_xe_vm_bind_op *bind_op, + struct xe_sync_entry *syncs, u32 num_syncs, + struct async_op_fence *afence) +{ + LIST_HEAD(objs); + LIST_HEAD(dups); + struct ttm_validate_buffer tv_bo, tv_vm; + struct ww_acquire_ctx ww; + struct xe_bo *vbo; + int err, i; + + lockdep_assert_held(&vm->lock); + XE_BUG_ON(!list_empty(&vma->unbind_link)); + + /* Binds deferred to faults, signal fences now */ + if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) && + !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) { + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, + dma_fence_get_stub()); + if (afence) + dma_fence_signal(&afence->fence); + return 0; + } + + xe_vm_tv_populate(vm, &tv_vm); + list_add_tail(&tv_vm.head, &objs); + vbo = vma->bo; + if (vbo) { + /* + * An unbind can drop the last reference to the BO and + * the BO is needed for ttm_eu_backoff_reservation so + * take a reference here. + */ + xe_bo_get(vbo); + + tv_bo.bo = &vbo->ttm; + tv_bo.num_shared = 1; + list_add(&tv_bo.head, &objs); + } + +again: + err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups); + if (!err) { + err = __vm_bind_ioctl(vm, vma, e, bo, + bind_op->op, bind_op->region, syncs, + num_syncs, afence); + ttm_eu_backoff_reservation(&ww, &objs); + if (err == -EAGAIN && xe_vma_is_userptr(vma)) { + lockdep_assert_held_write(&vm->lock); + err = xe_vma_userptr_pin_pages(vma); + if (!err) + goto again; + } + } + xe_bo_put(vbo); + + return err; +} + +struct async_op { + struct xe_vma *vma; + struct xe_engine *engine; + struct xe_bo *bo; + struct drm_xe_vm_bind_op bind_op; + struct xe_sync_entry *syncs; + u32 num_syncs; + struct list_head link; + struct async_op_fence *fence; +}; + +static void async_op_cleanup(struct xe_vm *vm, struct async_op *op) +{ + while (op->num_syncs--) + xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); + kfree(op->syncs); + xe_bo_put(op->bo); + if (op->engine) + xe_engine_put(op->engine); + xe_vm_put(vm); + if (op->fence) + dma_fence_put(&op->fence->fence); + kfree(op); +} + +static struct async_op *next_async_op(struct xe_vm *vm) +{ + return list_first_entry_or_null(&vm->async_ops.pending, + struct async_op, link); +} + +static void vm_set_async_error(struct xe_vm *vm, int err) +{ + lockdep_assert_held(&vm->lock); + vm->async_ops.error = err; +} + +static void async_op_work_func(struct work_struct *w) +{ + struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work); + + for (;;) { + struct async_op *op; + int err; + + if (vm->async_ops.error && !xe_vm_is_closed(vm)) + break; + + spin_lock_irq(&vm->async_ops.lock); + op = next_async_op(vm); + if (op) + list_del_init(&op->link); + spin_unlock_irq(&vm->async_ops.lock); + + if (!op) + break; + + if (!xe_vm_is_closed(vm)) { + bool first, last; + + down_write(&vm->lock); +again: + first = op->vma->first_munmap_rebind; + last = op->vma->last_munmap_rebind; +#ifdef TEST_VM_ASYNC_OPS_ERROR +#define FORCE_ASYNC_OP_ERROR BIT(31) + if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) { + err = vm_bind_ioctl(vm, op->vma, op->engine, + op->bo, &op->bind_op, + op->syncs, op->num_syncs, + op->fence); + } else { + err = -ENOMEM; + op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR; + } +#else + err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo, + &op->bind_op, op->syncs, + op->num_syncs, op->fence); +#endif + /* + * In order for the fencing to work (stall behind + * existing jobs / prevent new jobs from running) all + * the dma-resv slots need to be programmed in a batch + * relative to execs / the rebind worker. The vm->lock + * ensure this. + */ + if (!err && ((first && VM_BIND_OP(op->bind_op.op) == + XE_VM_BIND_OP_UNMAP) || + vm->async_ops.munmap_rebind_inflight)) { + if (last) { + op->vma->last_munmap_rebind = false; + vm->async_ops.munmap_rebind_inflight = + false; + } else { + vm->async_ops.munmap_rebind_inflight = + true; + + async_op_cleanup(vm, op); + + spin_lock_irq(&vm->async_ops.lock); + op = next_async_op(vm); + XE_BUG_ON(!op); + list_del_init(&op->link); + spin_unlock_irq(&vm->async_ops.lock); + + goto again; + } + } + if (err) { + trace_xe_vma_fail(op->vma); + drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d", + VM_BIND_OP(op->bind_op.op), + err); + + spin_lock_irq(&vm->async_ops.lock); + list_add(&op->link, &vm->async_ops.pending); + spin_unlock_irq(&vm->async_ops.lock); + + vm_set_async_error(vm, err); + up_write(&vm->lock); + + if (vm->async_ops.error_capture.addr) + vm_error_capture(vm, err, + op->bind_op.op, + op->bind_op.addr, + op->bind_op.range); + break; + } + up_write(&vm->lock); + } else { + trace_xe_vma_flush(op->vma); + + if (is_unmap_op(op->bind_op.op)) { + down_write(&vm->lock); + xe_vma_destroy_unlocked(op->vma); + up_write(&vm->lock); + } + + if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &op->fence->fence.flags)) { + if (!xe_vm_no_dma_fences(vm)) { + op->fence->started = true; + smp_wmb(); + wake_up_all(&op->fence->wq); + } + dma_fence_signal(&op->fence->fence); + } + } + + async_op_cleanup(vm, op); + } +} + +static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_bo *bo, + struct drm_xe_vm_bind_op *bind_op, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct async_op *op; + bool installed = false; + u64 seqno; + int i; + + lockdep_assert_held(&vm->lock); + + op = kmalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + return -ENOMEM; + } + + if (num_syncs) { + op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL); + if (!op->fence) { + kfree(op); + return -ENOMEM; + } + + seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno; + dma_fence_init(&op->fence->fence, &async_op_fence_ops, + &vm->async_ops.lock, e ? e->bind.fence_ctx : + vm->async_ops.fence.context, seqno); + + if (!xe_vm_no_dma_fences(vm)) { + op->fence->vm = vm; + op->fence->started = false; + init_waitqueue_head(&op->fence->wq); + } + } else { + op->fence = NULL; + } + op->vma = vma; + op->engine = e; + op->bo = bo; + op->bind_op = *bind_op; + op->syncs = syncs; + op->num_syncs = num_syncs; + INIT_LIST_HEAD(&op->link); + + for (i = 0; i < num_syncs; i++) + installed |= xe_sync_entry_signal(&syncs[i], NULL, + &op->fence->fence); + + if (!installed && op->fence) + dma_fence_signal(&op->fence->fence); + + spin_lock_irq(&vm->async_ops.lock); + list_add_tail(&op->link, &vm->async_ops.pending); + spin_unlock_irq(&vm->async_ops.lock); + + if (!vm->async_ops.error) + queue_work(system_unbound_wq, &vm->async_ops.work); + + return 0; +} + +static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_bo *bo, + struct drm_xe_vm_bind_op *bind_op, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct xe_vma *__vma, *next; + struct list_head rebind_list; + struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL; + u32 num_in_syncs = 0, num_out_syncs = 0; + bool first = true, last; + int err; + int i; + + lockdep_assert_held(&vm->lock); + + /* Not a linked list of unbinds + rebinds, easy */ + if (list_empty(&vma->unbind_link)) + return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op, + syncs, num_syncs); + + /* + * Linked list of unbinds + rebinds, decompose syncs into 'in / out' + * passing the 'in' to the first operation and 'out' to the last. Also + * the reference counting is a little tricky, increment the VM / bind + * engine ref count on all but the last operation and increment the BOs + * ref count on each rebind. + */ + + XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP && + VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL && + VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH); + + /* Decompose syncs */ + if (num_syncs) { + in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL); + out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL); + if (!in_syncs || !out_syncs) { + err = -ENOMEM; + goto out_error; + } + + for (i = 0; i < num_syncs; ++i) { + bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL; + + if (signal) + out_syncs[num_out_syncs++] = syncs[i]; + else + in_syncs[num_in_syncs++] = syncs[i]; + } + } + + /* Do unbinds + move rebinds to new list */ + INIT_LIST_HEAD(&rebind_list); + list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) { + if (__vma->destroyed || + VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) { + list_del_init(&__vma->unbind_link); + xe_bo_get(bo); + err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma, + e ? xe_engine_get(e) : NULL, + bo, bind_op, first ? + in_syncs : NULL, + first ? num_in_syncs : 0); + if (err) { + xe_bo_put(bo); + xe_vm_put(vm); + if (e) + xe_engine_put(e); + goto out_error; + } + in_syncs = NULL; + first = false; + } else { + list_move_tail(&__vma->unbind_link, &rebind_list); + } + } + last = list_empty(&rebind_list); + if (!last) { + xe_vm_get(vm); + if (e) + xe_engine_get(e); + } + err = __vm_bind_ioctl_async(vm, vma, e, + bo, bind_op, + first ? in_syncs : + last ? out_syncs : NULL, + first ? num_in_syncs : + last ? num_out_syncs : 0); + if (err) { + if (!last) { + xe_vm_put(vm); + if (e) + xe_engine_put(e); + } + goto out_error; + } + in_syncs = NULL; + + /* Do rebinds */ + list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) { + list_del_init(&__vma->unbind_link); + last = list_empty(&rebind_list); + + if (xe_vma_is_userptr(__vma)) { + bind_op->op = XE_VM_BIND_FLAG_ASYNC | + XE_VM_BIND_OP_MAP_USERPTR; + } else { + bind_op->op = XE_VM_BIND_FLAG_ASYNC | + XE_VM_BIND_OP_MAP; + xe_bo_get(__vma->bo); + } + + if (!last) { + xe_vm_get(vm); + if (e) + xe_engine_get(e); + } + + err = __vm_bind_ioctl_async(vm, __vma, e, + __vma->bo, bind_op, last ? + out_syncs : NULL, + last ? num_out_syncs : 0); + if (err) { + if (!last) { + xe_vm_put(vm); + if (e) + xe_engine_put(e); + } + goto out_error; + } + } + + kfree(syncs); + return 0; + +out_error: + kfree(in_syncs); + kfree(out_syncs); + kfree(syncs); + + return err; +} + +static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, + u64 addr, u64 range, u32 op) +{ + struct xe_device *xe = vm->xe; + struct xe_vma *vma, lookup; + bool async = !!(op & XE_VM_BIND_FLAG_ASYNC); + + lockdep_assert_held(&vm->lock); + + lookup.start = addr; + lookup.end = addr + range - 1; + + switch (VM_BIND_OP(op)) { + case XE_VM_BIND_OP_MAP: + case XE_VM_BIND_OP_MAP_USERPTR: + vma = xe_vm_find_overlapping_vma(vm, &lookup); + if (XE_IOCTL_ERR(xe, vma)) + return -EBUSY; + break; + case XE_VM_BIND_OP_UNMAP: + case XE_VM_BIND_OP_PREFETCH: + vma = xe_vm_find_overlapping_vma(vm, &lookup); + if (XE_IOCTL_ERR(xe, !vma) || + XE_IOCTL_ERR(xe, (vma->start != addr || + vma->end != addr + range - 1) && !async)) + return -EINVAL; + break; + case XE_VM_BIND_OP_UNMAP_ALL: + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + return -EINVAL; + } + + return 0; +} + +static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma) +{ + down_read(&vm->userptr.notifier_lock); + vma->destroyed = true; + up_read(&vm->userptr.notifier_lock); + xe_vm_remove_vma(vm, vma); +} + +static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma) +{ + int err; + + if (vma->bo && !vma->bo->vm) { + vm_insert_extobj(vm, vma); + err = add_preempt_fences(vm, vma->bo); + if (err) + return err; + } + + return 0; +} + +/* + * Find all overlapping VMAs in lookup range and add to a list in the returned + * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that + * need to be bound if first / last VMAs are not fully unbound. This is akin to + * how munmap works. + */ +static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, + struct xe_vma *lookup) +{ + struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup); + struct rb_node *node; + struct xe_vma *first = vma, *last = vma, *new_first = NULL, + *new_last = NULL, *__vma, *next; + int err = 0; + bool first_munmap_rebind = false; + + lockdep_assert_held(&vm->lock); + XE_BUG_ON(!vma); + + node = &vma->vm_node; + while ((node = rb_next(node))) { + if (!xe_vma_cmp_vma_cb(lookup, node)) { + __vma = to_xe_vma(node); + list_add_tail(&__vma->unbind_link, &vma->unbind_link); + last = __vma; + } else { + break; + } + } + + node = &vma->vm_node; + while ((node = rb_prev(node))) { + if (!xe_vma_cmp_vma_cb(lookup, node)) { + __vma = to_xe_vma(node); + list_add(&__vma->unbind_link, &vma->unbind_link); + first = __vma; + } else { + break; + } + } + + if (first->start != lookup->start) { + struct ww_acquire_ctx ww; + + if (first->bo) + err = xe_bo_lock(first->bo, &ww, 0, true); + if (err) + goto unwind; + new_first = xe_vma_create(first->vm, first->bo, + first->bo ? first->bo_offset : + first->userptr.ptr, + first->start, + lookup->start - 1, + (first->pte_flags & PTE_READ_ONLY), + first->gt_mask); + if (first->bo) + xe_bo_unlock(first->bo, &ww); + if (!new_first) { + err = -ENOMEM; + goto unwind; + } + if (!first->bo) { + err = xe_vma_userptr_pin_pages(new_first); + if (err) + goto unwind; + } + err = prep_replacement_vma(vm, new_first); + if (err) + goto unwind; + } + + if (last->end != lookup->end) { + struct ww_acquire_ctx ww; + u64 chunk = lookup->end + 1 - last->start; + + if (last->bo) + err = xe_bo_lock(last->bo, &ww, 0, true); + if (err) + goto unwind; + new_last = xe_vma_create(last->vm, last->bo, + last->bo ? last->bo_offset + chunk : + last->userptr.ptr + chunk, + last->start + chunk, + last->end, + (last->pte_flags & PTE_READ_ONLY), + last->gt_mask); + if (last->bo) + xe_bo_unlock(last->bo, &ww); + if (!new_last) { + err = -ENOMEM; + goto unwind; + } + if (!last->bo) { + err = xe_vma_userptr_pin_pages(new_last); + if (err) + goto unwind; + } + err = prep_replacement_vma(vm, new_last); + if (err) + goto unwind; + } + + prep_vma_destroy(vm, vma); + if (list_empty(&vma->unbind_link) && (new_first || new_last)) + vma->first_munmap_rebind = true; + list_for_each_entry(__vma, &vma->unbind_link, unbind_link) { + if ((new_first || new_last) && !first_munmap_rebind) { + __vma->first_munmap_rebind = true; + first_munmap_rebind = true; + } + prep_vma_destroy(vm, __vma); + } + if (new_first) { + xe_vm_insert_vma(vm, new_first); + list_add_tail(&new_first->unbind_link, &vma->unbind_link); + if (!new_last) + new_first->last_munmap_rebind = true; + } + if (new_last) { + xe_vm_insert_vma(vm, new_last); + list_add_tail(&new_last->unbind_link, &vma->unbind_link); + new_last->last_munmap_rebind = true; + } + + return vma; + +unwind: + list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) + list_del_init(&__vma->unbind_link); + if (new_last) { + prep_vma_destroy(vm, new_last); + xe_vma_destroy_unlocked(new_last); + } + if (new_first) { + prep_vma_destroy(vm, new_first); + xe_vma_destroy_unlocked(new_first); + } + + return ERR_PTR(err); +} + +/* + * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch + */ +static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, + struct xe_vma *lookup, + u32 region) +{ + struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma, + *next; + struct rb_node *node; + + if (!xe_vma_is_userptr(vma)) { + if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region])) + return ERR_PTR(-EINVAL); + } + + node = &vma->vm_node; + while ((node = rb_next(node))) { + if (!xe_vma_cmp_vma_cb(lookup, node)) { + __vma = to_xe_vma(node); + if (!xe_vma_is_userptr(__vma)) { + if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) + goto flush_list; + } + list_add_tail(&__vma->unbind_link, &vma->unbind_link); + } else { + break; + } + } + + node = &vma->vm_node; + while ((node = rb_prev(node))) { + if (!xe_vma_cmp_vma_cb(lookup, node)) { + __vma = to_xe_vma(node); + if (!xe_vma_is_userptr(__vma)) { + if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) + goto flush_list; + } + list_add(&__vma->unbind_link, &vma->unbind_link); + } else { + break; + } + } + + return vma; + +flush_list: + list_for_each_entry_safe(__vma, next, &vma->unbind_link, + unbind_link) + list_del_init(&__vma->unbind_link); + + return ERR_PTR(-EINVAL); +} + +static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm, + struct xe_bo *bo) +{ + struct xe_vma *first = NULL, *vma; + + lockdep_assert_held(&vm->lock); + xe_bo_assert_held(bo); + + list_for_each_entry(vma, &bo->vmas, bo_link) { + if (vma->vm != vm) + continue; + + prep_vma_destroy(vm, vma); + if (!first) + first = vma; + else + list_add_tail(&vma->unbind_link, &first->unbind_link); + } + + return first; +} + +static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, + struct xe_bo *bo, + u64 bo_offset_or_userptr, + u64 addr, u64 range, u32 op, + u64 gt_mask, u32 region) +{ + struct ww_acquire_ctx ww; + struct xe_vma *vma, lookup; + int err; + + lockdep_assert_held(&vm->lock); + + lookup.start = addr; + lookup.end = addr + range - 1; + + switch (VM_BIND_OP(op)) { + case XE_VM_BIND_OP_MAP: + XE_BUG_ON(!bo); + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return ERR_PTR(err); + vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr, + addr + range - 1, + op & XE_VM_BIND_FLAG_READONLY, + gt_mask); + xe_bo_unlock(bo, &ww); + if (!vma) + return ERR_PTR(-ENOMEM); + + xe_vm_insert_vma(vm, vma); + if (!bo->vm) { + vm_insert_extobj(vm, vma); + err = add_preempt_fences(vm, bo); + if (err) { + prep_vma_destroy(vm, vma); + xe_vma_destroy_unlocked(vma); + + return ERR_PTR(err); + } + } + break; + case XE_VM_BIND_OP_UNMAP: + vma = vm_unbind_lookup_vmas(vm, &lookup); + break; + case XE_VM_BIND_OP_PREFETCH: + vma = vm_prefetch_lookup_vmas(vm, &lookup, region); + break; + case XE_VM_BIND_OP_UNMAP_ALL: + XE_BUG_ON(!bo); + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return ERR_PTR(err); + vma = vm_unbind_all_lookup_vmas(vm, bo); + if (!vma) + vma = ERR_PTR(-EINVAL); + xe_bo_unlock(bo, &ww); + break; + case XE_VM_BIND_OP_MAP_USERPTR: + XE_BUG_ON(bo); + + vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr, + addr + range - 1, + op & XE_VM_BIND_FLAG_READONLY, + gt_mask); + if (!vma) + return ERR_PTR(-ENOMEM); + + err = xe_vma_userptr_pin_pages(vma); + if (err) { + xe_vma_destroy(vma, NULL); + + return ERR_PTR(err); + } else { + xe_vm_insert_vma(vm, vma); + } + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + vma = ERR_PTR(-EINVAL); + } + + return vma; +} + +#ifdef TEST_VM_ASYNC_OPS_ERROR +#define SUPPORTED_FLAGS \ + (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \ + XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff) +#else +#define SUPPORTED_FLAGS \ + (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \ + XE_VM_BIND_FLAG_IMMEDIATE | 0xffff) +#endif +#define XE_64K_PAGE_MASK 0xffffull + +#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ + +static int vm_bind_ioctl_check_args(struct xe_device *xe, + struct drm_xe_vm_bind *args, + struct drm_xe_vm_bind_op **bind_ops, + bool *async) +{ + int err; + int i; + + if (XE_IOCTL_ERR(xe, args->extensions) || + XE_IOCTL_ERR(xe, !args->num_binds) || + XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS)) + return -EINVAL; + + if (args->num_binds > 1) { + u64 __user *bind_user = + u64_to_user_ptr(args->vector_of_binds); + + *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * + args->num_binds, GFP_KERNEL); + if (!*bind_ops) + return -ENOMEM; + + err = __copy_from_user(*bind_ops, bind_user, + sizeof(struct drm_xe_vm_bind_op) * + args->num_binds); + if (XE_IOCTL_ERR(xe, err)) { + err = -EFAULT; + goto free_bind_ops; + } + } else { + *bind_ops = &args->bind; + } + + for (i = 0; i < args->num_binds; ++i) { + u64 range = (*bind_ops)[i].range; + u64 addr = (*bind_ops)[i].addr; + u32 op = (*bind_ops)[i].op; + u32 obj = (*bind_ops)[i].obj; + u64 obj_offset = (*bind_ops)[i].obj_offset; + u32 region = (*bind_ops)[i].region; + + if (i == 0) { + *async = !!(op & XE_VM_BIND_FLAG_ASYNC); + } else if (XE_IOCTL_ERR(xe, !*async) || + XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) || + XE_IOCTL_ERR(xe, VM_BIND_OP(op) == + XE_VM_BIND_OP_RESTART)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_ERR(xe, !*async && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_ERR(xe, !*async && + VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) > + XE_VM_BIND_OP_PREFETCH) || + XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) || + XE_IOCTL_ERR(xe, !obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) || + XE_IOCTL_ERR(xe, !obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || + XE_IOCTL_ERR(xe, addr && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || + XE_IOCTL_ERR(xe, range && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || + XE_IOCTL_ERR(xe, obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_ERR(xe, obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) || + XE_IOCTL_ERR(xe, region && + VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) || + XE_IOCTL_ERR(xe, !(BIT(region) & + xe->info.mem_region_mask)) || + XE_IOCTL_ERR(xe, obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) || + XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) || + XE_IOCTL_ERR(xe, range & ~PAGE_MASK) || + XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) != + XE_VM_BIND_OP_RESTART && + VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) { + err = -EINVAL; + goto free_bind_ops; + } + } + + return 0; + +free_bind_ops: + if (args->num_binds > 1) + kfree(*bind_ops); + return err; +} + +int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_bind *args = data; + struct drm_xe_sync __user *syncs_user; + struct xe_bo **bos = NULL; + struct xe_vma **vmas = NULL; + struct xe_vm *vm; + struct xe_engine *e = NULL; + u32 num_syncs; + struct xe_sync_entry *syncs = NULL; + struct drm_xe_vm_bind_op *bind_ops; + bool async; + int err; + int i, j = 0; + + err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async); + if (err) + return err; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) { + err = -EINVAL; + goto free_objs; + } + + if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { + DRM_ERROR("VM closed while we began looking up?\n"); + err = -ENOENT; + goto put_vm; + } + + if (args->engine_id) { + e = xe_engine_lookup(xef, args->engine_id); + if (XE_IOCTL_ERR(xe, !e)) { + err = -ENOENT; + goto put_vm; + } + if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) { + err = -EINVAL; + goto put_engine; + } + } + + if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) { + if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) + err = -ENOTSUPP; + if (XE_IOCTL_ERR(xe, !err && args->num_syncs)) + err = EINVAL; + if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error)) + err = -EPROTO; + + if (!err) { + down_write(&vm->lock); + trace_xe_vm_restart(vm); + vm_set_async_error(vm, 0); + up_write(&vm->lock); + + queue_work(system_unbound_wq, &vm->async_ops.work); + + /* Rebinds may have been blocked, give worker a kick */ + if (xe_vm_in_compute_mode(vm)) + queue_work(vm->xe->ordered_wq, + &vm->preempt.rebind_work); + } + + goto put_engine; + } + + if (XE_IOCTL_ERR(xe, !vm->async_ops.error && + async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) { + err = -ENOTSUPP; + goto put_engine; + } + + for (i = 0; i < args->num_binds; ++i) { + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + + if (XE_IOCTL_ERR(xe, range > vm->size) || + XE_IOCTL_ERR(xe, addr > vm->size - range)) { + err = -EINVAL; + goto put_engine; + } + + if (bind_ops[i].gt_mask) { + u64 valid_gts = BIT(xe->info.tile_count) - 1; + + if (XE_IOCTL_ERR(xe, bind_ops[i].gt_mask & + ~valid_gts)) { + err = -EINVAL; + goto put_engine; + } + } + } + + bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL); + if (!bos) { + err = -ENOMEM; + goto put_engine; + } + + vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL); + if (!vmas) { + err = -ENOMEM; + goto put_engine; + } + + for (i = 0; i < args->num_binds; ++i) { + struct drm_gem_object *gem_obj; + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + u32 obj = bind_ops[i].obj; + u64 obj_offset = bind_ops[i].obj_offset; + + if (!obj) + continue; + + gem_obj = drm_gem_object_lookup(file, obj); + if (XE_IOCTL_ERR(xe, !gem_obj)) { + err = -ENOENT; + goto put_obj; + } + bos[i] = gem_to_xe_bo(gem_obj); + + if (XE_IOCTL_ERR(xe, range > bos[i]->size) || + XE_IOCTL_ERR(xe, obj_offset > + bos[i]->size - range)) { + err = -EINVAL; + goto put_obj; + } + + if (bos[i]->flags & XE_BO_INTERNAL_64K) { + if (XE_IOCTL_ERR(xe, obj_offset & + XE_64K_PAGE_MASK) || + XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) || + XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) { + err = -EINVAL; + goto put_obj; + } + } + } + + if (args->num_syncs) { + syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); + if (!syncs) { + err = -ENOMEM; + goto put_obj; + } + } + + syncs_user = u64_to_user_ptr(args->syncs); + for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], + &syncs_user[num_syncs], false, + xe_vm_no_dma_fences(vm)); + if (err) + goto free_syncs; + } + + err = down_write_killable(&vm->lock); + if (err) + goto free_syncs; + + /* Do some error checking first to make the unwind easier */ + for (i = 0; i < args->num_binds; ++i) { + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + u32 op = bind_ops[i].op; + + err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op); + if (err) + goto release_vm_lock; + } + + for (i = 0; i < args->num_binds; ++i) { + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + u32 op = bind_ops[i].op; + u64 obj_offset = bind_ops[i].obj_offset; + u64 gt_mask = bind_ops[i].gt_mask; + u32 region = bind_ops[i].region; + + vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset, + addr, range, op, gt_mask, + region); + if (IS_ERR(vmas[i])) { + err = PTR_ERR(vmas[i]); + vmas[i] = NULL; + goto destroy_vmas; + } + } + + for (j = 0; j < args->num_binds; ++j) { + struct xe_sync_entry *__syncs; + u32 __num_syncs = 0; + bool first_or_last = j == 0 || j == args->num_binds - 1; + + if (args->num_binds == 1) { + __num_syncs = num_syncs; + __syncs = syncs; + } else if (first_or_last && num_syncs) { + bool first = j == 0; + + __syncs = kmalloc(sizeof(*__syncs) * num_syncs, + GFP_KERNEL); + if (!__syncs) { + err = ENOMEM; + break; + } + + /* in-syncs on first bind, out-syncs on last bind */ + for (i = 0; i < num_syncs; ++i) { + bool signal = syncs[i].flags & + DRM_XE_SYNC_SIGNAL; + + if ((first && !signal) || (!first && signal)) + __syncs[__num_syncs++] = syncs[i]; + } + } else { + __num_syncs = 0; + __syncs = NULL; + } + + if (async) { + bool last = j == args->num_binds - 1; + + /* + * Each pass of async worker drops the ref, take a ref + * here, 1 set of refs taken above + */ + if (!last) { + if (e) + xe_engine_get(e); + xe_vm_get(vm); + } + + err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j], + bind_ops + j, __syncs, + __num_syncs); + if (err && !last) { + if (e) + xe_engine_put(e); + xe_vm_put(vm); + } + if (err) + break; + } else { + XE_BUG_ON(j != 0); /* Not supported */ + err = vm_bind_ioctl(vm, vmas[j], e, bos[j], + bind_ops + j, __syncs, + __num_syncs, NULL); + break; /* Needed so cleanup loops work */ + } + } + + /* Most of cleanup owned by the async bind worker */ + if (async && !err) { + up_write(&vm->lock); + if (args->num_binds > 1) + kfree(syncs); + goto free_objs; + } + +destroy_vmas: + for (i = j; err && i < args->num_binds; ++i) { + u32 op = bind_ops[i].op; + struct xe_vma *vma, *next; + + if (!vmas[i]) + break; + + list_for_each_entry_safe(vma, next, &vma->unbind_link, + unbind_link) { + list_del_init(&vma->unbind_link); + if (!vma->destroyed) { + prep_vma_destroy(vm, vma); + xe_vma_destroy_unlocked(vma); + } + } + + switch (VM_BIND_OP(op)) { + case XE_VM_BIND_OP_MAP: + prep_vma_destroy(vm, vmas[i]); + xe_vma_destroy_unlocked(vmas[i]); + break; + case XE_VM_BIND_OP_MAP_USERPTR: + prep_vma_destroy(vm, vmas[i]); + xe_vma_destroy_unlocked(vmas[i]); + break; + } + } +release_vm_lock: + up_write(&vm->lock); +free_syncs: + while (num_syncs--) { + if (async && j && + !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL)) + continue; /* Still in async worker */ + xe_sync_entry_cleanup(&syncs[num_syncs]); + } + + kfree(syncs); +put_obj: + for (i = j; i < args->num_binds; ++i) + xe_bo_put(bos[i]); +put_engine: + if (e) + xe_engine_put(e); +put_vm: + xe_vm_put(vm); +free_objs: + kfree(bos); + kfree(vmas); + if (args->num_binds > 1) + kfree(bind_ops); + return err; +} + +/* + * XXX: Using the TTM wrappers for now, likely can call into dma-resv code + * directly to optimize. Also this likely should be an inline function. + */ +int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, + int num_resv, bool intr) +{ + struct ttm_validate_buffer tv_vm; + LIST_HEAD(objs); + LIST_HEAD(dups); + + XE_BUG_ON(!ww); + + tv_vm.num_shared = num_resv; + tv_vm.bo = xe_vm_ttm_bo(vm);; + list_add_tail(&tv_vm.head, &objs); + + return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); +} + +void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww) +{ + dma_resv_unlock(&vm->resv); + ww_acquire_fini(ww); +} + +/** + * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock + * @vma: VMA to invalidate + * + * Walks a list of page tables leaves which it memset the entries owned by this + * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is + * complete. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_vm_invalidate_vma(struct xe_vma *vma) +{ + struct xe_device *xe = vma->vm->xe; + struct xe_gt *gt; + u32 gt_needs_invalidate = 0; + int seqno[XE_MAX_GT]; + u8 id; + int ret; + + XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm)); + trace_xe_vma_usm_invalidate(vma); + + /* Check that we don't race with page-table updates */ + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (xe_vma_is_userptr(vma)) { + WARN_ON_ONCE(!mmu_interval_check_retry + (&vma->userptr.notifier, + vma->userptr.notifier_seq)); + WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv, + DMA_RESV_USAGE_BOOKKEEP)); + + } else { + xe_bo_assert_held(vma->bo); + } + } + + for_each_gt(gt, xe, id) { + if (xe_pt_zap_ptes(gt, vma)) { + gt_needs_invalidate |= BIT(id); + xe_device_wmb(xe); + seqno[id] = xe_gt_tlb_invalidation(gt); + if (seqno[id] < 0) + return seqno[id]; + } + } + + for_each_gt(gt, xe, id) { + if (gt_needs_invalidate & BIT(id)) { + ret = xe_gt_tlb_invalidation_wait(gt, seqno[id]); + if (ret < 0) + return ret; + } + } + + vma->usm.gt_invalidated = vma->gt_mask; + + return 0; +} + +#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) +int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) +{ + struct rb_node *node; + bool is_lmem; + uint64_t addr; + + if (!down_read_trylock(&vm->lock)) { + drm_printf(p, " Failed to acquire VM lock to dump capture"); + return 0; + } + if (vm->pt_root[gt_id]) { + addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_lmem); + drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_lmem ? "LMEM" : "SYS"); + } + + for (node = rb_first(&vm->vmas); node; node = rb_next(node)) { + struct xe_vma *vma = to_xe_vma(node); + bool is_userptr = xe_vma_is_userptr(vma); + + if (is_userptr) { + struct xe_res_cursor cur; + + xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur); + addr = xe_res_dma(&cur); + } else { + addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_lmem); + } + drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", + vma->start, vma->end, vma->end - vma->start + 1ull, + addr, is_userptr ? "USR" : is_lmem ? "VRAM" : "SYS"); + } + up_read(&vm->lock); + + return 0; +} +#else +int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) +{ + return 0; +} +#endif diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h new file mode 100644 index 000000000000..3468ed9d0528 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_VM_H_ +#define _XE_VM_H_ + +#include "xe_macros.h" +#include "xe_map.h" +#include "xe_vm_types.h" + +struct drm_device; +struct drm_printer; +struct drm_file; + +struct ttm_buffer_object; +struct ttm_validate_buffer; + +struct xe_engine; +struct xe_file; +struct xe_sync_entry; + +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); +void xe_vm_free(struct kref *ref); + +struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); +int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); + +static inline struct xe_vm *xe_vm_get(struct xe_vm *vm) +{ + kref_get(&vm->refcount); + return vm; +} + +static inline void xe_vm_put(struct xe_vm *vm) +{ + kref_put(&vm->refcount, xe_vm_free); +} + +int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, + int num_resv, bool intr); + +void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww); + +static inline bool xe_vm_is_closed(struct xe_vm *vm) +{ + /* Only guaranteed not to change when vm->resv is held */ + return !vm->size; +} + +struct xe_vma * +xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma); + +#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) + +u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt); + +int xe_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_vm_bind_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +void xe_vm_close_and_put(struct xe_vm *vm); + +static inline bool xe_vm_in_compute_mode(struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_COMPUTE_MODE; +} + +static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_FAULT_MODE; +} + +static inline bool xe_vm_no_dma_fences(struct xe_vm *vm) +{ + return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm); +} + +int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e); + +int xe_vm_userptr_pin(struct xe_vm *vm); + +int __xe_vm_userptr_needs_repin(struct xe_vm *vm); + +int xe_vm_userptr_check_repin(struct xe_vm *vm); + +struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); + +int xe_vm_invalidate_vma(struct xe_vma *vma); + +int xe_vm_async_fence_wait_start(struct dma_fence *fence); + +extern struct ttm_device_funcs xe_ttm_funcs; + +struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm); + +static inline bool xe_vma_is_userptr(struct xe_vma *vma) +{ + return !vma->bo; +} + +int xe_vma_userptr_pin_pages(struct xe_vma *vma); + +int xe_vma_userptr_check_repin(struct xe_vma *vma); + +/* + * XE_ONSTACK_TV is used to size the tv_onstack array that is input + * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv(). + */ +#define XE_ONSTACK_TV 20 +int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer **tv, + struct list_head *objs, + bool intr, + unsigned int num_shared); + +void xe_vm_unlock_dma_resv(struct xe_vm *vm, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer *tv, + struct ww_acquire_ctx *ww, + struct list_head *objs); + +void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, + enum dma_resv_usage usage); + +int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) +#define vm_dbg drm_dbg +#else +__printf(2, 3) +static inline void vm_dbg(const struct drm_device *dev, + const char *format, ...) +{ /* noop */ } +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h new file mode 100644 index 000000000000..5b6216964c45 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -0,0 +1,555 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_VM_DOC_H_ +#define _XE_VM_DOC_H_ + +/** + * DOC: XE VM (user address space) + * + * VM creation + * =========== + * + * Allocate a physical page for root of the page table structure, create default + * bind engine, and return a handle to the user. + * + * Scratch page + * ------------ + * + * If the VM is created with the flag, DRM_XE_VM_CREATE_SCRATCH_PAGE, set the + * entire page table structure defaults pointing to blank page allocated by the + * VM. Invalid memory access rather than fault just read / write to this page. + * + * VM bind (create GPU mapping for a BO or userptr) + * ================================================ + * + * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same + * in / out fence interface (struct drm_xe_sync) as execs which allows users to + * think of binds and execs as more or less the same operation. + * + * Operations + * ---------- + * + * XE_VM_BIND_OP_MAP - Create mapping for a BO + * XE_VM_BIND_OP_UNMAP - Destroy mapping for a BO / userptr + * XE_VM_BIND_OP_MAP_USERPTR - Create mapping for userptr + * + * Implementation details + * ~~~~~~~~~~~~~~~~~~~~~~ + * + * All bind operations are implemented via a hybrid approach of using the CPU + * and GPU to modify page tables. If a new physical page is allocated in the + * page table structure we populate that page via the CPU and insert that new + * page into the existing page table structure via a GPU job. Also any existing + * pages in the page table structure that need to be modified also are updated + * via the GPU job. As the root physical page is prealloced on VM creation our + * GPU job will always have at least 1 update. The in / out fences are passed to + * this job so again this is conceptually the same as an exec. + * + * Very simple example of few binds on an empty VM with 48 bits of address space + * and the resulting operations: + * + * .. code-block:: + * + * bind BO0 0x0-0x1000 + * alloc page level 3a, program PTE[0] to BO0 phys address (CPU) + * alloc page level 2, program PDE[0] page level 3a phys address (CPU) + * alloc page level 1, program PDE[0] page level 2 phys address (CPU) + * update root PDE[0] to page level 1 phys address (GPU) + * + * bind BO1 0x201000-0x202000 + * alloc page level 3b, program PTE[1] to BO1 phys address (CPU) + * update page level 2 PDE[1] to page level 3b phys address (GPU) + * + * bind BO2 0x1ff000-0x201000 + * update page level 3a PTE[511] to BO2 phys addres (GPU) + * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU) + * + * GPU bypass + * ~~~~~~~~~~ + * + * In the above example the steps using the GPU can be converted to CPU if the + * bind can be done immediately (all in-fences satisfied, VM dma-resv kernel + * slot is idle). + * + * Address space + * ------------- + * + * Depending on platform either 48 or 57 bits of address space is supported. + * + * Page sizes + * ---------- + * + * The minimum page size is either 4k or 64k depending on platform and memory + * placement (sysmem vs. VRAM). We enforce that binds must be aligned to the + * minimum page size. + * + * Larger pages (2M or 1GB) can be used for BOs in VRAM, the BO physical address + * is aligned to the larger pages size, and VA is aligned to the larger page + * size. Larger pages for userptrs / BOs in sysmem should be possible but is not + * yet implemented. + * + * Sync error handling mode + * ------------------------ + * + * In both modes during the bind IOCTL the user input is validated. In sync + * error handling mode the newly bound BO is validated (potentially moved back + * to a region of memory where is can be used), page tables are updated by the + * CPU and the job to do the GPU binds is created in the IOCTL itself. This step + * can fail due to memory pressure. The user can recover by freeing memory and + * trying this operation again. + * + * Async error handling mode + * ------------------------- + * + * In async error handling the step of validating the BO, updating page tables, + * and generating a job are deferred to an async worker. As this step can now + * fail after the IOCTL has reported success we need an error handling flow for + * which the user can recover from. + * + * The solution is for a user to register a user address with the VM which the + * VM uses to report errors to. The ufence wait interface can be used to wait on + * a VM going into an error state. Once an error is reported the VM's async + * worker is paused. While the VM's async worker is paused sync, + * XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the + * uses believe the error state is fixed, the async worker can be resumed via + * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the + * first operation processed is the operation that caused the original error. + * + * Bind queues / engines + * --------------------- + * + * Think of the case where we have two bind operations A + B and are submitted + * in that order. A has in fences while B has none. If using a single bind + * queue, B is now blocked on A's in fences even though it is ready to run. This + * example is a real use case for VK sparse binding. We work around this + * limitation by implementing bind engines. + * + * In the bind IOCTL the user can optionally pass in an engine ID which must map + * to an engine which is of the special class DRM_XE_ENGINE_CLASS_VM_BIND. + * Underneath this is a really virtual engine that can run on any of the copy + * hardware engines. The job(s) created each IOCTL are inserted into this + * engine's ring. In the example above if A and B have different bind engines B + * is free to pass A. If the engine ID field is omitted, the default bind queue + * for the VM is used. + * + * TODO: Explain race in issue 41 and how we solve it + * + * Array of bind operations + * ------------------------ + * + * The uAPI allows multiple binds operations to be passed in via a user array, + * of struct drm_xe_vm_bind_op, in a single VM bind IOCTL. This interface + * matches the VK sparse binding API. The implementation is rather simple, parse + * the array into a list of operations, pass the in fences to the first operation, + * and pass the out fences to the last operation. The ordered nature of a bind + * engine makes this possible. + * + * Munmap semantics for unbinds + * ---------------------------- + * + * Munmap allows things like: + * + * .. code-block:: + * + * 0x0000-0x2000 and 0x3000-0x5000 have mappings + * Munmap 0x1000-0x4000, results in mappings 0x0000-0x1000 and 0x4000-0x5000 + * + * To support this semantic in the above example we decompose the above example + * into 4 operations: + * + * .. code-block:: + * + * unbind 0x0000-0x2000 + * unbind 0x3000-0x5000 + * rebind 0x0000-0x1000 + * rebind 0x4000-0x5000 + * + * Why not just do a partial unbind of 0x1000-0x2000 and 0x3000-0x4000? This + * falls apart when using large pages at the edges and the unbind forces us to + * use a smaller page size. For simplity we always issue a set of unbinds + * unmapping anything in the range and at most 2 rebinds on the edges. + * + * Similar to an array of binds, in fences are passed to the first operation and + * out fences are signaled on the last operation. + * + * In this example there is a window of time where 0x0000-0x1000 and + * 0x4000-0x5000 are invalid but the user didn't ask for these addresses to be + * removed from the mapping. To work around this we treat any munmap style + * unbinds which require a rebind as a kernel operations (BO eviction or userptr + * invalidation). The first operation waits on the VM's + * DMA_RESV_USAGE_PREEMPT_FENCE slots (waits for all pending jobs on VM to + * complete / triggers preempt fences) and the last operation is installed in + * the VM's DMA_RESV_USAGE_KERNEL slot (blocks future jobs / resume compute mode + * VM). The caveat is all dma-resv slots must be updated atomically with respect + * to execs and compute mode rebind worker. To accomplish this, hold the + * vm->lock in write mode from the first operation until the last. + * + * Deferred binds in fault mode + * ---------------------------- + * + * In a VM is in fault mode (TODO: link to fault mode), new bind operations that + * create mappings are by default are deferred to the page fault handler (first + * use). This behavior can be overriden by setting the flag + * XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping + * immediately. + * + * User pointer + * ============ + * + * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the + * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO + * was created and then a binding was created. We bypass creating a dummy BO in + * XE and simply create a binding directly from the userptr. + * + * Invalidation + * ------------ + * + * Since this a core kernel managed memory the kernel can move this memory + * whenever it wants. We register an invalidation MMU notifier to alert XE when + * a user poiter is about to move. The invalidation notifier needs to block + * until all pending users (jobs or compute mode engines) of the userptr are + * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. + * + * Rebinds + * ------- + * + * Either the next exec (non-compute) or rebind worker (compute mode) will + * rebind the userptr. The invalidation MMU notifier kicks the rebind worker + * after the VM dma-resv wait if the VM is in compute mode. + * + * Compute mode + * ============ + * + * A VM in compute mode enables long running workloads and ultra low latency + * submission (ULLS). ULLS is implemented via a continuously running batch + + * semaphores. This enables to the user to insert jump to new batch commands + * into the continuously running batch. In both cases these batches exceed the + * time a dma fence is allowed to exist for before signaling, as such dma fences + * are not used when a VM is in compute mode. User fences (TODO: link user fence + * doc) are used instead to signal operation's completion. + * + * Preempt fences + * -------------- + * + * If the kernel decides to move memory around (either userptr invalidate, BO + * eviction, or mumap style unbind which results in a rebind) and a batch is + * running on an engine, that batch can fault or cause a memory corruption as + * page tables for the moved memory are no longer valid. To work around this we + * introduce the concept of preempt fences. When sw signaling is enabled on a + * preempt fence it tells the submission backend to kick that engine off the + * hardware and the preempt fence signals when the engine is off the hardware. + * Once all preempt fences are signaled for a VM the kernel can safely move the + * memory and kick the rebind worker which resumes all the engines execution. + * + * A preempt fence, for every engine using the VM, is installed the VM's + * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every + * engine using the VM, is also installed into the same dma-resv slot of every + * external BO mapped in the VM. + * + * Rebind worker + * ------------- + * + * The rebind worker is very similar to an exec. It is resposible for rebinding + * evicted BOs or userptrs, waiting on those operations, installing new preempt + * fences, and finally resuming executing of engines in the VM. + * + * Flow + * ~~~~ + * + * .. code-block:: + * + * <----------------------------------------------------------------------| + * Check if VM is closed, if so bail out | + * Lock VM global lock in read mode | + * Pin userptrs (also finds userptr invalidated since last rebind worker) | + * Lock VM dma-resv and external BOs dma-resv | + * Validate BOs that have been evicted | + * Wait on and allocate new preempt fences for every engine using the VM | + * Rebind invalidated userptrs + evicted BOs | + * Wait on last rebind fence | + * Wait VM's DMA_RESV_USAGE_KERNEL dma-resv slot | + * Install preeempt fences and issue resume for every engine using the VM | + * Check if any userptrs invalidated since pin | + * Squash resume for all engines | + * Unlock all | + * Wait all VM's dma-resv slots | + * Retry ---------------------------------------------------------- + * Release all engines waiting to resume + * Unlock all + * + * Timeslicing + * ----------- + * + * In order to prevent an engine from continuously being kicked off the hardware + * and making no forward progress an engine has a period of time it allowed to + * run after resume before it can be kicked off again. This effectively gives + * each engine a timeslice. + * + * Handling multiple GTs + * ===================== + * + * If a GT has slower access to some regions and the page table structure are in + * the slow region, the performance on that GT could adversely be affected. To + * work around this we allow a VM page tables to be shadowed in multiple GTs. + * When VM is created, a default bind engine and PT table structure are created + * on each GT. + * + * Binds can optionally pass in a mask of GTs where a mapping should be created, + * if this mask is zero then default to all the GTs where the VM has page + * tables. + * + * The implementation for this breaks down into a bunch for_each_gt loops in + * various places plus exporting a composite fence for multi-GT binds to the + * user. + * + * Fault mode (unified shared memory) + * ================================== + * + * A VM in fault mode can be enabled on devices that support page faults. If + * page faults are enabled, using dma fences can potentially induce a deadlock: + * A pending page fault can hold up the GPU work which holds up the dma fence + * signaling, and memory allocation is usually required to resolve a page + * fault, but memory allocation is not allowed to gate dma fence signaling. As + * such, dma fences are not allowed when VM is in fault mode. Because dma-fences + * are not allowed, long running workloads and ULLS are enabled on a faulting + * VM. + * + * Defered VM binds + * ---------------- + * + * By default, on a faulting VM binds just allocate the VMA and the actual + * updating of the page tables is defered to the page fault handler. This + * behavior can be overridden by setting the flag XE_VM_BIND_FLAG_IMMEDIATE in + * the VM bind which will then do the bind immediately. + * + * Page fault handler + * ------------------ + * + * Page faults are received in the G2H worker under the CT lock which is in the + * path of dma fences (no memory allocations are allowed, faults require memory + * allocations) thus we cannot process faults under the CT lock. Another issue + * is faults issue TLB invalidations which require G2H credits and we cannot + * allocate G2H credits in the G2H handlers without deadlocking. Lastly, we do + * not want the CT lock to be an outer lock of the VM global lock (VM global + * lock required to fault processing). + * + * To work around the above issue with processing faults in the G2H worker, we + * sink faults to a buffer which is large enough to sink all possible faults on + * the GT (1 per hardware engine) and kick a worker to process the faults. Since + * the page faults G2H are already received in a worker, kicking another worker + * adds more latency to a critical performance path. We add a fast path in the + * G2H irq handler which looks at first G2H and if it is a page fault we sink + * the fault to the buffer and kick the worker to process the fault. TLB + * invalidation responses are also in the critical path so these can also be + * processed in this fast path. + * + * Multiple buffers and workers are used and hashed over based on the ASID so + * faults from different VMs can be processed in parallel. + * + * The page fault handler itself is rather simple, flow is below. + * + * .. code-block:: + * + * Lookup VM from ASID in page fault G2H + * Lock VM global lock in read mode + * Lookup VMA from address in page fault G2H + * Check if VMA is valid, if not bail + * Check if VMA's BO has backing store, if not allocate + * <----------------------------------------------------------------------| + * If userptr, pin pages | + * Lock VM & BO dma-resv locks | + * If atomic fault, migrate to VRAM, else validate BO location | + * Issue rebind | + * Wait on rebind to complete | + * Check if userptr invalidated since pin | + * Drop VM & BO dma-resv locks | + * Retry ---------------------------------------------------------- + * Unlock all + * Issue blocking TLB invalidation | + * Send page fault response to GuC + * + * Access counters + * --------------- + * + * Access counters can be configured to trigger a G2H indicating the device is + * accessing VMAs in system memory frequently as hint to migrate those VMAs to + * VRAM. + * + * Same as the page fault handler, access counters G2H cannot be processed the + * G2H worker under the CT lock. Again we use a buffer to sink access counter + * G2H. Unlike page faults there is no upper bound so if the buffer is full we + * simply drop the G2H. Access counters are a best case optimization and it is + * safe to drop these unlike page faults. + * + * The access counter handler itself is rather simple flow is below. + * + * .. code-block:: + * + * Lookup VM from ASID in access counter G2H + * Lock VM global lock in read mode + * Lookup VMA from address in access counter G2H + * If userptr, bail nothing to do + * Lock VM & BO dma-resv locks + * Issue migration to VRAM + * Unlock all + * + * Notice no rebind is issued in the access counter handler as the rebind will + * be issued on next page fault. + * + * Cavets with eviction / user pointer invalidation + * ------------------------------------------------ + * + * In the case of eviction and user pointer invalidation on a faulting VM, there + * is no need to issue a rebind rather we just need to blow away the page tables + * for the VMAs and the page fault handler will rebind the VMAs when they fault. + * The cavet is to update / read the page table structure the VM global lock is + * neeeed. In both the case of eviction and user pointer invalidation locks are + * held which make acquiring the VM global lock impossible. To work around this + * every VMA maintains a list of leaf page table entries which should be written + * to zero to blow away the VMA's page tables. After writing zero to these + * entries a blocking TLB invalidate is issued. At this point it is safe for the + * kernel to move the VMA's memory around. This is a necessary lockless + * algorithm and is safe as leafs cannot be changed while either an eviction or + * userptr invalidation is occurring. + * + * Locking + * ======= + * + * VM locking protects all of the core data paths (bind operations, execs, + * evictions, and compute mode rebind worker) in XE. + * + * Locks + * ----- + * + * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects + * the list of userptrs mapped in the VM, the list of engines using this VM, and + * the array of external BOs mapped in the VM. When adding or removing any of the + * aforemented state from the VM should acquire this lock in write mode. The VM + * bind path also acquires this lock in write while while the exec / compute + * mode rebind worker acquire this lock in read mode. + * + * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv + * slots which is shared with any private BO in the VM. Expected to be acquired + * during VM binds, execs, and compute mode rebind worker. This lock is also + * held when private BOs are being evicted. + * + * external BO dma-resv lock (bo->ttm.base.resv->lock) - WW lock. Protects + * external BO dma-resv slots. Expected to be acquired during VM binds (in + * addition to the VM dma-resv lock). All external BO dma-locks within a VM are + * expected to be acquired (in addition to the VM dma-resv lock) during execs + * and the compute mode rebind worker. This lock is also held when an external + * BO is being evicted. + * + * Putting it all together + * ----------------------- + * + * 1. An exec and bind operation with the same VM can't be executing at the same + * time (vm->lock). + * + * 2. A compute mode rebind worker and bind operation with the same VM can't be + * executing at the same time (vm->lock). + * + * 3. We can't add / remove userptrs or external BOs to a VM while an exec with + * the same VM is executing (vm->lock). + * + * 4. We can't add / remove userptrs, external BOs, or engines to a VM while a + * compute mode rebind worker with the same VM is executing (vm->lock). + * + * 5. Evictions within a VM can't be happen while an exec with the same VM is + * executing (dma-resv locks). + * + * 6. Evictions within a VM can't be happen while a compute mode rebind worker + * with the same VM is executing (dma-resv locks). + * + * dma-resv usage + * ============== + * + * As previously stated to enforce the ordering of kernel ops (eviction, userptr + * invalidation, munmap style unbinds which result in a rebind), rebinds during + * execs, execs, and resumes in the rebind worker we use both the VMs and + * external BOs dma-resv slots. Let try to make this as clear as possible. + * + * Slot installation + * ----------------- + * + * 1. Jobs from kernel ops install themselves into the DMA_RESV_USAGE_KERNEL + * slot of either an external BO or VM (depends on if kernel op is operating on + * an external or private BO) + * + * 2. In non-compute mode, jobs from execs install themselves into the + * DMA_RESV_USAGE_BOOKKEEP slot of the VM + * + * 3. In non-compute mode, jobs from execs install themselves into the + * DMA_RESV_USAGE_WRITE slot of all external BOs in the VM + * + * 4. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot + * of the VM + * + * 5. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot + * of the external BO (if the bind is to an external BO, this is addition to #4) + * + * 6. Every engine using a compute mode VM has a preempt fence in installed into + * the DMA_RESV_USAGE_PREEMPT_FENCE slot of the VM + * + * 7. Every engine using a compute mode VM has a preempt fence in installed into + * the DMA_RESV_USAGE_PREEMPT_FENCE slot of all the external BOs in the VM + * + * Slot waiting + * ------------ + * + * 1. The exection of all jobs from kernel ops shall wait on all slots + * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if + * kernel op is operating on external or private BO) + * + * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall + * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM + * (depends on if the rebind is operatiing on an external or private BO) + * + * 3. In non-compute mode, the exection of all jobs from execs shall wait on the + * last rebind job + * + * 4. In compute mode, the exection of all jobs from rebinds in the rebind + * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO + * or VM (depends on if rebind is operating on external or private BO) + * + * 5. In compute mode, resumes in rebind worker shall wait on last rebind fence + * + * 6. In compute mode, resumes in rebind worker shall wait on the + * DMA_RESV_USAGE_KERNEL slot of the VM + * + * Putting it all together + * ----------------------- + * + * 1. New jobs from kernel ops are blocked behind any existing jobs from + * non-compute mode execs + * + * 2. New jobs from non-compute mode execs are blocked behind any existing jobs + * from kernel ops and rebinds + * + * 3. New jobs from kernel ops are blocked behind all preempt fences signaling in + * compute mode + * + * 4. Compute mode engine resumes are blocked behind any existing jobs from + * kernel ops and rebinds + * + * Future work + * =========== + * + * Support large pages for sysmem and userptr. + * + * Update page faults to handle BOs are page level grainularity (e.g. part of BO + * could be in system memory while another part could be in VRAM). + * + * Page fault handler likely we be optimized a bit more (e.g. Rebinds always + * wait on the dma-resv kernel slots of VM or BO, technically we only have to + * wait the BO moving. If using a job to do the rebind, we could not block in + * the page fault handler rather attach a callback to fence of the rebind job to + * signal page fault complete. Our handling of short circuting for atomic faults + * for bound VMAs could be better. etc...). We can tune all of this once we have + * benchmarks / performance number from workloads up and running. + */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c new file mode 100644 index 000000000000..4498aa2fbd47 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -0,0 +1,347 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include +#include + +#include "xe_bo.h" +#include "xe_vm.h" +#include "xe_vm_madvise.h" + +static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + u64 value) +{ + int i, err; + + if (XE_IOCTL_ERR(xe, value > XE_MEM_REGION_CLASS_VRAM)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, value == XE_MEM_REGION_CLASS_VRAM && + !xe->info.is_dgfx)) + return -EINVAL; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.preferred_mem_class = value; + xe_bo_placement_for_flags(xe, bo, bo->flags); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + int i, err; + + if (XE_IOCTL_ERR(xe, value > xe->info.tile_count)) + return -EINVAL; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.preferred_gt = value; + xe_bo_placement_for_flags(xe, bo, bo->flags); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_preferred_mem_class_gt(struct xe_device *xe, + struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + u64 value) +{ + int i, err; + u32 gt_id = upper_32_bits(value); + u32 mem_class = lower_32_bits(value); + + if (XE_IOCTL_ERR(xe, mem_class > XE_MEM_REGION_CLASS_VRAM)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, mem_class == XE_MEM_REGION_CLASS_VRAM && + !xe->info.is_dgfx)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, gt_id > xe->info.tile_count)) + return -EINVAL; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.preferred_mem_class = mem_class; + bo->props.preferred_gt = gt_id; + xe_bo_placement_for_flags(xe, bo, bo->flags); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + int i, err; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT))) + return -EINVAL; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.cpu_atomic = !!value; + + /* + * All future CPU accesses must be from system memory only, we + * just invalidate the CPU page tables which will trigger a + * migration on next access. + */ + if (bo->props.cpu_atomic) + ttm_bo_unmap_virtual(&bo->ttm); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + int i, err; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) && + !(bo->flags & XE_BO_CREATE_VRAM1_BIT))) + return -EINVAL; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.device_atomic = !!value; + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_priority(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + int i, err; + + if (XE_IOCTL_ERR(xe, value > DRM_XE_VMA_PRIORITY_HIGH)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, value == DRM_XE_VMA_PRIORITY_HIGH && + !capable(CAP_SYS_NICE))) + return -EPERM; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->ttm.priority = value; + ttm_bo_move_to_lru_tail(&bo->ttm); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_pin(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + XE_WARN_ON("NIY"); + return 0; +} + +typedef int (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value); + +static const madvise_func madvise_funcs[] = { + [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS] = madvise_preferred_mem_class, + [DRM_XE_VM_MADVISE_PREFERRED_GT] = madvise_preferred_gt, + [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT] = + madvise_preferred_mem_class_gt, + [DRM_XE_VM_MADVISE_CPU_ATOMIC] = madvise_cpu_atomic, + [DRM_XE_VM_MADVISE_DEVICE_ATOMIC] = madvise_device_atomic, + [DRM_XE_VM_MADVISE_PRIORITY] = madvise_priority, + [DRM_XE_VM_MADVISE_PIN] = madvise_pin, +}; + +static struct xe_vma *node_to_vma(const struct rb_node *node) +{ + BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0); + return (struct xe_vma *)node; +} + +static struct xe_vma ** +get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range) +{ + struct xe_vma **vmas; + struct xe_vma *vma, *__vma, lookup; + int max_vmas = 8; + struct rb_node *node; + + lockdep_assert_held(&vm->lock); + + vmas = kmalloc(max_vmas * sizeof(*vmas), GFP_KERNEL); + if (!vmas) + return NULL; + + lookup.start = addr; + lookup.end = addr + range - 1; + + vma = xe_vm_find_overlapping_vma(vm, &lookup); + if (!vma) + return vmas; + + if (!xe_vma_is_userptr(vma)) { + vmas[*num_vmas] = vma; + *num_vmas += 1; + } + + node = &vma->vm_node; + while ((node = rb_next(node))) { + if (!xe_vma_cmp_vma_cb(&lookup, node)) { + __vma = node_to_vma(node); + if (xe_vma_is_userptr(__vma)) + continue; + + if (*num_vmas == max_vmas) { + struct xe_vma **__vmas = + krealloc(vmas, max_vmas * sizeof(*vmas), + GFP_KERNEL); + + if (!__vmas) + return NULL; + vmas = __vmas; + } + vmas[*num_vmas] = __vma; + *num_vmas += 1; + } else { + break; + } + } + + node = &vma->vm_node; + while ((node = rb_prev(node))) { + if (!xe_vma_cmp_vma_cb(&lookup, node)) { + __vma = node_to_vma(node); + if (xe_vma_is_userptr(__vma)) + continue; + + if (*num_vmas == max_vmas) { + struct xe_vma **__vmas = + krealloc(vmas, max_vmas * sizeof(*vmas), + GFP_KERNEL); + + if (!__vmas) + return NULL; + vmas = __vmas; + } + vmas[*num_vmas] = __vma; + *num_vmas += 1; + } else { + break; + } + } + + return vmas; +} + +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_madvise *args = data; + struct xe_vm *vm; + struct xe_vma **vmas = NULL; + int num_vmas = 0, err = 0, idx; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs))) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { + err = -ENOENT; + goto put_vm; + } + + if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) { + err = -EINVAL; + goto put_vm; + } + + down_read(&vm->lock); + + vmas = get_vmas(vm, &num_vmas, args->addr, args->range); + if (XE_IOCTL_ERR(xe, err)) + goto unlock_vm; + + if (XE_IOCTL_ERR(xe, !vmas)) { + err = -ENOMEM; + goto unlock_vm; + } + + if (XE_IOCTL_ERR(xe, !num_vmas)) { + err = -EINVAL; + goto unlock_vm; + } + + idx = array_index_nospec(args->property, ARRAY_SIZE(madvise_funcs)); + err = madvise_funcs[idx](xe, vm, vmas, num_vmas, args->value); + +unlock_vm: + up_read(&vm->lock); +put_vm: + xe_vm_put(vm); + kfree(vmas); + return err; +} diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h new file mode 100644 index 000000000000..eecd33acd248 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_VM_MADVISE_H_ +#define _XE_VM_MADVISE_H_ + +struct drm_device; +struct drm_file; + +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h new file mode 100644 index 000000000000..2a3b911ab358 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -0,0 +1,337 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_VM_TYPES_H_ +#define _XE_VM_TYPES_H_ + +#include +#include +#include +#include + +#include "xe_device_types.h" +#include "xe_pt_types.h" + +struct xe_bo; +struct xe_vm; + +struct xe_vma { + struct rb_node vm_node; + /** @vm: VM which this VMA belongs to */ + struct xe_vm *vm; + + /** + * @start: start address of this VMA within its address domain, end - + * start + 1 == VMA size + */ + u64 start; + /** @end: end address of this VMA within its address domain */ + u64 end; + /** @pte_flags: pte flags for this VMA */ + u32 pte_flags; + + /** @bo: BO if not a userptr, must be NULL is userptr */ + struct xe_bo *bo; + /** @bo_offset: offset into BO if not a userptr, unused for userptr */ + u64 bo_offset; + + /** @gt_mask: GT mask of where to create binding for this VMA */ + u64 gt_mask; + + /** + * @gt_present: GT mask of binding are present for this VMA. + * protected by vm->lock, vm->resv and for userptrs, + * vm->userptr.notifier_lock for writing. Needs either for reading, + * but if reading is done under the vm->lock only, it needs to be held + * in write mode. + */ + u64 gt_present; + + /** + * @destroyed: VMA is destroyed, in the sense that it shouldn't be + * subject to rebind anymore. This field must be written under + * the vm lock in write mode and the userptr.notifier_lock in + * either mode. Read under the vm lock or the userptr.notifier_lock in + * write mode. + */ + bool destroyed; + + /** + * @first_munmap_rebind: VMA is first in a sequence of ops that triggers + * a rebind (munmap style VM unbinds). This indicates the operation + * using this VMA must wait on all dma-resv slots (wait for pending jobs + * / trigger preempt fences). + */ + bool first_munmap_rebind; + + /** + * @last_munmap_rebind: VMA is first in a sequence of ops that triggers + * a rebind (munmap style VM unbinds). This indicates the operation + * using this VMA must install itself into kernel dma-resv slot (blocks + * future jobs) and kick the rebind work in compute mode. + */ + bool last_munmap_rebind; + + /** @use_atomic_access_pte_bit: Set atomic access bit in PTE */ + bool use_atomic_access_pte_bit; + + union { + /** @bo_link: link into BO if not a userptr */ + struct list_head bo_link; + /** @userptr_link: link into VM repin list if userptr */ + struct list_head userptr_link; + }; + + /** + * @rebind_link: link into VM if this VMA needs rebinding, and + * if it's a bo (not userptr) needs validation after a possible + * eviction. Protected by the vm's resv lock. + */ + struct list_head rebind_link; + + /** + * @unbind_link: link or list head if an unbind of multiple VMAs, in + * single unbind op, is being done. + */ + struct list_head unbind_link; + + /** @destroy_cb: callback to destroy VMA when unbind job is done */ + struct dma_fence_cb destroy_cb; + + /** @destroy_work: worker to destroy this BO */ + struct work_struct destroy_work; + + /** @userptr: user pointer state */ + struct { + /** @ptr: user pointer */ + uintptr_t ptr; + /** @invalidate_link: Link for the vm::userptr.invalidated list */ + struct list_head invalidate_link; + /** + * @notifier: MMU notifier for user pointer (invalidation call back) + */ + struct mmu_interval_notifier notifier; + /** @sgt: storage for a scatter gather table */ + struct sg_table sgt; + /** @sg: allocated scatter gather table */ + struct sg_table *sg; + /** @notifier_seq: notifier sequence number */ + unsigned long notifier_seq; + /** + * @initial_bind: user pointer has been bound at least once. + * write: vm->userptr.notifier_lock in read mode and vm->resv held. + * read: vm->userptr.notifier_lock in write mode or vm->resv held. + */ + bool initial_bind; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) + u32 divisor; +#endif + } userptr; + + /** @usm: unified shared memory state */ + struct { + /** @gt_invalidated: VMA has been invalidated */ + u64 gt_invalidated; + } usm; + + struct { + struct list_head rebind_link; + } notifier; + + struct { + /** + * @extobj.link: Link into vm's external object list. + * protected by the vm lock. + */ + struct list_head link; + } extobj; +}; + +struct xe_device; + +#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) + +struct xe_vm { + struct xe_device *xe; + + struct kref refcount; + + /* engine used for (un)binding vma's */ + struct xe_engine *eng[XE_MAX_GT]; + + /** Protects @rebind_list and the page-table structures */ + struct dma_resv resv; + + u64 size; + struct rb_root vmas; + + struct xe_pt *pt_root[XE_MAX_GT]; + struct xe_bo *scratch_bo[XE_MAX_GT]; + struct xe_pt *scratch_pt[XE_MAX_GT][XE_VM_MAX_LEVEL]; + + /** @flags: flags for this VM, statically setup a creation time */ +#define XE_VM_FLAGS_64K BIT(0) +#define XE_VM_FLAG_COMPUTE_MODE BIT(1) +#define XE_VM_FLAG_ASYNC_BIND_OPS BIT(2) +#define XE_VM_FLAG_MIGRATION BIT(3) +#define XE_VM_FLAG_SCRATCH_PAGE BIT(4) +#define XE_VM_FLAG_FAULT_MODE BIT(5) +#define XE_VM_FLAG_GT_ID(flags) (((flags) >> 6) & 0x3) +#define XE_VM_FLAG_SET_GT_ID(gt) ((gt)->info.id << 6) + unsigned long flags; + + /** @composite_fence_ctx: context composite fence */ + u64 composite_fence_ctx; + /** @composite_fence_seqno: seqno for composite fence */ + u32 composite_fence_seqno; + + /** + * @lock: outer most lock, protects objects of anything attached to this + * VM + */ + struct rw_semaphore lock; + + /** + * @rebind_list: list of VMAs that need rebinding, and if they are + * bos (not userptr), need validation after a possible eviction. The + * list is protected by @resv. + */ + struct list_head rebind_list; + + /** @rebind_fence: rebind fence from execbuf */ + struct dma_fence *rebind_fence; + + /** + * @destroy_work: worker to destroy VM, needed as a dma_fence signaling + * from an irq context can be last put and the destroy needs to be able + * to sleep. + */ + struct work_struct destroy_work; + + /** @extobj: bookkeeping for external objects. Protected by the vm lock */ + struct { + /** @enties: number of external BOs attached this VM */ + u32 entries; + /** @list: list of vmas with external bos attached */ + struct list_head list; + } extobj; + + /** @async_ops: async VM operations (bind / unbinds) */ + struct { + /** @list: list of pending async VM ops */ + struct list_head pending; + /** @work: worker to execute async VM ops */ + struct work_struct work; + /** @lock: protects list of pending async VM ops and fences */ + spinlock_t lock; + /** @error_capture: error capture state */ + struct { + /** @mm: user MM */ + struct mm_struct *mm; + /** + * @addr: user pointer to copy error capture state too + */ + u64 addr; + /** @wq: user fence wait queue for VM errors */ + wait_queue_head_t wq; + } error_capture; + /** @fence: fence state */ + struct { + /** @context: context of async fence */ + u64 context; + /** @seqno: seqno of async fence */ + u32 seqno; + } fence; + /** @error: error state for async VM ops */ + int error; + /** + * @munmap_rebind_inflight: an munmap style VM bind is in the + * middle of a set of ops which requires a rebind at the end. + */ + bool munmap_rebind_inflight; + } async_ops; + + /** @userptr: user pointer state */ + struct { + /** + * @userptr.repin_list: list of VMAs which are user pointers, + * and needs repinning. Protected by @lock. + */ + struct list_head repin_list; + /** + * @notifier_lock: protects notifier in write mode and + * submission in read mode. + */ + struct rw_semaphore notifier_lock; + /** + * @userptr.invalidated_lock: Protects the + * @userptr.invalidated list. + */ + spinlock_t invalidated_lock; + /** + * @userptr.invalidated: List of invalidated userptrs, not yet + * picked + * up for revalidation. Protected from access with the + * @invalidated_lock. Removing items from the list + * additionally requires @lock in write mode, and adding + * items to the list requires the @userptr.notifer_lock in + * write mode. + */ + struct list_head invalidated; + } userptr; + + /** @preempt: preempt state */ + struct { + /** + * @min_run_period_ms: The minimum run period before preempting + * an engine again + */ + s64 min_run_period_ms; + /** @engines: list of engines attached to this VM */ + struct list_head engines; + /** @num_engines: number user engines attached to this VM */ + int num_engines; + /** + * @rebind_work: worker to rebind invalidated userptrs / evicted + * BOs + */ + struct work_struct rebind_work; + } preempt; + + /** @um: unified memory state */ + struct { + /** @asid: address space ID, unique to each VM */ + u32 asid; + /** + * @last_fault_vma: Last fault VMA, used for fast lookup when we + * get a flood of faults to the same VMA + */ + struct xe_vma *last_fault_vma; + } usm; + + /** + * @notifier: Lists and locks for temporary usage within notifiers where + * we either can't grab the vm lock or the vm resv. + */ + struct { + /** @notifier.list_lock: lock protecting @rebind_list */ + spinlock_t list_lock; + /** + * @notifier.rebind_list: list of vmas that we want to put on the + * main @rebind_list. This list is protected for writing by both + * notifier.list_lock, and the resv of the bo the vma points to, + * and for reading by the notifier.list_lock only. + */ + struct list_head rebind_list; + } notifier; + + /** @error_capture: allow to track errors */ + struct { + /** @capture_once: capture only one error per VM */ + bool capture_once; + } error_capture; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c new file mode 100644 index 000000000000..b56141ba7145 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_wa.h" + +#include + +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_hw_engine_types.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "xe_rtp.h" +#include "xe_step.h" + +#include "gt/intel_engine_regs.h" +#include "gt/intel_gt_regs.h" +#include "i915_reg.h" + +/** + * DOC: Hardware workarounds + * + * Hardware workarounds are register programming documented to be executed in + * the driver that fall outside of the normal programming sequences for a + * platform. There are some basic categories of workarounds, depending on + * how/when they are applied: + * + * - LRC workarounds: workarounds that touch registers that are + * saved/restored to/from the HW context image. The list is emitted (via Load + * Register Immediate commands) once when initializing the device and saved in + * the default context. That default context is then used on every context + * creation to have a "primed golden context", i.e. a context image that + * already contains the changes needed to all the registers. + * + * TODO: Although these workarounds are maintained here, they are not + * currently being applied. + * + * - Engine workarounds: the list of these WAs is applied whenever the specific + * engine is reset. It's also possible that a set of engine classes share a + * common power domain and they are reset together. This happens on some + * platforms with render and compute engines. In this case (at least) one of + * them need to keeep the workaround programming: the approach taken in the + * driver is to tie those workarounds to the first compute/render engine that + * is registered. When executing with GuC submission, engine resets are + * outside of kernel driver control, hence the list of registers involved in + * written once, on engine initialization, and then passed to GuC, that + * saves/restores their values before/after the reset takes place. See + * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference. + * + * - GT workarounds: the list of these WAs is applied whenever these registers + * revert to their default values: on GPU reset, suspend/resume [1]_, etc. + * + * - Register whitelist: some workarounds need to be implemented in userspace, + * but need to touch privileged registers. The whitelist in the kernel + * instructs the hardware to allow the access to happen. From the kernel side, + * this is just a special case of a MMIO workaround (as we write the list of + * these to/be-whitelisted registers to some special HW registers). + * + * - Workaround batchbuffers: buffers that get executed automatically by the + * hardware on every HW context restore. These buffers are created and + * programmed in the default context so the hardware always go through those + * programming sequences when switching contexts. The support for workaround + * batchbuffers is enabled these hardware mechanisms: + * + * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default + * context, pointing the hardware to jump to that location when that offset + * is reached in the context restore. Workaround batchbuffer in the driver + * currently uses this mechanism for all platforms. + * + * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context, + * pointing the hardware to a buffer to continue executing after the + * engine registers are restored in a context restore sequence. This is + * currently not used in the driver. + * + * - Other: There are WAs that, due to their nature, cannot be applied from a + * central place. Those are peppered around the rest of the code, as needed. + * Workarounds related to the display IP are the main example. + * + * .. [1] Technically, some registers are powercontext saved & restored, so they + * survive a suspend/resume. In practice, writing them again is not too + * costly and simplifies things, so it's the approach taken in the driver. + * + * .. note:: + * Hardware workarounds in xe work the same way as in i915, with the + * difference of how they are maintained in the code. In xe it uses the + * xe_rtp infrastructure so the workarounds can be kept in tables, following + * a more declarative approach rather than procedural. + */ + +#undef _MMIO +#undef MCR_REG +#define _MMIO(x) _XE_RTP_REG(x) +#define MCR_REG(x) _XE_RTP_MCR_REG(x) + +static bool match_14011060649(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return hwe->instance % 2 == 0; +} + +static const struct xe_rtp_entry gt_was[] = { + { XE_RTP_NAME("14011060649"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), + ENGINE_CLASS(VIDEO_DECODE), + FUNC(match_14011060649)), + XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS, + XE_RTP_FLAG(FOREACH_ENGINE)) + }, + { XE_RTP_NAME("16010515920"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), + STEP(A0, B0), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS, + XE_RTP_FLAG(FOREACH_ENGINE)) + }, + { XE_RTP_NAME("22010523718"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10)), + XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011006942"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10)), + XE_RTP_SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS) + }, + { XE_RTP_NAME("14010948348"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011037102"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011371254"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011431319/0"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLCGCTL9440, + GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011431319/1"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLCGCTL9444, + GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS) + }, + { XE_RTP_NAME("14010569222"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011028019"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS) + }, + { XE_RTP_NAME("14014830051"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_CLR(SARB_CHICKEN1, COMP_CKN_IN) + }, + { XE_RTP_NAME("14015795083"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE) + }, + { XE_RTP_NAME("14011059788"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE) + }, + { XE_RTP_NAME("1409420604"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS) + }, + { XE_RTP_NAME("1408615072"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL) + }, + {} +}; + +static const struct xe_rtp_entry engine_was[] = { + { XE_RTP_NAME("14015227452"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1606931601"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), + XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE) + }, + { XE_RTP_NAME("14010826681, 1606700617, 22010271021"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("18019627453"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1409804808"), + XE_RTP_RULES(GRAPHICS_VERSION(1200), + ENGINE_CLASS(RENDER), + IS_INTEGRATED), + XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("14010229206, 1409085225"), + XE_RTP_RULES(GRAPHICS_VERSION(1200), + ENGINE_CLASS(RENDER), + IS_INTEGRATED), + XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), + XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)), + XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), + XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)), + XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1406941453"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL, + XE_RTP_FLAG(MASKED_REG)) + }, + {} +}; + +static const struct xe_rtp_entry lrc_was[] = { + { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("16011163337"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + /* read verification is ignored due to 1608008084. */ + XE_RTP_FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK, + FF_MODE2_GS_TIMER_224) + }, + { XE_RTP_NAME("1409044764"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3, + DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("22010493298"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_SET(HIZ_CHICKEN, + DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE, + XE_RTP_FLAG(MASKED_REG)) + }, + {} +}; + +/** + * xe_wa_process_gt - process GT workaround table + * @gt: GT instance to process workarounds for + * + * Process GT workaround table for this platform, saving in @gt all the + * workarounds that need to be applied at the GT level. + */ +void xe_wa_process_gt(struct xe_gt *gt) +{ + xe_rtp_process(gt_was, >->reg_sr, gt, NULL); +} + +/** + * xe_wa_process_engine - process engine workaround table + * @hwe: engine instance to process workarounds for + * + * Process engine workaround table for this platform, saving in @hwe all the + * workarounds that need to be applied at the engine level that match this + * engine. + */ +void xe_wa_process_engine(struct xe_hw_engine *hwe) +{ + xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe); +} + +/** + * xe_wa_process_lrc - process context workaround table + * @hwe: engine instance to process workarounds for + * + * Process context workaround table for this platform, saving in @hwe all the + * workarounds that need to be applied on context restore. These are workarounds + * touching registers that are part of the HW context image. + */ +void xe_wa_process_lrc(struct xe_hw_engine *hwe) +{ + xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe); +} diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h new file mode 100644 index 000000000000..cd2307d58795 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WA_ +#define _XE_WA_ + +struct xe_gt; +struct xe_hw_engine; + +void xe_wa_process_gt(struct xe_gt *gt); +void xe_wa_process_engine(struct xe_hw_engine *hwe); +void xe_wa_process_lrc(struct xe_hw_engine *hwe); + +void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c new file mode 100644 index 000000000000..8a8d814a0e7a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_macros.h" +#include "xe_vm.h" + +static int do_compare(u64 addr, u64 value, u64 mask, u16 op) +{ + u64 rvalue; + int err; + bool passed; + + err = copy_from_user(&rvalue, u64_to_user_ptr(addr), sizeof(rvalue)); + if (err) + return -EFAULT; + + switch (op) { + case DRM_XE_UFENCE_WAIT_EQ: + passed = (rvalue & mask) == (value & mask); + break; + case DRM_XE_UFENCE_WAIT_NEQ: + passed = (rvalue & mask) != (value & mask); + break; + case DRM_XE_UFENCE_WAIT_GT: + passed = (rvalue & mask) > (value & mask); + break; + case DRM_XE_UFENCE_WAIT_GTE: + passed = (rvalue & mask) >= (value & mask); + break; + case DRM_XE_UFENCE_WAIT_LT: + passed = (rvalue & mask) < (value & mask); + break; + case DRM_XE_UFENCE_WAIT_LTE: + passed = (rvalue & mask) <= (value & mask); + break; + default: + XE_BUG_ON("Not possible"); + } + + return passed ? 0 : 1; +} + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +int check_hw_engines(struct xe_device *xe, + struct drm_xe_engine_class_instance *eci, + int num_engines) +{ + int i; + + for (i = 0; i < num_engines; ++i) { + enum xe_engine_class user_class = + user_to_xe_engine_class[eci[i].engine_class]; + + if (eci[i].gt_id >= xe->info.tile_count) + return -EINVAL; + + if (!xe_gt_hw_engine(xe_device_get_gt(xe, eci[i].gt_id), + user_class, eci[i].engine_instance, true)) + return -EINVAL; + } + + return 0; +} + +#define VALID_FLAGS (DRM_XE_UFENCE_WAIT_SOFT_OP | \ + DRM_XE_UFENCE_WAIT_ABSTIME | \ + DRM_XE_UFENCE_WAIT_VM_ERROR) +#define MAX_OP DRM_XE_UFENCE_WAIT_LTE + +int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + DEFINE_WAIT_FUNC(w_wait, woken_wake_function); + struct drm_xe_wait_user_fence *args = data; + struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; + struct drm_xe_engine_class_instance __user *user_eci = + u64_to_user_ptr(args->instances); + struct xe_vm *vm = NULL; + u64 addr = args->addr; + int err; + bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP || + args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR; + unsigned long timeout = args->timeout; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->op > MAX_OP)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, no_engines && + (args->num_engines || args->instances))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !no_engines && !args->num_engines)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) && + addr & 0x7)) + return -EINVAL; + + if (!no_engines) { + err = copy_from_user(eci, user_eci, + sizeof(struct drm_xe_engine_class_instance) * + args->num_engines); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, check_hw_engines(xe, eci, + args->num_engines))) + return -EINVAL; + } + + if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) { + if (XE_IOCTL_ERR(xe, args->vm_id >> 32)) + return -EINVAL; + + vm = xe_vm_lookup(to_xe_file(file), args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) + return -ENOENT; + + if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) { + xe_vm_put(vm); + return -ENOTSUPP; + } + + addr = vm->async_ops.error_capture.addr; + } + + if (XE_IOCTL_ERR(xe, timeout > MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + /* + * FIXME: Very simple implementation at the moment, single wait queue + * for everything. Could be optimized to have a wait queue for every + * hardware engine. Open coding as 'do_compare' can sleep which doesn't + * work with the wait_event_* macros. + */ + if (vm) + add_wait_queue(&vm->async_ops.error_capture.wq, &w_wait); + else + add_wait_queue(&xe->ufence_wq, &w_wait); + for (;;) { + if (vm && xe_vm_is_closed(vm)) { + err = -ENODEV; + break; + } + err = do_compare(addr, args->value, args->mask, args->op); + if (err <= 0) + break; + + if (signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + if (!timeout) { + err = -ETIME; + break; + } + + timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout); + } + if (vm) { + remove_wait_queue(&vm->async_ops.error_capture.wq, &w_wait); + xe_vm_put(vm); + } else { + remove_wait_queue(&xe->ufence_wq, &w_wait); + } + if (XE_IOCTL_ERR(xe, err < 0)) + return err; + else if (XE_IOCTL_ERR(xe, !timeout)) + return -ETIME; + + /* + * Again very simple, return the time in jiffies that has past, may need + * a more precision + */ + if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME) + args->timeout = args->timeout - timeout; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h new file mode 100644 index 000000000000..0e268978f9e6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WAIT_USER_FENCE_H_ +#define _XE_WAIT_USER_FENCE_H_ + +struct drm_device; +struct drm_file; + +int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c new file mode 100644 index 000000000000..e4a8d4a1899e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_guc_reg.h" +#include "xe_mmio.h" +#include "xe_uc_fw.h" +#include "xe_wopcm.h" + +#include "i915_utils.h" + +/** + * DOC: Write Once Protected Content Memory (WOPCM) Layout + * + * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and + * offset registers whose values are calculated and determined by HuC/GuC + * firmware size and set of hardware requirements/restrictions as shown below: + * + * :: + * + * +=========> +====================+ <== WOPCM Top + * ^ | HW contexts RSVD | + * | +===> +====================+ <== GuC WOPCM Top + * | ^ | | + * | | | | + * | | | | + * | GuC | | + * | WOPCM | | + * | Size +--------------------+ + * WOPCM | | GuC FW RSVD | + * | | +--------------------+ + * | | | GuC Stack RSVD | + * | | +------------------- + + * | v | GuC WOPCM RSVD | + * | +===> +====================+ <== GuC WOPCM base + * | | WOPCM RSVD | + * | +------------------- + <== HuC Firmware Top + * v | HuC FW | + * +=========> +====================+ <== WOPCM Base + * + * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top. + * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6 + * context). + */ + +/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */ +#define DGFX_WOPCM_SIZE SZ_4M /* FIXME: Larger size require + for 2 tile PVC, do a proper + probe sooner or later */ +#define MTL_WOPCM_SIZE SZ_4M /* FIXME: Larger size require + for MTL, do a proper probe + sooner or later */ +#define GEN11_WOPCM_SIZE SZ_2M +/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ +#define WOPCM_RESERVED_SIZE SZ_16K + +/* 16KB reserved at the beginning of GuC WOPCM. */ +#define GUC_WOPCM_RESERVED SZ_16K +/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */ +#define GUC_WOPCM_STACK_RESERVED SZ_8K + +/* GuC WOPCM Offset value needs to be aligned to 16KB. */ +#define GUC_WOPCM_OFFSET_ALIGNMENT (1UL << GUC_WOPCM_OFFSET_SHIFT) + +/* 36KB WOPCM reserved at the end of WOPCM on GEN11. */ +#define GEN11_WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) + +static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm) +{ + return container_of(wopcm, struct xe_gt, uc.wopcm); +} + +static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm) +{ + return gt_to_xe(wopcm_to_gt(wopcm)); +} + +static u32 context_reserved_size(void) +{ + return GEN11_WOPCM_HW_CTX_RESERVED; +} + +static bool __check_layout(struct xe_device *xe, u32 wopcm_size, + u32 guc_wopcm_base, u32 guc_wopcm_size, + u32 guc_fw_size, u32 huc_fw_size) +{ + const u32 ctx_rsvd = context_reserved_size(); + u32 size; + + size = wopcm_size - ctx_rsvd; + if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) { + drm_err(&xe->drm, + "WOPCM: invalid GuC region layout: %uK + %uK > %uK\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K, + size / SZ_1K); + return false; + } + + size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED; + if (unlikely(guc_wopcm_size < size)) { + drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n", + xe_uc_fw_type_repr(XE_UC_FW_TYPE_GUC), + guc_wopcm_size / SZ_1K, size / SZ_1K); + return false; + } + + size = huc_fw_size + WOPCM_RESERVED_SIZE; + if (unlikely(guc_wopcm_base < size)) { + drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n", + xe_uc_fw_type_repr(XE_UC_FW_TYPE_HUC), + guc_wopcm_base / SZ_1K, size / SZ_1K); + return false; + } + + return true; +} + +static bool __wopcm_regs_locked(struct xe_gt *gt, + u32 *guc_wopcm_base, u32 *guc_wopcm_size) +{ + u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg); + u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg); + + if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) || + !(reg_base & GUC_WOPCM_OFFSET_VALID)) + return false; + + *guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK; + *guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK; + return true; +} + +static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt, + struct xe_wopcm *wopcm) +{ + u32 base = wopcm->guc.base; + u32 size = wopcm->guc.size; + u32 huc_agent = xe_uc_fw_is_disabled(>->uc.huc.fw) ? 0 : + HUC_LOADING_AGENT_GUC; + u32 mask; + int err; + + XE_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK)); + XE_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK); + XE_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); + XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); + + mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; + err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE.reg, size, mask, + size | GUC_WOPCM_SIZE_LOCKED); + if (err) + goto err_out; + + mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; + err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET.reg, + base | huc_agent, mask, + base | huc_agent | + GUC_WOPCM_OFFSET_VALID); + if (err) + goto err_out; + + return 0; + +err_out: + drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n"); + drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET", + DMA_GUC_WOPCM_OFFSET.reg, + xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg)); + drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE", + GUC_WOPCM_SIZE.reg, + xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg)); + + return err; +} + +u32 xe_wopcm_size(struct xe_device *xe) +{ + return IS_DGFX(xe) ? DGFX_WOPCM_SIZE : + xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE : + GEN11_WOPCM_SIZE; +} + +/** + * xe_wopcm_init() - Initialize the WOPCM structure. + * @wopcm: pointer to xe_wopcm. + * + * This function will partition WOPCM space based on GuC and HuC firmware sizes + * and will allocate max remaining for use by GuC. This function will also + * enforce platform dependent hardware restrictions on GuC WOPCM offset and + * size. It will fail the WOPCM init if any of these checks fail, so that the + * following WOPCM registers setup and GuC firmware uploading would be aborted. + */ +int xe_wopcm_init(struct xe_wopcm *wopcm) +{ + struct xe_device *xe = wopcm_to_xe(wopcm); + struct xe_gt *gt = wopcm_to_gt(wopcm); + u32 guc_fw_size = xe_uc_fw_get_upload_size(>->uc.guc.fw); + u32 huc_fw_size = xe_uc_fw_get_upload_size(>->uc.huc.fw); + u32 ctx_rsvd = context_reserved_size(); + u32 guc_wopcm_base; + u32 guc_wopcm_size; + bool locked; + int ret = 0; + + if (!guc_fw_size) + return -EINVAL; + + wopcm->size = xe_wopcm_size(xe); + drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + XE_BUG_ON(guc_fw_size >= wopcm->size); + XE_BUG_ON(huc_fw_size >= wopcm->size); + XE_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size); + + locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size); + if (locked) { + drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); + goto check; + } + + /* + * Aligned value of guc_wopcm_base will determine available WOPCM space + * for HuC firmware and mandatory reserved area. + */ + guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE; + guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT); + + /* + * Need to clamp guc_wopcm_base now to make sure the following math is + * correct. Formal check of whole WOPCM layout will be done below. + */ + guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd); + + /* Aligned remainings of usable WOPCM space can be assigned to GuC. */ + guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base; + guc_wopcm_size &= GUC_WOPCM_SIZE_MASK; + + drm_dbg(&xe->drm, "Calculated GuC WOPCM [%uK, %uK)\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); + +check: + if (__check_layout(xe, wopcm->size, guc_wopcm_base, guc_wopcm_size, + guc_fw_size, huc_fw_size)) { + wopcm->guc.base = guc_wopcm_base; + wopcm->guc.size = guc_wopcm_size; + XE_BUG_ON(!wopcm->guc.base); + XE_BUG_ON(!wopcm->guc.size); + } else { + drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n"); + return -E2BIG; + } + + if (!locked) + ret = __wopcm_init_regs(xe, gt, wopcm); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_wopcm.h b/drivers/gpu/drm/xe/xe_wopcm.h new file mode 100644 index 000000000000..0197a282460b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wopcm.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WOPCM_H_ +#define _XE_WOPCM_H_ + +#include "xe_wopcm_types.h" + +struct xe_device; + +int xe_wopcm_init(struct xe_wopcm *wopcm); +u32 xe_wopcm_size(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h new file mode 100644 index 000000000000..486d850c4084 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wopcm_types.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WOPCM_TYPES_H_ +#define _XE_WOPCM_TYPES_H_ + +#include + +/** + * struct xe_wopcm - Overall WOPCM info and WOPCM regions. + */ +struct xe_wopcm { + /** @size: Size of overall WOPCM */ + u32 size; + /** @guc: GuC WOPCM Region info */ + struct { + /** @base: GuC WOPCM base which is offset from WOPCM base */ + u32 base; + /** @size: Size of the GuC WOPCM region */ + u32 size; + } guc; +}; + +#endif diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h new file mode 100644 index 000000000000..e539594ed939 --- /dev/null +++ b/include/drm/xe_pciids.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PCIIDS_H_ +#define _XE_PCIIDS_H_ + +/* + * Lists below can be turned into initializers for a struct pci_device_id + * by defining INTEL_VGA_DEVICE: + * + * #define INTEL_VGA_DEVICE(id, info) { \ + * 0x8086, id, \ + * ~0, ~0, \ + * 0x030000, 0xff0000, \ + * (unsigned long) info } + * + * And then calling like: + * + * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__) + * + * To turn them into something else, just provide a different macro passed as + * first argument. + */ + +/* TGL */ +#define XE_TGL_GT1_IDS(MACRO__, ...) \ + MACRO__(0x9A60, ## __VA_ARGS__), \ + MACRO__(0x9A68, ## __VA_ARGS__), \ + MACRO__(0x9A70, ## __VA_ARGS__) + +#define XE_TGL_GT2_IDS(MACRO__, ...) \ + MACRO__(0x9A40, ## __VA_ARGS__), \ + MACRO__(0x9A49, ## __VA_ARGS__), \ + MACRO__(0x9A59, ## __VA_ARGS__), \ + MACRO__(0x9A78, ## __VA_ARGS__), \ + MACRO__(0x9AC0, ## __VA_ARGS__), \ + MACRO__(0x9AC9, ## __VA_ARGS__), \ + MACRO__(0x9AD9, ## __VA_ARGS__), \ + MACRO__(0x9AF8, ## __VA_ARGS__) + +#define XE_TGL_IDS(MACRO__, ...) \ + XE_TGL_GT1_IDS(MACRO__, ...), \ + XE_TGL_GT2_IDS(MACRO__, ...) + +/* RKL */ +#define XE_RKL_IDS(MACRO__, ...) \ + MACRO__(0x4C80, ## __VA_ARGS__), \ + MACRO__(0x4C8A, ## __VA_ARGS__), \ + MACRO__(0x4C8B, ## __VA_ARGS__), \ + MACRO__(0x4C8C, ## __VA_ARGS__), \ + MACRO__(0x4C90, ## __VA_ARGS__), \ + MACRO__(0x4C9A, ## __VA_ARGS__) + +/* DG1 */ +#define XE_DG1_IDS(MACRO__, ...) \ + MACRO__(0x4905, ## __VA_ARGS__), \ + MACRO__(0x4906, ## __VA_ARGS__), \ + MACRO__(0x4907, ## __VA_ARGS__), \ + MACRO__(0x4908, ## __VA_ARGS__), \ + MACRO__(0x4909, ## __VA_ARGS__) + +/* ADL-S */ +#define XE_ADLS_IDS(MACRO__, ...) \ + MACRO__(0x4680, ## __VA_ARGS__), \ + MACRO__(0x4682, ## __VA_ARGS__), \ + MACRO__(0x4688, ## __VA_ARGS__), \ + MACRO__(0x468A, ## __VA_ARGS__), \ + MACRO__(0x4690, ## __VA_ARGS__), \ + MACRO__(0x4692, ## __VA_ARGS__), \ + MACRO__(0x4693, ## __VA_ARGS__) + +/* ADL-P */ +#define XE_ADLP_IDS(MACRO__, ...) \ + MACRO__(0x46A0, ## __VA_ARGS__), \ + MACRO__(0x46A1, ## __VA_ARGS__), \ + MACRO__(0x46A2, ## __VA_ARGS__), \ + MACRO__(0x46A3, ## __VA_ARGS__), \ + MACRO__(0x46A6, ## __VA_ARGS__), \ + MACRO__(0x46A8, ## __VA_ARGS__), \ + MACRO__(0x46AA, ## __VA_ARGS__), \ + MACRO__(0x462A, ## __VA_ARGS__), \ + MACRO__(0x4626, ## __VA_ARGS__), \ + MACRO__(0x4628, ## __VA_ARGS__), \ + MACRO__(0x46B0, ## __VA_ARGS__), \ + MACRO__(0x46B1, ## __VA_ARGS__), \ + MACRO__(0x46B2, ## __VA_ARGS__), \ + MACRO__(0x46B3, ## __VA_ARGS__), \ + MACRO__(0x46C0, ## __VA_ARGS__), \ + MACRO__(0x46C1, ## __VA_ARGS__), \ + MACRO__(0x46C2, ## __VA_ARGS__), \ + MACRO__(0x46C3, ## __VA_ARGS__) + +/* ADL-N */ +#define XE_ADLN_IDS(MACRO__, ...) \ + MACRO__(0x46D0, ## __VA_ARGS__), \ + MACRO__(0x46D1, ## __VA_ARGS__), \ + MACRO__(0x46D2, ## __VA_ARGS__) + +/* RPL-S */ +#define XE_RPLS_IDS(MACRO__, ...) \ + MACRO__(0xA780, ## __VA_ARGS__), \ + MACRO__(0xA781, ## __VA_ARGS__), \ + MACRO__(0xA782, ## __VA_ARGS__), \ + MACRO__(0xA783, ## __VA_ARGS__), \ + MACRO__(0xA788, ## __VA_ARGS__), \ + MACRO__(0xA789, ## __VA_ARGS__), \ + MACRO__(0xA78A, ## __VA_ARGS__), \ + MACRO__(0xA78B, ## __VA_ARGS__) + +/* RPL-U */ +#define XE_RPLU_IDS(MACRO__, ...) \ + MACRO__(0xA721, ## __VA_ARGS__), \ + MACRO__(0xA7A1, ## __VA_ARGS__), \ + MACRO__(0xA7A9, ## __VA_ARGS__) + +/* RPL-P */ +#define XE_RPLP_IDS(MACRO__, ...) \ + MACRO__(0xA720, ## __VA_ARGS__), \ + MACRO__(0xA7A0, ## __VA_ARGS__), \ + MACRO__(0xA7A8, ## __VA_ARGS__) + +/* DG2 */ +#define XE_DG2_G10_IDS(MACRO__, ...) \ + MACRO__(0x5690, ## __VA_ARGS__), \ + MACRO__(0x5691, ## __VA_ARGS__), \ + MACRO__(0x5692, ## __VA_ARGS__), \ + MACRO__(0x56A0, ## __VA_ARGS__), \ + MACRO__(0x56A1, ## __VA_ARGS__), \ + MACRO__(0x56A2, ## __VA_ARGS__) + +#define XE_DG2_G11_IDS(MACRO__, ...) \ + MACRO__(0x5693, ## __VA_ARGS__), \ + MACRO__(0x5694, ## __VA_ARGS__), \ + MACRO__(0x5695, ## __VA_ARGS__), \ + MACRO__(0x5698, ## __VA_ARGS__), \ + MACRO__(0x56A5, ## __VA_ARGS__), \ + MACRO__(0x56A6, ## __VA_ARGS__), \ + MACRO__(0x56B0, ## __VA_ARGS__), \ + MACRO__(0x56B1, ## __VA_ARGS__) + +#define XE_DG2_G12_IDS(MACRO__, ...) \ + MACRO__(0x5696, ## __VA_ARGS__), \ + MACRO__(0x5697, ## __VA_ARGS__), \ + MACRO__(0x56A3, ## __VA_ARGS__), \ + MACRO__(0x56A4, ## __VA_ARGS__), \ + MACRO__(0x56B2, ## __VA_ARGS__), \ + MACRO__(0x56B3, ## __VA_ARGS__) + +#define XE_DG2_IDS(MACRO__, ...) \ + XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\ + XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\ + XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__) + +#define XE_ATS_M150_IDS(MACRO__, ...) \ + MACRO__(0x56C0, ## __VA_ARGS__) + +#define XE_ATS_M75_IDS(MACRO__, ...) \ + MACRO__(0x56C1, ## __VA_ARGS__) + +#define XE_ATS_M_IDS(MACRO__, ...) \ + XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\ + XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) + +/* MTL */ +#define XE_MTL_M_IDS(MACRO__, ...) \ + MACRO__(0x7D40, ## __VA_ARGS__), \ + MACRO__(0x7D43, ## __VA_ARGS__), \ + MACRO__(0x7DC0, ## __VA_ARGS__) + +#define XE_MTL_P_IDS(MACRO__, ...) \ + MACRO__(0x7D45, ## __VA_ARGS__), \ + MACRO__(0x7D47, ## __VA_ARGS__), \ + MACRO__(0x7D50, ## __VA_ARGS__), \ + MACRO__(0x7D55, ## __VA_ARGS__), \ + MACRO__(0x7DC5, ## __VA_ARGS__), \ + MACRO__(0x7DD0, ## __VA_ARGS__), \ + MACRO__(0x7DD5, ## __VA_ARGS__) + +#define XE_MTL_S_IDS(MACRO__, ...) \ + MACRO__(0x7D60, ## __VA_ARGS__), \ + MACRO__(0x7DE0, ## __VA_ARGS__) + +#define XE_ARL_IDS(MACRO__, ...) \ + MACRO__(0x7D66, ## __VA_ARGS__), \ + MACRO__(0x7D76, ## __VA_ARGS__) + +#define XE_MTL_IDS(MACRO__, ...) \ + XE_MTL_M_IDS(MACRO__, ## __VA_ARGS__), \ + XE_MTL_P_IDS(MACRO__, ## __VA_ARGS__), \ + XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__), \ + XE_ARL_IDS(MACRO__, ## __VA_ARGS__) + +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h new file mode 100644 index 000000000000..f64b1c785fad --- /dev/null +++ b/include/uapi/drm/xe_drm.h @@ -0,0 +1,787 @@ +/* + * Copyright 2021 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _UAPI_XE_DRM_H_ +#define _UAPI_XE_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + */ + +/** + * struct i915_user_extension - Base class for defining a chain of extensions + * + * Many interfaces need to grow over time. In most cases we can simply + * extend the struct and have userspace pass in more data. Another option, + * as demonstrated by Vulkan's approach to providing extensions for forward + * and backward compatibility, is to use a list of optional structs to + * provide those extra details. + * + * The key advantage to using an extension chain is that it allows us to + * redefine the interface more easily than an ever growing struct of + * increasing complexity, and for large parts of that interface to be + * entirely optional. The downside is more pointer chasing; chasing across + * the __user boundary with pointers encapsulated inside u64. + * + * Example chaining: + * + * .. code-block:: C + * + * struct i915_user_extension ext3 { + * .next_extension = 0, // end + * .name = ..., + * }; + * struct i915_user_extension ext2 { + * .next_extension = (uintptr_t)&ext3, + * .name = ..., + * }; + * struct i915_user_extension ext1 { + * .next_extension = (uintptr_t)&ext2, + * .name = ..., + * }; + * + * Typically the struct i915_user_extension would be embedded in some uAPI + * struct, and in this case we would feed it the head of the chain(i.e ext1), + * which would then apply all of the above extensions. + * + */ +struct xe_user_extension { + /** + * @next_extension: + * + * Pointer to the next struct i915_user_extension, or zero if the end. + */ + __u64 next_extension; + /** + * @name: Name of the extension. + * + * Note that the name here is just some integer. + * + * Also note that the name space for this is not global for the whole + * driver, but rather its scope/meaning is limited to the specific piece + * of uAPI which has embedded the struct i915_user_extension. + */ + __u32 name; + /** + * @flags: MBZ + * + * All undefined bits must be zero. + */ + __u32 pad; +}; + +/* + * i915 specific ioctls. + * + * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie + * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset + * against DRM_COMMAND_BASE and should be between [0x0, 0x60). + */ +#define DRM_XE_DEVICE_QUERY 0x00 +#define DRM_XE_GEM_CREATE 0x01 +#define DRM_XE_GEM_MMAP_OFFSET 0x02 +#define DRM_XE_VM_CREATE 0x03 +#define DRM_XE_VM_DESTROY 0x04 +#define DRM_XE_VM_BIND 0x05 +#define DRM_XE_ENGINE_CREATE 0x06 +#define DRM_XE_ENGINE_DESTROY 0x07 +#define DRM_XE_EXEC 0x08 +#define DRM_XE_MMIO 0x09 +#define DRM_XE_ENGINE_SET_PROPERTY 0x0a +#define DRM_XE_WAIT_USER_FENCE 0x0b +#define DRM_XE_VM_MADVISE 0x0c + +/* Must be kept compact -- no holes */ +#define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) +#define DRM_IOCTL_XE_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create) +#define DRM_IOCTL_XE_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset) +#define DRM_IOCTL_XE_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create) +#define DRM_IOCTL_XE_VM_DESTROY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy) +#define DRM_IOCTL_XE_VM_BIND DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind) +#define DRM_IOCTL_XE_ENGINE_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create) +#define DRM_IOCTL_XE_ENGINE_DESTROY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy) +#define DRM_IOCTL_XE_EXEC DRM_IOW( DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) +#define DRM_IOCTL_XE_MMIO DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio) +#define DRM_IOCTL_XE_ENGINE_SET_PROPERTY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property) +#define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_VM_MADVISE DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise) + +struct drm_xe_engine_class_instance { + __u16 engine_class; + +#define DRM_XE_ENGINE_CLASS_RENDER 0 +#define DRM_XE_ENGINE_CLASS_COPY 1 +#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 +#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 +#define DRM_XE_ENGINE_CLASS_COMPUTE 4 + /* + * Kernel only class (not actual hardware engine class). Used for + * creating ordered queues of VM bind operations. + */ +#define DRM_XE_ENGINE_CLASS_VM_BIND 5 + + __u16 engine_instance; + __u16 gt_id; +}; + +#define XE_MEM_REGION_CLASS_SYSMEM 0 +#define XE_MEM_REGION_CLASS_VRAM 1 + +struct drm_xe_query_mem_usage { + __u32 num_regions; + __u32 pad; + + struct drm_xe_query_mem_region { + __u16 mem_class; + __u16 instance; /* unique ID even among different classes */ + __u32 pad; + __u32 min_page_size; + __u32 max_page_size; + __u64 total_size; + __u64 used; + __u64 reserved[8]; + } regions[]; +}; + +struct drm_xe_query_config { + __u32 num_params; + __u32 pad; +#define XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 +#define XE_QUERY_CONFIG_FLAGS 1 + #define XE_QUERY_CONFIG_FLAGS_HAS_VRAM (0x1 << 0) + #define XE_QUERY_CONFIG_FLAGS_USE_GUC (0x1 << 1) +#define XE_QUERY_CONFIG_MIN_ALIGNEMENT 2 +#define XE_QUERY_CONFIG_VA_BITS 3 +#define XE_QUERY_CONFIG_GT_COUNT 4 +#define XE_QUERY_CONFIG_MEM_REGION_COUNT 5 +#define XE_QUERY_CONFIG_NUM_PARAM XE_QUERY_CONFIG_MEM_REGION_COUNT + 1 + __u64 info[]; +}; + +struct drm_xe_query_gts { + __u32 num_gt; + __u32 pad; + + /* + * TODO: Perhaps info about every mem region relative to this GT? e.g. + * bandwidth between this GT and remote region? + */ + + struct drm_xe_query_gt { +#define XE_QUERY_GT_TYPE_MAIN 0 +#define XE_QUERY_GT_TYPE_REMOTE 1 +#define XE_QUERY_GT_TYPE_MEDIA 2 + __u16 type; + __u16 instance; + __u32 clock_freq; + __u64 features; + __u64 native_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ + __u64 slow_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ + __u64 inaccessible_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ + __u64 reserved[8]; + } gts[]; +}; + +struct drm_xe_query_topology_mask { + /** @gt_id: GT ID the mask is associated with */ + __u16 gt_id; + + /** @type: type of mask */ + __u16 type; +#define XE_TOPO_DSS_GEOMETRY (1 << 0) +#define XE_TOPO_DSS_COMPUTE (1 << 1) +#define XE_TOPO_EU_PER_DSS (1 << 2) + + /** @num_bytes: number of bytes in requested mask */ + __u32 num_bytes; + + /** @mask: little-endian mask of @num_bytes */ + __u8 mask[]; +}; + +struct drm_xe_device_query { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @query: The type of data to query */ + __u32 query; + +#define DRM_XE_DEVICE_QUERY_ENGINES 0 +#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1 +#define DRM_XE_DEVICE_QUERY_CONFIG 2 +#define DRM_XE_DEVICE_QUERY_GTS 3 +#define DRM_XE_DEVICE_QUERY_HWCONFIG 4 +#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 + + /** @size: Size of the queried data */ + __u32 size; + + /** @data: Queried data is placed here */ + __u64 data; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_gem_create { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** + * @size: Requested size for the object + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + + /** + * @flags: Flags, currently a mask of memory instances of where BO can + * be placed + */ +#define XE_GEM_CREATE_FLAG_DEFER_BACKING (0x1 << 24) +#define XE_GEM_CREATE_FLAG_SCANOUT (0x1 << 25) + __u32 flags; + + /** + * @vm_id: Attached VM, if any + * + * If a VM is specified, this BO must: + * + * 1. Only ever be bound to that VM. + * + * 2. Cannot be exported as a PRIME fd. + */ + __u32 vm_id; + + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_gem_mmap_offset { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @handle: Handle for the object being mapped. */ + __u32 handle; + + /** @flags: Must be zero */ + __u32 flags; + + /** @offset: The fake offset to use for subsequent mmap call */ + __u64 offset; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture + */ +struct drm_xe_vm_bind_op_error_capture { + /** @error: errno that occured */ + __s32 error; + /** @op: operation that encounter an error */ + __u32 op; + /** @addr: address of bind op */ + __u64 addr; + /** @size: size of bind */ + __u64 size; +}; + +/** struct drm_xe_ext_vm_set_property - VM set property extension */ +struct drm_xe_ext_vm_set_property { + /** @base: base user extension */ + struct xe_user_extension base; + + /** @property: property to set */ +#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS 0 + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_create { + /** @extensions: Pointer to the first extension struct, if any */ +#define XE_VM_EXTENSION_SET_PROPERTY 0 + __u64 extensions; + + /** @flags: Flags */ + __u32 flags; + +#define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0) +#define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1) +#define DRM_XE_VM_CREATE_ASYNC_BIND_OPS (0x1 << 2) +#define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3) + + /** @vm_id: Returned VM ID */ + __u32 vm_id; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_destroy { + /** @vm_id: VM ID */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_bind_op { + /** + * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP + */ + __u32 obj; + + union { + /** + * @obj_offset: Offset into the object, MBZ for CLEAR_RANGE, + * ignored for unbind + */ + __u64 obj_offset; + /** @userptr: user pointer to bind on */ + __u64 userptr; + }; + + /** + * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL + */ + __u64 range; + + /** @addr: Address to operate on, MBZ for UNMAP_ALL */ + __u64 addr; + + /** + * @gt_mask: Mask for which GTs to create binds for, 0 == All GTs, + * only applies to creating new VMAs + */ + __u64 gt_mask; + + /** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */ + __u32 op; + + /** @mem_region: Memory region to prefetch VMA to, instance not a mask */ + __u32 region; + +#define XE_VM_BIND_OP_MAP 0x0 +#define XE_VM_BIND_OP_UNMAP 0x1 +#define XE_VM_BIND_OP_MAP_USERPTR 0x2 +#define XE_VM_BIND_OP_RESTART 0x3 +#define XE_VM_BIND_OP_UNMAP_ALL 0x4 +#define XE_VM_BIND_OP_PREFETCH 0x5 + +#define XE_VM_BIND_FLAG_READONLY (0x1 << 16) + /* + * A bind ops completions are always async, hence the support for out + * sync. This flag indicates the allocation of the memory for new page + * tables and the job to program the pages tables is asynchronous + * relative to the IOCTL. That part of a bind operation can fail under + * memory pressure, the job in practice can't fail unless the system is + * totally shot. + * + * If this flag is clear and the IOCTL doesn't return an error, in + * practice the bind op is good and will complete. + * + * If this flag is set and doesn't return return an error, the bind op + * can still fail and recovery is needed. If configured, the bind op that + * caused the error will be captured in drm_xe_vm_bind_op_error_capture. + * Once the user sees the error (via a ufence + + * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory + * via non-async unbinds, and then restart all queue'd async binds op via + * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the + * VM. + * + * This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is + * configured in the VM and must be set if the VM is configured with + * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state. + */ +#define XE_VM_BIND_FLAG_ASYNC (0x1 << 17) + /* + * Valid on a faulting VM only, do the MAP operation immediately rather + * than differing the MAP to the page fault handler. + */ +#define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 18) + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_bind { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @vm_id: The ID of the VM to bind to */ + __u32 vm_id; + + /** + * @engine_id: engine_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND + * and engine must have same vm_id. If zero, the default VM bind engine + * is used. + */ + __u32 engine_id; + + /** @num_binds: number of binds in this IOCTL */ + __u32 num_binds; + + union { + /** @bind: used if num_binds == 1 */ + struct drm_xe_vm_bind_op bind; + /** + * @vector_of_binds: userptr to array of struct + * drm_xe_vm_bind_op if num_binds > 1 + */ + __u64 vector_of_binds; + }; + + /** @num_syncs: amount of syncs to wait on */ + __u32 num_syncs; + + /** @syncs: pointer to struct drm_xe_sync array */ + __u64 syncs; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** struct drm_xe_ext_engine_set_property - engine set property extension */ +struct drm_xe_ext_engine_set_property { + /** @base: base user extension */ + struct xe_user_extension base; + + /** @property: property to set */ + __u32 property; + + /** @value: property value */ + __u64 value; +}; + +/** + * struct drm_xe_engine_set_property - engine set property + * + * Same namespace for extensions as drm_xe_engine_create + */ +struct drm_xe_engine_set_property { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @engine_id: Engine ID */ + __u32 engine_id; + + /** @property: property to set */ +#define XE_ENGINE_PROPERTY_PRIORITY 0 +#define XE_ENGINE_PROPERTY_TIMESLICE 1 +#define XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT 2 + /* + * Long running or ULLS engine mode. DMA fences not allowed in this + * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves + * as a sanity check the UMD knows what it is doing. Can only be set at + * engine create time. + */ +#define XE_ENGINE_PROPERTY_COMPUTE_MODE 3 +#define XE_ENGINE_PROPERTY_PERSISTENCE 4 +#define XE_ENGINE_PROPERTY_JOB_TIMEOUT 5 +#define XE_ENGINE_PROPERTY_ACC_TRIGGER 6 +#define XE_ENGINE_PROPERTY_ACC_NOTIFY 7 +#define XE_ENGINE_PROPERTY_ACC_GRANULARITY 8 + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_engine_create { + /** @extensions: Pointer to the first extension struct, if any */ +#define XE_ENGINE_EXTENSION_SET_PROPERTY 0 + __u64 extensions; + + /** @width: submission width (number BB per exec) for this engine */ + __u16 width; + + /** @num_placements: number of valid placements for this engine */ + __u16 num_placements; + + /** @vm_id: VM to use for this engine */ + __u32 vm_id; + + /** @flags: MBZ */ + __u32 flags; + + /** @engine_id: Returned engine ID */ + __u32 engine_id; + + /** + * @instances: user pointer to a 2-d array of struct + * drm_xe_engine_class_instance + * + * length = width (i) * num_placements (j) + * index = j + i * width + */ + __u64 instances; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_engine_destroy { + /** @vm_id: VM ID */ + __u32 engine_id; + + /** @pad: MBZ */ + __u32 pad; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_sync { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + __u32 flags; + +#define DRM_XE_SYNC_SYNCOBJ 0x0 +#define DRM_XE_SYNC_TIMELINE_SYNCOBJ 0x1 +#define DRM_XE_SYNC_DMA_BUF 0x2 +#define DRM_XE_SYNC_USER_FENCE 0x3 +#define DRM_XE_SYNC_SIGNAL 0x10 + + union { + __u32 handle; + /** + * @addr: Address of user fence. When sync passed in via exec + * IOCTL this a GPU address in the VM. When sync passed in via + * VM bind IOCTL this is a user pointer. In either case, it is + * the users responsibility that this address is present and + * mapped when the user fence is signalled. Must be qword + * aligned. + */ + __u64 addr; + }; + + __u64 timeline_value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_exec { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @engine_id: Engine ID for the batch buffer */ + __u32 engine_id; + + /** @num_syncs: Amount of struct drm_xe_sync in array. */ + __u32 num_syncs; + + /** @syncs: Pointer to struct drm_xe_sync array. */ + __u64 syncs; + + /** + * @address: address of batch buffer if num_batch_buffer == 1 or an + * array of batch buffer addresses + */ + __u64 address; + + /** + * @num_batch_buffer: number of batch buffer in this exec, must match + * the width of the engine + */ + __u16 num_batch_buffer; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_mmio { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + __u32 addr; + + __u32 flags; + +#define DRM_XE_MMIO_8BIT 0x0 +#define DRM_XE_MMIO_16BIT 0x1 +#define DRM_XE_MMIO_32BIT 0x2 +#define DRM_XE_MMIO_64BIT 0x3 +#define DRM_XE_MMIO_BITS_MASK 0x3 +#define DRM_XE_MMIO_READ 0x4 +#define DRM_XE_MMIO_WRITE 0x8 + + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_wait_user_fence - wait user fence + * + * Wait on user fence, XE will wakeup on every HW engine interrupt in the + * instances list and check if user fence is complete: + * (*addr & MASK) OP (VALUE & MASK) + * + * Returns to user on user fence completion or timeout. + */ +struct drm_xe_wait_user_fence { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + union { + /** + * @addr: user pointer address to wait on, must qword aligned + */ + __u64 addr; + /** + * @vm_id: The ID of the VM which encounter an error used with + * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear. + */ + __u64 vm_id; + }; + /** @op: wait operation (type of comparison) */ +#define DRM_XE_UFENCE_WAIT_EQ 0 +#define DRM_XE_UFENCE_WAIT_NEQ 1 +#define DRM_XE_UFENCE_WAIT_GT 2 +#define DRM_XE_UFENCE_WAIT_GTE 3 +#define DRM_XE_UFENCE_WAIT_LT 4 +#define DRM_XE_UFENCE_WAIT_LTE 5 + __u16 op; + /** @flags: wait flags */ +#define DRM_XE_UFENCE_WAIT_SOFT_OP (1 << 0) /* e.g. Wait on VM bind */ +#define DRM_XE_UFENCE_WAIT_ABSTIME (1 << 1) +#define DRM_XE_UFENCE_WAIT_VM_ERROR (1 << 2) + __u16 flags; + /** @value: compare value */ + __u64 value; + /** @mask: comparison mask */ +#define DRM_XE_UFENCE_WAIT_U8 0xffu +#define DRM_XE_UFENCE_WAIT_U16 0xffffu +#define DRM_XE_UFENCE_WAIT_U32 0xffffffffu +#define DRM_XE_UFENCE_WAIT_U64 0xffffffffffffffffu + __u64 mask; + /** @timeout: how long to wait before bailing, value in jiffies */ + __s64 timeout; + /** + * @num_engines: number of engine instances to wait on, must be zero + * when DRM_XE_UFENCE_WAIT_SOFT_OP set + */ + __u64 num_engines; + /** + * @instances: user pointer to array of drm_xe_engine_class_instance to + * wait on, must be NULL when DRM_XE_UFENCE_WAIT_SOFT_OP set + */ + __u64 instances; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_madvise { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @vm_id: The ID VM in which the VMA exists */ + __u32 vm_id; + + /** @range: Number of bytes in the VMA */ + __u64 range; + + /** @addr: Address of the VMA to operation on */ + __u64 addr; + + /* + * Setting the preferred location will trigger a migrate of the VMA + * backing store to new location if the backing store is already + * allocated. + */ +#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS 0 +#define DRM_XE_VM_MADVISE_PREFERRED_GT 1 + /* + * In this case lower 32 bits are mem class, upper 32 are GT. + * Combination provides a single IOCTL plus migrate VMA to preferred + * location. + */ +#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT 2 + /* + * The CPU will do atomic memory operations to this VMA. Must be set on + * some devices for atomics to behave correctly. + */ +#define DRM_XE_VM_MADVISE_CPU_ATOMIC 3 + /* + * The device will do atomic memory operations to this VMA. Must be set + * on some devices for atomics to behave correctly. + */ +#define DRM_XE_VM_MADVISE_DEVICE_ATOMIC 4 + /* + * Priority WRT to eviction (moving from preferred memory location due + * to memory pressure). The lower the priority, the more likely to be + * evicted. + */ +#define DRM_XE_VM_MADVISE_PRIORITY 5 +#define DRM_XE_VMA_PRIORITY_LOW 0 +#define DRM_XE_VMA_PRIORITY_NORMAL 1 /* Default */ +#define DRM_XE_VMA_PRIORITY_HIGH 2 /* Must be elevated user */ + /* Pin the VMA in memory, must be elevated user */ +#define DRM_XE_VM_MADVISE_PIN 6 + + /** @property: property to set */ + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _UAPI_XE_DRM_H_ */ -- cgit v1.2.3 From 37c476d68d29051f333944bd784d1054b495c5a8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 12 Dec 2023 20:32:26 -0800 Subject: drm/drm_modeset_helper_vtables.h: fix typos/spellos Fix spelling problems as identified by codespell. Signed-off-by: Randy Dunlap Cc: David Airlie Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Reviewed-by: Jani Nikula Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231213043226.10046-1-rdunlap@infradead.org --- include/drm/drm_modeset_helper_vtables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/drm') diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index a33cf7488737..881b03e4dc28 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -134,7 +134,7 @@ struct drm_crtc_helper_funcs { * Since this function is both called from the check phase of an atomic * commit, and the mode validation in the probe paths it is not allowed * to look at anything else but the passed-in mode, and validate it - * against configuration-invariant hardward constraints. Any further + * against configuration-invariant hardware constraints. Any further * limits which depend upon the configuration can only be checked in * @mode_fixup or @atomic_check. * @@ -550,7 +550,7 @@ struct drm_encoder_helper_funcs { * Since this function is both called from the check phase of an atomic * commit, and the mode validation in the probe paths it is not allowed * to look at anything else but the passed-in mode, and validate it - * against configuration-invariant hardward constraints. Any further + * against configuration-invariant hardware constraints. Any further * limits which depend upon the configuration can only be checked in * @mode_fixup or @atomic_check. * @@ -1474,7 +1474,7 @@ struct drm_mode_config_helper_funcs { * swapped into the various state pointers. The passed in state * therefore contains copies of the old/previous state. This hook should * commit the new state into hardware. Note that the helpers have - * already waited for preceeding atomic commits and fences, but drivers + * already waited for preceding atomic commits and fences, but drivers * can add more waiting calls at the start of their implementation, e.g. * to wait for driver-internal request for implicit syncing, before * starting to commit the update to the hardware. -- cgit v1.2.3 From 1e13c5644c443dee727ac1330bc118c909a1cf07 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:41 -0100 Subject: drm/drm_mode_object: increase max objects to accommodate new color props DRM_OBJECT_MAX_PROPERTY limits the number of properties to be attached and we are increasing that value all time we add a new property (generic or driver-specific). In this series, we are adding 13 new KMS driver-specific properties for AMD color manage: - CRTC Gamma enumerated Transfer Function - Plane: Degamma LUT+size+TF, HDR multiplier, shaper LUT+size+TF, 3D LUT+size, blend LUT+size+TF (12) Therefore, just increase DRM_OBJECT_MAX_PROPERTY to a number (64) that accomodates these new properties and gives some room for others, avoiding change this number everytime we add a new KMS property. Reviewed-by: Harry Wentland Reviewed-by: Simon Ser Signed-off-by: Melissa Wen Reviewed-by: Abhinav Kumar Acked-by: Maxime Ripard Signed-off-by: Alex Deucher --- include/drm/drm_mode_object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/drm') diff --git a/include/drm/drm_mode_object.h b/include/drm/drm_mode_object.h index 912f1e415685..08d7a7f0188f 100644 --- a/include/drm/drm_mode_object.h +++ b/include/drm/drm_mode_object.h @@ -60,7 +60,7 @@ struct drm_mode_object { void (*free_cb)(struct kref *kref); }; -#define DRM_OBJECT_MAX_PROPERTY 24 +#define DRM_OBJECT_MAX_PROPERTY 64 /** * struct drm_object_properties - property tracking for &drm_mode_object */ -- cgit v1.2.3 From 601603105325ad4ec62db95c9bc428202ece2c8f Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:42 -0100 Subject: drm/drm_property: make replace_property_blob_from_id a DRM helper Place it in drm_property where drm_property_replace_blob and drm_property_lookup_blob live. Then we can use the DRM helper for driver-specific KMS properties too. Reviewed-by: Harry Wentland Reviewed-by: Liviu Dudau Signed-off-by: Melissa Wen Acked-by: Maxime Ripard Signed-off-by: Alex Deucher --- drivers/gpu/drm/arm/malidp_crtc.c | 2 +- drivers/gpu/drm/drm_atomic_uapi.c | 52 ++++------------------------------ drivers/gpu/drm/drm_property.c | 59 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_property.h | 6 ++++ 4 files changed, 71 insertions(+), 48 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index dc01c43f6193..d72c22dcf685 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc, /* * The size of the ctm is checked in - * drm_atomic_replace_property_blob_from_id. + * drm_property_replace_blob_from_id. */ ctm = (struct drm_color_ctm *)state->ctm->data; for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) { diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index aee4a65d4959..29d4940188d4 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -362,48 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state, return fence_ptr; } -static int -drm_atomic_replace_property_blob_from_id(struct drm_device *dev, - struct drm_property_blob **blob, - uint64_t blob_id, - ssize_t expected_size, - ssize_t expected_elem_size, - bool *replaced) -{ - struct drm_property_blob *new_blob = NULL; - - if (blob_id != 0) { - new_blob = drm_property_lookup_blob(dev, blob_id); - if (new_blob == NULL) { - drm_dbg_atomic(dev, - "cannot find blob ID %llu\n", blob_id); - return -EINVAL; - } - - if (expected_size > 0 && - new_blob->length != expected_size) { - drm_dbg_atomic(dev, - "[BLOB:%d] length %zu different from expected %zu\n", - new_blob->base.id, new_blob->length, expected_size); - drm_property_blob_put(new_blob); - return -EINVAL; - } - if (expected_elem_size > 0 && - new_blob->length % expected_elem_size != 0) { - drm_dbg_atomic(dev, - "[BLOB:%d] length %zu not divisible by element size %zu\n", - new_blob->base.id, new_blob->length, expected_elem_size); - drm_property_blob_put(new_blob); - return -EINVAL; - } - } - - *replaced |= drm_property_replace_blob(blob, new_blob); - drm_property_blob_put(new_blob); - - return 0; -} - static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, struct drm_crtc_state *state, struct drm_property *property, uint64_t val) @@ -424,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, } else if (property == config->prop_vrr_enabled) { state->vrr_enabled = val; } else if (property == config->degamma_lut_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->degamma_lut, val, -1, sizeof(struct drm_color_lut), @@ -432,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, state->color_mgmt_changed |= replaced; return ret; } else if (property == config->ctm_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->ctm, val, sizeof(struct drm_color_ctm), -1, @@ -440,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, state->color_mgmt_changed |= replaced; return ret; } else if (property == config->gamma_lut_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->gamma_lut, val, -1, sizeof(struct drm_color_lut), @@ -581,7 +539,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, } else if (property == plane->color_range_property) { state->color_range = val; } else if (property == config->prop_fb_damage_clips) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->fb_damage_clips, val, -1, @@ -778,7 +736,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, if (state->link_status != DRM_LINK_STATUS_GOOD) state->link_status = val; } else if (property == config->hdr_output_metadata_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->hdr_output_metadata, val, sizeof(struct hdr_output_metadata), -1, diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index dfec479830e4..596272149a35 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "drm_crtc_internal.h" @@ -751,6 +752,64 @@ bool drm_property_replace_blob(struct drm_property_blob **blob, } EXPORT_SYMBOL(drm_property_replace_blob); +/** + * drm_property_replace_blob_from_id - replace a blob property taking a reference + * @dev: DRM device + * @blob: a pointer to the member blob to be replaced + * @blob_id: the id of the new blob to replace with + * @expected_size: expected size of the blob property + * @expected_elem_size: expected size of an element in the blob property + * @replaced: if the blob was in fact replaced + * + * Look up the new blob from id, take its reference, check expected sizes of + * the blob and its element and replace the old blob by the new one. Advertise + * if the replacement operation was successful. + * + * Return: true if the blob was in fact replaced. -EINVAL if the new blob was + * not found or sizes don't match. + */ +int drm_property_replace_blob_from_id(struct drm_device *dev, + struct drm_property_blob **blob, + uint64_t blob_id, + ssize_t expected_size, + ssize_t expected_elem_size, + bool *replaced) +{ + struct drm_property_blob *new_blob = NULL; + + if (blob_id != 0) { + new_blob = drm_property_lookup_blob(dev, blob_id); + if (new_blob == NULL) { + drm_dbg_atomic(dev, + "cannot find blob ID %llu\n", blob_id); + return -EINVAL; + } + + if (expected_size > 0 && + new_blob->length != expected_size) { + drm_dbg_atomic(dev, + "[BLOB:%d] length %zu different from expected %zu\n", + new_blob->base.id, new_blob->length, expected_size); + drm_property_blob_put(new_blob); + return -EINVAL; + } + if (expected_elem_size > 0 && + new_blob->length % expected_elem_size != 0) { + drm_dbg_atomic(dev, + "[BLOB:%d] length %zu not divisible by element size %zu\n", + new_blob->base.id, new_blob->length, expected_elem_size); + drm_property_blob_put(new_blob); + return -EINVAL; + } + } + + *replaced |= drm_property_replace_blob(blob, new_blob); + drm_property_blob_put(new_blob); + + return 0; +} +EXPORT_SYMBOL(drm_property_replace_blob_from_id); + int drm_mode_getblob_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h index 65bc9710a470..082f29156b3e 100644 --- a/include/drm/drm_property.h +++ b/include/drm/drm_property.h @@ -279,6 +279,12 @@ struct drm_property_blob *drm_property_create_blob(struct drm_device *dev, const void *data); struct drm_property_blob *drm_property_lookup_blob(struct drm_device *dev, uint32_t id); +int drm_property_replace_blob_from_id(struct drm_device *dev, + struct drm_property_blob **blob, + uint64_t blob_id, + ssize_t expected_size, + ssize_t expected_elem_size, + bool *replaced); int drm_property_replace_global_blob(struct drm_device *dev, struct drm_property_blob **replace, size_t length, -- cgit v1.2.3 From 24013b9301349881c9fcd27e7edacc672e0bf6d3 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:43 -0100 Subject: drm/drm_plane: track color mgmt changes per plane We will add color mgmt properties to DRM planes in the next patches and we want to track when one of this properties change to define atomic commit behaviors. Using a similar approach from CRTC color props, we set a color_mgmt_changed boolean whenever a plane color prop changes. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Acked-by: Maxime Ripard Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_atomic.c | 1 + drivers/gpu/drm/drm_atomic_state_helper.c | 1 + include/drm/drm_plane.h | 7 +++++++ 3 files changed, 9 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index c31fc0b48c31..a91737adf8e7 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -733,6 +733,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, drm_get_color_encoding_name(state->color_encoding)); drm_printf(p, "\tcolor-range=%s\n", drm_get_color_range_name(state->color_range)); + drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed); if (plane->funcs->atomic_print_state) plane->funcs->atomic_print_state(p, state); diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 54975de44a0e..519228eb1095 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -352,6 +352,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, state->fence = NULL; state->commit = NULL; state->fb_damage_clips = NULL; + state->color_mgmt_changed = false; } EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state); diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index c6565a6f9324..641fe298052d 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -251,6 +251,13 @@ struct drm_plane_state { /** @state: backpointer to global drm_atomic_state */ struct drm_atomic_state *state; + + /** + * @color_mgmt_changed: Color management properties have changed. Used + * by the atomic helpers and drivers to steer the atomic commit control + * flow. + */ + bool color_mgmt_changed : 1; }; static inline struct drm_rect -- cgit v1.2.3 From 933a2a376fb3f22ba4774f74233571504ac56b02 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 19 Dec 2023 14:57:34 +1100 Subject: drm: using mul_u32_u32() requires linux/math64.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some pending include file cleanups produced this error: In file included from include/linux/kernel.h:27, from drivers/gpu/ipu-v3/ipu-dp.c:7: include/drm/drm_color_mgmt.h: In function 'drm_color_lut_extract': include/drm/drm_color_mgmt.h:45:46: error: implicit declaration of function 'mul_u32_u32' [-Werror=implicit-function-declaration] 45 | return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(user_input, (1 << bit_precision) - 1), | ^~~~~~~~~~~ Fixes: c6fbb6bca108 ("drm: Fix color LUT rounding") Signed-off-by: Stephen Rothwell Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231219145734.13e40e1e@canb.auug.org.au --- include/drm/drm_color_mgmt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/drm') diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 54b2b2467bfd..ed81741036d7 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -24,6 +24,7 @@ #define __DRM_COLOR_MGMT_H__ #include +#include #include struct drm_crtc; -- cgit v1.2.3 From 780637e28783af505864151da78e713f62ed64ae Mon Sep 17 00:00:00 2001 From: Carlos Santa Date: Wed, 15 Feb 2023 12:34:25 -0800 Subject: drm/xe: Update the list of devices to add even more TGL devices The list of GTs got splitted a while back between GT1 and GT2 on TGL. References: https://patchwork.freedesktop.org/patch/388414/ CC: Rodrigo Vivi Signed-off-by: Carlos Santa Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 2 +- include/drm/xe_pciids.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/drm') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c159306e04cf..2b1c80bb6e46 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -279,7 +279,7 @@ static const struct xe_device_desc mtl_desc = { * PCI ID matches, otherwise we'll use the wrong info struct above. */ static const struct pci_device_id pciidlist[] = { - XE_TGL_GT2_IDS(INTEL_VGA_DEVICE, &tgl_desc), + XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index e539594ed939..b9e9f5b2b0ac 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -41,8 +41,8 @@ MACRO__(0x9AF8, ## __VA_ARGS__) #define XE_TGL_IDS(MACRO__, ...) \ - XE_TGL_GT1_IDS(MACRO__, ...), \ - XE_TGL_GT2_IDS(MACRO__, ...) + XE_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__),\ + XE_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__) /* RKL */ #define XE_RKL_IDS(MACRO__, ...) \ -- cgit v1.2.3 From de4651d6dd04d173b50fa8631a9a3cdd897434c4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 4 Aug 2023 16:17:09 -0700 Subject: drm/xe: Update ARL-S DevIDs to the latest BSpec BSpec changed with regard the DevIDs for ARL-S. Update the define accordingly. Bspec: 55420 Reviewed-by: Niranjana Vishwanathapura Link: https://lore.kernel.org/r/20230804231709.1065087-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- include/drm/xe_pciids.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/drm') diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index b9e9f5b2b0ac..0d0cf80eb0ba 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -183,8 +183,7 @@ MACRO__(0x7DE0, ## __VA_ARGS__) #define XE_ARL_IDS(MACRO__, ...) \ - MACRO__(0x7D66, ## __VA_ARGS__), \ - MACRO__(0x7D76, ## __VA_ARGS__) + MACRO__(0x7D67, ## __VA_ARGS__) #define XE_MTL_IDS(MACRO__, ...) \ XE_MTL_M_IDS(MACRO__, ## __VA_ARGS__), \ -- cgit v1.2.3 From 3330361543fca2a60b71ebf02cd5e56bb417b159 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:16 -0700 Subject: drm/xe/lnl: Add LNL platform definition LNL is an integrated GPU based on the Xe2 architecture. Bspec: 70821 Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Balasubramani Vivekanandan Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 6 ++++++ drivers/gpu/drm/xe/xe_platform_types.h | 1 + include/drm/xe_pciids.h | 5 +++++ 3 files changed, 12 insertions(+) (limited to 'include/drm') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 8512cd451887..6c2c6723b1b2 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -319,6 +319,11 @@ static const struct xe_device_desc mtl_desc = { PLATFORM(XE_METEORLAKE), }; +static const struct xe_device_desc lnl_desc = { + PLATFORM(XE_LUNARLAKE), + .require_force_probe = true, +}; + #undef PLATFORM /* Map of GMD_ID values to graphics IP */ @@ -356,6 +361,7 @@ static const struct pci_device_id pciidlist[] = { XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), + XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index abbb8a1f29a8..e378a64a0f86 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -21,6 +21,7 @@ enum xe_platform { XE_DG2, XE_PVC, XE_METEORLAKE, + XE_LUNARLAKE, }; enum xe_subplatform { diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index 0d0cf80eb0ba..f6ac6d9772ee 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -191,4 +191,9 @@ XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__), \ XE_ARL_IDS(MACRO__, ## __VA_ARGS__) +#define XE_LNL_IDS(MACRO__, ...) \ + MACRO__(0x6420, ## __VA_ARGS__), \ + MACRO__(0x64A0, ## __VA_ARGS__), \ + MACRO__(0x64B0, ## __VA_ARGS__) + #endif -- cgit v1.2.3 From bbd52b6153731908e52f68d7c797bef7c42af4f7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 31 Aug 2023 07:58:44 -0700 Subject: drm/gpuva: Add drm_gpuva_for_each_op_reverse Add a helper to walk op list in reverse. Xe will make use of this when unwinding GPUVA operations. v2: (Rodrigo) reword commit message Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- include/drm/drm_gpuvm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/drm') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 6258849382e1..48311e6d664c 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1007,6 +1007,16 @@ struct drm_gpuva_ops { #define drm_gpuva_for_each_op_from_reverse(op, ops) \ list_for_each_entry_from_reverse(op, &(ops)->list, entry) +/** + * drm_gpuva_for_each_op_reverse - iterator to walk over &drm_gpuva_ops in reverse + * @op: &drm_gpuva_op to assign in each iteration step + * @ops: &drm_gpuva_ops to walk + * + * This iterator walks over all ops within a given list of operations in reverse + */ +#define drm_gpuva_for_each_op_reverse(op, ops) \ + list_for_each_entry_reverse(op, &(ops)->list, entry) + /** * drm_gpuva_first_op() - returns the first &drm_gpuva_op from &drm_gpuva_ops * @ops: the &drm_gpuva_ops to get the fist &drm_gpuva_op from -- cgit v1.2.3 From 3445166655cdcdcf18f10ffa124e6ae0ee3018c6 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Wed, 11 Oct 2023 10:44:18 +0530 Subject: drm/xe: Add new DG2 PCI IDs Add recently added PCI IDs for DG2 BSpec: 44477 Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231011051418.2767145-1-shekhar.chauhan@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- include/drm/xe_pciids.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index f6ac6d9772ee..d105527df0c4 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -138,7 +138,11 @@ MACRO__(0x56A5, ## __VA_ARGS__), \ MACRO__(0x56A6, ## __VA_ARGS__), \ MACRO__(0x56B0, ## __VA_ARGS__), \ - MACRO__(0x56B1, ## __VA_ARGS__) + MACRO__(0x56B1, ## __VA_ARGS__), \ + MACRO__(0x56BA, ## __VA_ARGS__), \ + MACRO__(0x56BB, ## __VA_ARGS__), \ + MACRO__(0x56BC, ## __VA_ARGS__), \ + MACRO__(0x56BD, ## __VA_ARGS__) #define XE_DG2_G12_IDS(MACRO__, ...) \ MACRO__(0x5696, ## __VA_ARGS__), \ -- cgit v1.2.3 From cd0adf746527dc2d1410adf5bf09ee6f4cd22a79 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Wed, 11 Oct 2023 21:15:26 +0530 Subject: drm/xe/dg2: Remove one PCI ID The bspec was recently updated to remove PCI ID 0x5698; this ID is actually reserved for future use and should not be treated as DG2-G11. BSpec: 44477 Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231011154526.2819754-1-shekhar.chauhan@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- include/drm/xe_pciids.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/drm') diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index d105527df0c4..11deefceffd0 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -134,7 +134,6 @@ MACRO__(0x5693, ## __VA_ARGS__), \ MACRO__(0x5694, ## __VA_ARGS__), \ MACRO__(0x5695, ## __VA_ARGS__), \ - MACRO__(0x5698, ## __VA_ARGS__), \ MACRO__(0x56A5, ## __VA_ARGS__), \ MACRO__(0x56A6, ## __VA_ARGS__), \ MACRO__(0x56B0, ## __VA_ARGS__), \ -- cgit v1.2.3 From baf9089c800c46f224f14e2a681ba3a7c1b09374 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Fri, 17 Nov 2023 09:29:28 -0800 Subject: drm/xe: Include RPL-U to pciidlist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RPL-U is defined as a subplatform but those PCI ids were not included in pciidlist so Xe KMD would never probe device with those ids. This is following what i915 does to include RPL-U to PCI ids probe list. v2: - change order to match i915 Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- include/drm/xe_pciids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/drm') diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index 11deefceffd0..47ff7472c8bd 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -117,6 +117,7 @@ /* RPL-P */ #define XE_RPLP_IDS(MACRO__, ...) \ + XE_RPLU_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0xA720, ## __VA_ARGS__), \ MACRO__(0xA7A0, ## __VA_ARGS__), \ MACRO__(0xA7A8, ## __VA_ARGS__) -- cgit v1.2.3 From 1bec833316fffa110259093671d27be137be454d Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Mon, 20 Nov 2023 10:00:35 -0800 Subject: drm/xe: Add missing RPL and ADL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those are ids present in i915 but missing in Xe. Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- include/drm/xe_pciids.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/drm') diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index 47ff7472c8bd..84fb596ab8f5 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -67,6 +67,7 @@ MACRO__(0x4682, ## __VA_ARGS__), \ MACRO__(0x4688, ## __VA_ARGS__), \ MACRO__(0x468A, ## __VA_ARGS__), \ + MACRO__(0x468B, ## __VA_ARGS__), \ MACRO__(0x4690, ## __VA_ARGS__), \ MACRO__(0x4692, ## __VA_ARGS__), \ MACRO__(0x4693, ## __VA_ARGS__) @@ -113,14 +114,18 @@ #define XE_RPLU_IDS(MACRO__, ...) \ MACRO__(0xA721, ## __VA_ARGS__), \ MACRO__(0xA7A1, ## __VA_ARGS__), \ - MACRO__(0xA7A9, ## __VA_ARGS__) + MACRO__(0xA7A9, ## __VA_ARGS__), \ + MACRO__(0xA7AC, ## __VA_ARGS__), \ + MACRO__(0xA7AD, ## __VA_ARGS__) /* RPL-P */ #define XE_RPLP_IDS(MACRO__, ...) \ XE_RPLU_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0xA720, ## __VA_ARGS__), \ MACRO__(0xA7A0, ## __VA_ARGS__), \ - MACRO__(0xA7A8, ## __VA_ARGS__) + MACRO__(0xA7A8, ## __VA_ARGS__), \ + MACRO__(0xA7AA, ## __VA_ARGS__), \ + MACRO__(0xA7AB, ## __VA_ARGS__) /* DG2 */ #define XE_DG2_G10_IDS(MACRO__, ...) \ -- cgit v1.2.3 From 1a3d4d76bacee545c620f5935a5bf4677ad88d4c Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Mon, 20 Nov 2023 12:25:07 +0530 Subject: drm/xe: ATS-M device ID update ATS-M device ID update. BSpec: 44477 Signed-off-by: Haridhar Kalvala Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231120065507.1543676-1-haridhar.kalvala@intel.com Signed-off-by: Rodrigo Vivi --- include/drm/xe_pciids.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/drm') diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index 84fb596ab8f5..6c3ac3b5ec94 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -163,7 +163,8 @@ XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__) #define XE_ATS_M150_IDS(MACRO__, ...) \ - MACRO__(0x56C0, ## __VA_ARGS__) + MACRO__(0x56C0, ## __VA_ARGS__), \ + MACRO__(0x56C2, ## __VA_ARGS__) #define XE_ATS_M75_IDS(MACRO__, ...) \ MACRO__(0x56C1, ## __VA_ARGS__) -- cgit v1.2.3 From 812ec747a354e00f5e789f3cdcfbc80f98f1d71d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 21 Nov 2023 11:52:09 -0800 Subject: drm/xe: Sync MTL PCI IDs with i915 For Xe1 platforms, it's better to follow the way i915 adds the PCI IDs to the header, so it's easier to catch up when there is an update. This brings the same logic applied in commit 2e3c369f23a7 ("drm/i915/mtl: Eliminate subplatforms") to the equivalent xe header. The end result of this header for Xe1 platforms is now in sync with i915 as of commit 5032c607e886 ("drm/i915: ATS-M device ID update"). This can be seen by $ git show 5032c607e886:include/drm/i915_pciids.h > a.h $ git diff --color-words --no-index a.h include/drm/xe_pciids.h Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231121195209.802235-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- include/drm/xe_pciids.h | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) (limited to 'include/drm') diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index 6c3ac3b5ec94..de1a344737bc 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -173,33 +173,14 @@ XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\ XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) -/* MTL */ -#define XE_MTL_M_IDS(MACRO__, ...) \ +/* MTL / ARL */ +#define XE_MTL_IDS(MACRO__, ...) \ MACRO__(0x7D40, ## __VA_ARGS__), \ - MACRO__(0x7D43, ## __VA_ARGS__), \ - MACRO__(0x7DC0, ## __VA_ARGS__) - -#define XE_MTL_P_IDS(MACRO__, ...) \ MACRO__(0x7D45, ## __VA_ARGS__), \ - MACRO__(0x7D47, ## __VA_ARGS__), \ - MACRO__(0x7D50, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ - MACRO__(0x7DC5, ## __VA_ARGS__), \ - MACRO__(0x7DD0, ## __VA_ARGS__), \ - MACRO__(0x7DD5, ## __VA_ARGS__) - -#define XE_MTL_S_IDS(MACRO__, ...) \ MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7DE0, ## __VA_ARGS__) - -#define XE_ARL_IDS(MACRO__, ...) \ - MACRO__(0x7D67, ## __VA_ARGS__) - -#define XE_MTL_IDS(MACRO__, ...) \ - XE_MTL_M_IDS(MACRO__, ## __VA_ARGS__), \ - XE_MTL_P_IDS(MACRO__, ## __VA_ARGS__), \ - XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__), \ - XE_ARL_IDS(MACRO__, ## __VA_ARGS__) + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD5, ## __VA_ARGS__) #define XE_LNL_IDS(MACRO__, ...) \ MACRO__(0x6420, ## __VA_ARGS__), \ -- cgit v1.2.3 From 200a6b3af05918ddb18832fa4d5a8f15c9dd99e0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Dec 2023 21:48:56 -0800 Subject: drm/gpuvm: fix all kernel-doc warnings in include/drm/drm_gpuvm.h Update kernel-doc comments in to correct all kernel-doc warnings: drm_gpuvm.h:148: warning: Excess struct member 'addr' description in 'drm_gpuva' drm_gpuvm.h:148: warning: Excess struct member 'offset' description in 'drm_gpuva' drm_gpuvm.h:148: warning: Excess struct member 'obj' description in 'drm_gpuva' drm_gpuvm.h:148: warning: Excess struct member 'entry' description in 'drm_gpuva' drm_gpuvm.h:148: warning: Excess struct member '__subtree_last' description in 'drm_gpuva' drm_gpuvm.h:192: warning: No description found for return value of 'drm_gpuva_invalidated' drm_gpuvm.h:331: warning: Excess struct member 'tree' description in 'drm_gpuvm' drm_gpuvm.h:331: warning: Excess struct member 'list' description in 'drm_gpuvm' drm_gpuvm.h:331: warning: Excess struct member 'list' description in 'drm_gpuvm' drm_gpuvm.h:331: warning: Excess struct member 'local_list' description in 'drm_gpuvm' drm_gpuvm.h:331: warning: Excess struct member 'lock' description in 'drm_gpuvm' drm_gpuvm.h:331: warning: Excess struct member 'list' description in 'drm_gpuvm' drm_gpuvm.h:331: warning: Excess struct member 'local_list' description in 'drm_gpuvm' drm_gpuvm.h:331: warning: Excess struct member 'lock' description in 'drm_gpuvm' drm_gpuvm.h:352: warning: No description found for return value of 'drm_gpuvm_get' drm_gpuvm.h:545: warning: Excess struct member 'fn' description in 'drm_gpuvm_exec' drm_gpuvm.h:545: warning: Excess struct member 'priv' description in 'drm_gpuvm_exec' drm_gpuvm.h:597: warning: missing initial short description on line: * drm_gpuvm_exec_resv_add_fence() drm_gpuvm.h:616: warning: missing initial short description on line: * drm_gpuvm_exec_validate() drm_gpuvm.h:623: warning: No description found for return value of 'drm_gpuvm_exec_validate' drm_gpuvm.h:698: warning: Excess struct member 'gpuva' description in 'drm_gpuvm_bo' drm_gpuvm.h:698: warning: Excess struct member 'entry' description in 'drm_gpuvm_bo' drm_gpuvm.h:698: warning: Excess struct member 'gem' description in 'drm_gpuvm_bo' drm_gpuvm.h:698: warning: Excess struct member 'evict' description in 'drm_gpuvm_bo' drm_gpuvm.h:726: warning: No description found for return value of 'drm_gpuvm_bo_get' drm_gpuvm.h:738: warning: missing initial short description on line: * drm_gpuvm_bo_gem_evict() drm_gpuvm.h:740: warning: missing initial short description on line: * drm_gpuvm_bo_gem_evict() drm_gpuvm.h:698: warning: Excess struct member 'evict' description in 'drm_gpuvm_bo' drm_gpuvm.h:844: warning: Excess struct member 'addr' description in 'drm_gpuva_op_map' drm_gpuvm.h:844: warning: Excess struct member 'range' description in 'drm_gpuva_op_map' drm_gpuvm.h:844: warning: Excess struct member 'offset' description in 'drm_gpuva_op_map' drm_gpuvm.h:844: warning: Excess struct member 'obj' description in 'drm_gpuva_op_map' Signed-off-by: Randy Dunlap Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: Jonathan Corbet Cc: Vegard Nossum Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231231054856.31786-1-rdunlap@infradead.org --- include/drm/drm_gpuvm.h | 80 +++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 35 deletions(-) (limited to 'include/drm') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 6258849382e1..f3fdb810989a 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -92,7 +92,7 @@ struct drm_gpuva { */ struct { /** - * @addr: the start address + * @va.addr: the start address */ u64 addr; @@ -107,17 +107,17 @@ struct drm_gpuva { */ struct { /** - * @offset: the offset within the &drm_gem_object + * @gem.offset: the offset within the &drm_gem_object */ u64 offset; /** - * @obj: the mapped &drm_gem_object + * @gem.obj: the mapped &drm_gem_object */ struct drm_gem_object *obj; /** - * @entry: the &list_head to attach this object to a &drm_gpuvm_bo + * @gem.entry: the &list_head to attach this object to a &drm_gpuvm_bo */ struct list_head entry; } gem; @@ -127,12 +127,12 @@ struct drm_gpuva { */ struct { /** - * @rb: the rb-tree node + * @rb.node: the rb-tree node */ struct rb_node node; /** - * @entry: The &list_head to additionally connect &drm_gpuvas + * @rb.entry: The &list_head to additionally connect &drm_gpuvas * in the same order they appear in the interval tree. This is * useful to keep iterating &drm_gpuvas from a start node found * through the rb-tree while doing modifications on the rb-tree @@ -141,7 +141,7 @@ struct drm_gpuva { struct list_head entry; /** - * @__subtree_last: needed by the interval tree, holding last-in-subtree + * @rb.__subtree_last: needed by the interval tree, holding last-in-subtree */ u64 __subtree_last; } rb; @@ -187,6 +187,8 @@ static inline void drm_gpuva_invalidate(struct drm_gpuva *va, bool invalidate) * drm_gpuva_invalidated() - indicates whether the backing BO of this &drm_gpuva * is invalidated * @va: the &drm_gpuva to check + * + * Returns: %true if the GPU VA is invalidated, %false otherwise */ static inline bool drm_gpuva_invalidated(struct drm_gpuva *va) { @@ -252,12 +254,12 @@ struct drm_gpuvm { */ struct { /** - * @tree: the rb-tree to track GPU VA mappings + * @rb.tree: the rb-tree to track GPU VA mappings */ struct rb_root_cached tree; /** - * @list: the &list_head to track GPU VA mappings + * @rb.list: the &list_head to track GPU VA mappings */ struct list_head list; } rb; @@ -290,19 +292,19 @@ struct drm_gpuvm { */ struct { /** - * @list: &list_head storing &drm_gpuvm_bos serving as + * @extobj.list: &list_head storing &drm_gpuvm_bos serving as * external object */ struct list_head list; /** - * @local_list: pointer to the local list temporarily storing - * entries from the external object list + * @extobj.local_list: pointer to the local list temporarily + * storing entries from the external object list */ struct list_head *local_list; /** - * @lock: spinlock to protect the extobj list + * @extobj.lock: spinlock to protect the extobj list */ spinlock_t lock; } extobj; @@ -312,19 +314,19 @@ struct drm_gpuvm { */ struct { /** - * @list: &list_head storing &drm_gpuvm_bos currently being - * evicted + * @evict.list: &list_head storing &drm_gpuvm_bos currently + * being evicted */ struct list_head list; /** - * @local_list: pointer to the local list temporarily storing - * entries from the evicted object list + * @evict.local_list: pointer to the local list temporarily + * storing entries from the evicted object list */ struct list_head *local_list; /** - * @lock: spinlock to protect the evict list + * @evict.lock: spinlock to protect the evict list */ spinlock_t lock; } evict; @@ -344,6 +346,8 @@ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, * * This function acquires an additional reference to @gpuvm. It is illegal to * call this without already holding a reference. No locks required. + * + * Returns: the &struct drm_gpuvm pointer */ static inline struct drm_gpuvm * drm_gpuvm_get(struct drm_gpuvm *gpuvm) @@ -533,12 +537,13 @@ struct drm_gpuvm_exec { */ struct { /** - * @fn: The driver callback to lock additional &drm_gem_objects. + * @extra.fn: The driver callback to lock additional + * &drm_gem_objects. */ int (*fn)(struct drm_gpuvm_exec *vm_exec); /** - * @priv: driver private data for the @fn callback + * @extra.priv: driver private data for the @fn callback */ void *priv; } extra; @@ -589,7 +594,7 @@ void drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm, enum dma_resv_usage extobj_usage); /** - * drm_gpuvm_exec_resv_add_fence() + * drm_gpuvm_exec_resv_add_fence() - add fence to private and all extobj * @vm_exec: the &drm_gpuvm_exec wrapper * @fence: fence to add * @private_usage: private dma-resv usage @@ -608,10 +613,12 @@ drm_gpuvm_exec_resv_add_fence(struct drm_gpuvm_exec *vm_exec, } /** - * drm_gpuvm_exec_validate() + * drm_gpuvm_exec_validate() - validate all BOs marked as evicted * @vm_exec: the &drm_gpuvm_exec wrapper * * See drm_gpuvm_validate(). + * + * Returns: 0 on success, negative error code on failure. */ static inline int drm_gpuvm_exec_validate(struct drm_gpuvm_exec *vm_exec) @@ -664,7 +671,7 @@ struct drm_gpuvm_bo { */ struct { /** - * @gpuva: The list of linked &drm_gpuvas. + * @list.gpuva: The list of linked &drm_gpuvas. * * It is safe to access entries from this list as long as the * GEM's gpuva lock is held. See also struct drm_gem_object. @@ -672,25 +679,25 @@ struct drm_gpuvm_bo { struct list_head gpuva; /** - * @entry: Structure containing all &list_heads serving as + * @list.entry: Structure containing all &list_heads serving as * entry. */ struct { /** - * @gem: List entry to attach to the &drm_gem_objects - * gpuva list. + * @list.entry.gem: List entry to attach to the + * &drm_gem_objects gpuva list. */ struct list_head gem; /** - * @evict: List entry to attach to the &drm_gpuvms - * extobj list. + * @list.entry.evict: List entry to attach to the + * &drm_gpuvms extobj list. */ struct list_head extobj; /** - * @evict: List entry to attach to the &drm_gpuvms evict - * list. + * @list.entry.evict: List entry to attach to the + * &drm_gpuvms evict list. */ struct list_head evict; } entry; @@ -713,6 +720,8 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *vm_bo); * * This function acquires an additional reference to @vm_bo. It is illegal to * call this without already holding a reference. No locks required. + * + * Returns: the &struct vm_bo pointer */ static inline struct drm_gpuvm_bo * drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo) @@ -730,7 +739,8 @@ drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict); /** - * drm_gpuvm_bo_gem_evict() + * drm_gpuvm_bo_gem_evict() - add/remove all &drm_gpuvm_bo's in the list + * to/from the &drm_gpuvms evicted list * @obj: the &drm_gem_object * @evict: indicates whether @obj is evicted * @@ -817,12 +827,12 @@ struct drm_gpuva_op_map { */ struct { /** - * @addr: the base address of the new mapping + * @va.addr: the base address of the new mapping */ u64 addr; /** - * @range: the range of the new mapping + * @va.range: the range of the new mapping */ u64 range; } va; @@ -832,12 +842,12 @@ struct drm_gpuva_op_map { */ struct { /** - * @offset: the offset within the &drm_gem_object + * @gem.offset: the offset within the &drm_gem_object */ u64 offset; /** - * @obj: the &drm_gem_object to map + * @gem.obj: the &drm_gem_object to map */ struct drm_gem_object *obj; } gem; -- cgit v1.2.3