From 86cec7ece3e62517e2bc0fd796a8a8da4193e7e5 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:28 +0300 Subject: gpu: host1x: Allow syncpoints without associated client Syncpoints don't need to be associated with any client, so remove the property, and expose host1x_syncpt_alloc. This will allow allocating syncpoints without prior knowledge of the engine that it will be used with. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/syncpt.c | 37 +++++++++++++++++++++++++------------ drivers/gpu/host1x/syncpt.h | 1 - 2 files changed, 25 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index fce7892d5137..9a113016d482 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -42,13 +42,28 @@ static void host1x_syncpt_base_free(struct host1x_syncpt_base *base) base->requested = false; } -static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, - struct host1x_client *client, - unsigned long flags) +/** + * host1x_syncpt_alloc() - allocate a syncpoint + * @host: host1x device data + * @flags: bitfield of HOST1X_SYNCPT_* flags + * @name: name for the syncpoint for use in debug prints + * + * Allocates a hardware syncpoint for the caller's use. The caller then has + * the sole authority to mutate the syncpoint's value until it is freed again. + * + * If no free syncpoints are available, or a NULL name was specified, returns + * NULL. + */ +struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, + unsigned long flags, + const char *name) { struct host1x_syncpt *sp = host->syncpt; + char *full_name; unsigned int i; - char *name; + + if (!name) + return NULL; mutex_lock(&host->syncpt_mutex); @@ -64,13 +79,11 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, goto unlock; } - name = kasprintf(GFP_KERNEL, "%02u-%s", sp->id, - client ? dev_name(client->dev) : NULL); - if (!name) + full_name = kasprintf(GFP_KERNEL, "%u-%s", sp->id, name); + if (!full_name) goto free_base; - sp->client = client; - sp->name = name; + sp->name = full_name; if (flags & HOST1X_SYNCPT_CLIENT_MANAGED) sp->client_managed = true; @@ -87,6 +100,7 @@ unlock: mutex_unlock(&host->syncpt_mutex); return NULL; } +EXPORT_SYMBOL(host1x_syncpt_alloc); /** * host1x_syncpt_id() - retrieve syncpoint ID @@ -401,7 +415,7 @@ int host1x_syncpt_init(struct host1x *host) host1x_hw_syncpt_enable_protection(host); /* Allocate sync point to use for clearing waits for expired fences */ - host->nop_sp = host1x_syncpt_alloc(host, NULL, 0); + host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop"); if (!host->nop_sp) return -ENOMEM; @@ -423,7 +437,7 @@ struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, { struct host1x *host = dev_get_drvdata(client->host->parent); - return host1x_syncpt_alloc(host, client, flags); + return host1x_syncpt_alloc(host, flags, dev_name(client->dev)); } EXPORT_SYMBOL(host1x_syncpt_request); @@ -447,7 +461,6 @@ void host1x_syncpt_free(struct host1x_syncpt *sp) host1x_syncpt_base_free(sp->base); kfree(sp->name); sp->base = NULL; - sp->client = NULL; sp->name = NULL; sp->client_managed = false; diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index 8e1d04dacaa0..3aa6b25b1b9c 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -33,7 +33,6 @@ struct host1x_syncpt { const char *name; bool client_managed; struct host1x *host; - struct host1x_client *client; struct host1x_syncpt_base *base; /* interrupt data */ -- cgit v1.2.3 From 49a5fb1679952a76861bd2580f785e33e3de712c Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:29 +0300 Subject: gpu: host1x: Show number of pending waiters in debugfs Show the number of pending waiters in the debugfs status file. This is useful for testing to verify that waiters do not leak or accumulate incorrectly. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/debug.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c index 1b4997bda1c7..8a14880c61bb 100644 --- a/drivers/gpu/host1x/debug.c +++ b/drivers/gpu/host1x/debug.c @@ -69,6 +69,7 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) static void show_syncpts(struct host1x *m, struct output *o) { + struct list_head *pos; unsigned int i; host1x_debug_output(o, "---- syncpts ----\n"); @@ -76,12 +77,19 @@ static void show_syncpts(struct host1x *m, struct output *o) for (i = 0; i < host1x_syncpt_nb_pts(m); i++) { u32 max = host1x_syncpt_read_max(m->syncpt + i); u32 min = host1x_syncpt_load(m->syncpt + i); + unsigned int waiters = 0; - if (!min && !max) + spin_lock(&m->syncpt[i].intr.lock); + list_for_each(pos, &m->syncpt[i].intr.wait_head) + waiters++; + spin_unlock(&m->syncpt[i].intr.lock); + + if (!min && !max && !waiters) continue; - host1x_debug_output(o, "id %u (%s) min %d max %d\n", - i, m->syncpt[i].name, min, max); + host1x_debug_output(o, + "id %u (%s) min %d max %d (%d waiters)\n", + i, m->syncpt[i].name, min, max, waiters); } for (i = 0; i < host1x_syncpt_nb_bases(m); i++) { -- cgit v1.2.3 From ecfb888ade427e2da437b48cafd8fc824e80c909 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:30 +0300 Subject: gpu: host1x: Remove cancelled waiters immediately Before this patch, cancelled waiters would only be cleaned up once their threshold value was reached. Make host1x_intr_put_ref process the cancellation immediately to fix this. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/intr.c | 23 +++++++++++++++++------ drivers/gpu/host1x/intr.h | 4 +++- drivers/gpu/host1x/syncpt.c | 2 +- 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 9245add23b5d..69b0e8e41466 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -242,18 +242,29 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, return 0; } -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref) +void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref, + bool flush) { struct host1x_waitlist *waiter = ref; struct host1x_syncpt *syncpt; - while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) == - WLS_REMOVED) - schedule(); + atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED); syncpt = host->syncpt + id; - (void)process_wait_list(host, syncpt, - host1x_syncpt_load(host->syncpt + id)); + + spin_lock(&syncpt->intr.lock); + if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) == + WLS_CANCELLED) { + list_del(&waiter->list); + kref_put(&waiter->refcount, waiter_release); + } + spin_unlock(&syncpt->intr.lock); + + if (flush) { + /* Wait until any concurrently executing handler has finished. */ + while (atomic_read(&waiter->state) != WLS_HANDLED) + schedule(); + } kref_put(&waiter->refcount, waiter_release); } diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index aac38194398f..6ea55e615e3a 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -74,8 +74,10 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, * Unreference an action submitted to host1x_intr_add_action(). * You must call this if you passed non-NULL as ref. * @ref the ref returned from host1x_intr_add_action() + * @flush wait until any pending handlers have completed before returning. */ -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref); +void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref, + bool flush); /* Initialize host1x sync point interrupt */ int host1x_intr_init(struct host1x *host, unsigned int irq_sync); diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 9a113016d482..f061dfd5bbc7 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -308,7 +308,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, } } - host1x_intr_put_ref(sp->host, sp->id, ref); + host1x_intr_put_ref(sp->host, sp->id, ref, true); done: return err; -- cgit v1.2.3 From f63b42cbc86e12f7d960d1fdaaf93b4373c06c65 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:31 +0300 Subject: gpu: host1x: Use HW-equivalent syncpoint expiration check Make syncpoint expiration checks always use the same logic used by the hardware. This ensures that there are no race conditions that could occur because of the hardware triggering a syncpoint interrupt and then the driver disagreeing. One situation where this could occur is if a job incremented a syncpoint too many times -- then the hardware would trigger an interrupt, but the driver would assume that a syncpoint value greater than the syncpoint's max value is in the future, and not clean up the job. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/syncpt.c | 51 ++------------------------------------------- 1 file changed, 2 insertions(+), 49 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index f061dfd5bbc7..8da4bbce8b9d 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -321,59 +321,12 @@ EXPORT_SYMBOL(host1x_syncpt_wait); bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh) { u32 current_val; - u32 future_val; smp_rmb(); current_val = (u32)atomic_read(&sp->min_val); - future_val = (u32)atomic_read(&sp->max_val); - - /* Note the use of unsigned arithmetic here (mod 1<<32). - * - * c = current_val = min_val = the current value of the syncpoint. - * t = thresh = the value we are checking - * f = future_val = max_val = the value c will reach when all - * outstanding increments have completed. - * - * Note that c always chases f until it reaches f. - * - * Dtf = (f - t) - * Dtc = (c - t) - * - * Consider all cases: - * - * A) .....c..t..f..... Dtf < Dtc need to wait - * B) .....c.....f..t.. Dtf > Dtc expired - * C) ..t..c.....f..... Dtf > Dtc expired (Dct very large) - * - * Any case where f==c: always expired (for any t). Dtf == Dcf - * Any case where t==c: always expired (for any f). Dtf >= Dtc (because Dtc==0) - * Any case where t==f!=c: always wait. Dtf < Dtc (because Dtf==0, - * Dtc!=0) - * - * Other cases: - * - * A) .....t..f..c..... Dtf < Dtc need to wait - * A) .....f..c..t..... Dtf < Dtc need to wait - * A) .....f..t..c..... Dtf > Dtc expired - * - * So: - * Dtf >= Dtc implies EXPIRED (return true) - * Dtf < Dtc implies WAIT (return false) - * - * Note: If t is expired then we *cannot* wait on it. We would wait - * forever (hang the system). - * - * Note: do NOT get clever and remove the -thresh from both sides. It - * is NOT the same. - * - * If future valueis zero, we have a client managed sync point. In that - * case we do a direct comparison. - */ - if (!host1x_syncpt_client_managed(sp)) - return future_val - thresh >= current_val - thresh; - else - return (s32)(current_val - thresh) >= 0; + + return ((current_val - thresh) & 0x80000000U) == 0U; } int host1x_syncpt_init(struct host1x *host) -- cgit v1.2.3 From 2aed4f5ab04af922a7cf1b616701845c9ed2473f Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:32 +0300 Subject: gpu: host1x: Cleanup and refcounting for syncpoints Add reference counting for allocated syncpoints to allow keeping them allocated while jobs are referencing them. Additionally, clean up various places using syncpoint IDs to use host1x_syncpt pointers instead. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 4 +- drivers/gpu/drm/tegra/drm.c | 14 +++---- drivers/gpu/drm/tegra/gr2d.c | 4 +- drivers/gpu/drm/tegra/gr3d.c | 4 +- drivers/gpu/drm/tegra/vic.c | 4 +- drivers/gpu/host1x/cdma.c | 11 ++--- drivers/gpu/host1x/dev.h | 7 ++-- drivers/gpu/host1x/hw/cdma_hw.c | 2 +- drivers/gpu/host1x/hw/channel_hw.c | 10 ++--- drivers/gpu/host1x/hw/debug_hw.c | 2 +- drivers/gpu/host1x/job.c | 5 ++- drivers/gpu/host1x/syncpt.c | 75 ++++++++++++++++++++++++++-------- drivers/gpu/host1x/syncpt.h | 3 ++ drivers/staging/media/tegra-video/vi.c | 6 +-- include/linux/host1x.h | 8 ++-- 15 files changed, 99 insertions(+), 60 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index c9385cfd0fc1..cfda71e151cc 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2141,7 +2141,7 @@ cleanup: drm_plane_cleanup(primary); host1x_client_iommu_detach(client); - host1x_syncpt_free(dc->syncpt); + host1x_syncpt_put(dc->syncpt); return err; } @@ -2166,7 +2166,7 @@ static int tegra_dc_exit(struct host1x_client *client) } host1x_client_iommu_detach(client); - host1x_syncpt_free(dc->syncpt); + host1x_syncpt_put(dc->syncpt); return 0; } diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 90709c38c993..ce5bdc58d315 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -174,7 +174,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, struct drm_tegra_syncpt syncpt; struct host1x *host1x = dev_get_drvdata(drm->dev->parent); struct drm_gem_object **refs; - struct host1x_syncpt *sp; + struct host1x_syncpt *sp = NULL; struct host1x_job *job; unsigned int num_refs; int err; @@ -301,8 +301,8 @@ int tegra_drm_submit(struct tegra_drm_context *context, goto fail; } - /* check whether syncpoint ID is valid */ - sp = host1x_syncpt_get(host1x, syncpt.id); + /* Syncpoint ref will be dropped on job release. */ + sp = host1x_syncpt_get_by_id(host1x, syncpt.id); if (!sp) { err = -ENOENT; goto fail; @@ -311,7 +311,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, job->is_addr_reg = context->client->ops->is_addr_reg; job->is_valid_class = context->client->ops->is_valid_class; job->syncpt_incrs = syncpt.incrs; - job->syncpt_id = syncpt.id; + job->syncpt = sp; job->timeout = 10000; if (args->timeout && args->timeout < 10000) @@ -383,7 +383,7 @@ static int tegra_syncpt_read(struct drm_device *drm, void *data, struct drm_tegra_syncpt_read *args = data; struct host1x_syncpt *sp; - sp = host1x_syncpt_get(host, args->id); + sp = host1x_syncpt_get_by_id_noref(host, args->id); if (!sp) return -EINVAL; @@ -398,7 +398,7 @@ static int tegra_syncpt_incr(struct drm_device *drm, void *data, struct drm_tegra_syncpt_incr *args = data; struct host1x_syncpt *sp; - sp = host1x_syncpt_get(host1x, args->id); + sp = host1x_syncpt_get_by_id_noref(host1x, args->id); if (!sp) return -EINVAL; @@ -412,7 +412,7 @@ static int tegra_syncpt_wait(struct drm_device *drm, void *data, struct drm_tegra_syncpt_wait *args = data; struct host1x_syncpt *sp; - sp = host1x_syncpt_get(host1x, args->id); + sp = host1x_syncpt_get_by_id_noref(host1x, args->id); if (!sp) return -EINVAL; diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index adbe2ddcda19..de288cba3905 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -67,7 +67,7 @@ static int gr2d_init(struct host1x_client *client) detach: host1x_client_iommu_detach(client); free: - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); put: host1x_channel_put(gr2d->channel); return err; @@ -86,7 +86,7 @@ static int gr2d_exit(struct host1x_client *client) return err; host1x_client_iommu_detach(client); - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(gr2d->channel); return 0; diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index b0b8154e8104..24442ade0da3 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -76,7 +76,7 @@ static int gr3d_init(struct host1x_client *client) detach: host1x_client_iommu_detach(client); free: - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); put: host1x_channel_put(gr3d->channel); return err; @@ -94,7 +94,7 @@ static int gr3d_exit(struct host1x_client *client) return err; host1x_client_iommu_detach(client); - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(gr3d->channel); return 0; diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 77e128832920..72aea1cc0cfa 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -214,7 +214,7 @@ static int vic_init(struct host1x_client *client) return 0; free_syncpt: - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); free_channel: host1x_channel_put(vic->channel); detach: @@ -238,7 +238,7 @@ static int vic_exit(struct host1x_client *client) if (err < 0) return err; - host1x_syncpt_free(client->syncpts[0]); + host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(vic->channel); host1x_client_iommu_detach(client); diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index e8d3fda91d8a..6e6ca774f68d 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -273,15 +273,13 @@ static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, static void cdma_start_timer_locked(struct host1x_cdma *cdma, struct host1x_job *job) { - struct host1x *host = cdma_to_host1x(cdma); - if (cdma->timeout.client) { /* timer already started */ return; } cdma->timeout.client = job->client; - cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id); + cdma->timeout.syncpt = job->syncpt; cdma->timeout.syncpt_val = job->syncpt_end; cdma->timeout.start_ktime = ktime_get(); @@ -312,7 +310,6 @@ static void stop_cdma_timer_locked(struct host1x_cdma *cdma) static void update_cdma_locked(struct host1x_cdma *cdma) { bool signal = false; - struct host1x *host1x = cdma_to_host1x(cdma); struct host1x_job *job, *n; /* If CDMA is stopped, queue is cleared and we can return */ @@ -324,8 +321,7 @@ static void update_cdma_locked(struct host1x_cdma *cdma) * to consume as many sync queue entries as possible without blocking */ list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { - struct host1x_syncpt *sp = - host1x_syncpt_get(host1x, job->syncpt_id); + struct host1x_syncpt *sp = job->syncpt; /* Check whether this syncpt has completed, and bail if not */ if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { @@ -499,8 +495,7 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) if (!cdma->timeout.initialized) { int err; - err = host1x_hw_cdma_timeout_init(host1x, cdma, - job->syncpt_id); + err = host1x_hw_cdma_timeout_init(host1x, cdma); if (err) { mutex_unlock(&cdma->lock); return err; diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index f781a9b0f39d..63010ae37a97 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -37,7 +37,7 @@ struct host1x_cdma_ops { void (*start)(struct host1x_cdma *cdma); void (*stop)(struct host1x_cdma *cdma); void (*flush)(struct host1x_cdma *cdma); - int (*timeout_init)(struct host1x_cdma *cdma, unsigned int syncpt); + int (*timeout_init)(struct host1x_cdma *cdma); void (*timeout_destroy)(struct host1x_cdma *cdma); void (*freeze)(struct host1x_cdma *cdma); void (*resume)(struct host1x_cdma *cdma, u32 getptr); @@ -261,10 +261,9 @@ static inline void host1x_hw_cdma_flush(struct host1x *host, } static inline int host1x_hw_cdma_timeout_init(struct host1x *host, - struct host1x_cdma *cdma, - unsigned int syncpt) + struct host1x_cdma *cdma) { - return host->cdma_op->timeout_init(cdma, syncpt); + return host->cdma_op->timeout_init(cdma); } static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host, diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index 2f3bf94cf365..e49cd5b8f735 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -295,7 +295,7 @@ static void cdma_timeout_handler(struct work_struct *work) /* * Init timeout resources */ -static int cdma_timeout_init(struct host1x_cdma *cdma, unsigned int syncpt) +static int cdma_timeout_init(struct host1x_cdma *cdma) { INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler); cdma->timeout.initialized = true; diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 5eaa29d171c9..d4c28faf27d1 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -86,8 +86,7 @@ static void submit_gathers(struct host1x_job *job) static inline void synchronize_syncpt_base(struct host1x_job *job) { - struct host1x *host = dev_get_drvdata(job->channel->dev->parent); - struct host1x_syncpt *sp = host->syncpt + job->syncpt_id; + struct host1x_syncpt *sp = job->syncpt; unsigned int id; u32 value; @@ -118,7 +117,7 @@ static void host1x_channel_set_streamid(struct host1x_channel *channel) static int channel_submit(struct host1x_job *job) { struct host1x_channel *ch = job->channel; - struct host1x_syncpt *sp; + struct host1x_syncpt *sp = job->syncpt; u32 user_syncpt_incrs = job->syncpt_incrs; u32 prev_max = 0; u32 syncval; @@ -126,10 +125,9 @@ static int channel_submit(struct host1x_job *job) struct host1x_waitlist *completed_waiter = NULL; struct host1x *host = dev_get_drvdata(ch->dev->parent); - sp = host->syncpt + job->syncpt_id; trace_host1x_channel_submit(dev_name(ch->dev), job->num_gathers, job->num_relocs, - job->syncpt_id, job->syncpt_incrs); + job->syncpt->id, job->syncpt_incrs); /* before error checks, return current max */ prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); @@ -163,7 +161,7 @@ static int channel_submit(struct host1x_job *job) host1x_cdma_push(&ch->cdma, host1x_opcode_setclass(HOST1X_CLASS_HOST1X, host1x_uclass_wait_syncpt_r(), 1), - host1x_class_host_wait_syncpt(job->syncpt_id, + host1x_class_host_wait_syncpt(job->syncpt->id, host1x_syncpt_read_max(sp))); } diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c index f31bcfa1b837..ceb48229d14b 100644 --- a/drivers/gpu/host1x/hw/debug_hw.c +++ b/drivers/gpu/host1x/hw/debug_hw.c @@ -204,7 +204,7 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) unsigned int i; host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n", - job, job->syncpt_id, job->syncpt_end, + job, job->syncpt->id, job->syncpt_end, job->first_get, job->timeout, job->num_slots, job->num_unpins); diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 82d0a60ba3f7..adbdc225de8d 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -79,6 +79,9 @@ static void job_free(struct kref *ref) { struct host1x_job *job = container_of(ref, struct host1x_job, ref); + if (job->syncpt) + host1x_syncpt_put(job->syncpt); + kfree(job); } @@ -674,7 +677,7 @@ EXPORT_SYMBOL(host1x_job_unpin); */ void host1x_job_dump(struct device *dev, struct host1x_job *job) { - dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id); + dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id); dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end); dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get); dev_dbg(dev, " TIMEOUT %d\n", job->timeout); diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 8da4bbce8b9d..7bb5de8c3d63 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -90,6 +90,8 @@ struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, else sp->client_managed = false; + kref_init(&sp->ref); + mutex_unlock(&host->syncpt_mutex); return sp; @@ -383,7 +385,7 @@ int host1x_syncpt_init(struct host1x *host) * host1x client drivers can use this function to allocate a syncpoint for * subsequent use. A syncpoint returned by this function will be reserved for * use by the client exclusively. When no longer using a syncpoint, a host1x - * client driver needs to release it using host1x_syncpt_free(). + * client driver needs to release it using host1x_syncpt_put(). */ struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, unsigned long flags) @@ -394,20 +396,9 @@ struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, } EXPORT_SYMBOL(host1x_syncpt_request); -/** - * host1x_syncpt_free() - free a requested syncpoint - * @sp: host1x syncpoint - * - * Release a syncpoint previously allocated using host1x_syncpt_request(). A - * host1x client driver should call this when the syncpoint is no longer in - * use. Note that client drivers must ensure that the syncpoint doesn't remain - * under the control of hardware after calling this function, otherwise two - * clients may end up trying to access the same syncpoint concurrently. - */ -void host1x_syncpt_free(struct host1x_syncpt *sp) +static void syncpt_release(struct kref *ref) { - if (!sp) - return; + struct host1x_syncpt *sp = container_of(ref, struct host1x_syncpt, ref); mutex_lock(&sp->host->syncpt_mutex); @@ -419,7 +410,23 @@ void host1x_syncpt_free(struct host1x_syncpt *sp) mutex_unlock(&sp->host->syncpt_mutex); } -EXPORT_SYMBOL(host1x_syncpt_free); + +/** + * host1x_syncpt_put() - free a requested syncpoint + * @sp: host1x syncpoint + * + * Release a syncpoint previously allocated using host1x_syncpt_request(). A + * host1x client driver should call this when the syncpoint is no longer in + * use. + */ +void host1x_syncpt_put(struct host1x_syncpt *sp) +{ + if (!sp) + return; + + kref_put(&sp->ref, syncpt_release); +} +EXPORT_SYMBOL(host1x_syncpt_put); void host1x_syncpt_deinit(struct host1x *host) { @@ -486,16 +493,48 @@ unsigned int host1x_syncpt_nb_mlocks(struct host1x *host) } /** - * host1x_syncpt_get() - obtain a syncpoint by ID + * host1x_syncpt_get_by_id() - obtain a syncpoint by ID + * @host: host1x controller + * @id: syncpoint ID + */ +struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host, + unsigned int id) +{ + if (id >= host->info->nb_pts) + return NULL; + + if (kref_get_unless_zero(&host->syncpt[id].ref)) + return &host->syncpt[id]; + else + return NULL; +} +EXPORT_SYMBOL(host1x_syncpt_get_by_id); + +/** + * host1x_syncpt_get_by_id_noref() - obtain a syncpoint by ID but don't + * increase the refcount. * @host: host1x controller * @id: syncpoint ID */ -struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id) +struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host, + unsigned int id) { if (id >= host->info->nb_pts) return NULL; - return host->syncpt + id; + return &host->syncpt[id]; +} +EXPORT_SYMBOL(host1x_syncpt_get_by_id_noref); + +/** + * host1x_syncpt_get() - increment syncpoint refcount + * @sp: syncpoint + */ +struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp) +{ + kref_get(&sp->ref); + + return sp; } EXPORT_SYMBOL(host1x_syncpt_get); diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index 3aa6b25b1b9c..a6766f8d55ee 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "intr.h" @@ -26,6 +27,8 @@ struct host1x_syncpt_base { }; struct host1x_syncpt { + struct kref ref; + unsigned int id; atomic_t min_val; atomic_t max_val; diff --git a/drivers/staging/media/tegra-video/vi.c b/drivers/staging/media/tegra-video/vi.c index 7a09061cda57..df5ca3596470 100644 --- a/drivers/staging/media/tegra-video/vi.c +++ b/drivers/staging/media/tegra-video/vi.c @@ -1131,8 +1131,8 @@ static void tegra_channel_host1x_syncpts_free(struct tegra_vi_channel *chan) int i; for (i = 0; i < chan->numgangports; i++) { - host1x_syncpt_free(chan->mw_ack_sp[i]); - host1x_syncpt_free(chan->frame_start_sp[i]); + host1x_syncpt_put(chan->mw_ack_sp[i]); + host1x_syncpt_put(chan->frame_start_sp[i]); } } @@ -1177,7 +1177,7 @@ static int tegra_channel_host1x_syncpt_init(struct tegra_vi_channel *chan) mw_sp = host1x_syncpt_request(&vi->client, flags); if (!mw_sp) { dev_err(vi->dev, "failed to request memory ack syncpoint\n"); - host1x_syncpt_free(fs_sp); + host1x_syncpt_put(fs_sp); ret = -ENOMEM; goto free_syncpts; } diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 7137ce0e35d4..107aea29bccb 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -142,7 +142,9 @@ struct host1x_syncpt_base; struct host1x_syncpt; struct host1x; -struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id); +struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host, u32 id); +struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host, u32 id); +struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp); u32 host1x_syncpt_id(struct host1x_syncpt *sp); u32 host1x_syncpt_read_min(struct host1x_syncpt *sp); u32 host1x_syncpt_read_max(struct host1x_syncpt *sp); @@ -153,7 +155,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value); struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, unsigned long flags); -void host1x_syncpt_free(struct host1x_syncpt *sp); +void host1x_syncpt_put(struct host1x_syncpt *sp); struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, unsigned long flags, const char *name); @@ -221,7 +223,7 @@ struct host1x_job { dma_addr_t *reloc_addr_phys; /* Sync point id, number of increments and end related to the submit */ - u32 syncpt_id; + struct host1x_syncpt *syncpt; u32 syncpt_incrs; u32 syncpt_end; -- cgit v1.2.3 From aded42ada6eacfa11d349b158e993f66e4741aa7 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:33 +0300 Subject: gpu: host1x: Reset max value when freeing a syncpoint With job recovery becoming optional, syncpoints may have a mismatch between their value and max value when freed. As such, when freeing, set the max value to the current value of the syncpoint so that it is in a sane state for the next user. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/syncpt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 7bb5de8c3d63..877c5ab40cbd 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -400,6 +400,8 @@ static void syncpt_release(struct kref *ref) { struct host1x_syncpt *sp = container_of(ref, struct host1x_syncpt, ref); + atomic_set(&sp->max_val, host1x_syncpt_read(sp)); + mutex_lock(&sp->host->syncpt_mutex); host1x_syncpt_base_free(sp->base); -- cgit v1.2.3 From f5ba33fb9690566c382624637125827b5512e766 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:34 +0300 Subject: gpu: host1x: Reserve VBLANK syncpoints at initialization On T20-T148 chips, the bootloader can set up a boot splash screen with DC configured to increment syncpoint 26/27 at VBLANK. Because of this we shouldn't allow these syncpoints to be allocated until DC has been reset and will no longer increment them in the background. As such, on these chips, reserve those two syncpoints at initialization, and only mark them free once the DC driver has indicated it's safe to do so. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 6 ++++++ drivers/gpu/host1x/dev.c | 6 ++++++ drivers/gpu/host1x/dev.h | 6 ++++++ drivers/gpu/host1x/syncpt.c | 35 ++++++++++++++++++++++++++++++++++- include/linux/host1x.h | 3 +++ 5 files changed, 55 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index cfda71e151cc..40bf8f33a2ae 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2045,6 +2045,12 @@ static int tegra_dc_init(struct host1x_client *client) struct drm_plane *cursor = NULL; int err; + /* + * DC has been reset by now, so VBLANK syncpoint can be released + * for general use. + */ + host1x_syncpt_release_vblank_reservation(client, 26 + dc->pipe); + /* * XXX do not register DCs with no window groups because we cannot * assign a primary plane to them, which in turn will cause KMS to diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index d0ebb70e2fdd..fbb6447b8659 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -77,6 +77,7 @@ static const struct host1x_info host1x01_info = { .has_hypervisor = false, .num_sid_entries = 0, .sid_table = NULL, + .reserve_vblank_syncpts = true, }; static const struct host1x_info host1x02_info = { @@ -91,6 +92,7 @@ static const struct host1x_info host1x02_info = { .has_hypervisor = false, .num_sid_entries = 0, .sid_table = NULL, + .reserve_vblank_syncpts = true, }; static const struct host1x_info host1x04_info = { @@ -105,6 +107,7 @@ static const struct host1x_info host1x04_info = { .has_hypervisor = false, .num_sid_entries = 0, .sid_table = NULL, + .reserve_vblank_syncpts = false, }; static const struct host1x_info host1x05_info = { @@ -119,6 +122,7 @@ static const struct host1x_info host1x05_info = { .has_hypervisor = false, .num_sid_entries = 0, .sid_table = NULL, + .reserve_vblank_syncpts = false, }; static const struct host1x_sid_entry tegra186_sid_table[] = { @@ -142,6 +146,7 @@ static const struct host1x_info host1x06_info = { .has_hypervisor = true, .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, + .reserve_vblank_syncpts = false, }; static const struct host1x_sid_entry tegra194_sid_table[] = { @@ -165,6 +170,7 @@ static const struct host1x_info host1x07_info = { .has_hypervisor = true, .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), .sid_table = tegra194_sid_table, + .reserve_vblank_syncpts = false, }; static const struct of_device_id host1x_of_match[] = { diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 63010ae37a97..fa6d4bc46e98 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -101,6 +101,12 @@ struct host1x_info { bool has_hypervisor; /* has hypervisor registers */ unsigned int num_sid_entries; const struct host1x_sid_entry *sid_table; + /* + * On T20-T148, the boot chain may setup DC to increment syncpoints + * 26/27 on VBLANK. As such we cannot use these syncpoints until + * the display driver disables VBLANK increments. + */ + bool reserve_vblank_syncpts; }; struct host1x { diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 877c5ab40cbd..e648ebbb2027 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -67,7 +67,7 @@ struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, mutex_lock(&host->syncpt_mutex); - for (i = 0; i < host->info->nb_pts && sp->name; i++, sp++) + for (i = 0; i < host->info->nb_pts && kref_read(&sp->ref); i++, sp++) ; if (i >= host->info->nb_pts) @@ -374,6 +374,11 @@ int host1x_syncpt_init(struct host1x *host) if (!host->nop_sp) return -ENOMEM; + if (host->info->reserve_vblank_syncpts) { + kref_init(&host->syncpt[26].ref); + kref_init(&host->syncpt[27].ref); + } + return 0; } @@ -559,3 +564,31 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base) return base->id; } EXPORT_SYMBOL(host1x_syncpt_base_id); + +static void do_nothing(struct kref *ref) +{ +} + +/** + * host1x_syncpt_release_vblank_reservation() - Make VBLANK syncpoint + * available for allocation + * + * @client: host1x bus client + * @syncpt_id: syncpoint ID to make available + * + * Makes VBLANK syncpoint available for allocatation if it was + * reserved at initialization time. This should be called by the display + * driver after it has ensured that any VBLANK increment programming configured + * by the boot chain has been disabled. + */ +void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, + u32 syncpt_id) +{ + struct host1x *host = dev_get_drvdata(client->host->parent); + + if (!host->info->reserve_vblank_syncpts) + return; + + kref_put(&host->syncpt[syncpt_id].ref, do_nothing); +} +EXPORT_SYMBOL(host1x_syncpt_release_vblank_reservation); diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 107aea29bccb..e0a41c2b4c7a 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -163,6 +163,9 @@ struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp); u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); +void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, + u32 syncpt_id); + /* * host1x channel */ -- cgit v1.2.3 From 5a8d95d20c406c673258edd4c2bd308c22304657 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:35 +0300 Subject: gpu: host1x: Assign intr waiter inside lock Move the assignment of the ref out-pointer in host1x_intr_add_action to happen within the spinlock. With the current arrangement, it is possible for the waiter to complete before the assignment has happened, which breaks horribly if the waiter completion callback tries to use the reference. In practice, there is currently no situation where this issue can manifest -- it was first noticed with the upcoming DMA fence implementation patches. As such this doesn't need to be backported. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/intr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 69b0e8e41466..6d1f3c0fdbe7 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -235,10 +235,11 @@ int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, host1x_hw_intr_enable_syncpt_intr(host, syncpt->id); } - spin_unlock(&syncpt->intr.lock); - if (ref) *ref = waiter; + + spin_unlock(&syncpt->intr.lock); + return 0; } -- cgit v1.2.3 From d3555eb7f8c01b9c16d400af9533555757a2c264 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 29 Mar 2021 16:38:36 +0300 Subject: gpu: host1x: Fix Tegra194 syncpt interrupt threshold Syncpoint interrupts are not working as expected on Tegra194. The problem is that the syncpoint interrupt threshold being used is the global interrupt threshold and not the virtual interrupt threshold. Fix this by using the virtual interrupt threshold which aligns with downstream. Signed-off-by: Jon Hunter Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/hw/hw_host1x07_vm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/host1x/hw/hw_host1x07_vm.h b/drivers/gpu/host1x/hw/hw_host1x07_vm.h index 3058b3c9a91d..b766851d5b83 100644 --- a/drivers/gpu/host1x/hw/hw_host1x07_vm.h +++ b/drivers/gpu/host1x/hw/hw_host1x07_vm.h @@ -29,6 +29,6 @@ #define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x652c + 4 * (x)) #define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x6590 + 4 * (x)) #define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x)) -#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0x8d00 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0x9980 + 4 * (x)) #define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xa604 + 4 * (x)) #define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) -- cgit v1.2.3 From 933deb8c7b8e3f83e3dbd0b08e3cad51350d44c4 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 26 Mar 2021 15:51:37 +0100 Subject: gpu: host1x: Add early init and late exit callbacks These callbacks can be used by client drivers to run code during early init and during late exit. Early init callbacks are run prior to the regular init callbacks while late exit callbacks run after the regular exit callbacks. Signed-off-by: Thierry Reding --- drivers/gpu/host1x/bus.c | 31 +++++++++++++++++++++++++++++++ include/linux/host1x.h | 4 ++++ 2 files changed, 35 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 68a766ff0e9d..46f69c532b6b 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -196,6 +196,17 @@ int host1x_device_init(struct host1x_device *device) mutex_lock(&device->clients_lock); + list_for_each_entry(client, &device->clients, list) { + if (client->ops && client->ops->early_init) { + err = client->ops->early_init(client); + if (err < 0) { + dev_err(&device->dev, "failed to early initialize %s: %d\n", + dev_name(client->dev), err); + goto teardown_late; + } + } + } + list_for_each_entry(client, &device->clients, list) { if (client->ops && client->ops->init) { err = client->ops->init(client); @@ -217,6 +228,14 @@ teardown: if (client->ops->exit) client->ops->exit(client); + /* reset client to end of list for late teardown */ + client = list_entry(&device->clients, struct host1x_client, list); + +teardown_late: + list_for_each_entry_continue_reverse(client, &device->clients, list) + if (client->ops->late_exit) + client->ops->late_exit(client); + mutex_unlock(&device->clients_lock); return err; } @@ -251,6 +270,18 @@ int host1x_device_exit(struct host1x_device *device) } } + list_for_each_entry_reverse(client, &device->clients, list) { + if (client->ops && client->ops->late_exit) { + err = client->ops->late_exit(client); + if (err < 0) { + dev_err(&device->dev, "failed to late cleanup %s: %d\n", + dev_name(client->dev), err); + mutex_unlock(&device->clients_lock); + return err; + } + } + } + mutex_unlock(&device->clients_lock); return 0; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index e0a41c2b4c7a..232e1bd507a7 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -25,14 +25,18 @@ u64 host1x_get_dma_mask(struct host1x *host1x); /** * struct host1x_client_ops - host1x client operations + * @early_init: host1x client early initialization code * @init: host1x client initialization code * @exit: host1x client tear down code + * @late_exit: host1x client late tear down code * @suspend: host1x client suspend code * @resume: host1x client resume code */ struct host1x_client_ops { + int (*early_init)(struct host1x_client *client); int (*init)(struct host1x_client *client); int (*exit)(struct host1x_client *client); + int (*late_exit)(struct host1x_client *client); int (*suspend)(struct host1x_client *client); int (*resume)(struct host1x_client *client); }; -- cgit v1.2.3 From 86044e749be77a3544990027244abb20ddb3caf0 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 26 Mar 2021 15:51:32 +0100 Subject: drm/tegra: dc: Inherit DMA mask Inherit the DMA mask from host1x (on Tegra210 and earlier) or the display hub (on Tegra186 and later). This is necessary in order to properly map buffers without SMMU support and use the maximum IOVA space available with SMMU support. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 7 +++++++ drivers/gpu/drm/tegra/hub.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 40bf8f33a2ae..935717e7410d 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2538,9 +2538,16 @@ static int tegra_dc_couple(struct tegra_dc *dc) static int tegra_dc_probe(struct platform_device *pdev) { + u64 dma_mask = dma_get_mask(pdev->dev.parent); struct tegra_dc *dc; int err; + err = dma_coerce_mask_and_coherent(&pdev->dev, dma_mask); + if (err < 0) { + dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); + return err; + } + dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL); if (!dc) return -ENOMEM; diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 8e6d329d062b..617240032c37 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -848,12 +848,19 @@ static const struct host1x_client_ops tegra_display_hub_ops = { static int tegra_display_hub_probe(struct platform_device *pdev) { + u64 dma_mask = dma_get_mask(pdev->dev.parent); struct device_node *child = NULL; struct tegra_display_hub *hub; struct clk *clk; unsigned int i; int err; + err = dma_coerce_mask_and_coherent(&pdev->dev, dma_mask); + if (err < 0) { + dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); + return err; + } + hub = devm_kzalloc(&pdev->dev, sizeof(*hub), GFP_KERNEL); if (!hub) return -ENOMEM; -- cgit v1.2.3 From 042c0bd76d7b053ea6bc47e4db471d5cfc7f19c5 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 26 Mar 2021 15:51:33 +0100 Subject: drm/tegra: dc: Parameterize maximum resolution Tegra186 and later support a higher maximum resolution than earlier chips, so make sure to reflect that in the mode configuration. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 6 ++++++ drivers/gpu/drm/tegra/drm.c | 13 ++++++++++--- drivers/gpu/drm/tegra/drm.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 935717e7410d..0541d7b5c841 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2117,6 +2117,12 @@ static int tegra_dc_init(struct host1x_client *client) if (dc->soc->pitch_align > tegra->pitch_align) tegra->pitch_align = dc->soc->pitch_align; + /* track maximum resolution */ + if (dc->soc->has_nvdisplay) + drm->mode_config.max_width = drm->mode_config.max_height = 16384; + else + drm->mode_config.max_width = drm->mode_config.max_height = 4096; + err = tegra_dc_rgb_init(drm, dc); if (err < 0 && err != -ENODEV) { dev_err(dc->dev, "failed to initialize RGB output: %d\n", err); diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index ce5bdc58d315..0c350b0daab4 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1121,9 +1121,8 @@ static int host1x_drm_probe(struct host1x_device *dev) drm->mode_config.min_width = 0; drm->mode_config.min_height = 0; - - drm->mode_config.max_width = 4096; - drm->mode_config.max_height = 4096; + drm->mode_config.max_width = 0; + drm->mode_config.max_height = 0; drm->mode_config.allow_fb_modifiers = true; @@ -1142,6 +1141,14 @@ static int host1x_drm_probe(struct host1x_device *dev) if (err < 0) goto fbdev; + /* + * Now that all display controller have been initialized, the maximum + * supported resolution is known and the bitmask for horizontal and + * vertical bitfields can be computed. + */ + tegra->hmask = drm->mode_config.max_width - 1; + tegra->vmask = drm->mode_config.max_height - 1; + if (tegra->use_explicit_iommu) { u64 carveout_start, carveout_end, gem_start, gem_end; u64 dma_mask = dma_get_mask(&dev->dev); diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index f38de08e0c95..1b23bb0e29e3 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -54,6 +54,7 @@ struct tegra_drm { struct tegra_fbdev *fbdev; #endif + unsigned int hmask, vmask; unsigned int pitch_align; struct tegra_display_hub *hub; -- cgit v1.2.3 From d5ec699db5bb15f981f1632d058ffe857d649765 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 26 Mar 2021 15:51:34 +0100 Subject: drm/tegra: dc: Implement hardware cursor on Tegra186 and later The hardware cursor on Tegra186 differs slightly from the implementation on older SoC generations. In particular the new implementation relies on software for clipping the cursor against the screen. Fortunately, atomic KMS already computes clipped coordinates for (cursor) planes, so this is trivial to implement. The format supported by the hardware cursor is also slightly different. v2: use more drm_rect helpers (Dmitry) Signed-off-by: Thierry Reding Reviewed-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 61 ++++++++++++++++++++++++++++++++++++++++------ drivers/gpu/drm/tegra/dc.h | 5 ++++ 2 files changed, 58 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 0541d7b5c841..bc8756d06b3e 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -832,10 +832,14 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm, return &plane->base; } -static const u32 tegra_cursor_plane_formats[] = { +static const u32 tegra_legacy_cursor_plane_formats[] = { DRM_FORMAT_RGBA8888, }; +static const u32 tegra_cursor_plane_formats[] = { + DRM_FORMAT_ARGB8888, +}; + static int tegra_cursor_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -875,12 +879,24 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, plane); struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state); struct tegra_dc *dc = to_tegra_dc(new_state->crtc); - u32 value = CURSOR_CLIP_DISPLAY; + struct tegra_drm *tegra = plane->dev->dev_private; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + u64 dma_mask = *dc->dev->dma_mask; +#endif + unsigned int x, y; + u32 value = 0; /* rien ne va plus */ if (!new_state->crtc || !new_state->fb) return; + /* + * Legacy display supports hardware clipping of the cursor, but + * nvdisplay relies on software to clip the cursor to the screen. + */ + if (!dc->soc->has_nvdisplay) + value |= CURSOR_CLIP_DISPLAY; + switch (new_state->crtc_w) { case 32: value |= CURSOR_SIZE_32x32; @@ -908,7 +924,7 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - value = (tegra_plane_state->iova[0] >> 32) & 0x3; + value = (tegra_plane_state->iova[0] >> 32) & (dma_mask >> 32); tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI); #endif @@ -920,15 +936,39 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, value = tegra_dc_readl(dc, DC_DISP_BLEND_CURSOR_CONTROL); value &= ~CURSOR_DST_BLEND_MASK; value &= ~CURSOR_SRC_BLEND_MASK; - value |= CURSOR_MODE_NORMAL; + + if (dc->soc->has_nvdisplay) + value &= ~CURSOR_COMPOSITION_MODE_XOR; + else + value |= CURSOR_MODE_NORMAL; + value |= CURSOR_DST_BLEND_NEG_K1_TIMES_SRC; value |= CURSOR_SRC_BLEND_K1_TIMES_SRC; value |= CURSOR_ALPHA; tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL); + /* nvdisplay relies on software for clipping */ + if (dc->soc->has_nvdisplay) { + struct drm_rect src; + + x = new_state->dst.x1; + y = new_state->dst.y1; + + drm_rect_fp_to_int(&src, &new_state->src); + + value = (src.y1 & tegra->vmask) << 16 | (src.x1 & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR); + + value = (drm_rect_height(&src) & tegra->vmask) << 16 | + (drm_rect_width(&src) & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR); + } else { + x = new_state->crtc_x; + y = new_state->crtc_y; + } + /* position the cursor */ - value = (new_state->crtc_y & 0x3fff) << 16 | - (new_state->crtc_x & 0x3fff); + value = ((y & tegra->vmask) << 16) | (x & tegra->hmask); tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); } @@ -982,8 +1022,13 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm, plane->index = 6; plane->dc = dc; - num_formats = ARRAY_SIZE(tegra_cursor_plane_formats); - formats = tegra_cursor_plane_formats; + if (!dc->soc->has_nvdisplay) { + num_formats = ARRAY_SIZE(tegra_legacy_cursor_plane_formats); + formats = tegra_legacy_cursor_plane_formats; + } else { + num_formats = ARRAY_SIZE(tegra_cursor_plane_formats); + formats = tegra_cursor_plane_formats; + } err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, &tegra_plane_funcs, formats, diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 051d03dcb9b0..21074cd2ce5e 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -511,6 +511,8 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc); #define DC_DISP_CURSOR_START_ADDR_HI 0x4ec #define DC_DISP_BLEND_CURSOR_CONTROL 0x4f1 +#define CURSOR_COMPOSITION_MODE_BLEND (0 << 25) +#define CURSOR_COMPOSITION_MODE_XOR (1 << 25) #define CURSOR_MODE_LEGACY (0 << 24) #define CURSOR_MODE_NORMAL (1 << 24) #define CURSOR_DST_BLEND_ZERO (0 << 16) @@ -705,6 +707,9 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc); #define PROTOCOL_MASK (0xf << 8) #define PROTOCOL_SINGLE_TMDS_A (0x1 << 8) +#define DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR 0x442 +#define DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR 0x446 + #define DC_WIN_CORE_WINDOWGROUP_SET_CONTROL 0x702 #define OWNER_MASK (0xf << 0) #define OWNER(x) (((x) & 0xf) << 0) -- cgit v1.2.3 From 4def888d4158ee8977995664bb55fa50894645d2 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 26 Mar 2021 15:51:35 +0100 Subject: drm/tegra: fb: Add diagnostics for framebuffer modifiers Add a debug message to let the user know when a framebuffer modifier is not supported. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/fb.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 01939c57fc74..350f33206076 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -86,6 +86,7 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, break; default: + DRM_DEBUG_KMS("unknown format modifier: %llx\n", modifier); return -EINVAL; } -- cgit v1.2.3 From 476e93205ff61a6507bcba28f4f01269b65ebb38 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 26 Mar 2021 15:51:36 +0100 Subject: drm/tegra: gem: Add a clarifying comment Clarify when a fixed IOV address can be used and when a buffer has to be mapped before the IOVA can be used. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/plane.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 19e8847a164b..793da5d675d2 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -119,6 +119,14 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) dma_addr_t phys_addr, *phys; struct sg_table *sgt; + /* + * If we're not attached to a domain, we already stored the + * physical address when the buffer was allocated. If we're + * part of a group that's shared between all display + * controllers, we've also already mapped the framebuffer + * through the SMMU. In both cases we can short-circuit the + * code below and retrieve the stored IOV address. + */ if (!domain || dc->client.group) phys = &phys_addr; else -- cgit v1.2.3 From 05d1adfe2a8b5c6a794a9927d1991a00c5d68f1d Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 26 Mar 2021 15:51:38 +0100 Subject: drm/tegra: Count number of display controllers at runtime In order to be able to attach planes to all possible display controllers the exact number of CRTCs must be known. Keep track of the number of the display controllers that register during initialization. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/tegra/drm.h | 1 + drivers/gpu/drm/tegra/hub.c | 6 ++++-- 3 files changed, 27 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index bc8756d06b3e..aacbe5e202d2 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2080,6 +2080,16 @@ static bool tegra_dc_has_window_groups(struct tegra_dc *dc) return false; } +static int tegra_dc_early_init(struct host1x_client *client) +{ + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + tegra->num_crtcs++; + + return 0; +} + static int tegra_dc_init(struct host1x_client *client) { struct drm_device *drm = dev_get_drvdata(client->host); @@ -2228,6 +2238,16 @@ static int tegra_dc_exit(struct host1x_client *client) return 0; } +static int tegra_dc_late_exit(struct host1x_client *client) +{ + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + tegra->num_crtcs--; + + return 0; +} + static int tegra_dc_runtime_suspend(struct host1x_client *client) { struct tegra_dc *dc = host1x_client_to_dc(client); @@ -2292,8 +2312,10 @@ put_rpm: } static const struct host1x_client_ops dc_client_ops = { + .early_init = tegra_dc_early_init, .init = tegra_dc_init, .exit = tegra_dc_exit, + .late_exit = tegra_dc_late_exit, .suspend = tegra_dc_runtime_suspend, .resume = tegra_dc_runtime_resume, }; diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 1b23bb0e29e3..eef933303a3c 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -56,6 +56,7 @@ struct tegra_drm { unsigned int hmask, vmask; unsigned int pitch_align; + unsigned int num_crtcs; struct tegra_display_hub *hub; }; diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 617240032c37..500c9d37e654 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -562,9 +562,8 @@ struct drm_plane *tegra_shared_plane_create(struct drm_device *drm, enum drm_plane_type type = DRM_PLANE_TYPE_OVERLAY; struct tegra_drm *tegra = drm->dev_private; struct tegra_display_hub *hub = tegra->hub; - /* planes can be assigned to arbitrary CRTCs */ - unsigned int possible_crtcs = 0x7; struct tegra_shared_plane *plane; + unsigned int possible_crtcs; unsigned int num_formats; const u64 *modifiers; struct drm_plane *p; @@ -583,6 +582,9 @@ struct drm_plane *tegra_shared_plane_create(struct drm_device *drm, p = &plane->base.base; + /* planes can be assigned to arbitrary CRTCs */ + possible_crtcs = BIT(tegra->num_crtcs) - 1; + num_formats = ARRAY_SIZE(tegra_shared_plane_formats); formats = tegra_shared_plane_formats; modifiers = tegra_shared_plane_modifiers; -- cgit v1.2.3 From 7b6f846785f41d57917e36851c120cfbe87f0809 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 26 Mar 2021 15:51:39 +0100 Subject: drm/tegra: Support sector layout on Tegra194 Tegra194 has a special physical address bit that enables some memory swizzling logic to support different sector layouts. Support the bit that selects the sector layout which is passed in the framebuffer modifier. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 7 +++++++ drivers/gpu/drm/tegra/dc.h | 1 + drivers/gpu/drm/tegra/drm.h | 3 +++ drivers/gpu/drm/tegra/fb.c | 9 +++++++++ drivers/gpu/drm/tegra/gem.h | 6 ++++++ drivers/gpu/drm/tegra/hub.c | 28 ++++++++++++++++++++++++++++ drivers/gpu/drm/tegra/plane.c | 24 ++++++++++++++++++++++++ 7 files changed, 78 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index aacbe5e202d2..f9120dc24682 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2325,6 +2325,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = { .supports_interlacing = false, .supports_cursor = false, .supports_block_linear = false, + .supports_sector_layout = false, .has_legacy_blending = true, .pitch_align = 8, .has_powergate = false, @@ -2344,6 +2345,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = { .supports_interlacing = false, .supports_cursor = false, .supports_block_linear = false, + .supports_sector_layout = false, .has_legacy_blending = true, .pitch_align = 8, .has_powergate = false, @@ -2363,6 +2365,7 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = { .supports_interlacing = false, .supports_cursor = false, .supports_block_linear = false, + .supports_sector_layout = false, .has_legacy_blending = true, .pitch_align = 64, .has_powergate = true, @@ -2382,6 +2385,7 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, + .supports_sector_layout = false, .has_legacy_blending = false, .pitch_align = 64, .has_powergate = true, @@ -2401,6 +2405,7 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, + .supports_sector_layout = false, .has_legacy_blending = false, .pitch_align = 64, .has_powergate = true, @@ -2454,6 +2459,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, + .supports_sector_layout = false, .has_legacy_blending = false, .pitch_align = 64, .has_powergate = false, @@ -2502,6 +2508,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, + .supports_sector_layout = true, .has_legacy_blending = false, .pitch_align = 64, .has_powergate = false, diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 21074cd2ce5e..29f19c3c6149 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -52,6 +52,7 @@ struct tegra_dc_soc_info { bool supports_interlacing; bool supports_cursor; bool supports_block_linear; + bool supports_sector_layout; bool has_legacy_blending; unsigned int pitch_align; bool has_powergate; diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index eef933303a3c..87df251c1fcf 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -24,6 +24,9 @@ #include "hub.h" #include "trace.h" +/* XXX move to include/uapi/drm/drm_fourcc.h? */ +#define DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT BIT(22) + struct reset_control; #ifdef CONFIG_DRM_FBDEV_EMULATION diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 350f33206076..cae8b8cbe9dd 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -44,6 +44,15 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, { uint64_t modifier = framebuffer->modifier; + if ((modifier >> 56) == DRM_FORMAT_MOD_VENDOR_NVIDIA) { + if ((modifier & DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT) == 0) + tiling->sector_layout = TEGRA_BO_SECTOR_LAYOUT_TEGRA; + else + tiling->sector_layout = TEGRA_BO_SECTOR_LAYOUT_GPU; + + modifier &= ~DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT; + } + switch (modifier) { case DRM_FORMAT_MOD_LINEAR: tiling->mode = TEGRA_BO_TILING_MODE_PITCH; diff --git a/drivers/gpu/drm/tegra/gem.h b/drivers/gpu/drm/tegra/gem.h index fafb5724499b..c15fd99d6cb2 100644 --- a/drivers/gpu/drm/tegra/gem.h +++ b/drivers/gpu/drm/tegra/gem.h @@ -21,9 +21,15 @@ enum tegra_bo_tiling_mode { TEGRA_BO_TILING_MODE_BLOCK, }; +enum tegra_bo_sector_layout { + TEGRA_BO_SECTOR_LAYOUT_TEGRA, + TEGRA_BO_SECTOR_LAYOUT_GPU, +}; + struct tegra_bo_tiling { enum tegra_bo_tiling_mode mode; unsigned long value; + enum tegra_bo_sector_layout sector_layout; }; struct tegra_bo { diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 500c9d37e654..79bff8b48271 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -55,6 +55,18 @@ static const u64 tegra_shared_plane_modifiers[] = { DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3), DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4), DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5), + /* + * The GPU sector layout is only supported on Tegra194, but these will + * be filtered out later on by ->format_mod_supported() on SoCs where + * it isn't supported. + */ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT, + /* sentinel */ DRM_FORMAT_MOD_INVALID }; @@ -366,6 +378,12 @@ static int tegra_shared_plane_atomic_check(struct drm_plane *plane, return -EINVAL; } + if (tiling->sector_layout == TEGRA_BO_SECTOR_LAYOUT_GPU && + !dc->soc->supports_sector_layout) { + DRM_ERROR("hardware doesn't support GPU sector layout\n"); + return -EINVAL; + } + /* * Tegra doesn't support different strides for U and V planes so we * error out if the user tries to display a framebuffer with such a @@ -485,6 +503,16 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, base = tegra_plane_state->iova[0] + fb->offsets[0]; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + /* + * Physical address bit 39 in Tegra194 is used as a switch for special + * logic that swizzles the memory using either the legacy Tegra or the + * dGPU sector layout. + */ + if (tegra_plane_state->tiling.sector_layout == TEGRA_BO_SECTOR_LAYOUT_GPU) + base |= BIT(39); +#endif + tegra_plane_writel(p, tegra_plane_state->format, DC_WIN_COLOR_DEPTH); tegra_plane_writel(p, 0, DC_WIN_PRECOMP_WGRP_PARAMS); diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 793da5d675d2..2e11b4b1f702 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -83,6 +83,22 @@ static void tegra_plane_atomic_destroy_state(struct drm_plane *plane, kfree(state); } +static bool tegra_plane_supports_sector_layout(struct drm_plane *plane) +{ + struct drm_crtc *crtc; + + drm_for_each_crtc(crtc, plane->dev) { + if (plane->possible_crtcs & drm_crtc_mask(crtc)) { + struct tegra_dc *dc = to_tegra_dc(crtc); + + if (!dc->soc->supports_sector_layout) + return false; + } + } + + return true; +} + static bool tegra_plane_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) @@ -92,6 +108,14 @@ static bool tegra_plane_format_mod_supported(struct drm_plane *plane, if (modifier == DRM_FORMAT_MOD_LINEAR) return true; + /* check for the sector layout bit */ + if ((modifier >> 56) == DRM_FORMAT_MOD_VENDOR_NVIDIA) { + if (modifier & DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT) { + if (!tegra_plane_supports_sector_layout(plane)) + return false; + } + } + if (info->num_planes == 1) return true; -- cgit v1.2.3