summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2015-08-27 06:00:28 +0300
committerDave Airlie <airlied@redhat.com>2015-08-27 06:00:28 +0300
commit40b2dffbcc67e92d5df97785dffc68fe88605bfa (patch)
tree91276b6ae4210791ad4494adaf69a56b16c7b0ac
parentdb56176025cee5e242dfeed5f4e304d095d29fa3 (diff)
parentc2b6bd7e91aad8440a2f55bdbde6f5a8ae19fac5 (diff)
downloadlinux-40b2dffbcc67e92d5df97785dffc68fe88605bfa.tar.xz
Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
- DP fixes for radeon and amdgpu - IH ring fix for tonga and fiji - Lots of GPU scheduler fixes - Misc additional fixes * 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (42 commits) drm/amdgpu: fix wait queue handling in the scheduler drm/amdgpu: remove extra parameters from scheduler callbacks drm/amdgpu: wake up scheduler only when neccessary drm/amdgpu: remove entity idle timeout v2 drm/amdgpu: fix postclose order drm/amdgpu: use IB for copy buffer of eviction drm/amdgpu: adjust the judgement of removing fence callback drm/amdgpu: fix no sync_wait in copy_buffer drm/amdgpu: fix last_vm_update fence is not effetive for sched fence drm/amdgpu: add priv data to sched drm/amdgpu: add owner for sched fence drm/amdgpu: remove entity reference from sched fence drm/amdgpu: fix and cleanup amd_sched_entity_push_job drm/amdgpu: remove amdgpu_bo_list_clone drm/amdgpu: remove the context from amdgpu_job drm/amdgpu: remove unused parameters to amd_sched_create drm/amdgpu: remove sched_lock drm/amdgpu: remove prepare_job callback drm/amdgpu: cleanup a scheduler function name drm/amdgpu: reorder scheduler functions ...
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c222
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c19
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c377
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.h69
-rw-r--r--drivers/gpu/drm/amd/scheduler/sched_fence.c19
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c5
30 files changed, 671 insertions, 727 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2fc58e658986..aa2dcf578dd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -183,6 +183,7 @@ struct amdgpu_vm;
struct amdgpu_ring;
struct amdgpu_semaphore;
struct amdgpu_cs_parser;
+struct amdgpu_job;
struct amdgpu_irq_src;
struct amdgpu_fpriv;
@@ -246,7 +247,7 @@ struct amdgpu_buffer_funcs {
unsigned copy_num_dw;
/* used for buffer migration */
- void (*emit_copy_buffer)(struct amdgpu_ring *ring,
+ void (*emit_copy_buffer)(struct amdgpu_ib *ib,
/* src addr in bytes */
uint64_t src_offset,
/* dst addr in bytes */
@@ -439,9 +440,12 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
-signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
- struct amdgpu_fence **fences,
- bool intr, long t);
+signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
+ struct fence **array,
+ uint32_t count,
+ bool wait_all,
+ bool intr,
+ signed long t);
struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
void amdgpu_fence_unref(struct amdgpu_fence **fence);
@@ -514,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
uint64_t dst_offset,
uint32_t byte_count,
struct reservation_object *resv,
- struct amdgpu_fence **fence);
+ struct fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
struct amdgpu_bo_list_entry {
@@ -650,7 +654,7 @@ struct amdgpu_sa_bo {
struct amdgpu_sa_manager *manager;
unsigned soffset;
unsigned eoffset;
- struct amdgpu_fence *fence;
+ struct fence *fence;
};
/*
@@ -692,7 +696,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore);
void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore,
- struct amdgpu_fence *fence);
+ struct fence *fence);
/*
* Synchronization
@@ -700,7 +704,8 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_sync {
struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS];
- struct amdgpu_fence *last_vm_update;
+ DECLARE_HASHTABLE(fences, 4);
+ struct fence *last_vm_update;
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
@@ -712,8 +717,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
void *owner);
int amdgpu_sync_rings(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
+int amdgpu_sync_wait(struct amdgpu_sync *sync);
void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- struct amdgpu_fence *fence);
+ struct fence *fence);
/*
* GART structures, functions & helpers
@@ -871,7 +877,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ib *ibs,
unsigned num_ibs,
- int (*free_job)(struct amdgpu_cs_parser *),
+ int (*free_job)(struct amdgpu_job *),
void *owner,
struct fence **fence);
@@ -957,7 +963,7 @@ struct amdgpu_vm_id {
unsigned id;
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
- struct amdgpu_fence *flushed_updates;
+ struct fence *flushed_updates;
/* last use of vmid */
struct amdgpu_fence *last_id_use;
};
@@ -1042,7 +1048,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
- struct fence *fence, uint64_t queued_seq);
+ struct fence *fence);
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
struct amdgpu_ring *ring, uint64_t seq);
@@ -1078,8 +1084,6 @@ struct amdgpu_bo_list {
};
struct amdgpu_bo_list *
-amdgpu_bo_list_clone(struct amdgpu_bo_list *list);
-struct amdgpu_bo_list *
amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
@@ -1255,14 +1259,16 @@ struct amdgpu_cs_parser {
/* user fence */
struct amdgpu_user_fence uf;
+};
- struct amdgpu_ring *ring;
- struct mutex job_lock;
- struct work_struct job_work;
- int (*prepare_job)(struct amdgpu_cs_parser *sched_job);
- int (*run_job)(struct amdgpu_cs_parser *sched_job);
- int (*free_job)(struct amdgpu_cs_parser *sched_job);
- struct amd_sched_fence *s_fence;
+struct amdgpu_job {
+ struct amd_sched_job base;
+ struct amdgpu_device *adev;
+ struct amdgpu_ib *ibs;
+ uint32_t num_ibs;
+ struct mutex job_lock;
+ struct amdgpu_user_fence uf;
+ int (*free_job)(struct amdgpu_job *sched_job);
};
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
@@ -2241,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
-#define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b))
+#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b))
#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev))
#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
@@ -2343,7 +2349,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync);
void amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_vm *vm,
- struct amdgpu_fence *updates);
+ struct fence *updates);
void amdgpu_vm_fence(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_fence *fence);
@@ -2373,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
uint64_t addr);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
-
+int amdgpu_vm_free_job(struct amdgpu_job *job);
/*
* functions used by amdgpu_encoder.c
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 759482e4300d..98d59ee640ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
{
unsigned long start_jiffies;
unsigned long end_jiffies;
- struct amdgpu_fence *fence = NULL;
+ struct fence *fence = NULL;
int i, r;
start_jiffies = jiffies;
@@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence);
if (r)
goto exit_do_move;
- r = fence_wait(&fence->base, false);
+ r = fence_wait(fence, false);
if (r)
goto exit_do_move;
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
}
end_jiffies = jiffies;
r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move:
if (fence)
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 7eed523bf28f..f82a2dd83874 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -62,39 +62,6 @@ static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
return 0;
}
-struct amdgpu_bo_list *
-amdgpu_bo_list_clone(struct amdgpu_bo_list *list)
-{
- struct amdgpu_bo_list *result;
- unsigned i;
-
- result = kmalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
- if (!result)
- return NULL;
-
- result->array = drm_calloc_large(list->num_entries,
- sizeof(struct amdgpu_bo_list_entry));
- if (!result->array) {
- kfree(result);
- return NULL;
- }
-
- mutex_init(&result->lock);
- result->gds_obj = list->gds_obj;
- result->gws_obj = list->gws_obj;
- result->oa_obj = list->oa_obj;
- result->has_userptr = list->has_userptr;
- result->num_entries = list->num_entries;
-
- memcpy(result->array, list->array, list->num_entries *
- sizeof(struct amdgpu_bo_list_entry));
-
- for (i = 0; i < result->num_entries; ++i)
- amdgpu_bo_ref(result->array[i].robj);
-
- return result;
-}
-
static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
{
struct amdgpu_bo_list *list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 27df17a0e620..89c3dd62ba21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -75,6 +75,11 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
+ /* Don't try to start link training before we
+ * have the dpcd */
+ if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+ return;
+
/* set it to OFF so that drm_helper_connector_dpms()
* won't return immediately since the current state
* is ON at this point.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e4424b4db5d3..6a206f15635f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -126,19 +126,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return 0;
}
-static void amdgpu_job_work_func(struct work_struct *work)
-{
- struct amdgpu_cs_parser *sched_job =
- container_of(work, struct amdgpu_cs_parser,
- job_work);
- mutex_lock(&sched_job->job_lock);
- if (sched_job->free_job)
- sched_job->free_job(sched_job);
- mutex_unlock(&sched_job->job_lock);
- /* after processing job, free memory */
- fence_put(&sched_job->s_fence->base);
- kfree(sched_job);
-}
struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_ctx *ctx,
@@ -157,10 +144,6 @@ struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
parser->ctx = ctx;
parser->ibs = ibs;
parser->num_ibs = num_ibs;
- if (amdgpu_enable_scheduler) {
- mutex_init(&parser->job_lock);
- INIT_WORK(&parser->job_work, amdgpu_job_work_func);
- }
for (i = 0; i < num_ibs; i++)
ibs[i].ctx = ctx;
@@ -173,7 +156,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
uint64_t *chunk_array_user;
uint64_t *chunk_array = NULL;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_bo_list *bo_list = NULL;
unsigned size, i;
int r = 0;
@@ -185,20 +167,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
r = -EINVAL;
goto out;
}
- bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
- if (!amdgpu_enable_scheduler)
- p->bo_list = bo_list;
- else {
- if (bo_list && !bo_list->has_userptr) {
- p->bo_list = amdgpu_bo_list_clone(bo_list);
- amdgpu_bo_list_put(bo_list);
- if (!p->bo_list)
- return -ENOMEM;
- } else if (bo_list && bo_list->has_userptr)
- p->bo_list = bo_list;
- else
- p->bo_list = NULL;
- }
+ p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
/* get chunks */
INIT_LIST_HEAD(&p->validated);
@@ -291,7 +260,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
}
- p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
+ p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!p->ibs)
r = -ENOMEM;
@@ -498,25 +467,24 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
unsigned i;
if (parser->ctx)
amdgpu_ctx_put(parser->ctx);
- if (parser->bo_list) {
- if (amdgpu_enable_scheduler && !parser->bo_list->has_userptr)
- amdgpu_bo_list_free(parser->bo_list);
- else
- amdgpu_bo_list_put(parser->bo_list);
- }
+ if (parser->bo_list)
+ amdgpu_bo_list_put(parser->bo_list);
+
drm_free_large(parser->vm_bos);
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks);
- if (parser->ibs)
- for (i = 0; i < parser->num_ibs; i++)
- amdgpu_ib_free(parser->adev, &parser->ibs[i]);
- kfree(parser->ibs);
- if (parser->uf.bo)
- drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
-
if (!amdgpu_enable_scheduler)
- kfree(parser);
+ {
+ if (parser->ibs)
+ for (i = 0; i < parser->num_ibs; i++)
+ amdgpu_ib_free(parser->adev, &parser->ibs[i]);
+ kfree(parser->ibs);
+ if (parser->uf.bo)
+ drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
+ }
+
+ kfree(parser);
}
/**
@@ -533,12 +501,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
amdgpu_cs_parser_fini_late(parser);
}
-static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job)
-{
- amdgpu_cs_parser_fini_late(sched_job);
- return 0;
-}
-
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
struct amdgpu_vm *vm)
{
@@ -810,68 +772,16 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
return 0;
}
-static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job)
+static int amdgpu_cs_free_job(struct amdgpu_job *sched_job)
{
- int r, i;
- struct amdgpu_cs_parser *parser = sched_job;
- struct amdgpu_device *adev = sched_job->adev;
- bool reserved_buffers = false;
-
- r = amdgpu_cs_parser_relocs(parser);
- if (r) {
- if (r != -ERESTARTSYS) {
- if (r == -ENOMEM)
- DRM_ERROR("Not enough memory for command submission!\n");
- else
- DRM_ERROR("Failed to process the buffer list %d!\n", r);
- }
- }
-
- if (!r) {
- reserved_buffers = true;
- r = amdgpu_cs_ib_fill(adev, parser);
- }
- if (!r) {
- r = amdgpu_cs_dependencies(adev, parser);
- if (r)
- DRM_ERROR("Failed in the dependencies handling %d!\n", r);
- }
- if (r) {
- amdgpu_cs_parser_fini(parser, r, reserved_buffers);
- return r;
- }
-
- for (i = 0; i < parser->num_ibs; i++)
- trace_amdgpu_cs(parser, i);
-
- r = amdgpu_cs_ib_vm_chunk(adev, parser);
- return r;
-}
-
-static struct amdgpu_ring *amdgpu_cs_parser_get_ring(
- struct amdgpu_device *adev,
- struct amdgpu_cs_parser *parser)
-{
- int i, r;
-
- struct amdgpu_cs_chunk *chunk;
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct amdgpu_ring *ring;
- for (i = 0; i < parser->nchunks; i++) {
- chunk = &parser->chunks[i];
- chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
- chunk_ib->ip_instance, chunk_ib->ring,
- &ring);
- if (r)
- return NULL;
- break;
- }
- return ring;
+ int i;
+ if (sched_job->ibs)
+ for (i = 0; i < sched_job->num_ibs; i++)
+ amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]);
+ kfree(sched_job->ibs);
+ if (sched_job->uf.bo)
+ drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base);
+ return 0;
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -879,7 +789,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser *parser;
- int r;
+ bool reserved_buffers = false;
+ int i, r;
down_read(&adev->exclusive_lock);
if (!adev->accel_working) {
@@ -899,44 +810,79 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return r;
}
- if (amdgpu_enable_scheduler && parser->num_ibs) {
- struct amdgpu_ring * ring =
- amdgpu_cs_parser_get_ring(adev, parser);
- r = amdgpu_cs_parser_prepare_job(parser);
+ r = amdgpu_cs_parser_relocs(parser);
+ if (r == -ENOMEM)
+ DRM_ERROR("Not enough memory for command submission!\n");
+ else if (r && r != -ERESTARTSYS)
+ DRM_ERROR("Failed to process the buffer list %d!\n", r);
+ else if (!r) {
+ reserved_buffers = true;
+ r = amdgpu_cs_ib_fill(adev, parser);
+ }
+
+ if (!r) {
+ r = amdgpu_cs_dependencies(adev, parser);
if (r)
- goto out;
- parser->ring = ring;
- parser->free_job = amdgpu_cs_parser_free_job;
- mutex_lock(&parser->job_lock);
- r = amd_sched_push_job(ring->scheduler,
- &parser->ctx->rings[ring->idx].entity,
- parser,
- &parser->s_fence);
+ DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+ }
+
+ if (r)
+ goto out;
+
+ for (i = 0; i < parser->num_ibs; i++)
+ trace_amdgpu_cs(parser, i);
+
+ r = amdgpu_cs_ib_vm_chunk(adev, parser);
+ if (r)
+ goto out;
+
+ if (amdgpu_enable_scheduler && parser->num_ibs) {
+ struct amdgpu_job *job;
+ struct amdgpu_ring * ring = parser->ibs->ring;
+ job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+ job->base.sched = ring->scheduler;
+ job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
+ job->adev = parser->adev;
+ job->ibs = parser->ibs;
+ job->num_ibs = parser->num_ibs;
+ job->base.owner = parser->filp;
+ mutex_init(&job->job_lock);
+ if (job->ibs[job->num_ibs - 1].user) {
+ memcpy(&job->uf, &parser->uf,
+ sizeof(struct amdgpu_user_fence));
+ job->ibs[job->num_ibs - 1].user = &job->uf;
+ }
+
+ job->free_job = amdgpu_cs_free_job;
+ mutex_lock(&job->job_lock);
+ r = amd_sched_entity_push_job((struct amd_sched_job *)job);
if (r) {
- mutex_unlock(&parser->job_lock);
+ mutex_unlock(&job->job_lock);
+ amdgpu_cs_free_job(job);
+ kfree(job);
goto out;
}
- parser->ibs[parser->num_ibs - 1].sequence =
+ cs->out.handle =
amdgpu_ctx_add_fence(parser->ctx, ring,
- &parser->s_fence->base,
- parser->s_fence->v_seq);
- cs->out.handle = parser->s_fence->v_seq;
+ &job->base.s_fence->base);
+ parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
+
list_sort(NULL, &parser->validated, cmp_size_smaller_first);
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
- &parser->s_fence->base);
+ &job->base.s_fence->base);
- mutex_unlock(&parser->job_lock);
+ mutex_unlock(&job->job_lock);
+ amdgpu_cs_parser_fini_late(parser);
up_read(&adev->exclusive_lock);
return 0;
}
- r = amdgpu_cs_parser_prepare_job(parser);
- if (r)
- goto out;
cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
out:
- amdgpu_cs_parser_fini(parser, r, true);
+ amdgpu_cs_parser_fini(parser, r, reserved_buffers);
up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 08bc7722ddb8..20cbc4eb5a6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -229,17 +229,13 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
}
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
- struct fence *fence, uint64_t queued_seq)
+ struct fence *fence)
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
- uint64_t seq = 0;
+ uint64_t seq = cring->sequence;
unsigned idx = 0;
struct fence *other = NULL;
- if (amdgpu_enable_scheduler)
- seq = queued_seq;
- else
- seq = cring->sequence;
idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
other = cring->fences[idx];
if (other) {
@@ -253,8 +249,7 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
spin_lock(&ctx->ring_lock);
cring->fences[idx] = fence;
- if (!amdgpu_enable_scheduler)
- cring->sequence++;
+ cring->sequence++;
spin_unlock(&ctx->ring_lock);
fence_put(other);
@@ -267,21 +262,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
{
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
struct fence *fence;
- uint64_t queued_seq;
spin_lock(&ctx->ring_lock);
- if (amdgpu_enable_scheduler)
- queued_seq = amd_sched_next_queued_seq(&cring->entity);
- else
- queued_seq = cring->sequence;
- if (seq >= queued_seq) {
+ if (seq >= cring->sequence) {
spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL);
}
- if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) {
+ if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) {
spin_unlock(&ctx->ring_lock);
return NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e6fa27805207..0fcc0bd1622c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -49,9 +49,10 @@
/*
* KMS wrapper.
* - 3.0.0 - initial driver
+ * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 0
+#define KMS_DRIVER_MINOR 1
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 98500f1756f7..f446bf2fedc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -626,10 +626,10 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
ring->fence_drv.ring = ring;
if (amdgpu_enable_scheduler) {
- ring->scheduler = amd_sched_create((void *)ring->adev,
- &amdgpu_sched_ops,
- ring->idx, 5, 0,
- amdgpu_sched_hw_submission);
+ ring->scheduler = amd_sched_create(&amdgpu_sched_ops,
+ ring->idx,
+ amdgpu_sched_hw_submission,
+ (void *)ring->adev);
if (!ring->scheduler)
DRM_ERROR("Failed to create scheduler on ring %d.\n",
ring->idx);
@@ -836,22 +836,37 @@ static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence)
return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
}
-static inline bool amdgpu_test_signaled_any(struct amdgpu_fence **fences)
+static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
{
int idx;
- struct amdgpu_fence *fence;
+ struct fence *fence;
- idx = 0;
- for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) {
+ for (idx = 0; idx < count; ++idx) {
fence = fences[idx];
if (fence) {
- if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
+ if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
return true;
}
}
return false;
}
+static bool amdgpu_test_signaled_all(struct fence **fences, uint32_t count)
+{
+ int idx;
+ struct fence *fence;
+
+ for (idx = 0; idx < count; ++idx) {
+ fence = fences[idx];
+ if (fence) {
+ if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+ return false;
+ }
+ }
+
+ return true;
+}
+
struct amdgpu_wait_cb {
struct fence_cb base;
struct task_struct *task;
@@ -867,33 +882,56 @@ static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
signed long t)
{
- struct amdgpu_fence *array[AMDGPU_MAX_RINGS];
struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_device *adev = fence->ring->adev;
- memset(&array[0], 0, sizeof(array));
- array[0] = fence;
-
- return amdgpu_fence_wait_any(adev, array, intr, t);
+ return amdgpu_fence_wait_multiple(adev, &f, 1, false, intr, t);
}
-/* wait until any fence in array signaled */
-signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
- struct amdgpu_fence **array, bool intr, signed long t)
+/**
+ * Wait the fence array with timeout
+ *
+ * @adev: amdgpu device
+ * @array: the fence array with amdgpu fence pointer
+ * @count: the number of the fence array
+ * @wait_all: the flag of wait all(true) or wait any(false)
+ * @intr: when sleep, set the current task interruptable or not
+ * @t: timeout to wait
+ *
+ * If wait_all is true, it will return when all fences are signaled or timeout.
+ * If wait_all is false, it will return when any fence is signaled or timeout.
+ */
+signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
+ struct fence **array,
+ uint32_t count,
+ bool wait_all,
+ bool intr,
+ signed long t)
{
long idx = 0;
- struct amdgpu_wait_cb cb[AMDGPU_MAX_RINGS];
- struct amdgpu_fence *fence;
+ struct amdgpu_wait_cb *cb;
+ struct fence *fence;
BUG_ON(!array);
- for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) {
+ cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL);
+ if (cb == NULL) {
+ t = -ENOMEM;
+ goto err_free_cb;
+ }
+
+ for (idx = 0; idx < count; ++idx) {
fence = array[idx];
if (fence) {
cb[idx].task = current;
- if (fence_add_callback(&fence->base,
- &cb[idx].base, amdgpu_fence_wait_cb))
- return t; /* return if fence is already signaled */
+ if (fence_add_callback(fence,
+ &cb[idx].base, amdgpu_fence_wait_cb)) {
+ /* The fence is already signaled */
+ if (wait_all)
+ continue;
+ else
+ goto fence_rm_cb;
+ }
}
}
@@ -907,7 +945,9 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
* amdgpu_test_signaled_any must be called after
* set_current_state to prevent a race with wake_up_process
*/
- if (amdgpu_test_signaled_any(array))
+ if (!wait_all && amdgpu_test_signaled_any(array, count))
+ break;
+ if (wait_all && amdgpu_test_signaled_all(array, count))
break;
if (adev->needs_reset) {
@@ -923,13 +963,16 @@ signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
__set_current_state(TASK_RUNNING);
- idx = 0;
- for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) {
+fence_rm_cb:
+ for (idx = 0; idx < count; ++idx) {
fence = array[idx];
- if (fence)
- fence_remove_callback(&fence->base, &cb[idx].base);
+ if (fence && cb[idx].base.func)
+ fence_remove_callback(fence, &cb[idx].base);
}
+err_free_cb:
+ kfree(cb);
+
return t;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 5104e64e9ad8..c439735ee670 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -73,29 +73,12 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
if (!vm)
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
- else
- ib->gpu_addr = 0;
-
- } else {
- ib->sa_bo = NULL;
- ib->ptr = NULL;
- ib->gpu_addr = 0;
}
amdgpu_sync_create(&ib->sync);
ib->ring = ring;
- ib->fence = NULL;
- ib->user = NULL;
ib->vm = vm;
- ib->ctx = NULL;
- ib->gds_base = 0;
- ib->gds_size = 0;
- ib->gws_base = 0;
- ib->gws_size = 0;
- ib->oa_base = 0;
- ib->oa_size = 0;
- ib->flags = 0;
return 0;
}
@@ -110,8 +93,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
*/
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
{
- amdgpu_sync_free(adev, &ib->sync, ib->fence);
- amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence);
+ amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
+ amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
amdgpu_fence_unref(&ib->fence);
}
@@ -143,7 +126,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx, *old_ctx;
struct amdgpu_vm *vm;
- uint64_t sequence;
unsigned i;
int r = 0;
@@ -158,7 +140,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
dev_err(adev->dev, "couldn't schedule ib\n");
return -EINVAL;
}
-
+ r = amdgpu_sync_wait(&ibs->sync);
+ if (r) {
+ dev_err(adev->dev, "IB sync failed (%d).\n", r);
+ return r;
+ }
r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
if (r) {
dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
@@ -216,12 +202,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
return r;
}
- sequence = amdgpu_enable_scheduler ? ib->sequence : 0;
-
if (!amdgpu_enable_scheduler && ib->ctx)
ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
- &ib->fence->base,
- sequence);
+ &ib->fence->base);
/* wrap the last IB with fence */
if (ib->user) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 90044b254404..5c8a803acedc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -98,18 +98,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
/* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation.
*/
- adev->irq.ih.ring = kzalloc(adev->irq.ih.ring_size + 8, GFP_KERNEL);
+ adev->irq.ih.ring = pci_alloc_consistent(adev->pdev,
+ adev->irq.ih.ring_size + 8,
+ &adev->irq.ih.rb_dma_addr);
if (adev->irq.ih.ring == NULL)
return -ENOMEM;
- adev->irq.ih.rb_dma_addr = pci_map_single(adev->pdev,
- (void *)adev->irq.ih.ring,
- adev->irq.ih.ring_size,
- PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(adev->pdev, adev->irq.ih.rb_dma_addr)) {
- dev_err(&adev->pdev->dev, "Failed to DMA MAP the IH RB page\n");
- kfree((void *)adev->irq.ih.ring);
- return -ENOMEM;
- }
+ memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8);
adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0;
adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1;
}
@@ -149,9 +143,9 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
/* add 8 bytes for the rptr/wptr shadows and
* add them to the end of the ring allocation.
*/
- pci_unmap_single(adev->pdev, adev->irq.ih.rb_dma_addr,
- adev->irq.ih.ring_size + 8, PCI_DMA_BIDIRECTIONAL);
- kfree((void *)adev->irq.ih.ring);
+ pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8,
+ (void *)adev->irq.ih.ring,
+ adev->irq.ih.rb_dma_addr);
adev->irq.ih.ring = NULL;
}
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 87da6b1848fd..22367939ebf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -560,6 +560,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (!fpriv)
return;
+ amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+
amdgpu_vm_fini(adev, &fpriv->vm);
idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
@@ -568,8 +570,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
idr_destroy(&fpriv->bo_list_handles);
mutex_destroy(&fpriv->bo_list_lock);
- amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
-
kfree(fpriv);
file_priv->driver_priv = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 238465a9ac55..6ea18dcec561 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -193,7 +193,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
unsigned size, unsigned align);
void amdgpu_sa_bo_free(struct amdgpu_device *adev,
struct amdgpu_sa_bo **sa_bo,
- struct amdgpu_fence *fence);
+ struct fence *fence);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index d6398cf45f24..b92525329d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -139,6 +139,20 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
return r;
}
+static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f)
+{
+ struct amdgpu_fence *a_fence;
+ struct amd_sched_fence *s_fence;
+
+ s_fence = to_amd_sched_fence(f);
+ if (s_fence)
+ return s_fence->scheduler->ring_id;
+ a_fence = to_amdgpu_fence(f);
+ if (a_fence)
+ return a_fence->ring->idx;
+ return 0;
+}
+
static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
{
struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
@@ -147,7 +161,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
}
list_del_init(&sa_bo->olist);
list_del_init(&sa_bo->flist);
- amdgpu_fence_unref(&sa_bo->fence);
+ fence_put(sa_bo->fence);
kfree(sa_bo);
}
@@ -161,7 +175,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
if (sa_bo->fence == NULL ||
- !fence_is_signaled(&sa_bo->fence->base)) {
+ !fence_is_signaled(sa_bo->fence)) {
return;
}
amdgpu_sa_bo_remove_locked(sa_bo);
@@ -246,7 +260,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
}
static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_fence **fences,
+ struct fence **fences,
unsigned *tries)
{
struct amdgpu_sa_bo *best_bo = NULL;
@@ -275,7 +289,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
sa_bo = list_first_entry(&sa_manager->flist[i],
struct amdgpu_sa_bo, flist);
- if (!fence_is_signaled(&sa_bo->fence->base)) {
+ if (!fence_is_signaled(sa_bo->fence)) {
fences[i] = sa_bo->fence;
continue;
}
@@ -299,7 +313,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
}
if (best_bo) {
- ++tries[best_bo->fence->ring->idx];
+ uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence);
+ ++tries[idx];
sa_manager->hole = best_bo->olist.prev;
/* we knew that this one is signaled,
@@ -315,7 +330,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
struct amdgpu_sa_bo **sa_bo,
unsigned size, unsigned align)
{
- struct amdgpu_fence *fences[AMDGPU_MAX_RINGS];
+ struct fence *fences[AMDGPU_MAX_RINGS];
unsigned tries[AMDGPU_MAX_RINGS];
int i, r;
signed long t;
@@ -352,7 +367,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
spin_unlock(&sa_manager->wq.lock);
- t = amdgpu_fence_wait_any(adev, fences, false, MAX_SCHEDULE_TIMEOUT);
+ t = amdgpu_fence_wait_multiple(adev, fences, AMDGPU_MAX_RINGS, false, false,
+ MAX_SCHEDULE_TIMEOUT);
r = (t > 0) ? 0 : t;
spin_lock(&sa_manager->wq.lock);
/* if we have nothing to wait for block */
@@ -372,7 +388,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
}
void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
- struct amdgpu_fence *fence)
+ struct fence *fence)
{
struct amdgpu_sa_manager *sa_manager;
@@ -382,10 +398,11 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
sa_manager = (*sa_bo)->manager;
spin_lock(&sa_manager->wq.lock);
- if (fence && !fence_is_signaled(&fence->base)) {
- (*sa_bo)->fence = amdgpu_fence_ref(fence);
- list_add_tail(&(*sa_bo)->flist,
- &sa_manager->flist[fence->ring->idx]);
+ if (fence && !fence_is_signaled(fence)) {
+ uint32_t idx;
+ (*sa_bo)->fence = fence_get(fence);
+ idx = amdgpu_sa_get_ring_from_fence(fence);
+ list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
} else {
amdgpu_sa_bo_remove_locked(*sa_bo);
}
@@ -412,8 +429,16 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
soffset, eoffset, eoffset - soffset);
if (i->fence) {
- seq_printf(m, " protected by 0x%016llx on ring %d",
- i->fence->seq, i->fence->ring->idx);
+ struct amdgpu_fence *a_fence = to_amdgpu_fence(i->fence);
+ struct amd_sched_fence *s_fence = to_amd_sched_fence(i->fence);
+ if (a_fence)
+ seq_printf(m, " protected by 0x%016llx on ring %d",
+ a_fence->seq, a_fence->ring->idx);
+ if (s_fence)
+ seq_printf(m, " protected by 0x%016x on ring %d",
+ s_fence->base.seqno,
+ s_fence->scheduler->ring_id);
+
}
seq_printf(m, "\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index a86e38158afa..f93fb3541488 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -27,55 +27,28 @@
#include <drm/drmP.h>
#include "amdgpu.h"
-static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity,
- struct amd_sched_job *job)
+static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job)
{
- int r = 0;
- struct amdgpu_cs_parser *sched_job;
- if (!job || !job->data) {
- DRM_ERROR("job is null\n");
- return -EINVAL;
- }
-
- sched_job = (struct amdgpu_cs_parser *)job->data;
- if (sched_job->prepare_job) {
- r = sched_job->prepare_job(sched_job);
- if (r) {
- DRM_ERROR("Prepare job error\n");
- schedule_work(&sched_job->job_work);
- }
- }
- return r;
-}
-
-static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity,
- struct amd_sched_job *job)
-{
- int r = 0;
- struct amdgpu_cs_parser *sched_job;
+ struct amdgpu_job *sched_job;
struct amdgpu_fence *fence;
+ int r;
- if (!job || !job->data) {
+ if (!job) {
DRM_ERROR("job is null\n");
return NULL;
}
- sched_job = (struct amdgpu_cs_parser *)job->data;
+ sched_job = (struct amdgpu_job *)job;
mutex_lock(&sched_job->job_lock);
r = amdgpu_ib_schedule(sched_job->adev,
sched_job->num_ibs,
sched_job->ibs,
- sched_job->filp);
+ sched_job->base.owner);
if (r)
goto err;
fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs - 1].fence);
- if (sched_job->run_job) {
- r = sched_job->run_job(sched_job);
- if (r)
- goto err;
- }
+ if (sched_job->free_job)
+ sched_job->free_job(sched_job);
mutex_unlock(&sched_job->job_lock);
return &fence->base;
@@ -83,25 +56,25 @@ static struct fence *amdgpu_sched_run_job(struct amd_gpu_scheduler *sched,
err:
DRM_ERROR("Run job error\n");
mutex_unlock(&sched_job->job_lock);
- schedule_work(&sched_job->job_work);
+ job->sched->ops->process_job(job);
return NULL;
}
-static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_job *job)
+static void amdgpu_sched_process_job(struct amd_sched_job *job)
{
- struct amdgpu_cs_parser *sched_job;
+ struct amdgpu_job *sched_job;
- if (!job || !job->data) {
+ if (!job) {
DRM_ERROR("job is null\n");
return;
}
- sched_job = (struct amdgpu_cs_parser *)job->data;
- schedule_work(&sched_job->job_work);
+ sched_job = (struct amdgpu_job *)job;
+ /* after processing job, free memory */
+ fence_put(&sched_job->base.s_fence->base);
+ kfree(sched_job);
}
struct amd_sched_backend_ops amdgpu_sched_ops = {
- .prepare_job = amdgpu_sched_prepare_job,
.run_job = amdgpu_sched_run_job,
.process_job = amdgpu_sched_process_job
};
@@ -110,36 +83,39 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ib *ibs,
unsigned num_ibs,
- int (*free_job)(struct amdgpu_cs_parser *),
+ int (*free_job)(struct amdgpu_job *),
void *owner,
struct fence **f)
{
int r = 0;
if (amdgpu_enable_scheduler) {
- struct amdgpu_cs_parser *sched_job =
- amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx,
- ibs, num_ibs);
- if(!sched_job) {
+ struct amdgpu_job *job =
+ kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
+ if (!job)
return -ENOMEM;
- }
- sched_job->free_job = free_job;
- mutex_lock(&sched_job->job_lock);
- r = amd_sched_push_job(ring->scheduler,
- &adev->kernel_ctx.rings[ring->idx].entity,
- sched_job, &sched_job->s_fence);
+ job->base.sched = ring->scheduler;
+ job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
+ job->adev = adev;
+ job->ibs = ibs;
+ job->num_ibs = num_ibs;
+ job->base.owner = owner;
+ mutex_init(&job->job_lock);
+ job->free_job = free_job;
+ mutex_lock(&job->job_lock);
+ r = amd_sched_entity_push_job((struct amd_sched_job *)job);
if (r) {
- mutex_unlock(&sched_job->job_lock);
- kfree(sched_job);
+ mutex_unlock(&job->job_lock);
+ kfree(job);
return r;
}
- ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq;
- *f = fence_get(&sched_job->s_fence->base);
- mutex_unlock(&sched_job->job_lock);
+ *f = fence_get(&job->base.s_fence->base);
+ mutex_unlock(&job->job_lock);
} else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
if (r)
return r;
*f = fence_get(&ibs[num_ibs - 1].fence->base);
}
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
index d6d41a42ab65..ff3ca52ec6fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
@@ -87,7 +87,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
void amdgpu_semaphore_free(struct amdgpu_device *adev,
struct amdgpu_semaphore **semaphore,
- struct amdgpu_fence *fence)
+ struct fence *fence)
{
if (semaphore == NULL || *semaphore == NULL) {
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 7cb711fc1ee2..4fffb2539331 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -32,6 +32,11 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
+struct amdgpu_sync_entry {
+ struct hlist_node node;
+ struct fence *fence;
+};
+
/**
* amdgpu_sync_create - zero init sync object
*
@@ -49,9 +54,33 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
sync->sync_to[i] = NULL;
+ hash_init(sync->fences);
sync->last_vm_update = NULL;
}
+static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
+{
+ struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
+ struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
+ if (a_fence)
+ return a_fence->ring->adev == adev;
+ if (s_fence)
+ return (struct amdgpu_device *)s_fence->scheduler->priv == adev;
+ return false;
+}
+
+static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
+{
+ struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
+ struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+ if (s_fence)
+ return s_fence->owner == owner;
+ if (a_fence)
+ return a_fence->owner == owner;
+ return false;
+}
+
/**
* amdgpu_sync_fence - remember to sync to this fence
*
@@ -62,28 +91,54 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *f)
{
+ struct amdgpu_sync_entry *e;
struct amdgpu_fence *fence;
struct amdgpu_fence *other;
+ struct fence *tmp, *later;
if (!f)
return 0;
+ if (amdgpu_sync_same_dev(adev, f) &&
+ amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) {
+ if (sync->last_vm_update) {
+ tmp = sync->last_vm_update;
+ BUG_ON(f->context != tmp->context);
+ later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp;
+ sync->last_vm_update = fence_get(later);
+ fence_put(tmp);
+ } else
+ sync->last_vm_update = fence_get(f);
+ }
+
fence = to_amdgpu_fence(f);
- if (!fence || fence->ring->adev != adev)
- return fence_wait(f, true);
+ if (!fence || fence->ring->adev != adev) {
+ hash_for_each_possible(sync->fences, e, node, f->context) {
+ struct fence *new;
+ if (unlikely(e->fence->context != f->context))
+ continue;
+ new = fence_get(fence_later(e->fence, f));
+ if (new) {
+ fence_put(e->fence);
+ e->fence = new;
+ }
+ return 0;
+ }
+
+ e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+
+ hash_add(sync->fences, &e->node, f->context);
+ e->fence = fence_get(f);
+ return 0;
+ }
other = sync->sync_to[fence->ring->idx];
sync->sync_to[fence->ring->idx] = amdgpu_fence_ref(
amdgpu_fence_later(fence, other));
amdgpu_fence_unref(&other);
- if (fence->owner == AMDGPU_FENCE_OWNER_VM) {
- other = sync->last_vm_update;
- sync->last_vm_update = amdgpu_fence_ref(
- amdgpu_fence_later(fence, other));
- amdgpu_fence_unref(&other);
- }
-
return 0;
}
@@ -147,6 +202,24 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
return r;
}
+int amdgpu_sync_wait(struct amdgpu_sync *sync)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ int i, r;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+ r = fence_wait(e->fence, false);
+ if (r)
+ return r;
+
+ hash_del(&e->node);
+ fence_put(e->fence);
+ kfree(e);
+ }
+ return 0;
+}
+
/**
* amdgpu_sync_rings - sync ring to all registered fences
*
@@ -234,15 +307,23 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
*/
void amdgpu_sync_free(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
- struct amdgpu_fence *fence)
+ struct fence *fence)
{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
unsigned i;
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+ hash_del(&e->node);
+ fence_put(e->fence);
+ kfree(e);
+ }
+
for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
amdgpu_fence_unref(&sync->sync_to[i]);
- amdgpu_fence_unref(&sync->last_vm_update);
+ fence_put(sync->last_vm_update);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 962dd5552137..f80b1a43be8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
void *gtt_map, *vram_map;
void **gtt_start, **gtt_end;
void **vram_start, **vram_end;
- struct amdgpu_fence *fence = NULL;
+ struct fence *fence = NULL;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i);
@@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin;
}
- r = fence_wait(&fence->base, false);
+ r = fence_wait(fence, false);
if (r) {
DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
goto out_lclean_unpin;
}
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
r = amdgpu_bo_kmap(vram_obj, &vram_map);
if (r) {
@@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_lclean_unpin;
}
- r = fence_wait(&fence->base, false);
+ r = fence_wait(fence, false);
if (r) {
DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
goto out_lclean_unpin;
}
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) {
@@ -214,7 +214,7 @@ out_lclean:
amdgpu_bo_unref(&gtt_obj[i]);
}
if (fence)
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dd3415d2e45d..399143541d8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
struct amdgpu_device *adev;
struct amdgpu_ring *ring;
uint64_t old_start, new_start;
- struct amdgpu_fence *fence;
+ struct fence *fence;
int r;
adev = amdgpu_get_adev(bo->bdev);
@@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
new_mem->num_pages * PAGE_SIZE, /* bytes */
bo->resv, &fence);
/* FIXME: handle copy error */
- r = ttm_bo_move_accel_cleanup(bo, &fence->base,
+ r = ttm_bo_move_accel_cleanup(bo, fence,
evict, no_wait_gpu, new_mem);
- amdgpu_fence_unref(&fence);
+ fence_put(fence);
return r;
}
@@ -987,46 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
uint64_t dst_offset,
uint32_t byte_count,
struct reservation_object *resv,
- struct amdgpu_fence **fence)
+ struct fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_sync sync;
uint32_t max_bytes;
unsigned num_loops, num_dw;
+ struct amdgpu_ib *ib;
unsigned i;
int r;
- /* sync other rings */
- amdgpu_sync_create(&sync);
- if (resv) {
- r = amdgpu_sync_resv(adev, &sync, resv, false);
- if (r) {
- DRM_ERROR("sync failed (%d).\n", r);
- amdgpu_sync_free(adev, &sync, NULL);
- return r;
- }
- }
-
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
- /* for fence and sync */
- num_dw += 64 + AMDGPU_NUM_SYNCS * 8;
+ /* for IB padding */
+ while (num_dw & 0x7)
+ num_dw++;
+
+ ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
+ if (!ib)
+ return -ENOMEM;
- r = amdgpu_ring_lock(ring, num_dw);
+ r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
if (r) {
- DRM_ERROR("ring lock failed (%d).\n", r);
- amdgpu_sync_free(adev, &sync, NULL);
+ kfree(ib);
return r;
}
- amdgpu_sync_rings(&sync, ring);
+ ib->length_dw = 0;
+
+ if (resv) {
+ r = amdgpu_sync_resv(adev, &ib->sync, resv,
+ AMDGPU_FENCE_OWNER_UNDEFINED);
+ if (r) {
+ DRM_ERROR("sync failed (%d).\n", r);
+ goto error_free;
+ }
+ }
for (i = 0; i < num_loops; i++) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
- amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset,
+ amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset,
cur_size_in_bytes);
src_offset += cur_size_in_bytes;
@@ -1034,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
byte_count -= cur_size_in_bytes;
}
- r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence);
- if (r) {
- amdgpu_ring_unlock_undo(ring);
- amdgpu_sync_free(adev, &sync, NULL);
- return r;
- }
-
- amdgpu_ring_unlock_commit(ring);
- amdgpu_sync_free(adev, &sync, *fence);
+ amdgpu_vm_pad_ib(adev, ib);
+ WARN_ON(ib->length_dw > num_dw);
+ r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
+ &amdgpu_vm_free_job,
+ AMDGPU_FENCE_OWNER_MOVE,
+ fence);
+ if (r)
+ goto error_free;
+ if (!amdgpu_enable_scheduler) {
+ amdgpu_ib_free(adev, ib);
+ kfree(ib);
+ }
return 0;
+error_free:
+ amdgpu_ib_free(adev, ib);
+ kfree(ib);
+ return r;
}
#if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 68369cf1e318..b87355ccfb1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -807,7 +807,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
}
static int amdgpu_uvd_free_job(
- struct amdgpu_cs_parser *sched_job)
+ struct amdgpu_job *sched_job)
{
amdgpu_ib_free(sched_job->adev, sched_job->ibs);
kfree(sched_job->ibs);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 33ee6ae28f37..1a984c934b1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -340,7 +340,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
}
static int amdgpu_vce_free_job(
- struct amdgpu_cs_parser *sched_job)
+ struct amdgpu_job *sched_job)
{
amdgpu_ib_free(sched_job->adev, sched_job->ibs);
kfree(sched_job->ibs);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a78a206e176e..83b7ce6f5f72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -200,19 +200,29 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
*/
void amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_vm *vm,
- struct amdgpu_fence *updates)
+ struct fence *updates)
{
uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
- struct amdgpu_fence *flushed_updates = vm_id->flushed_updates;
+ struct fence *flushed_updates = vm_id->flushed_updates;
+ bool is_earlier = false;
+
+ if (flushed_updates && updates) {
+ BUG_ON(flushed_updates->context != updates->context);
+ is_earlier = (updates->seqno - flushed_updates->seqno <=
+ INT_MAX) ? true : false;
+ }
if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
- (updates && amdgpu_fence_is_earlier(flushed_updates, updates))) {
+ is_earlier) {
trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
- vm_id->flushed_updates = amdgpu_fence_ref(
- amdgpu_fence_later(flushed_updates, updates));
- amdgpu_fence_unref(&flushed_updates);
+ if (is_earlier) {
+ vm_id->flushed_updates = fence_get(updates);
+ fence_put(flushed_updates);
+ }
+ if (!flushed_updates)
+ vm_id->flushed_updates = fence_get(updates);
vm_id->pd_gpu_addr = pd_addr;
amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
}
@@ -306,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
}
}
-static int amdgpu_vm_free_job(
- struct amdgpu_cs_parser *sched_job)
+int amdgpu_vm_free_job(struct amdgpu_job *sched_job)
{
int i;
for (i = 0; i < sched_job->num_ibs; i++)
@@ -1347,7 +1356,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
fence_put(vm->page_directory_fence);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- amdgpu_fence_unref(&vm->ids[i].flushed_updates);
+ fence_put(vm->ids[i].flushed_updates);
amdgpu_fence_unref(&vm->ids[i].last_id_use);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 2b4242b39b0a..3920c1e346f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -630,6 +630,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -1338,18 +1339,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback.
*/
-static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring,
+static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
- amdgpu_ring_write(ring, byte_count);
- amdgpu_ring_write(ring, 0); /* src/dst endian swap */
- amdgpu_ring_write(ring, lower_32_bits(src_offset));
- amdgpu_ring_write(ring, upper_32_bits(src_offset));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 9b0cab413677..fab7b236f37f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2660,6 +2660,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 4b68e6306f40..818edb37fa9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -622,6 +622,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 9de8104eddeb..715e02d3bfba 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -689,6 +689,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -1349,19 +1350,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback.
*/
-static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring,
+static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR));
- amdgpu_ring_write(ring, byte_count);
- amdgpu_ring_write(ring, 0); /* src/dst endian swap */
- amdgpu_ring_write(ring, lower_32_bits(src_offset));
- amdgpu_ring_write(ring, upper_32_bits(src_offset));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 029f3455f9f9..67128c8e78b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -810,6 +810,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(ring, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
@@ -1473,19 +1474,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
* Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback.
*/
-static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring,
+static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR));
- amdgpu_ring_write(ring, byte_count);
- amdgpu_ring_write(ring, 0); /* src/dst endian swap */
- amdgpu_ring_write(ring, lower_32_bits(src_offset));
- amdgpu_ring_write(ring, upper_32_bits(src_offset));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
}
/**
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 265d3e2f63cc..d99fe90991dc 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -27,30 +27,32 @@
#include <drm/drmP.h>
#include "gpu_scheduler.h"
+static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+
/* Initialize a given run queue struct */
static void amd_sched_rq_init(struct amd_sched_rq *rq)
{
+ spin_lock_init(&rq->lock);
INIT_LIST_HEAD(&rq->entities);
- mutex_init(&rq->lock);
rq->current_entity = NULL;
}
static void amd_sched_rq_add_entity(struct amd_sched_rq *rq,
struct amd_sched_entity *entity)
{
- mutex_lock(&rq->lock);
+ spin_lock(&rq->lock);
list_add_tail(&entity->list, &rq->entities);
- mutex_unlock(&rq->lock);
+ spin_unlock(&rq->lock);
}
static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
struct amd_sched_entity *entity)
{
- mutex_lock(&rq->lock);
+ spin_lock(&rq->lock);
list_del_init(&entity->list);
if (rq->current_entity == entity)
rq->current_entity = NULL;
- mutex_unlock(&rq->lock);
+ spin_unlock(&rq->lock);
}
/**
@@ -61,12 +63,16 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
static struct amd_sched_entity *
amd_sched_rq_select_entity(struct amd_sched_rq *rq)
{
- struct amd_sched_entity *entity = rq->current_entity;
+ struct amd_sched_entity *entity;
+
+ spin_lock(&rq->lock);
+ entity = rq->current_entity;
if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) {
if (!kfifo_is_empty(&entity->job_queue)) {
rq->current_entity = entity;
+ spin_unlock(&rq->lock);
return rq->current_entity;
}
}
@@ -76,6 +82,7 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
if (!kfifo_is_empty(&entity->job_queue)) {
rq->current_entity = entity;
+ spin_unlock(&rq->lock);
return rq->current_entity;
}
@@ -83,76 +90,9 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
break;
}
- return NULL;
-}
+ spin_unlock(&rq->lock);
-/**
- * Note: This function should only been called inside scheduler main
- * function for thread safety, there is no other protection here.
- * return ture if scheduler has something ready to run.
- *
- * For active_hw_rq, there is only one producer(scheduler thread) and
- * one consumer(ISR). It should be safe to use this function in scheduler
- * main thread to decide whether to continue emit more IBs.
-*/
-static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
-{
- unsigned long flags;
- bool full;
-
- spin_lock_irqsave(&sched->queue_lock, flags);
- full = atomic64_read(&sched->hw_rq_count) <
- sched->hw_submission_limit ? true : false;
- spin_unlock_irqrestore(&sched->queue_lock, flags);
-
- return full;
-}
-
-/**
- * Select next entity from the kernel run queue, if not available,
- * return null.
-*/
-static struct amd_sched_entity *
-kernel_rq_select_context(struct amd_gpu_scheduler *sched)
-{
- struct amd_sched_entity *sched_entity;
- struct amd_sched_rq *rq = &sched->kernel_rq;
-
- mutex_lock(&rq->lock);
- sched_entity = amd_sched_rq_select_entity(rq);
- mutex_unlock(&rq->lock);
- return sched_entity;
-}
-
-/**
- * Select next entity containing real IB submissions
-*/
-static struct amd_sched_entity *
-select_context(struct amd_gpu_scheduler *sched)
-{
- struct amd_sched_entity *wake_entity = NULL;
- struct amd_sched_entity *tmp;
- struct amd_sched_rq *rq;
-
- if (!is_scheduler_ready(sched))
- return NULL;
-
- /* Kernel run queue has higher priority than normal run queue*/
- tmp = kernel_rq_select_context(sched);
- if (tmp != NULL)
- goto exit;
-
- rq = &sched->sched_rq;
- mutex_lock(&rq->lock);
- tmp = amd_sched_rq_select_entity(rq);
- mutex_unlock(&rq->lock);
-exit:
- if (sched->current_entity && (sched->current_entity != tmp))
- wake_entity = sched->current_entity;
- sched->current_entity = tmp;
- if (wake_entity && wake_entity->need_wakeup)
- wake_up(&wake_entity->wait_queue);
- return tmp;
+ return NULL;
}
/**
@@ -171,31 +111,20 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
struct amd_sched_rq *rq,
uint32_t jobs)
{
- uint64_t seq_ring = 0;
- char name[20];
-
if (!(sched && entity && rq))
return -EINVAL;
memset(entity, 0, sizeof(struct amd_sched_entity));
- seq_ring = ((uint64_t)sched->ring_id) << 60;
- spin_lock_init(&entity->lock);
entity->belongto_rq = rq;
entity->scheduler = sched;
- init_waitqueue_head(&entity->wait_queue);
- init_waitqueue_head(&entity->wait_emit);
entity->fence_context = fence_context_alloc(1);
- snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context);
- memcpy(entity->name, name, 20);
- entity->need_wakeup = false;
if(kfifo_alloc(&entity->job_queue,
jobs * sizeof(void *),
GFP_KERNEL))
return -EINVAL;
spin_lock_init(&entity->queue_lock);
- atomic64_set(&entity->last_queued_v_seq, seq_ring);
- atomic64_set(&entity->last_signaled_v_seq, seq_ring);
+ atomic_set(&entity->fence_seq, 0);
/* Add the entity to the run queue */
amd_sched_rq_add_entity(rq, entity);
@@ -210,23 +139,24 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
*
* return true if entity is initialized, false otherwise
*/
-static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity)
+static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
+ struct amd_sched_entity *entity)
{
return entity->scheduler == sched &&
entity->belongto_rq != NULL;
}
-static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity)
+/**
+ * Check if entity is idle
+ *
+ * @entity The pointer to a valid scheduler entity
+ *
+ * Return true if entity don't has any unscheduled jobs.
+ */
+static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
{
- /**
- * Idle means no pending IBs, and the entity is not
- * currently being used.
- */
- barrier();
- if ((sched->current_entity != entity) &&
- kfifo_is_empty(&entity->job_queue))
+ rmb();
+ if (kfifo_is_empty(&entity->job_queue))
return true;
return false;
@@ -238,84 +168,114 @@ static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
* @sched Pointer to scheduler instance
* @entity The pointer to a valid scheduler entity
*
- * return 0 if succeed. negative error code on failure
+ * Cleanup and free the allocated resources.
*/
-int amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity)
+void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
+ struct amd_sched_entity *entity)
{
- int r = 0;
struct amd_sched_rq *rq = entity->belongto_rq;
- if (!is_context_entity_initialized(sched, entity))
- return 0;
- entity->need_wakeup = true;
+ if (!amd_sched_entity_is_initialized(sched, entity))
+ return;
+
/**
* The client will not queue more IBs during this fini, consume existing
* queued IBs
*/
- r = wait_event_timeout(
- entity->wait_queue,
- is_context_entity_idle(sched, entity),
- msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS)
- ) ? 0 : -1;
-
- if (r) {
- if (entity->is_pending)
- DRM_INFO("Entity %p is in waiting state during fini,\
- all pending ibs will be canceled.\n",
- entity);
- }
+ wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
amd_sched_rq_remove_entity(rq, entity);
kfifo_free(&entity->job_queue);
- return r;
}
/**
- * Submit a normal job to the job queue
+ * Helper to submit a job to the job queue
*
- * @sched The pointer to the scheduler
- * @c_entity The pointer to amd_sched_entity
* @job The pointer to job required to submit
- * return 0 if succeed. -1 if failed.
- * -2 indicate queue is full for this client, client should wait untill
- * scheduler consum some queued command.
- * -1 other fail.
-*/
-int amd_sched_push_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *c_entity,
- void *data,
- struct amd_sched_fence **fence)
+ *
+ * Returns true if we could submit the job.
+ */
+static bool amd_sched_entity_in(struct amd_sched_job *job)
+{
+ struct amd_sched_entity *entity = job->s_entity;
+ bool added, first = false;
+
+ spin_lock(&entity->queue_lock);
+ added = kfifo_in(&entity->job_queue, &job, sizeof(job)) == sizeof(job);
+
+ if (added && kfifo_len(&entity->job_queue) == sizeof(job))
+ first = true;
+
+ spin_unlock(&entity->queue_lock);
+
+ /* first job wakes up scheduler */
+ if (first)
+ amd_sched_wakeup(job->sched);
+
+ return added;
+}
+
+/**
+ * Submit a job to the job queue
+ *
+ * @job The pointer to job required to submit
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
{
- struct amd_sched_job *job;
+ struct amd_sched_entity *entity = sched_job->s_entity;
+ struct amd_sched_fence *fence = amd_sched_fence_create(
+ entity, sched_job->owner);
+ int r;
if (!fence)
- return -EINVAL;
- job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL);
- if (!job)
return -ENOMEM;
- job->sched = sched;
- job->s_entity = c_entity;
- job->data = data;
- *fence = amd_sched_fence_create(c_entity);
- if ((*fence) == NULL) {
- kfree(job);
- return -EINVAL;
- }
- fence_get(&(*fence)->base);
- job->s_fence = *fence;
- while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *),
- &c_entity->queue_lock) != sizeof(void *)) {
- /**
- * Current context used up all its IB slots
- * wait here, or need to check whether GPU is hung
- */
- schedule();
- }
- /* first job wake up scheduler */
- if ((kfifo_len(&c_entity->job_queue) / sizeof(void *)) == 1)
- wake_up_interruptible(&sched->wait_queue);
- return 0;
+
+ fence_get(&fence->base);
+ sched_job->s_fence = fence;
+
+ r = wait_event_interruptible(entity->scheduler->job_scheduled,
+ amd_sched_entity_in(sched_job));
+
+ return r;
+}
+
+/**
+ * Return ture if we can push more jobs to the hw.
+ */
+static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
+{
+ return atomic_read(&sched->hw_rq_count) <
+ sched->hw_submission_limit;
+}
+
+/**
+ * Wake up the scheduler when it is ready
+ */
+static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
+{
+ if (amd_sched_ready(sched))
+ wake_up_interruptible(&sched->wake_up_worker);
+}
+
+/**
+ * Select next entity containing real IB submissions
+*/
+static struct amd_sched_entity *
+amd_sched_select_context(struct amd_gpu_scheduler *sched)
+{
+ struct amd_sched_entity *tmp;
+
+ if (!amd_sched_ready(sched))
+ return NULL;
+
+ /* Kernel run queue has higher priority than normal run queue*/
+ tmp = amd_sched_rq_select_entity(&sched->kernel_rq);
+ if (tmp == NULL)
+ tmp = amd_sched_rq_select_entity(&sched->sched_rq);
+
+ return tmp;
}
static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
@@ -323,52 +283,41 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
struct amd_sched_job *sched_job =
container_of(cb, struct amd_sched_job, cb);
struct amd_gpu_scheduler *sched;
- unsigned long flags;
sched = sched_job->sched;
- atomic64_set(&sched_job->s_entity->last_signaled_v_seq,
- sched_job->s_fence->v_seq);
amd_sched_fence_signal(sched_job->s_fence);
- spin_lock_irqsave(&sched->queue_lock, flags);
- list_del(&sched_job->list);
- atomic64_dec(&sched->hw_rq_count);
- spin_unlock_irqrestore(&sched->queue_lock, flags);
-
- sched->ops->process_job(sched, sched_job);
+ atomic_dec(&sched->hw_rq_count);
fence_put(&sched_job->s_fence->base);
- kfree(sched_job);
- wake_up_interruptible(&sched->wait_queue);
+ sched->ops->process_job(sched_job);
+ wake_up_interruptible(&sched->wake_up_worker);
}
static int amd_sched_main(void *param)
{
- int r;
- struct amd_sched_job *job;
struct sched_param sparam = {.sched_priority = 1};
- struct amd_sched_entity *c_entity = NULL;
struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
+ int r;
sched_setscheduler(current, SCHED_FIFO, &sparam);
while (!kthread_should_stop()) {
+ struct amd_sched_entity *c_entity = NULL;
+ struct amd_sched_job *job;
struct fence *fence;
- wait_event_interruptible(sched->wait_queue,
- is_scheduler_ready(sched) &&
- (c_entity = select_context(sched)));
+ wait_event_interruptible(sched->wake_up_worker,
+ kthread_should_stop() ||
+ (c_entity = amd_sched_select_context(sched)));
+
+ if (!c_entity)
+ continue;
+
r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
if (r != sizeof(void *))
continue;
- r = sched->ops->prepare_job(sched, c_entity, job);
- if (!r) {
- unsigned long flags;
- spin_lock_irqsave(&sched->queue_lock, flags);
- list_add_tail(&job->list, &sched->active_hw_rq);
- atomic64_inc(&sched->hw_rq_count);
- spin_unlock_irqrestore(&sched->queue_lock, flags);
- }
- mutex_lock(&sched->sched_lock);
- fence = sched->ops->run_job(sched, c_entity, job);
+ atomic_inc(&sched->hw_rq_count);
+
+ fence = sched->ops->run_job(job);
if (fence) {
r = fence_add_callback(fence, &job->cb,
amd_sched_process_job);
@@ -378,7 +327,8 @@ static int amd_sched_main(void *param)
DRM_ERROR("fence add callback failed (%d)\n", r);
fence_put(fence);
}
- mutex_unlock(&sched->sched_lock);
+
+ wake_up(&sched->job_scheduled);
}
return 0;
}
@@ -386,53 +336,42 @@ static int amd_sched_main(void *param)
/**
* Create a gpu scheduler
*
- * @device The device context for this scheduler
- * @ops The backend operations for this scheduler.
- * @id The scheduler is per ring, here is ring id.
- * @granularity The minumum ms unit the scheduler will scheduled.
- * @preemption Indicate whether this ring support preemption, 0 is no.
+ * @ops The backend operations for this scheduler.
+ * @ring The the ring id for the scheduler.
+ * @hw_submissions Number of hw submissions to do.
*
- * return the pointer to scheduler for success, otherwise return NULL
+ * Return the pointer to scheduler for success, otherwise return NULL
*/
-struct amd_gpu_scheduler *amd_sched_create(void *device,
- struct amd_sched_backend_ops *ops,
- unsigned ring,
- unsigned granularity,
- unsigned preemption,
- unsigned hw_submission)
+struct amd_gpu_scheduler *amd_sched_create(struct amd_sched_backend_ops *ops,
+ unsigned ring, unsigned hw_submission,
+ void *priv)
{
struct amd_gpu_scheduler *sched;
- char name[20];
sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
if (!sched)
return NULL;
- sched->device = device;
sched->ops = ops;
- sched->granularity = granularity;
sched->ring_id = ring;
- sched->preemption = preemption;
sched->hw_submission_limit = hw_submission;
- snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
- mutex_init(&sched->sched_lock);
- spin_lock_init(&sched->queue_lock);
+ sched->priv = priv;
+ snprintf(sched->name, sizeof(sched->name), "amdgpu[%d]", ring);
amd_sched_rq_init(&sched->sched_rq);
amd_sched_rq_init(&sched->kernel_rq);
- init_waitqueue_head(&sched->wait_queue);
- INIT_LIST_HEAD(&sched->active_hw_rq);
- atomic64_set(&sched->hw_rq_count, 0);
+ init_waitqueue_head(&sched->wake_up_worker);
+ init_waitqueue_head(&sched->job_scheduled);
+ atomic_set(&sched->hw_rq_count, 0);
/* Each scheduler will run on a seperate kernel thread */
- sched->thread = kthread_create(amd_sched_main, sched, name);
- if (sched->thread) {
- wake_up_process(sched->thread);
- return sched;
+ sched->thread = kthread_run(amd_sched_main, sched, sched->name);
+ if (IS_ERR(sched->thread)) {
+ DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
+ kfree(sched);
+ return NULL;
}
- DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
- kfree(sched);
- return NULL;
+ return sched;
}
/**
@@ -448,15 +387,3 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched)
kfree(sched);
return 0;
}
-
-/**
- * Get next queued sequence number
- *
- * @entity The context entity
- *
- * return the next queued sequence number
-*/
-uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity)
-{
- return atomic64_read(&c_entity->last_queued_v_seq) + 1;
-}
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index ceb5918bfbeb..e797796dcad7 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -27,8 +27,6 @@
#include <linux/kfifo.h>
#include <linux/fence.h>
-#define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
-
struct amd_gpu_scheduler;
struct amd_sched_rq;
@@ -41,20 +39,12 @@ struct amd_sched_rq;
struct amd_sched_entity {
struct list_head list;
struct amd_sched_rq *belongto_rq;
- spinlock_t lock;
- /* the virtual_seq is unique per context per ring */
- atomic64_t last_queued_v_seq;
- atomic64_t last_signaled_v_seq;
+ atomic_t fence_seq;
/* the job_queue maintains the jobs submitted by clients */
struct kfifo job_queue;
spinlock_t queue_lock;
struct amd_gpu_scheduler *scheduler;
- wait_queue_head_t wait_queue;
- wait_queue_head_t wait_emit;
- bool is_pending;
uint64_t fence_context;
- char name[20];
- bool need_wakeup;
};
/**
@@ -63,26 +53,24 @@ struct amd_sched_entity {
* the next entity to emit commands from.
*/
struct amd_sched_rq {
- struct mutex lock;
+ spinlock_t lock;
struct list_head entities;
struct amd_sched_entity *current_entity;
};
struct amd_sched_fence {
struct fence base;
- struct fence_cb cb;
- struct amd_sched_entity *entity;
- uint64_t v_seq;
+ struct amd_gpu_scheduler *scheduler;
spinlock_t lock;
+ void *owner;
};
struct amd_sched_job {
- struct list_head list;
struct fence_cb cb;
struct amd_gpu_scheduler *sched;
struct amd_sched_entity *s_entity;
- void *data;
struct amd_sched_fence *s_fence;
+ void *owner;
};
extern const struct fence_ops amd_sched_fence_ops;
@@ -101,61 +89,42 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
* these functions should be implemented in driver side
*/
struct amd_sched_backend_ops {
- int (*prepare_job)(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *c_entity,
- struct amd_sched_job *job);
- struct fence *(*run_job)(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *c_entity,
- struct amd_sched_job *job);
- void (*process_job)(struct amd_gpu_scheduler *sched,
- struct amd_sched_job *job);
+ struct fence *(*run_job)(struct amd_sched_job *job);
+ void (*process_job)(struct amd_sched_job *job);
};
/**
* One scheduler is implemented for each hardware ring
*/
struct amd_gpu_scheduler {
- void *device;
struct task_struct *thread;
struct amd_sched_rq sched_rq;
struct amd_sched_rq kernel_rq;
- struct list_head active_hw_rq;
- atomic64_t hw_rq_count;
+ atomic_t hw_rq_count;
struct amd_sched_backend_ops *ops;
uint32_t ring_id;
- uint32_t granularity; /* in ms unit */
- uint32_t preemption;
- wait_queue_head_t wait_queue;
- struct amd_sched_entity *current_entity;
- struct mutex sched_lock;
- spinlock_t queue_lock;
+ wait_queue_head_t wake_up_worker;
+ wait_queue_head_t job_scheduled;
uint32_t hw_submission_limit;
+ char name[20];
+ void *priv;
};
-struct amd_gpu_scheduler *amd_sched_create(void *device,
- struct amd_sched_backend_ops *ops,
- uint32_t ring,
- uint32_t granularity,
- uint32_t preemption,
- uint32_t hw_submission);
+struct amd_gpu_scheduler *
+amd_sched_create(struct amd_sched_backend_ops *ops,
+ uint32_t ring, uint32_t hw_submission, void *priv);
int amd_sched_destroy(struct amd_gpu_scheduler *sched);
-int amd_sched_push_job(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *c_entity,
- void *data,
- struct amd_sched_fence **fence);
-
int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity,
struct amd_sched_rq *rq,
uint32_t jobs);
-int amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
- struct amd_sched_entity *entity);
-
-uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity);
+void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
+ struct amd_sched_entity *entity);
+int amd_sched_entity_push_job(struct amd_sched_job *sched_job);
struct amd_sched_fence *amd_sched_fence_create(
- struct amd_sched_entity *s_entity);
+ struct amd_sched_entity *s_entity, void *owner);
void amd_sched_fence_signal(struct amd_sched_fence *fence);
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index a4751598c0b4..e62c37920e11 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -27,19 +27,22 @@
#include <drm/drmP.h>
#include "gpu_scheduler.h"
-struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity)
+struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner)
{
struct amd_sched_fence *fence = NULL;
+ unsigned seq;
+
fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL);
if (fence == NULL)
return NULL;
- fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq);
- fence->entity = s_entity;
+ fence->owner = owner;
+ fence->scheduler = s_entity->scheduler;
spin_lock_init(&fence->lock);
- fence_init(&fence->base, &amd_sched_fence_ops,
- &fence->lock,
- s_entity->fence_context,
- fence->v_seq);
+
+ seq = atomic_inc_return(&s_entity->fence_seq);
+ fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock,
+ s_entity->fence_context, seq);
+
return fence;
}
@@ -60,7 +63,7 @@ static const char *amd_sched_fence_get_driver_name(struct fence *fence)
static const char *amd_sched_fence_get_timeline_name(struct fence *f)
{
struct amd_sched_fence *fence = to_amd_sched_fence(f);
- return (const char *)fence->entity->name;
+ return (const char *)fence->scheduler->name;
}
static bool amd_sched_fence_enable_signaling(struct fence *f)
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 94b21ae70ef7..5a2cafb4f1bc 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -95,6 +95,11 @@ void radeon_connector_hotplug(struct drm_connector *connector)
if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
} else if (radeon_dp_needs_link_train(radeon_connector)) {
+ /* Don't try to start link training before we
+ * have the dpcd */
+ if (!radeon_dp_getdpcd(radeon_connector))
+ return;
+
/* set it to OFF so that drm_helper_connector_dpms()
* won't return immediately since the current state
* is ON at this point.