summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
diff options
context:
space:
mode:
authorSonny Jiang <sonny.jiang@amd.com>2022-07-12 23:33:10 +0300
committerAlex Deucher <alexander.deucher@amd.com>2022-07-18 23:37:09 +0300
commit0b15205c7325dc20b7da0068307670d222d66949 (patch)
treeb899a22105466b9ce24fee83b06d8d384afd20d8 /drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
parent891ce1c9623f36194587d02774ac3d8c30c3ca72 (diff)
downloadlinux-0b15205c7325dc20b7da0068307670d222d66949.tar.xz
drm/amdgpu: limiting AV1 to first instance on VCN4 decode
AV1 is only supported on first instance. Signed-off-by: Sonny Jiang <sonny.jiang@amd.com> Reviewed-by: James Zhu <James.Zhu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c131
1 files changed, 131 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 84ac2401895a..a91ffbf902d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_vcn.h"
#include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "soc15_hw_ip.h"
@@ -44,6 +45,9 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+#define RDECODE_MSG_CREATE 0x00000000
+#define RDECODE_MESSAGE_CREATE 0x00000001
+
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
@@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
+static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p)
+{
+ struct drm_gpu_scheduler **scheds;
+
+ /* The create msg must be in the first IB submitted */
+ if (atomic_read(&p->entity->fence_seq))
+ return -EINVAL;
+
+ scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
+ [AMDGPU_RING_PRIO_0].sched;
+ drm_sched_entity_modify_sched(p->entity, scheds, 1);
+ return 0;
+}
+
+static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va_mapping *map;
+ uint32_t *msg, num_buffers;
+ struct amdgpu_bo *bo;
+ uint64_t start, end;
+ unsigned int i;
+ void *ptr;
+ int r;
+
+ addr &= AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
+ if (r) {
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+ return r;
+ }
+
+ start = map->start * AMDGPU_GPU_PAGE_SIZE;
+ end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+ if (addr & 0x7) {
+ DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(bo, &ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+ return r;
+ }
+
+ msg = ptr + addr - start;
+
+ /* Check length */
+ if (msg[1] > end - addr) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (msg[3] != RDECODE_MSG_CREATE)
+ goto out;
+
+ num_buffers = msg[2];
+ for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
+ uint32_t offset, size, *create;
+
+ if (msg[0] != RDECODE_MESSAGE_CREATE)
+ continue;
+
+ offset = msg[1];
+ size = msg[2];
+
+ if (offset + size > end) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ create = ptr + addr + offset - start;
+
+ /* H246, HEVC and VP9 can run on any instance */
+ if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
+ continue;
+
+ r = vcn_v4_0_limit_sched(p);
+ if (r)
+ goto out;
+ }
+
+out:
+ amdgpu_bo_kunmap(bo);
+ return r;
+}
+
+#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
+
+static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
+ struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
+ uint32_t val;
+ int r = 0;
+
+ /* The first instance can decode anything */
+ if (!ring->me)
+ return r;
+
+ /* unified queue ib header has 8 double words. */
+ if (ib->length_dw < 8)
+ return r;
+
+ val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE
+
+ if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
+
+ if (decode_buffer->valid_buf_flag & 0x1)
+ r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+ decode_buffer->msg_buffer_address_lo);
+ }
+ return r;
+}
+
static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@@ -1331,6 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.get_rptr = vcn_v4_0_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_unified_ring_set_wptr,
+ .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +