diff options
Diffstat (limited to 'drivers/staging/media/sunxi/cedrus/cedrus_h264.c')
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_h264.c | 147 |
1 files changed, 130 insertions, 17 deletions
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c index d6a782703c9b..bfb4a4820a67 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c @@ -6,6 +6,7 @@ * Copyright (c) 2018 Bootlin */ +#include <linux/delay.h> #include <linux/types.h> #include <media/videobuf2-dma-contig.h> @@ -38,7 +39,7 @@ struct cedrus_h264_sram_ref_pic { #define CEDRUS_H264_FRAME_NUM 18 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K) -#define CEDRUS_PIC_INFO_BUF_SIZE (128 * SZ_1K) +#define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K) static void cedrus_h264_write_sram(struct cedrus_dev *dev, enum cedrus_h264_sram_off off, @@ -96,7 +97,7 @@ static void cedrus_write_frame_list(struct cedrus_ctx *ctx, const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; - struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q; + struct vb2_queue *cap_q; struct cedrus_buffer *output_buf; struct cedrus_dev *dev = ctx->dev; unsigned long used_dpbs = 0; @@ -104,6 +105,8 @@ static void cedrus_write_frame_list(struct cedrus_ctx *ctx, unsigned int output = 0; unsigned int i; + cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + memset(pic_list, 0, sizeof(pic_list)); for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) { @@ -167,12 +170,14 @@ static void _cedrus_write_ref_list(struct cedrus_ctx *ctx, enum cedrus_h264_sram_off sram) { const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; - struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q; + struct vb2_queue *cap_q; struct cedrus_dev *dev = ctx->dev; u8 sram_array[CEDRUS_MAX_REF_IDX]; unsigned int i; size_t size; + cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + memset(sram_array, 0, sizeof(sram_array)); for (i = 0; i < num_ref; i++) { @@ -240,8 +245,8 @@ static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx, sizeof(scaling->scaling_list_8x8[0])); cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1, - scaling->scaling_list_8x8[3], - sizeof(scaling->scaling_list_8x8[3])); + scaling->scaling_list_8x8[1], + sizeof(scaling->scaling_list_8x8[1])); cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4, scaling->scaling_list_4x4, @@ -289,6 +294,28 @@ static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx, } } +/* + * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In + * rare cases frame is not decoded correctly. However, setting offset to 0 and + * skipping appropriate amount of bits with flush bits trigger always works. + */ +static void cedrus_skip_bits(struct cedrus_dev *dev, int num) +{ + int count = 0; + + while (count < num) { + int tmp = min(num - count, 32); + + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_FLUSH_BITS | + VE_H264_TRIGGER_TYPE_N_BITS(tmp)); + while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY) + udelay(1); + + count += tmp; + } +} + static void cedrus_set_params(struct cedrus_ctx *ctx, struct cedrus_run *run) { @@ -299,12 +326,13 @@ static void cedrus_set_params(struct cedrus_ctx *ctx, struct vb2_buffer *src_buf = &run->src->vb2_buf; struct cedrus_dev *dev = ctx->dev; dma_addr_t src_buf_addr; - u32 offset = slice->header_bit_size; - u32 len = (slice->size * 8) - offset; + u32 len = slice->size * 8; + unsigned int pic_width_in_mbs; + bool mbaff_pic; u32 reg; cedrus_write(dev, VE_H264_VLD_LEN, len); - cedrus_write(dev, VE_H264_VLD_OFFSET, offset); + cedrus_write(dev, VE_H264_VLD_OFFSET, 0); src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0); cedrus_write(dev, VE_H264_VLD_END, @@ -314,6 +342,20 @@ static void cedrus_set_params(struct cedrus_ctx *ctx, VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | VE_H264_VLD_ADDR_LAST); + if (ctx->src_fmt.width > 2048) { + cedrus_write(dev, VE_BUF_CTRL, + VE_BUF_CTRL_INTRAPRED_MIXED_RAM | + VE_BUF_CTRL_DBLK_MIXED_RAM); + cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR, + ctx->codec.h264.deblk_buf_dma); + cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR, + ctx->codec.h264.intra_pred_buf_dma); + } else { + cedrus_write(dev, VE_BUF_CTRL, + VE_BUF_CTRL_INTRAPRED_INT_SRAM | + VE_BUF_CTRL_DBLK_INT_SRAM); + } + /* * FIXME: Since the bitstream parsing is done in software, and * in userspace, this shouldn't be needed anymore. But it @@ -323,6 +365,8 @@ static void cedrus_set_params(struct cedrus_ctx *ctx, cedrus_write(dev, VE_H264_TRIGGER_TYPE, VE_H264_TRIGGER_TYPE_INIT_SWDEC); + cedrus_skip_bits(dev, slice->header_bit_size); + if (((pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) && (slice->slice_type == V4L2_H264_SLICE_TYPE_P || slice->slice_type == V4L2_H264_SLICE_TYPE_SP)) || @@ -370,12 +414,20 @@ static void cedrus_set_params(struct cedrus_ctx *ctx, reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE; cedrus_write(dev, VE_H264_SPS, reg); + mbaff_pic = !(slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) && + (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD); + pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1; + // slice parameters reg = 0; + reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24; + reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) * + (mbaff_pic + 1)) & 0xff) << 16; reg |= decode->nal_ref_idc ? BIT(12) : 0; reg |= (slice->slice_type & 0xf) << 8; reg |= slice->cabac_init_idc & 0x3; - reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC; + if (ctx->fh.m2m_ctx->new_frame) + reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC; if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC) reg |= VE_H264_SHS_FIELD_PIC; if (slice->flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD) @@ -447,7 +499,7 @@ static void cedrus_h264_setup(struct cedrus_ctx *ctx, { struct cedrus_dev *dev = ctx->dev; - cedrus_engine_enable(dev, CEDRUS_CODEC_H264); + cedrus_engine_enable(ctx, CEDRUS_CODEC_H264); cedrus_write(dev, VE_H264_SDROT_CTRL, 0); cedrus_write(dev, VE_H264_EXTRA_BUFFER1, @@ -464,18 +516,30 @@ static void cedrus_h264_setup(struct cedrus_ctx *ctx, static int cedrus_h264_start(struct cedrus_ctx *ctx) { struct cedrus_dev *dev = ctx->dev; + unsigned int pic_info_size; unsigned int field_size; unsigned int mv_col_size; int ret; + /* Formula for picture buffer size is taken from CedarX source. */ + + if (ctx->src_fmt.width > 2048) + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000; + else + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000; + /* - * FIXME: It seems that the H6 cedarX code is using a formula - * here based on the size of the frame, while all the older - * code is using a fixed size, so that might need to be - * changed at some point. + * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set, + * there is no need to multiply by 2. */ + pic_info_size += ctx->src_fmt.height * 2 * 64; + + if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE) + pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE; + + ctx->codec.h264.pic_info_buf_size = pic_info_size; ctx->codec.h264.pic_info_buf = - dma_alloc_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, + dma_alloc_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, &ctx->codec.h264.pic_info_buf_dma, GFP_KERNEL); if (!ctx->codec.h264.pic_info_buf) @@ -528,15 +592,56 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx) goto err_neighbor_buf; } + if (ctx->src_fmt.width > 2048) { + /* + * Formulas for deblock and intra prediction buffer sizes + * are taken from CedarX source. + */ + + ctx->codec.h264.deblk_buf_size = + ALIGN(ctx->src_fmt.width, 32) * 12; + ctx->codec.h264.deblk_buf = + dma_alloc_coherent(dev->dev, + ctx->codec.h264.deblk_buf_size, + &ctx->codec.h264.deblk_buf_dma, + GFP_KERNEL); + if (!ctx->codec.h264.deblk_buf) { + ret = -ENOMEM; + goto err_mv_col_buf; + } + + ctx->codec.h264.intra_pred_buf_size = + ALIGN(ctx->src_fmt.width, 64) * 5; + ctx->codec.h264.intra_pred_buf = + dma_alloc_coherent(dev->dev, + ctx->codec.h264.intra_pred_buf_size, + &ctx->codec.h264.intra_pred_buf_dma, + GFP_KERNEL); + if (!ctx->codec.h264.intra_pred_buf) { + ret = -ENOMEM; + goto err_deblk_buf; + } + } + return 0; +err_deblk_buf: + dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, + ctx->codec.h264.deblk_buf, + ctx->codec.h264.deblk_buf_dma); + +err_mv_col_buf: + dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size, + ctx->codec.h264.mv_col_buf, + ctx->codec.h264.mv_col_buf_dma); + err_neighbor_buf: dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, ctx->codec.h264.neighbor_info_buf, ctx->codec.h264.neighbor_info_buf_dma); err_pic_buf: - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, ctx->codec.h264.pic_info_buf, ctx->codec.h264.pic_info_buf_dma); return ret; @@ -552,9 +657,17 @@ static void cedrus_h264_stop(struct cedrus_ctx *ctx) dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, ctx->codec.h264.neighbor_info_buf, ctx->codec.h264.neighbor_info_buf_dma); - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, ctx->codec.h264.pic_info_buf, ctx->codec.h264.pic_info_buf_dma); + if (ctx->codec.h264.deblk_buf_size) + dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, + ctx->codec.h264.deblk_buf, + ctx->codec.h264.deblk_buf_dma); + if (ctx->codec.h264.intra_pred_buf_size) + dma_free_coherent(dev->dev, ctx->codec.h264.intra_pred_buf_size, + ctx->codec.h264.intra_pred_buf, + ctx->codec.h264.intra_pred_buf_dma); } static void cedrus_h264_trigger(struct cedrus_ctx *ctx) |