Message ID | 20200116133437.2443-6-narmstrong@baylibre.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | media: meson: vdec: Add VP9 decoding support | expand |
Hi Neil, Le jeudi 16 janvier 2020 à 14:34 +0100, Neil Armstrong a écrit : > From: Maxime Jourdan <mjourdan@baylibre.com> > > This adds VP9 decoding for the Amlogic GXL, G12A & SM1 SoCs, using > the commong "HEVC" HW decoder. > > For G12A & SM1, it uses the IOMMU support from the firmware. > > For 10bit decoding, the firmware can only decode in the proprietary > Amlogic Framebuffer Compression format, but can output in 8bit NV12 > buffer while writing the decoded frame. > > Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com> > Signed-off-by: Neil Armstrong <narmstrong@baylibre.com> > --- > drivers/staging/media/meson/vdec/Makefile | 2 +- > drivers/staging/media/meson/vdec/codec_vp9.c | 2139 +++++++++++++++++ > drivers/staging/media/meson/vdec/codec_vp9.h | 13 + > drivers/staging/media/meson/vdec/hevc_regs.h | 7 + > drivers/staging/media/meson/vdec/vdec.c | 5 + > .../staging/media/meson/vdec/vdec_helpers.c | 4 + > .../staging/media/meson/vdec/vdec_platform.c | 38 + > 7 files changed, 2207 insertions(+), 1 deletion(-) > create mode 100644 drivers/staging/media/meson/vdec/codec_vp9.c > create mode 100644 drivers/staging/media/meson/vdec/codec_vp9.h > > diff --git a/drivers/staging/media/meson/vdec/Makefile b/drivers/staging/media/meson/vdec/Makefile > index f55b6e625034..6e726af84ac9 100644 > --- a/drivers/staging/media/meson/vdec/Makefile > +++ b/drivers/staging/media/meson/vdec/Makefile > @@ -3,6 +3,6 @@ > > meson-vdec-objs = esparser.o vdec.o vdec_helpers.o vdec_platform.o > meson-vdec-objs += vdec_1.o vdec_hevc.o > -meson-vdec-objs += codec_mpeg12.o codec_h264.o codec_hevc_common.o > +meson-vdec-objs += codec_mpeg12.o codec_h264.o codec_hevc_common.o codec_vp9.o > > obj-$(CONFIG_VIDEO_MESON_VDEC) += meson-vdec.o > diff --git a/drivers/staging/media/meson/vdec/codec_vp9.c b/drivers/staging/media/meson/vdec/codec_vp9.c > new file mode 100644 > index 000000000000..9e23ccbb7cc0 > --- /dev/null > +++ b/drivers/staging/media/meson/vdec/codec_vp9.c > @@ -0,0 +1,2139 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +/* > + * Copyright (C) 2018 Maxime Jourdan <mjourdan@baylibre.com> > + * Copyright (C) 2015 Amlogic, Inc. All rights reserved. > + */ > + > +#include <media/v4l2-mem2mem.h> > +#include <media/videobuf2-dma-contig.h> > + > +#include "dos_regs.h" > +#include "hevc_regs.h" > +#include "vdec_helpers.h" > +#include "codec_hevc_common.h" > + > +/* HEVC reg mapping */ > +#define VP9_DEC_STATUS_REG HEVC_ASSIST_SCRATCH_0 > + #define VP9_10B_DECODE_SLICE 5 > + #define VP9_HEAD_PARSER_DONE 0xf0 > +#define VP9_RPM_BUFFER HEVC_ASSIST_SCRATCH_1 > +#define VP9_SHORT_TERM_RPS HEVC_ASSIST_SCRATCH_2 > +#define VP9_ADAPT_PROB_REG HEVC_ASSIST_SCRATCH_3 > +#define VP9_MMU_MAP_BUFFER HEVC_ASSIST_SCRATCH_4 > +#define VP9_PPS_BUFFER HEVC_ASSIST_SCRATCH_5 > +#define VP9_SAO_UP HEVC_ASSIST_SCRATCH_6 > +#define VP9_STREAM_SWAP_BUFFER HEVC_ASSIST_SCRATCH_7 > +#define VP9_STREAM_SWAP_BUFFER2 HEVC_ASSIST_SCRATCH_8 > +#define VP9_PROB_SWAP_BUFFER HEVC_ASSIST_SCRATCH_9 > +#define VP9_COUNT_SWAP_BUFFER HEVC_ASSIST_SCRATCH_A > +#define VP9_SEG_MAP_BUFFER HEVC_ASSIST_SCRATCH_B > +#define VP9_SCALELUT HEVC_ASSIST_SCRATCH_D > +#define VP9_WAIT_FLAG HEVC_ASSIST_SCRATCH_E > +#define LMEM_DUMP_ADR HEVC_ASSIST_SCRATCH_F > +#define NAL_SEARCH_CTL HEVC_ASSIST_SCRATCH_I > +#define VP9_DECODE_MODE HEVC_ASSIST_SCRATCH_J > + #define DECODE_MODE_SINGLE 0 > +#define DECODE_STOP_POS HEVC_ASSIST_SCRATCH_K > +#define HEVC_DECODE_COUNT HEVC_ASSIST_SCRATCH_M > +#define HEVC_DECODE_SIZE HEVC_ASSIST_SCRATCH_N > + > +/* VP9 Constants */ > +#define LCU_SIZE 64 > +#define MAX_REF_PIC_NUM 24 > +#define REFS_PER_FRAME 3 > +#define REF_FRAMES 8 > +#define MV_MEM_UNIT 0x240 > +#define ADAPT_PROB_SIZE 0xf80 > + > +enum FRAME_TYPE { > + KEY_FRAME = 0, > + INTER_FRAME = 1, > + FRAME_TYPES, > +}; > + > +/* VP9 Workspace layout */ > +#define MPRED_MV_BUF_SIZE 0x120000 > + > +#define IPP_SIZE 0x4000 > +#define SAO_ABV_SIZE 0x30000 > +#define SAO_VB_SIZE 0x30000 > +#define SH_TM_RPS_SIZE 0x800 > +#define VPS_SIZE 0x800 > +#define SPS_SIZE 0x800 > +#define PPS_SIZE 0x2000 > +#define SAO_UP_SIZE 0x2800 > +#define SWAP_BUF_SIZE 0x800 > +#define SWAP_BUF2_SIZE 0x800 > +#define SCALELUT_SIZE 0x8000 > +#define DBLK_PARA_SIZE 0x80000 > +#define DBLK_DATA_SIZE 0x80000 > +#define SEG_MAP_SIZE 0xd800 > +#define PROB_SIZE 0x5000 > +#define COUNT_SIZE 0x3000 > +#define MMU_VBH_SIZE 0x5000 > +#define MPRED_ABV_SIZE 0x10000 > +#define MPRED_MV_SIZE (MPRED_MV_BUF_SIZE * MAX_REF_PIC_NUM) > +#define RPM_BUF_SIZE 0x100 > +#define LMEM_SIZE 0x800 > + > +#define IPP_OFFSET 0x00 > +#define SAO_ABV_OFFSET (IPP_OFFSET + IPP_SIZE) > +#define SAO_VB_OFFSET (SAO_ABV_OFFSET + SAO_ABV_SIZE) > +#define SH_TM_RPS_OFFSET (SAO_VB_OFFSET + SAO_VB_SIZE) > +#define VPS_OFFSET (SH_TM_RPS_OFFSET + SH_TM_RPS_SIZE) > +#define SPS_OFFSET (VPS_OFFSET + VPS_SIZE) > +#define PPS_OFFSET (SPS_OFFSET + SPS_SIZE) > +#define SAO_UP_OFFSET (PPS_OFFSET + PPS_SIZE) > +#define SWAP_BUF_OFFSET (SAO_UP_OFFSET + SAO_UP_SIZE) > +#define SWAP_BUF2_OFFSET (SWAP_BUF_OFFSET + SWAP_BUF_SIZE) > +#define SCALELUT_OFFSET (SWAP_BUF2_OFFSET + SWAP_BUF2_SIZE) > +#define DBLK_PARA_OFFSET (SCALELUT_OFFSET + SCALELUT_SIZE) > +#define DBLK_DATA_OFFSET (DBLK_PARA_OFFSET + DBLK_PARA_SIZE) > +#define SEG_MAP_OFFSET (DBLK_DATA_OFFSET + DBLK_DATA_SIZE) > +#define PROB_OFFSET (SEG_MAP_OFFSET + SEG_MAP_SIZE) > +#define COUNT_OFFSET (PROB_OFFSET + PROB_SIZE) > +#define MMU_VBH_OFFSET (COUNT_OFFSET + COUNT_SIZE) > +#define MPRED_ABV_OFFSET (MMU_VBH_OFFSET + MMU_VBH_SIZE) > +#define MPRED_MV_OFFSET (MPRED_ABV_OFFSET + MPRED_ABV_SIZE) > +#define RPM_OFFSET (MPRED_MV_OFFSET + MPRED_MV_SIZE) > +#define LMEM_OFFSET (RPM_OFFSET + RPM_BUF_SIZE) > + > +#define SIZE_WORKSPACE ALIGN(LMEM_OFFSET + LMEM_SIZE, 64 * SZ_1K) > + > +#define NONE -1 > +#define INTRA_FRAME 0 > +#define LAST_FRAME 1 > +#define GOLDEN_FRAME 2 > +#define ALTREF_FRAME 3 > +#define MAX_REF_FRAMES 4 > + > +/* > + * Defines, declarations, sub-functions for vp9 de-block loop > + filter Thr/Lvl table update > + * - struct segmentation is for loop filter only (removed something) > + * - function "vp9_loop_filter_init" and "vp9_loop_filter_frame_init" will > + be instantiated in C_Entry > + * - vp9_loop_filter_init run once before decoding start > + * - vp9_loop_filter_frame_init run before every frame decoding start > + * - set video format to VP9 is in vp9_loop_filter_init > + */ > +#define MAX_LOOP_FILTER 63 > +#define MAX_REF_LF_DELTAS 4 > +#define MAX_MODE_LF_DELTAS 2 > +#define SEGMENT_DELTADATA 0 > +#define SEGMENT_ABSDATA 1 > +#define MAX_SEGMENTS 8 > + > +/* VP9 PROB processing defines */ > +#define VP9_PARTITION_START 0 > +#define VP9_PARTITION_SIZE_STEP (3 * 4) > +#define VP9_PARTITION_ONE_SIZE (4 * VP9_PARTITION_SIZE_STEP) > +#define VP9_PARTITION_KEY_START 0 > +#define VP9_PARTITION_P_START VP9_PARTITION_ONE_SIZE > +#define VP9_PARTITION_SIZE (2 * VP9_PARTITION_ONE_SIZE) > +#define VP9_SKIP_START (VP9_PARTITION_START + VP9_PARTITION_SIZE) > +#define VP9_SKIP_SIZE 4 /* only use 3*/ > +#define VP9_TX_MODE_START (VP9_SKIP_START + VP9_SKIP_SIZE) > +#define VP9_TX_MODE_8_0_OFFSET 0 > +#define VP9_TX_MODE_8_1_OFFSET 1 > +#define VP9_TX_MODE_16_0_OFFSET 2 > +#define VP9_TX_MODE_16_1_OFFSET 4 > +#define VP9_TX_MODE_32_0_OFFSET 6 > +#define VP9_TX_MODE_32_1_OFFSET 9 > +#define VP9_TX_MODE_SIZE 12 > +#define VP9_COEF_START (VP9_TX_MODE_START + VP9_TX_MODE_SIZE) > +#define VP9_COEF_BAND_0_OFFSET 0 > +#define VP9_COEF_BAND_1_OFFSET (VP9_COEF_BAND_0_OFFSET + 3 * 3 + 1) > +#define VP9_COEF_BAND_2_OFFSET (VP9_COEF_BAND_1_OFFSET + 6 * 3) > +#define VP9_COEF_BAND_3_OFFSET (VP9_COEF_BAND_2_OFFSET + 6 * 3) > +#define VP9_COEF_BAND_4_OFFSET (VP9_COEF_BAND_3_OFFSET + 6 * 3) > +#define VP9_COEF_BAND_5_OFFSET (VP9_COEF_BAND_4_OFFSET + 6 * 3) > +#define VP9_COEF_SIZE_ONE_SET 100 /* ((3 + 5 * 6) * 3 + 1 padding)*/ > +#define VP9_COEF_4X4_START (VP9_COEF_START + 0 * VP9_COEF_SIZE_ONE_SET) > +#define VP9_COEF_8X8_START (VP9_COEF_START + 4 * VP9_COEF_SIZE_ONE_SET) > +#define VP9_COEF_16X16_START (VP9_COEF_START + 8 * VP9_COEF_SIZE_ONE_SET) > +#define VP9_COEF_32X32_START (VP9_COEF_START + 12 * VP9_COEF_SIZE_ONE_SET) > +#define VP9_COEF_SIZE_PLANE (2 * VP9_COEF_SIZE_ONE_SET) > +#define VP9_COEF_SIZE (4 * 2 * 2 * VP9_COEF_SIZE_ONE_SET) > +#define VP9_INTER_MODE_START (VP9_COEF_START + VP9_COEF_SIZE) > +#define VP9_INTER_MODE_SIZE 24 /* only use 21 (# * 7)*/ > +#define VP9_INTERP_START (VP9_INTER_MODE_START + VP9_INTER_MODE_SIZE) > +#define VP9_INTERP_SIZE 8 > +#define VP9_INTRA_INTER_START (VP9_INTERP_START + VP9_INTERP_SIZE) > +#define VP9_INTRA_INTER_SIZE 4 > +#define VP9_INTERP_INTRA_INTER_START VP9_INTERP_START > +#define VP9_INTERP_INTRA_INTER_SIZE (VP9_INTERP_SIZE + VP9_INTRA_INTER_SIZE) > +#define VP9_COMP_INTER_START \ > + (VP9_INTERP_INTRA_INTER_START + VP9_INTERP_INTRA_INTER_SIZE) > +#define VP9_COMP_INTER_SIZE 5 > +#define VP9_COMP_REF_START (VP9_COMP_INTER_START + VP9_COMP_INTER_SIZE) > +#define VP9_COMP_REF_SIZE 5 > +#define VP9_SINGLE_REF_START (VP9_COMP_REF_START + VP9_COMP_REF_SIZE) > +#define VP9_SINGLE_REF_SIZE 10 > +#define VP9_REF_MODE_START VP9_COMP_INTER_START > +#define VP9_REF_MODE_SIZE \ > + (VP9_COMP_INTER_SIZE + VP9_COMP_REF_SIZE + VP9_SINGLE_REF_SIZE) > +#define VP9_IF_Y_MODE_START (VP9_REF_MODE_START + VP9_REF_MODE_SIZE) > +#define VP9_IF_Y_MODE_SIZE 36 > +#define VP9_IF_UV_MODE_START (VP9_IF_Y_MODE_START + VP9_IF_Y_MODE_SIZE) > +#define VP9_IF_UV_MODE_SIZE 92 /* only use 90*/ > +#define VP9_MV_JOINTS_START (VP9_IF_UV_MODE_START + VP9_IF_UV_MODE_SIZE) > +#define VP9_MV_JOINTS_SIZE 3 > +#define VP9_MV_SIGN_0_START (VP9_MV_JOINTS_START + VP9_MV_JOINTS_SIZE) > +#define VP9_MV_SIGN_0_SIZE 1 > +#define VP9_MV_CLASSES_0_START (VP9_MV_SIGN_0_START + VP9_MV_SIGN_0_SIZE) > +#define VP9_MV_CLASSES_0_SIZE 10 > +#define VP9_MV_CLASS0_0_START \ > + (VP9_MV_CLASSES_0_START + VP9_MV_CLASSES_0_SIZE) > +#define VP9_MV_CLASS0_0_SIZE 1 > +#define VP9_MV_BITS_0_START (VP9_MV_CLASS0_0_START + VP9_MV_CLASS0_0_SIZE) > +#define VP9_MV_BITS_0_SIZE 10 > +#define VP9_MV_SIGN_1_START (VP9_MV_BITS_0_START + VP9_MV_BITS_0_SIZE) > +#define VP9_MV_SIGN_1_SIZE 1 > +#define VP9_MV_CLASSES_1_START \ > + (VP9_MV_SIGN_1_START + VP9_MV_SIGN_1_SIZE) > +#define VP9_MV_CLASSES_1_SIZE 10 > +#define VP9_MV_CLASS0_1_START \ > + (VP9_MV_CLASSES_1_START + VP9_MV_CLASSES_1_SIZE) > +#define VP9_MV_CLASS0_1_SIZE 1 > +#define VP9_MV_BITS_1_START \ > + (VP9_MV_CLASS0_1_START + VP9_MV_CLASS0_1_SIZE) > +#define VP9_MV_BITS_1_SIZE 10 > +#define VP9_MV_CLASS0_FP_0_START \ > + (VP9_MV_BITS_1_START + VP9_MV_BITS_1_SIZE) > +#define VP9_MV_CLASS0_FP_0_SIZE 9 > +#define VP9_MV_CLASS0_FP_1_START \ > + (VP9_MV_CLASS0_FP_0_START + VP9_MV_CLASS0_FP_0_SIZE) > +#define VP9_MV_CLASS0_FP_1_SIZE 9 > +#define VP9_MV_CLASS0_HP_0_START \ > + (VP9_MV_CLASS0_FP_1_START + VP9_MV_CLASS0_FP_1_SIZE) > +#define VP9_MV_CLASS0_HP_0_SIZE 2 > +#define VP9_MV_CLASS0_HP_1_START \ > + (VP9_MV_CLASS0_HP_0_START + VP9_MV_CLASS0_HP_0_SIZE) > +#define VP9_MV_CLASS0_HP_1_SIZE 2 > +#define VP9_MV_START VP9_MV_JOINTS_START > +#define VP9_MV_SIZE 72 /*only use 69*/ > + > +#define VP9_TOTAL_SIZE (VP9_MV_START + VP9_MV_SIZE) > + > +/* VP9 COUNT mem processing defines */ > +#define VP9_COEF_COUNT_START 0 > +#define VP9_COEF_COUNT_BAND_0_OFFSET 0 > +#define VP9_COEF_COUNT_BAND_1_OFFSET \ > + (VP9_COEF_COUNT_BAND_0_OFFSET + 3 * 5) > +#define VP9_COEF_COUNT_BAND_2_OFFSET \ > + (VP9_COEF_COUNT_BAND_1_OFFSET + 6 * 5) > +#define VP9_COEF_COUNT_BAND_3_OFFSET \ > + (VP9_COEF_COUNT_BAND_2_OFFSET + 6 * 5) > +#define VP9_COEF_COUNT_BAND_4_OFFSET \ > + (VP9_COEF_COUNT_BAND_3_OFFSET + 6 * 5) > +#define VP9_COEF_COUNT_BAND_5_OFFSET \ > + (VP9_COEF_COUNT_BAND_4_OFFSET + 6 * 5) > +#define VP9_COEF_COUNT_SIZE_ONE_SET 165 /* ((3 + 5 * 6) * 5 */ > +#define VP9_COEF_COUNT_4X4_START \ > + (VP9_COEF_COUNT_START + 0 * VP9_COEF_COUNT_SIZE_ONE_SET) > +#define VP9_COEF_COUNT_8X8_START \ > + (VP9_COEF_COUNT_START + 4 * VP9_COEF_COUNT_SIZE_ONE_SET) > +#define VP9_COEF_COUNT_16X16_START \ > + (VP9_COEF_COUNT_START + 8 * VP9_COEF_COUNT_SIZE_ONE_SET) > +#define VP9_COEF_COUNT_32X32_START \ > + (VP9_COEF_COUNT_START + 12 * VP9_COEF_COUNT_SIZE_ONE_SET) > +#define VP9_COEF_COUNT_SIZE_PLANE (2 * VP9_COEF_COUNT_SIZE_ONE_SET) > +#define VP9_COEF_COUNT_SIZE (4 * 2 * 2 * VP9_COEF_COUNT_SIZE_ONE_SET) > + > +#define VP9_INTRA_INTER_COUNT_START \ > + (VP9_COEF_COUNT_START + VP9_COEF_COUNT_SIZE) > +#define VP9_INTRA_INTER_COUNT_SIZE (4 * 2) > +#define VP9_COMP_INTER_COUNT_START \ > + (VP9_INTRA_INTER_COUNT_START + VP9_INTRA_INTER_COUNT_SIZE) > +#define VP9_COMP_INTER_COUNT_SIZE (5 * 2) > +#define VP9_COMP_REF_COUNT_START \ > + (VP9_COMP_INTER_COUNT_START + VP9_COMP_INTER_COUNT_SIZE) > +#define VP9_COMP_REF_COUNT_SIZE (5 * 2) > +#define VP9_SINGLE_REF_COUNT_START \ > + (VP9_COMP_REF_COUNT_START + VP9_COMP_REF_COUNT_SIZE) > +#define VP9_SINGLE_REF_COUNT_SIZE (10 * 2) > +#define VP9_TX_MODE_COUNT_START \ > + (VP9_SINGLE_REF_COUNT_START + VP9_SINGLE_REF_COUNT_SIZE) > +#define VP9_TX_MODE_COUNT_SIZE (12 * 2) > +#define VP9_SKIP_COUNT_START \ > + (VP9_TX_MODE_COUNT_START + VP9_TX_MODE_COUNT_SIZE) > +#define VP9_SKIP_COUNT_SIZE (3 * 2) > +#define VP9_MV_SIGN_0_COUNT_START \ > + (VP9_SKIP_COUNT_START + VP9_SKIP_COUNT_SIZE) > +#define VP9_MV_SIGN_0_COUNT_SIZE (1 * 2) > +#define VP9_MV_SIGN_1_COUNT_START \ > + (VP9_MV_SIGN_0_COUNT_START + VP9_MV_SIGN_0_COUNT_SIZE) > +#define VP9_MV_SIGN_1_COUNT_SIZE (1 * 2) > +#define VP9_MV_BITS_0_COUNT_START \ > + (VP9_MV_SIGN_1_COUNT_START + VP9_MV_SIGN_1_COUNT_SIZE) > +#define VP9_MV_BITS_0_COUNT_SIZE (10 * 2) > +#define VP9_MV_BITS_1_COUNT_START \ > + (VP9_MV_BITS_0_COUNT_START + VP9_MV_BITS_0_COUNT_SIZE) > +#define VP9_MV_BITS_1_COUNT_SIZE (10 * 2) > +#define VP9_MV_CLASS0_HP_0_COUNT_START \ > + (VP9_MV_BITS_1_COUNT_START + VP9_MV_BITS_1_COUNT_SIZE) > +#define VP9_MV_CLASS0_HP_0_COUNT_SIZE (2 * 2) > +#define VP9_MV_CLASS0_HP_1_COUNT_START \ > + (VP9_MV_CLASS0_HP_0_COUNT_START + VP9_MV_CLASS0_HP_0_COUNT_SIZE) > +#define VP9_MV_CLASS0_HP_1_COUNT_SIZE (2 * 2) > + > +/* Start merge_tree */ > +#define VP9_INTER_MODE_COUNT_START \ > + (VP9_MV_CLASS0_HP_1_COUNT_START + VP9_MV_CLASS0_HP_1_COUNT_SIZE) > +#define VP9_INTER_MODE_COUNT_SIZE (7 * 4) > +#define VP9_IF_Y_MODE_COUNT_START \ > + (VP9_INTER_MODE_COUNT_START + VP9_INTER_MODE_COUNT_SIZE) > +#define VP9_IF_Y_MODE_COUNT_SIZE (10 * 4) > +#define VP9_IF_UV_MODE_COUNT_START \ > + (VP9_IF_Y_MODE_COUNT_START + VP9_IF_Y_MODE_COUNT_SIZE) > +#define VP9_IF_UV_MODE_COUNT_SIZE (10 * 10) > +#define VP9_PARTITION_P_COUNT_START \ > + (VP9_IF_UV_MODE_COUNT_START + VP9_IF_UV_MODE_COUNT_SIZE) > +#define VP9_PARTITION_P_COUNT_SIZE (4 * 4 * 4) > +#define VP9_INTERP_COUNT_START \ > + (VP9_PARTITION_P_COUNT_START + VP9_PARTITION_P_COUNT_SIZE) > +#define VP9_INTERP_COUNT_SIZE (4 * 3) > +#define VP9_MV_JOINTS_COUNT_START \ > + (VP9_INTERP_COUNT_START + VP9_INTERP_COUNT_SIZE) > +#define VP9_MV_JOINTS_COUNT_SIZE (1 * 4) > +#define VP9_MV_CLASSES_0_COUNT_START \ > + (VP9_MV_JOINTS_COUNT_START + VP9_MV_JOINTS_COUNT_SIZE) > +#define VP9_MV_CLASSES_0_COUNT_SIZE (1 * 11) > +#define VP9_MV_CLASS0_0_COUNT_START \ > + (VP9_MV_CLASSES_0_COUNT_START + VP9_MV_CLASSES_0_COUNT_SIZE) > +#define VP9_MV_CLASS0_0_COUNT_SIZE (1 * 2) > +#define VP9_MV_CLASSES_1_COUNT_START \ > + (VP9_MV_CLASS0_0_COUNT_START + VP9_MV_CLASS0_0_COUNT_SIZE) > +#define VP9_MV_CLASSES_1_COUNT_SIZE (1 * 11) > +#define VP9_MV_CLASS0_1_COUNT_START \ > + (VP9_MV_CLASSES_1_COUNT_START + VP9_MV_CLASSES_1_COUNT_SIZE) > +#define VP9_MV_CLASS0_1_COUNT_SIZE (1 * 2) > +#define VP9_MV_CLASS0_FP_0_COUNT_START \ > + (VP9_MV_CLASS0_1_COUNT_START + VP9_MV_CLASS0_1_COUNT_SIZE) > +#define VP9_MV_CLASS0_FP_0_COUNT_SIZE (3 * 4) > +#define VP9_MV_CLASS0_FP_1_COUNT_START \ > + (VP9_MV_CLASS0_FP_0_COUNT_START + VP9_MV_CLASS0_FP_0_COUNT_SIZE) > +#define VP9_MV_CLASS0_FP_1_COUNT_SIZE (3 * 4) > + > +#define DC_PRED 0 /* Average of above and left pixels */ > +#define V_PRED 1 /* Vertical */ > +#define H_PRED 2 /* Horizontal */ > +#define D45_PRED 3 /* Directional 45 deg = round(arctan(1/1) * 180/pi) */ > +#define D135_PRED 4 /* Directional 135 deg = 180 - 45 */ > +#define D117_PRED 5 /* Directional 117 deg = 180 - 63 */ > +#define D153_PRED 6 /* Directional 153 deg = 180 - 27 */ > +#define D207_PRED 7 /* Directional 207 deg = 180 + 27 */ > +#define D63_PRED 8 /* Directional 63 deg = round(arctan(2/1) * 180/pi) */ > +#define TM_PRED 9 /* True-motion */ > + > +#define ROUND_POWER_OF_TWO(value, num) (((value) + (1 << ((num) - 1))) >> (num)) > + > +#define MODE_MV_COUNT_SAT 20 > +static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { > + 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, > + 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 > +}; > + > +union rpm_param { > + struct { > + u16 data[RPM_BUF_SIZE]; > + } l; > + struct { > + u16 profile; > + u16 show_existing_frame; > + u16 frame_to_show_idx; > + u16 frame_type; /*1 bit*/ > + u16 show_frame; /*1 bit*/ > + u16 error_resilient_mode; /*1 bit*/ > + u16 intra_only; /*1 bit*/ > + u16 display_size_present; /*1 bit*/ > + u16 reset_frame_context; > + u16 refresh_frame_flags; > + u16 width; > + u16 height; > + u16 display_width; > + u16 display_height; > + u16 ref_info; > + u16 same_frame_size; > + u16 mode_ref_delta_enabled; > + u16 ref_deltas[4]; > + u16 mode_deltas[2]; > + u16 filter_level; > + u16 sharpness_level; > + u16 bit_depth; > + u16 seg_quant_info[8]; > + u16 seg_enabled; > + u16 seg_abs_delta; > + /* bit 15: feature enabled; bit 8, sign; bit[5:0], data */ > + u16 seg_lf_info[8]; > + } p; > +}; > + > +enum SEG_LVL_FEATURES { > + SEG_LVL_ALT_Q = 0, /* Use alternate Quantizer */ > + SEG_LVL_ALT_LF = 1, /* Use alternate loop filter value */ > + SEG_LVL_REF_FRAME = 2, /* Optional Segment reference frame */ > + SEG_LVL_SKIP = 3, /* Optional Segment (0,0) + skip mode */ > + SEG_LVL_MAX = 4 /* Number of features supported */ > +}; > + > +struct segmentation { > + u8 enabled; > + u8 update_map; > + u8 update_data; > + u8 abs_delta; > + u8 temporal_update; > + s16 feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; > + unsigned int feature_mask[MAX_SEGMENTS]; > +}; > + > +struct loop_filter_thresh { > + u8 mblim; > + u8 lim; > + u8 hev_thr; > +}; > + > +struct loop_filter_info_n { > + struct loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; > + u8 lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; > +}; > + > +struct loopfilter { > + int filter_level; > + > + int sharpness_level; > + int last_sharpness_level; > + > + u8 mode_ref_delta_enabled; > + u8 mode_ref_delta_update; > + > + /*0 = Intra, Last, GF, ARF*/ > + signed char ref_deltas[MAX_REF_LF_DELTAS]; > + signed char last_ref_deltas[MAX_REF_LF_DELTAS]; > + > + /*0 = ZERO_MV, MV*/ > + signed char mode_deltas[MAX_MODE_LF_DELTAS]; > + signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; > +}; > + > +struct vp9_frame { > + struct list_head list; > + struct vb2_v4l2_buffer *vbuf; > + int index; > + int intra_only; > + int show; > + int type; > + int done; > + unsigned int width; > + unsigned int height; > +}; > + > +struct codec_vp9 { > + /* VP9 context lock */ > + struct mutex lock; > + > + /* Common part with the HEVC decoder */ > + struct codec_hevc_common common; > + > + /* Buffer for the VP9 Workspace */ > + void *workspace_vaddr; > + dma_addr_t workspace_paddr; > + > + /* Contains many information parsed from the bitstream */ > + union rpm_param rpm_param; > + > + /* Whether we detected the bitstream as 10-bit */ > + int is_10bit; > + > + /* Coded resolution reported by the hardware */ > + u32 width, height; > + > + /* All ref frames used by the HW at a given time */ > + struct list_head ref_frames_list; > + u32 frames_num; > + > + /* In case of downsampling (decoding with FBC but outputting in NV12M), > + * we need to allocate additional buffers for FBC. > + */ > + void *fbc_buffer_vaddr[MAX_REF_PIC_NUM]; > + dma_addr_t fbc_buffer_paddr[MAX_REF_PIC_NUM]; > + > + int ref_frame_map[REF_FRAMES]; > + int next_ref_frame_map[REF_FRAMES]; > + struct vp9_frame *frame_refs[REFS_PER_FRAME]; > + > + u32 lcu_total; > + > + /* loop filter */ > + int default_filt_lvl; > + struct loop_filter_info_n lfi; > + struct loopfilter lf; > + struct segmentation seg_4lf; > + > + struct vp9_frame *cur_frame; > + struct vp9_frame *prev_frame; > +}; > + > +static int div_r32(s64 m, int n) > +{ > + s64 qu = div_s64(m, n); > + > + return (int)qu; > +} > + > +static int clip_prob(int p) > +{ > + return clamp_val(p, 1, 255); > +} > + > +static int segfeature_active(struct segmentation *seg, int segment_id, > + enum SEG_LVL_FEATURES feature_id) > +{ > + return seg->enabled && > + (seg->feature_mask[segment_id] & (1 << feature_id)); > +} > + > +static int get_segdata(struct segmentation *seg, int segment_id, > + enum SEG_LVL_FEATURES feature_id) > +{ > + return seg->feature_data[segment_id][feature_id]; > +} > + > +static void vp9_update_sharpness(struct loop_filter_info_n *lfi, > + int sharpness_lvl) > +{ > + int lvl; > + > + /* For each possible value for the loop filter fill out limits*/ > + for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { > + /* Set loop filter parameters that control sharpness.*/ > + int block_inside_limit = lvl >> ((sharpness_lvl > 0) + > + (sharpness_lvl > 4)); > + > + if (sharpness_lvl > 0) { > + if (block_inside_limit > (9 - sharpness_lvl)) > + block_inside_limit = (9 - sharpness_lvl); > + } > + > + if (block_inside_limit < 1) > + block_inside_limit = 1; > + > + lfi->lfthr[lvl].lim = (u8)block_inside_limit; > + lfi->lfthr[lvl].mblim = (u8)(2 * (lvl + 2) + > + block_inside_limit); > + } > +} > + > +/* Instantiate this function once when decode is started */ > +static void > +vp9_loop_filter_init(struct amvdec_core *core, struct codec_vp9 *vp9) > +{ > + struct loop_filter_info_n *lfi = &vp9->lfi; > + struct loopfilter *lf = &vp9->lf; > + struct segmentation *seg_4lf = &vp9->seg_4lf; > + int i; > + > + memset(lfi, 0, sizeof(struct loop_filter_info_n)); > + memset(lf, 0, sizeof(struct loopfilter)); > + memset(seg_4lf, 0, sizeof(struct segmentation)); > + lf->sharpness_level = 0; > + vp9_update_sharpness(lfi, lf->sharpness_level); > + lf->last_sharpness_level = lf->sharpness_level; > + > + for (i = 0; i < 32; i++) { > + unsigned int thr; > + > + thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | > + (lfi->lfthr[i * 2 + 1].mblim & 0xff); > + thr = (thr << 16) | ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | > + (lfi->lfthr[i * 2].mblim & 0xff); > + > + amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); > + } > + > + if (core->platform->revision >= VDEC_REVISION_G12A) > + /* VP9 video format */ > + amvdec_write_dos(core, HEVC_DBLK_CFGB, (0x54 << 8) | BIT(0)); > + else if (core->platform->revision >= VDEC_REVISION_SM1) > + amvdec_write_dos(core, HEVC_DBLK_CFGB, > + (0x3 << 14) | /* dw fifo thres r and b */ > + (0x3 << 12) | /* dw fifo thres r or b */ > + (0x3 << 10) | /* dw fifo thres not r/b */ > + BIT(0)); /* VP9 video format */ > + else > + amvdec_write_dos(core, HEVC_DBLK_CFGB, 0x40400001); > +} > + > +static void > +vp9_loop_filter_frame_init(struct amvdec_core *core, struct segmentation *seg, > + struct loop_filter_info_n *lfi, > + struct loopfilter *lf, int default_filt_lvl) > +{ > + int i; > + int seg_id; > + > + /* > + * n_shift is the multiplier for lf_deltas > + * the multiplier is: > + * - 1 for when filter_lvl is between 0 and 31 > + * - 2 when filter_lvl is between 32 and 63 > + */ > + const int scale = 1 << (default_filt_lvl >> 5); > + > + /* update limits if sharpness has changed */ > + if (lf->last_sharpness_level != lf->sharpness_level) { > + vp9_update_sharpness(lfi, lf->sharpness_level); > + lf->last_sharpness_level = lf->sharpness_level; > + > + /* Write to register */ > + for (i = 0; i < 32; i++) { > + unsigned int thr; > + > + thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | > + (lfi->lfthr[i * 2 + 1].mblim & 0xff); > + thr = (thr << 16) | > + ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | > + (lfi->lfthr[i * 2].mblim & 0xff); > + > + amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); > + } > + } > + > + for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { > + int lvl_seg = default_filt_lvl; > + > + if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { > + const int data = get_segdata(seg, seg_id, > + SEG_LVL_ALT_LF); > + lvl_seg = clamp_t(int, > + seg->abs_delta == SEGMENT_ABSDATA ? > + data : default_filt_lvl + data, > + 0, MAX_LOOP_FILTER); > + } > + > + if (!lf->mode_ref_delta_enabled) { > + /* > + * We could get rid of this if we assume that deltas > + * are set to zero when not in use. > + * encoder always uses deltas > + */ > + memset(lfi->lvl[seg_id], lvl_seg, > + sizeof(lfi->lvl[seg_id])); > + } else { > + int ref, mode; > + const int intra_lvl = > + lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; > + lfi->lvl[seg_id][INTRA_FRAME][0] = > + clamp_val(intra_lvl, 0, MAX_LOOP_FILTER); > + > + for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { > + for (mode = 0; mode < MAX_MODE_LF_DELTAS; > + ++mode) { > + const int inter_lvl = > + lvl_seg + > + lf->ref_deltas[ref] * scale + > + lf->mode_deltas[mode] * scale; > + lfi->lvl[seg_id][ref][mode] = > + clamp_val(inter_lvl, 0, > + MAX_LOOP_FILTER); > + } > + } > + } > + } > + > + for (i = 0; i < 16; i++) { > + unsigned int level; > + > + level = ((lfi->lvl[i >> 1][3][i & 1] & 0x3f) << 24) | > + ((lfi->lvl[i >> 1][2][i & 1] & 0x3f) << 16) | > + ((lfi->lvl[i >> 1][1][i & 1] & 0x3f) << 8) | > + (lfi->lvl[i >> 1][0][i & 1] & 0x3f); > + if (!default_filt_lvl) > + level = 0; > + > + amvdec_write_dos(core, HEVC_DBLK_CFGA, level); > + } > +} > + > +static void codec_vp9_flush_output(struct amvdec_session *sess) > +{ > + struct codec_vp9 *vp9 = sess->priv; > + struct vp9_frame *tmp, *n; > + > + mutex_lock(&vp9->lock); > + list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { > + if (!tmp->done) { > + if (tmp->show) > + amvdec_dst_buf_done(sess, tmp->vbuf, > + V4L2_FIELD_NONE); > + else > + v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); > + > + vp9->frames_num--; > + } > + > + list_del(&tmp->list); > + kfree(tmp); > + } > + mutex_unlock(&vp9->lock); > +} > + > +static u32 codec_vp9_num_pending_bufs(struct amvdec_session *sess) > +{ > + struct codec_vp9 *vp9 = sess->priv; > + > + if (!vp9) > + return 0; > + > + return vp9->frames_num; > +} > + > +static int codec_vp9_alloc_workspace(struct amvdec_core *core, > + struct codec_vp9 *vp9) > +{ > + /* Allocate some memory for the VP9 decoder's state */ > + vp9->workspace_vaddr = dma_alloc_coherent(core->dev, SIZE_WORKSPACE, > + &vp9->workspace_paddr, > + GFP_KERNEL); > + if (!vp9->workspace_vaddr) { > + dev_err(core->dev, "Failed to allocate VP9 Workspace\n"); > + return -ENOMEM; > + } > + > + memset(vp9->workspace_vaddr, 0, SIZE_WORKSPACE); > + > + return 0; > +} > + > +static void codec_vp9_setup_workspace(struct amvdec_session *sess, > + struct codec_vp9 *vp9) > +{ > + struct amvdec_core *core = sess->core; > + u32 revision = core->platform->revision; > + dma_addr_t wkaddr = vp9->workspace_paddr; > + > + amvdec_write_dos(core, HEVCD_IPP_LINEBUFF_BASE, wkaddr + IPP_OFFSET); > + amvdec_write_dos(core, VP9_RPM_BUFFER, wkaddr + RPM_OFFSET); > + amvdec_write_dos(core, VP9_SHORT_TERM_RPS, wkaddr + SH_TM_RPS_OFFSET); > + amvdec_write_dos(core, VP9_PPS_BUFFER, wkaddr + PPS_OFFSET); > + amvdec_write_dos(core, VP9_SAO_UP, wkaddr + SAO_UP_OFFSET); > + > + amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER, > + wkaddr + SWAP_BUF_OFFSET); > + amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER2, > + wkaddr + SWAP_BUF2_OFFSET); > + amvdec_write_dos(core, VP9_SCALELUT, wkaddr + SCALELUT_OFFSET); > + > + if (core->platform->revision >= VDEC_REVISION_G12A) > + amvdec_write_dos(core, HEVC_DBLK_CFGE, > + wkaddr + DBLK_PARA_OFFSET); > + > + amvdec_write_dos(core, HEVC_DBLK_CFG4, wkaddr + DBLK_PARA_OFFSET); > + amvdec_write_dos(core, HEVC_DBLK_CFG5, wkaddr + DBLK_DATA_OFFSET); > + amvdec_write_dos(core, VP9_SEG_MAP_BUFFER, wkaddr + SEG_MAP_OFFSET); > + amvdec_write_dos(core, VP9_PROB_SWAP_BUFFER, wkaddr + PROB_OFFSET); > + amvdec_write_dos(core, VP9_COUNT_SWAP_BUFFER, wkaddr + COUNT_OFFSET); > + amvdec_write_dos(core, LMEM_DUMP_ADR, wkaddr + LMEM_OFFSET); > + > + if (codec_hevc_use_mmu(revision, sess->pixfmt_cap, vp9->is_10bit)) { > + amvdec_write_dos(core, HEVC_SAO_MMU_VH0_ADDR, > + wkaddr + MMU_VBH_OFFSET); > + amvdec_write_dos(core, HEVC_SAO_MMU_VH1_ADDR, > + wkaddr + MMU_VBH_OFFSET + (MMU_VBH_SIZE / 2)); > + > + if (revision >= VDEC_REVISION_G12A) > + amvdec_write_dos(core, HEVC_ASSIST_MMU_MAP_ADDR, > + vp9->common.mmu_map_paddr); > + else > + amvdec_write_dos(core, VP9_MMU_MAP_BUFFER, > + vp9->common.mmu_map_paddr); > + } > +} > + > +static int codec_vp9_start(struct amvdec_session *sess) > +{ > + struct amvdec_core *core = sess->core; > + struct codec_vp9 *vp9; > + u32 val; > + int i; > + int ret; > + > + vp9 = kzalloc(sizeof(*vp9), GFP_KERNEL); > + if (!vp9) > + return -ENOMEM; > + > + ret = codec_vp9_alloc_workspace(core, vp9); > + if (ret) > + goto free_vp9; > + > + codec_vp9_setup_workspace(sess, vp9); > + amvdec_write_dos_bits(core, HEVC_STREAM_CONTROL, BIT(0)); > + /* stream_fifo_hole */ > + if (core->platform->revision >= VDEC_REVISION_G12A) > + amvdec_write_dos_bits(core, HEVC_STREAM_FIFO_CTL, BIT(29)); > + > + val = amvdec_read_dos(core, HEVC_PARSER_INT_CONTROL) & 0x7fffffff; > + val |= (3 << 29) | BIT(24) | BIT(22) | BIT(7) | BIT(4) | BIT(0); > + amvdec_write_dos(core, HEVC_PARSER_INT_CONTROL, val); > + amvdec_write_dos_bits(core, HEVC_SHIFT_STATUS, BIT(0)); > + amvdec_write_dos(core, HEVC_SHIFT_CONTROL, BIT(10) | BIT(9) | > + (3 << 6) | BIT(5) | BIT(2) | BIT(1) | BIT(0)); > + amvdec_write_dos(core, HEVC_CABAC_CONTROL, BIT(0)); > + amvdec_write_dos(core, HEVC_PARSER_CORE_CONTROL, BIT(0)); > + amvdec_write_dos(core, HEVC_SHIFT_STARTCODE, 0x00000001); > + > + amvdec_write_dos(core, VP9_DEC_STATUS_REG, 0); > + > + amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, BIT(16)); > + for (i = 0; i < ARRAY_SIZE(vdec_hevc_parser_cmd); ++i) > + amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, > + vdec_hevc_parser_cmd[i]); > + > + amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_0, PARSER_CMD_SKIP_CFG_0); > + amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_1, PARSER_CMD_SKIP_CFG_1); > + amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_2, PARSER_CMD_SKIP_CFG_2); > + amvdec_write_dos(core, HEVC_PARSER_IF_CONTROL, > + BIT(5) | BIT(2) | BIT(0)); > + > + amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(0)); > + amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(1)); > + > + amvdec_write_dos(core, VP9_WAIT_FLAG, 1); > + > + /* clear mailbox interrupt */ > + amvdec_write_dos(core, HEVC_ASSIST_MBOX1_CLR_REG, 1); > + /* enable mailbox interrupt */ > + amvdec_write_dos(core, HEVC_ASSIST_MBOX1_MASK, 1); > + /* disable PSCALE for hardware sharing */ > + amvdec_write_dos(core, HEVC_PSCALE_CTRL, 0); > + /* Let the uCode do all the parsing */ > + amvdec_write_dos(core, NAL_SEARCH_CTL, 0x8); > + > + amvdec_write_dos(core, DECODE_STOP_POS, 0); > + amvdec_write_dos(core, VP9_DECODE_MODE, DECODE_MODE_SINGLE); > + > + pr_debug("decode_count: %u; decode_size: %u\n", > + amvdec_read_dos(core, HEVC_DECODE_COUNT), > + amvdec_read_dos(core, HEVC_DECODE_SIZE)); > + > + vp9_loop_filter_init(core, vp9); > + > + INIT_LIST_HEAD(&vp9->ref_frames_list); > + mutex_init(&vp9->lock); > + memset(&vp9->ref_frame_map, -1, sizeof(vp9->ref_frame_map)); > + memset(&vp9->next_ref_frame_map, -1, sizeof(vp9->next_ref_frame_map)); > + for (i = 0; i < REFS_PER_FRAME; ++i) > + vp9->frame_refs[i] = NULL; > + sess->priv = vp9; > + > + return 0; > + > +free_vp9: > + kfree(vp9); > + return ret; > +} > + > +static int codec_vp9_stop(struct amvdec_session *sess) > +{ > + struct amvdec_core *core = sess->core; > + struct codec_vp9 *vp9 = sess->priv; > + > + mutex_lock(&vp9->lock); > + if (vp9->workspace_vaddr) > + dma_free_coherent(core->dev, SIZE_WORKSPACE, > + vp9->workspace_vaddr, > + vp9->workspace_paddr); > + > + codec_hevc_free_fbc_buffers(sess, &vp9->common); > + mutex_unlock(&vp9->lock); > + > + return 0; > +} > + > +static void codec_vp9_set_sao(struct amvdec_session *sess, > + struct vb2_buffer *vb) > +{ > + struct amvdec_core *core = sess->core; > + struct codec_vp9 *vp9 = sess->priv; > + > + dma_addr_t buf_y_paddr; > + dma_addr_t buf_u_v_paddr; > + u32 val; > + > + if (codec_hevc_use_downsample(sess->pixfmt_cap, vp9->is_10bit)) > + buf_y_paddr = > + vp9->common.fbc_buffer_paddr[vb->index]; > + else > + buf_y_paddr = > + vb2_dma_contig_plane_dma_addr(vb, 0); > + > + if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { > + val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0200; > + amvdec_write_dos(core, HEVC_SAO_CTRL5, val); > + amvdec_write_dos(core, HEVC_CM_BODY_START_ADDR, buf_y_paddr); > + } > + > + if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) { > + buf_y_paddr = > + vb2_dma_contig_plane_dma_addr(vb, 0); > + buf_u_v_paddr = > + vb2_dma_contig_plane_dma_addr(vb, 1); > + amvdec_write_dos(core, HEVC_SAO_Y_START_ADDR, buf_y_paddr); > + amvdec_write_dos(core, HEVC_SAO_C_START_ADDR, buf_u_v_paddr); > + amvdec_write_dos(core, HEVC_SAO_Y_WPTR, buf_y_paddr); > + amvdec_write_dos(core, HEVC_SAO_C_WPTR, buf_u_v_paddr); > + } > + > + if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, > + vp9->is_10bit)) { > + amvdec_write_dos(core, HEVC_CM_HEADER_START_ADDR, > + vp9->common.mmu_header_paddr[vb->index]); > + /* use HEVC_CM_HEADER_START_ADDR */ > + amvdec_write_dos_bits(core, HEVC_SAO_CTRL5, BIT(10)); > + } > + > + amvdec_write_dos(core, HEVC_SAO_Y_LENGTH, > + amvdec_get_output_size(sess)); > + amvdec_write_dos(core, HEVC_SAO_C_LENGTH, > + (amvdec_get_output_size(sess) / 2)); > + > + if (core->platform->revision >= VDEC_REVISION_G12A) { > + amvdec_clear_dos_bits(core, HEVC_DBLK_CFGB, > + BIT(4) | BIT(5) | BIT(8) | BIT(9)); > + /* enable first, compressed write */ > + if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) > + amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(8)); > + > + /* enable second, uncompressed write */ > + if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) > + amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(9)); > + > + /* dblk pipeline mode=1 for performance */ > + if (sess->width >= 1280) > + amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(4)); > + > + pr_debug("HEVC_DBLK_CFGB: %08X\n", > + amvdec_read_dos(core, HEVC_DBLK_CFGB)); > + } > + > + val = amvdec_read_dos(core, HEVC_SAO_CTRL1) & ~0x3ff0; > + val |= 0xff0; /* Set endianness for 2-bytes swaps (nv12) */ > + if (core->platform->revision < VDEC_REVISION_G12A) { > + val &= ~0x3; > + if (!codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) > + val |= BIT(0); /* disable cm compression */ > + /* TOFIX: Handle Amlogic Framebuffer compression */ > + } > + > + amvdec_write_dos(core, HEVC_SAO_CTRL1, val); > + pr_debug("HEVC_SAO_CTRL1: %08X\n", val); > + > + /* no downscale for NV12 */ > + val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0000; > + amvdec_write_dos(core, HEVC_SAO_CTRL5, val); > + > + val = amvdec_read_dos(core, HEVCD_IPP_AXIIF_CONFIG) & ~0x30; > + val |= 0xf; > + val &= ~BIT(12); /* NV12 */ > + amvdec_write_dos(core, HEVCD_IPP_AXIIF_CONFIG, val); > +} > + > +static dma_addr_t codec_vp9_get_frame_mv_paddr(struct codec_vp9 *vp9, > + struct vp9_frame *frame) > +{ > + return vp9->workspace_paddr + MPRED_MV_OFFSET + > + (frame->index * MPRED_MV_BUF_SIZE); > +} > + > +static void codec_vp9_set_mpred_mv(struct amvdec_core *core, > + struct codec_vp9 *vp9) > +{ > + int mpred_mv_rd_end_addr; > + int use_prev_frame_mvs = vp9->prev_frame->width == > + vp9->cur_frame->width && > + vp9->prev_frame->height == > + vp9->cur_frame->height && > + !vp9->prev_frame->intra_only && > + vp9->prev_frame->show && > + vp9->prev_frame->type != KEY_FRAME; > + > + amvdec_write_dos(core, HEVC_MPRED_CTRL3, 0x24122412); > + amvdec_write_dos(core, HEVC_MPRED_ABV_START_ADDR, > + vp9->workspace_paddr + MPRED_ABV_OFFSET); > + > + amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); > + if (use_prev_frame_mvs) > + amvdec_write_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); > + > + amvdec_write_dos(core, HEVC_MPRED_MV_WR_START_ADDR, > + codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); > + amvdec_write_dos(core, HEVC_MPRED_MV_WPTR, > + codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); > + > + amvdec_write_dos(core, HEVC_MPRED_MV_RD_START_ADDR, > + codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); > + amvdec_write_dos(core, HEVC_MPRED_MV_RPTR, > + codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); > + > + mpred_mv_rd_end_addr = > + codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame) + > + (vp9->lcu_total * MV_MEM_UNIT); > + amvdec_write_dos(core, HEVC_MPRED_MV_RD_END_ADDR, mpred_mv_rd_end_addr); > +} > + > +static void codec_vp9_update_next_ref(struct codec_vp9 *vp9) > +{ > + union rpm_param *param = &vp9->rpm_param; > + u32 buf_idx = vp9->cur_frame->index; > + int ref_index = 0; > + int refresh_frame_flags; > + int mask; > + > + refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? > + 0xff : param->p.refresh_frame_flags; > + > + for (mask = refresh_frame_flags; mask; mask >>= 1) { > + pr_debug("mask=%08X; ref_index=%d\n", mask, ref_index); > + if (mask & 1) > + vp9->next_ref_frame_map[ref_index] = buf_idx; > + else > + vp9->next_ref_frame_map[ref_index] = > + vp9->ref_frame_map[ref_index]; > + > + ++ref_index; > + } > + > + for (; ref_index < REF_FRAMES; ++ref_index) > + vp9->next_ref_frame_map[ref_index] = > + vp9->ref_frame_map[ref_index]; > +} > + > +static void codec_vp9_save_refs(struct codec_vp9 *vp9) > +{ > + union rpm_param *param = &vp9->rpm_param; > + int i; > + > + for (i = 0; i < REFS_PER_FRAME; ++i) { > + const int ref = (param->p.ref_info >> > + (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; > + > + if (vp9->ref_frame_map[ref] < 0) > + continue; > + > + pr_warn("%s: FIXME, would need to save ref %d\n", > + __func__, vp9->ref_frame_map[ref]); > + } > +} > + > +static void codec_vp9_update_ref(struct codec_vp9 *vp9) > +{ > + union rpm_param *param = &vp9->rpm_param; > + int ref_index = 0; > + int mask; > + int refresh_frame_flags; > + > + if (!vp9->cur_frame) > + return; > + > + refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? > + 0xff : param->p.refresh_frame_flags; > + > + for (mask = refresh_frame_flags; mask; mask >>= 1) { > + vp9->ref_frame_map[ref_index] = > + vp9->next_ref_frame_map[ref_index]; > + ++ref_index; > + } > + > + if (param->p.show_existing_frame) > + return; > + > + for (; ref_index < REF_FRAMES; ++ref_index) > + vp9->ref_frame_map[ref_index] = > + vp9->next_ref_frame_map[ref_index]; > +} > + > +static struct vp9_frame *codec_vp9_get_frame_by_idx(struct codec_vp9 *vp9, > + int idx) > +{ > + struct vp9_frame *frame; > + > + list_for_each_entry(frame, &vp9->ref_frames_list, list) { > + if (frame->index == idx) > + return frame; > + } > + > + return NULL; > +} > + > +static void codec_vp9_sync_ref(struct codec_vp9 *vp9) > +{ > + union rpm_param *param = &vp9->rpm_param; > + int i; > + > + for (i = 0; i < REFS_PER_FRAME; ++i) { > + const int ref = (param->p.ref_info >> > + (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; > + const int idx = vp9->ref_frame_map[ref]; > + > + vp9->frame_refs[i] = codec_vp9_get_frame_by_idx(vp9, idx); > + if (!vp9->frame_refs[i]) > + pr_warn("%s: couldn't find VP9 ref %d\n", __func__, > + idx); > + } > +} > + > +static void codec_vp9_set_refs(struct amvdec_session *sess, > + struct codec_vp9 *vp9) > +{ > + struct amvdec_core *core = sess->core; > + int i; > + > + for (i = 0; i < REFS_PER_FRAME; ++i) { > + struct vp9_frame *frame = vp9->frame_refs[i]; > + int id_y; > + int id_u_v; > + > + if (!frame) > + continue; > + > + if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { > + id_y = frame->index; > + id_u_v = id_y; > + } else { > + id_y = frame->index * 2; > + id_u_v = id_y + 1; > + } > + > + amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR, > + (id_u_v << 16) | (id_u_v << 8) | id_y); > + } > +} > + > +static void codec_vp9_set_mc(struct amvdec_session *sess, > + struct codec_vp9 *vp9) > +{ > + struct amvdec_core *core = sess->core; > + u32 scale = 0; > + u32 sz; > + int i; > + > + amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, 1); > + codec_vp9_set_refs(sess, vp9); > + amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, > + (16 << 8) | 1); > + codec_vp9_set_refs(sess, vp9); > + > + amvdec_write_dos(core, VP9D_MPP_REFINFO_TBL_ACCCONFIG, BIT(2)); > + for (i = 0; i < REFS_PER_FRAME; ++i) { > + if (!vp9->frame_refs[i]) > + continue; > + > + if (vp9->frame_refs[i]->width != vp9->width || > + vp9->frame_refs[i]->height != vp9->height) > + scale = 1; > + > + sz = amvdec_am21c_body_size(vp9->frame_refs[i]->width, > + vp9->frame_refs[i]->height); > + > + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, > + vp9->frame_refs[i]->width); > + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, > + vp9->frame_refs[i]->height); > + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, > + (vp9->frame_refs[i]->width << 14) / > + vp9->width); > + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, > + (vp9->frame_refs[i]->height << 14) / > + vp9->height); > + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, sz >> 5); > + } > + > + amvdec_write_dos(core, VP9D_MPP_REF_SCALE_ENBL, scale); > +} > + > +static struct vp9_frame *codec_vp9_get_new_frame(struct amvdec_session *sess) > +{ > + struct codec_vp9 *vp9 = sess->priv; > + union rpm_param *param = &vp9->rpm_param; > + struct vb2_v4l2_buffer *vbuf; > + struct vp9_frame *new_frame; > + > + new_frame = kzalloc(sizeof(*new_frame), GFP_KERNEL); > + if (!new_frame) > + return NULL; > + > + vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); > + if (!vbuf) { > + dev_err(sess->core->dev, "No dst buffer available\n"); > + kfree(new_frame); > + return NULL; > + } > + > + while (codec_vp9_get_frame_by_idx(vp9, vbuf->vb2_buf.index)) { > + struct vb2_v4l2_buffer *old_vbuf = vbuf; > + > + vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); > + v4l2_m2m_buf_queue(sess->m2m_ctx, old_vbuf); > + if (!vbuf) { > + dev_err(sess->core->dev, "No dst buffer available\n"); > + kfree(new_frame); > + return NULL; > + } > + } > + > + new_frame->vbuf = vbuf; > + new_frame->index = vbuf->vb2_buf.index; > + new_frame->intra_only = param->p.intra_only; > + new_frame->show = param->p.show_frame; > + new_frame->type = param->p.frame_type; > + new_frame->width = vp9->width; > + new_frame->height = vp9->height; > + list_add_tail(&new_frame->list, &vp9->ref_frames_list); > + vp9->frames_num++; > + > + return new_frame; > +} > + > +static void codec_vp9_show_existing_frame(struct codec_vp9 *vp9) > +{ > + union rpm_param *param = &vp9->rpm_param; > + > + if (!param->p.show_existing_frame) > + return; > + > + pr_debug("showing frame %u\n", param->p.frame_to_show_idx); > +} > + > +static void codec_vp9_rm_noshow_frame(struct amvdec_session *sess) > +{ > + struct codec_vp9 *vp9 = sess->priv; > + struct vp9_frame *tmp; > + > + list_for_each_entry(tmp, &vp9->ref_frames_list, list) { > + if (tmp->show) > + continue; > + > + pr_debug("rm noshow: %u\n", tmp->index); > + v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); > + list_del(&tmp->list); > + kfree(tmp); > + vp9->frames_num--; > + return; > + } > +} > + > +static void codec_vp9_process_frame(struct amvdec_session *sess) > +{ > + struct amvdec_core *core = sess->core; > + struct codec_vp9 *vp9 = sess->priv; > + union rpm_param *param = &vp9->rpm_param; > + int intra_only; > + > + if (!param->p.show_frame) > + codec_vp9_rm_noshow_frame(sess); > + > + vp9->cur_frame = codec_vp9_get_new_frame(sess); > + if (!vp9->cur_frame) > + return; > + > + pr_debug("frame %d: type: %08X; show_exist: %u; show: %u, intra_only: %u\n", > + vp9->cur_frame->index, > + param->p.frame_type, param->p.show_existing_frame, > + param->p.show_frame, param->p.intra_only); > + > + if (param->p.frame_type != KEY_FRAME) > + codec_vp9_sync_ref(vp9); > + codec_vp9_update_next_ref(vp9); > + codec_vp9_show_existing_frame(vp9); > + > + if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, > + vp9->is_10bit)) > + codec_hevc_fill_mmu_map(sess, &vp9->common, > + &vp9->cur_frame->vbuf->vb2_buf); > + > + intra_only = param->p.show_frame ? 0 : param->p.intra_only; > + > + /* clear mpred (for keyframe only) */ > + if (param->p.frame_type != KEY_FRAME && !intra_only) { > + codec_vp9_set_mc(sess, vp9); > + codec_vp9_set_mpred_mv(core, vp9); > + } else { > + amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); > + } > + > + amvdec_write_dos(core, HEVC_PARSER_PICTURE_SIZE, > + (vp9->height << 16) | vp9->width); > + codec_vp9_set_sao(sess, &vp9->cur_frame->vbuf->vb2_buf); > + > + vp9_loop_filter_frame_init(core, &vp9->seg_4lf, > + &vp9->lfi, &vp9->lf, > + vp9->default_filt_lvl); > + > + /* ask uCode to start decoding */ > + amvdec_write_dos(core, VP9_DEC_STATUS_REG, VP9_10B_DECODE_SLICE); > +} > + > +static void codec_vp9_process_lf(struct codec_vp9 *vp9) > +{ > + union rpm_param *param = &vp9->rpm_param; > + int i; > + > + vp9->lf.mode_ref_delta_enabled = param->p.mode_ref_delta_enabled; > + vp9->lf.sharpness_level = param->p.sharpness_level; > + vp9->default_filt_lvl = param->p.filter_level; > + vp9->seg_4lf.enabled = param->p.seg_enabled; > + vp9->seg_4lf.abs_delta = param->p.seg_abs_delta; > + > + for (i = 0; i < 4; i++) > + vp9->lf.ref_deltas[i] = param->p.ref_deltas[i]; > + > + for (i = 0; i < 2; i++) > + vp9->lf.mode_deltas[i] = param->p.mode_deltas[i]; > + > + for (i = 0; i < MAX_SEGMENTS; i++) > + vp9->seg_4lf.feature_mask[i] = > + (param->p.seg_lf_info[i] & 0x8000) ? > + (1 << SEG_LVL_ALT_LF) : 0; > + > + for (i = 0; i < MAX_SEGMENTS; i++) > + vp9->seg_4lf.feature_data[i][SEG_LVL_ALT_LF] = > + (param->p.seg_lf_info[i] & 0x100) ? > + -(param->p.seg_lf_info[i] & 0x3f) > + : (param->p.seg_lf_info[i] & 0x3f); > +} > + > +static void codec_vp9_resume(struct amvdec_session *sess) > +{ > + struct codec_vp9 *vp9 = sess->priv; > + > + mutex_lock(&vp9->lock); > + > + if (codec_hevc_setup_buffers(sess, &vp9->common, vp9->is_10bit)) { > + mutex_unlock(&vp9->lock); > + amvdec_abort(sess); > + return; > + } > + > + codec_vp9_setup_workspace(sess, vp9); > + codec_hevc_setup_decode_head(sess, vp9->is_10bit); > + codec_vp9_process_lf(vp9); > + codec_vp9_process_frame(sess); > + > + mutex_unlock(&vp9->lock); > +} > + > +/** > + * The RPM section within the workspace contains > + * many information regarding the parsed bitstream > + */ > +static void codec_vp9_fetch_rpm(struct amvdec_session *sess) > +{ > + struct codec_vp9 *vp9 = sess->priv; > + u16 *rpm_vaddr = vp9->workspace_vaddr + RPM_OFFSET; > + int i, j; > + > + for (i = 0; i < RPM_BUF_SIZE; i += 4) > + for (j = 0; j < 4; j++) > + vp9->rpm_param.l.data[i + j] = rpm_vaddr[i + 3 - j]; > +} > + > +static int codec_vp9_process_rpm(struct codec_vp9 *vp9) > +{ > + union rpm_param *param = &vp9->rpm_param; > + int src_changed = 0; > + int is_10bit = 0; > + int pic_width_64 = ALIGN(param->p.width, 64); > + int pic_height_32 = ALIGN(param->p.height, 32); > + int pic_width_lcu = (pic_width_64 % LCU_SIZE) ? > + pic_width_64 / LCU_SIZE + 1 > + : pic_width_64 / LCU_SIZE; > + int pic_height_lcu = (pic_height_32 % LCU_SIZE) ? > + pic_height_32 / LCU_SIZE + 1 > + : pic_height_32 / LCU_SIZE; > + vp9->lcu_total = pic_width_lcu * pic_height_lcu; > + > + if (param->p.bit_depth == 10) > + is_10bit = 1; > + > + if (vp9->width != param->p.width || vp9->height != param->p.height || > + vp9->is_10bit != is_10bit) > + src_changed = 1; > + > + vp9->width = param->p.width; > + vp9->height = param->p.height; > + vp9->is_10bit = is_10bit; > + > + pr_debug("width: %u; height: %u; is_10bit: %d; src_changed: %d\n", > + vp9->width, vp9->height, is_10bit, src_changed); > + > + return src_changed; > +} > + > +static bool codec_vp9_is_ref(struct codec_vp9 *vp9, struct vp9_frame *frame) > +{ > + int i; > + > + for (i = 0; i < REF_FRAMES; ++i) > + if (vp9->ref_frame_map[i] == frame->index) > + return true; > + > + return false; > +} > + > +static void codec_vp9_show_frame(struct amvdec_session *sess) > +{ > + struct codec_vp9 *vp9 = sess->priv; > + struct vp9_frame *tmp, *n; > + > + list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { > + if (!tmp->show || tmp == vp9->cur_frame) > + continue; > + > + if (!tmp->done) { > + pr_debug("Doning %u\n", tmp->index); > + amvdec_dst_buf_done(sess, tmp->vbuf, V4L2_FIELD_NONE); > + tmp->done = 1; > + vp9->frames_num--; > + } > + > + if (codec_vp9_is_ref(vp9, tmp) || tmp == vp9->prev_frame) > + continue; > + > + pr_debug("deleting %d\n", tmp->index); > + list_del(&tmp->list); > + kfree(tmp); > + } > +} > + > +static void vp9_tree_merge_probs(unsigned int *prev_prob, > + unsigned int *cur_prob, > + int coef_node_start, int tree_left, > + int tree_right, > + int tree_i, int node) > +{ > + int prob_32, prob_res, prob_shift; > + int pre_prob, new_prob; > + int den, m_count, get_prob, factor; > + > + prob_32 = prev_prob[coef_node_start / 4 * 2]; > + prob_res = coef_node_start & 3; > + prob_shift = prob_res * 8; > + pre_prob = (prob_32 >> prob_shift) & 0xff; > + > + den = tree_left + tree_right; > + > + if (den == 0) { > + new_prob = pre_prob; > + } else { > + m_count = den < MODE_MV_COUNT_SAT ? den : MODE_MV_COUNT_SAT; > + get_prob = > + clip_prob(div_r32(((int64_t)tree_left * 256 + > + (den >> 1)), > + den)); > + > + /* weighted_prob */ > + factor = count_to_update_factor[m_count]; > + new_prob = ROUND_POWER_OF_TWO(pre_prob * (256 - factor) + > + get_prob * factor, 8); > + } > + > + cur_prob[coef_node_start / 4 * 2] = > + (cur_prob[coef_node_start / 4 * 2] & (~(0xff << prob_shift))) | > + (new_prob << prob_shift); > +} > + > +static void adapt_coef_probs_cxt(unsigned int *prev_prob, > + unsigned int *cur_prob, > + unsigned int *count, > + int update_factor, > + int cxt_num, > + int coef_cxt_start, > + int coef_count_cxt_start) > +{ > + int prob_32, prob_res, prob_shift; > + int pre_prob, new_prob; > + int num, den, m_count, get_prob, factor; > + int node, coef_node_start; > + int count_sat = 24; > + int cxt; > + > + for (cxt = 0; cxt < cxt_num; cxt++) { > + const int n0 = count[coef_count_cxt_start]; > + const int n1 = count[coef_count_cxt_start + 1]; > + const int n2 = count[coef_count_cxt_start + 2]; > + const int neob = count[coef_count_cxt_start + 3]; > + const int nneob = count[coef_count_cxt_start + 4]; > + const unsigned int branch_ct[3][2] = { > + { neob, nneob }, > + { n0, n1 + n2 }, > + { n1, n2 } > + }; > + > + coef_node_start = coef_cxt_start; > + for (node = 0 ; node < 3 ; node++) { > + prob_32 = prev_prob[coef_node_start / 4 * 2]; > + prob_res = coef_node_start & 3; > + prob_shift = prob_res * 8; > + pre_prob = (prob_32 >> prob_shift) & 0xff; > + > + /* get binary prob */ > + num = branch_ct[node][0]; > + den = branch_ct[node][0] + branch_ct[node][1]; > + m_count = den < count_sat ? den : count_sat; > + > + get_prob = (den == 0) ? > + 128u : > + clip_prob(div_r32(((int64_t)num * 256 + > + (den >> 1)), den)); > + > + factor = update_factor * m_count / count_sat; > + new_prob = > + ROUND_POWER_OF_TWO(pre_prob * (256 - factor) + > + get_prob * factor, 8); > + > + cur_prob[coef_node_start / 4 * 2] = > + (cur_prob[coef_node_start / 4 * 2] & > + (~(0xff << prob_shift))) | > + (new_prob << prob_shift); > + > + coef_node_start += 1; > + } > + > + coef_cxt_start = coef_cxt_start + 3; > + coef_count_cxt_start = coef_count_cxt_start + 5; > + } > +} > + > +static void adapt_coef_probs(int prev_kf, int cur_kf, int pre_fc, > + unsigned int *prev_prob, unsigned int *cur_prob, > + unsigned int *count) > +{ > + int tx_size, coef_tx_size_start, coef_count_tx_size_start; > + int plane, coef_plane_start, coef_count_plane_start; > + int type, coef_type_start, coef_count_type_start; > + int band, coef_band_start, coef_count_band_start; > + int cxt_num; > + int coef_cxt_start, coef_count_cxt_start; > + int node, coef_node_start, coef_count_node_start; > + > + int tree_i, tree_left, tree_right; > + int mvd_i; > + > + int update_factor = cur_kf ? 112 : (prev_kf ? 128 : 112); > + > + int prob_32; > + int prob_res; > + int prob_shift; > + int pre_prob; > + > + int den; > + int get_prob; > + int m_count; > + int factor; > + > + int new_prob; > + > + for (tx_size = 0 ; tx_size < 4 ; tx_size++) { > + coef_tx_size_start = VP9_COEF_START + > + tx_size * 4 * VP9_COEF_SIZE_ONE_SET; > + coef_count_tx_size_start = VP9_COEF_COUNT_START + > + tx_size * 4 * VP9_COEF_COUNT_SIZE_ONE_SET; > + coef_plane_start = coef_tx_size_start; > + coef_count_plane_start = coef_count_tx_size_start; > + > + for (plane = 0 ; plane < 2 ; plane++) { > + coef_type_start = coef_plane_start; > + coef_count_type_start = coef_count_plane_start; > + > + for (type = 0 ; type < 2 ; type++) { > + coef_band_start = coef_type_start; > + coef_count_band_start = coef_count_type_start; > + > + for (band = 0 ; band < 6 ; band++) { > + if (band == 0) > + cxt_num = 3; > + else > + cxt_num = 6; > + coef_cxt_start = coef_band_start; > + coef_count_cxt_start = > + coef_count_band_start; > + > + adapt_coef_probs_cxt(prev_prob, > + cur_prob, > + count, > + update_factor, > + cxt_num, > + coef_cxt_start, > + coef_count_cxt_start); > + > + if (band == 0) { > + coef_band_start += 10; > + coef_count_band_start += 15; > + } else { > + coef_band_start += 18; > + coef_count_band_start += 30; > + } > + } > + coef_type_start += VP9_COEF_SIZE_ONE_SET; > + coef_count_type_start += > + VP9_COEF_COUNT_SIZE_ONE_SET; > + } > + > + coef_plane_start += 2 * VP9_COEF_SIZE_ONE_SET; > + coef_count_plane_start += > + 2 * VP9_COEF_COUNT_SIZE_ONE_SET; > + } > + } > + > + if (cur_kf == 0) { > + /* mode_mv_merge_probs - merge_intra_inter_prob */ > + for (coef_count_node_start = VP9_INTRA_INTER_COUNT_START; > + coef_count_node_start < (VP9_MV_CLASS0_HP_1_COUNT_START + > + VP9_MV_CLASS0_HP_1_COUNT_SIZE); > + coef_count_node_start += 2) { > + if (coef_count_node_start == > + VP9_INTRA_INTER_COUNT_START) > + coef_node_start = VP9_INTRA_INTER_START; > + else if (coef_count_node_start == > + VP9_COMP_INTER_COUNT_START) > + coef_node_start = VP9_COMP_INTER_START; > + else if (coef_count_node_start == > + VP9_TX_MODE_COUNT_START) > + coef_node_start = VP9_TX_MODE_START; > + else if (coef_count_node_start == > + VP9_SKIP_COUNT_START) > + coef_node_start = VP9_SKIP_START; > + else if (coef_count_node_start == > + VP9_MV_SIGN_0_COUNT_START) > + coef_node_start = VP9_MV_SIGN_0_START; > + else if (coef_count_node_start == > + VP9_MV_SIGN_1_COUNT_START) > + coef_node_start = VP9_MV_SIGN_1_START; > + else if (coef_count_node_start == > + VP9_MV_BITS_0_COUNT_START) > + coef_node_start = VP9_MV_BITS_0_START; > + else if (coef_count_node_start == > + VP9_MV_BITS_1_COUNT_START) > + coef_node_start = VP9_MV_BITS_1_START; > + else if (coef_count_node_start == > + VP9_MV_CLASS0_HP_0_COUNT_START) > + coef_node_start = VP9_MV_CLASS0_HP_0_START; > + > + den = count[coef_count_node_start] + > + count[coef_count_node_start + 1]; > + > + prob_32 = prev_prob[coef_node_start / 4 * 2]; > + prob_res = coef_node_start & 3; > + prob_shift = prob_res * 8; > + pre_prob = (prob_32 >> prob_shift) & 0xff; > + > + if (den == 0) { > + new_prob = pre_prob; > + } else { > + m_count = den < MODE_MV_COUNT_SAT ? > + den : MODE_MV_COUNT_SAT; > + get_prob = > + clip_prob(div_r32(((int64_t) > + count[coef_count_node_start] * 256 + > + (den >> 1)), > + den)); > + > + /* weighted prob */ > + factor = count_to_update_factor[m_count]; > + new_prob = > + ROUND_POWER_OF_TWO(pre_prob * > + (256 - factor) + > + get_prob * factor, > + 8); > + } > + > + cur_prob[coef_node_start / 4 * 2] = > + (cur_prob[coef_node_start / 4 * 2] & > + (~(0xff << prob_shift))) | > + (new_prob << prob_shift); > + > + coef_node_start = coef_node_start + 1; > + } > + > + coef_node_start = VP9_INTER_MODE_START; > + coef_count_node_start = VP9_INTER_MODE_COUNT_START; > + for (tree_i = 0 ; tree_i < 7 ; tree_i++) { > + for (node = 0 ; node < 3 ; node++) { > + unsigned int start = coef_count_node_start; > + > + switch (node) { > + case 2: > + tree_left = count[start + 1]; > + tree_right = count[start + 3]; > + break; > + case 1: > + tree_left = count[start + 0]; > + tree_right = count[start + 1] + > + count[start + 3]; > + break; > + default: > + tree_left = count[start + 2]; > + tree_right = count[start + 0] + > + count[start + 1] + > + count[start + 3]; > + break; > + } > + > + vp9_tree_merge_probs(prev_prob, cur_prob, > + coef_node_start, > + tree_left, tree_right, > + tree_i, node); > + > + coef_node_start = coef_node_start + 1; > + } > + > + coef_count_node_start = coef_count_node_start + 4; > + } > + > + coef_node_start = VP9_IF_Y_MODE_START; > + coef_count_node_start = VP9_IF_Y_MODE_COUNT_START; > + for (tree_i = 0 ; tree_i < 14 ; tree_i++) { > + for (node = 0 ; node < 9 ; node++) { > + unsigned int start = coef_count_node_start; > + > + switch (node) { > + case 8: > + tree_left = > + count[start + D153_PRED]; > + tree_right = > + count[start + D207_PRED]; > + break; > + case 7: > + tree_left = > + count[start + D63_PRED]; > + tree_right = > + count[start + D207_PRED] + > + count[start + D153_PRED]; > + break; > + case 6: > + tree_left = > + count[start + D45_PRED]; > + tree_right = > + count[start + D207_PRED] + > + count[start + D153_PRED] + > + count[start + D63_PRED]; > + break; > + case 5: > + tree_left = > + count[start + D135_PRED]; > + tree_right = > + count[start + D117_PRED]; > + break; > + case 4: > + tree_left = > + count[start + H_PRED]; > + tree_right = > + count[start + D117_PRED] + > + count[start + D135_PRED]; > + break; > + case 3: > + tree_left = > + count[start + H_PRED] + > + count[start + D117_PRED] + > + count[start + D135_PRED]; > + tree_right = > + count[start + D45_PRED] + > + count[start + D207_PRED] + > + count[start + D153_PRED] + > + count[start + D63_PRED]; > + break; > + case 2: > + tree_left = > + count[start + V_PRED]; > + tree_right = > + count[start + H_PRED] + > + count[start + D117_PRED] + > + count[start + D135_PRED] + > + count[start + D45_PRED] + > + count[start + D207_PRED] + > + count[start + D153_PRED] + > + count[start + D63_PRED]; > + break; > + case 1: > + tree_left = > + count[start + TM_PRED]; > + tree_right = > + count[start + V_PRED] + > + count[start + H_PRED] + > + count[start + D117_PRED] + > + count[start + D135_PRED] + > + count[start + D45_PRED] + > + count[start + D207_PRED] + > + count[start + D153_PRED] + > + count[start + D63_PRED]; > + break; > + default: > + tree_left = > + count[start + DC_PRED]; > + tree_right = > + count[start + TM_PRED] + > + count[start + V_PRED] + > + count[start + H_PRED] + > + count[start + D117_PRED] + > + count[start + D135_PRED] + > + count[start + D45_PRED] + > + count[start + D207_PRED] + > + count[start + D153_PRED] + > + count[start + D63_PRED]; > + break; > + } > + > + vp9_tree_merge_probs(prev_prob, cur_prob, > + coef_node_start, > + tree_left, tree_right, > + tree_i, node); > + > + coef_node_start = coef_node_start + 1; > + } > + coef_count_node_start = coef_count_node_start + 10; > + } > + > + coef_node_start = VP9_PARTITION_P_START; > + coef_count_node_start = VP9_PARTITION_P_COUNT_START; > + for (tree_i = 0 ; tree_i < 16 ; tree_i++) { > + for (node = 0 ; node < 3 ; node++) { > + unsigned int start = coef_count_node_start; > + > + switch (node) { > + case 2: > + tree_left = count[start + 2]; > + tree_right = count[start + 3]; > + break; > + case 1: > + tree_left = count[start + 1]; > + tree_right = count[start + 2] + > + count[start + 3]; > + break; > + default: > + tree_left = count[start + 0]; > + tree_right = count[start + 1] + > + count[start + 2] + > + count[start + 3]; > + break; > + } > + > + vp9_tree_merge_probs(prev_prob, cur_prob, > + coef_node_start, > + tree_left, tree_right, > + tree_i, node); > + > + coef_node_start = coef_node_start + 1; > + } > + > + coef_count_node_start = coef_count_node_start + 4; > + } > + > + coef_node_start = VP9_INTERP_START; > + coef_count_node_start = VP9_INTERP_COUNT_START; > + for (tree_i = 0 ; tree_i < 4 ; tree_i++) { > + for (node = 0 ; node < 2 ; node++) { > + unsigned int start = coef_count_node_start; > + > + switch (node) { > + case 1: > + tree_left = count[start + 1]; > + tree_right = count[start + 2]; > + break; > + default: > + tree_left = count[start + 0]; > + tree_right = count[start + 1] + > + count[start + 2]; > + break; > + } > + > + vp9_tree_merge_probs(prev_prob, cur_prob, > + coef_node_start, > + tree_left, tree_right, > + tree_i, node); > + > + coef_node_start = coef_node_start + 1; > + } > + coef_count_node_start = coef_count_node_start + 3; > + } > + > + coef_node_start = VP9_MV_JOINTS_START; > + coef_count_node_start = VP9_MV_JOINTS_COUNT_START; > + for (tree_i = 0 ; tree_i < 1 ; tree_i++) { > + for (node = 0 ; node < 3 ; node++) { > + unsigned int start = coef_count_node_start; > + > + switch (node) { > + case 2: > + tree_left = count[start + 2]; > + tree_right = count[start + 3]; > + break; > + case 1: > + tree_left = count[start + 1]; > + tree_right = count[start + 2] + > + count[start + 3]; > + break; > + default: > + tree_left = count[start + 0]; > + tree_right = count[start + 1] + > + count[start + 2] + > + count[start + 3]; > + break; > + } > + > + vp9_tree_merge_probs(prev_prob, cur_prob, > + coef_node_start, > + tree_left, tree_right, > + tree_i, node); > + > + coef_node_start = coef_node_start + 1; > + } > + coef_count_node_start = coef_count_node_start + 4; > + } > + > + for (mvd_i = 0 ; mvd_i < 2 ; mvd_i++) { > + coef_node_start = mvd_i ? VP9_MV_CLASSES_1_START : > + VP9_MV_CLASSES_0_START; > + coef_count_node_start = mvd_i ? > + VP9_MV_CLASSES_1_COUNT_START : > + VP9_MV_CLASSES_0_COUNT_START; > + tree_i = 0; > + for (node = 0; node < 10; node++) { > + unsigned int start = coef_count_node_start; > + > + switch (node) { > + case 9: > + tree_left = count[start + 9]; > + tree_right = count[start + 10]; > + break; > + case 8: > + tree_left = count[start + 7]; > + tree_right = count[start + 8]; > + break; > + case 7: > + tree_left = count[start + 7] + > + count[start + 8]; > + tree_right = count[start + 9] + > + count[start + 10]; > + break; > + case 6: > + tree_left = count[start + 6]; > + tree_right = count[start + 7] + > + count[start + 8] + > + count[start + 9] + > + count[start + 10]; > + break; > + case 5: > + tree_left = count[start + 4]; > + tree_right = count[start + 5]; > + break; > + case 4: > + tree_left = count[start + 4] + > + count[start + 5]; > + tree_right = count[start + 6] + > + count[start + 7] + > + count[start + 8] + > + count[start + 9] + > + count[start + 10]; > + break; > + case 3: > + tree_left = count[start + 2]; > + tree_right = count[start + 3]; > + break; > + case 2: > + tree_left = count[start + 2] + > + count[start + 3]; > + tree_right = count[start + 4] + > + count[start + 5] + > + count[start + 6] + > + count[start + 7] + > + count[start + 8] + > + count[start + 9] + > + count[start + 10]; > + break; > + case 1: > + tree_left = count[start + 1]; > + tree_right = count[start + 2] + > + count[start + 3] + > + count[start + 4] + > + count[start + 5] + > + count[start + 6] + > + count[start + 7] + > + count[start + 8] + > + count[start + 9] + > + count[start + 10]; > + break; > + default: > + tree_left = count[start + 0]; > + tree_right = count[start + 1] + > + count[start + 2] + > + count[start + 3] + > + count[start + 4] + > + count[start + 5] + > + count[start + 6] + > + count[start + 7] + > + count[start + 8] + > + count[start + 9] + > + count[start + 10]; > + break; > + } > + > + vp9_tree_merge_probs(prev_prob, cur_prob, > + coef_node_start, > + tree_left, tree_right, > + tree_i, node); > + > + coef_node_start = coef_node_start + 1; > + } > + > + coef_node_start = mvd_i ? VP9_MV_CLASS0_1_START : > + VP9_MV_CLASS0_0_START; > + coef_count_node_start = mvd_i ? > + VP9_MV_CLASS0_1_COUNT_START : > + VP9_MV_CLASS0_0_COUNT_START; > + tree_i = 0; > + node = 0; > + tree_left = count[coef_count_node_start + 0]; > + tree_right = count[coef_count_node_start + 1]; > + > + vp9_tree_merge_probs(prev_prob, cur_prob, > + coef_node_start, > + tree_left, tree_right, > + tree_i, node); > + coef_node_start = mvd_i ? VP9_MV_CLASS0_FP_1_START : > + VP9_MV_CLASS0_FP_0_START; > + coef_count_node_start = mvd_i ? > + VP9_MV_CLASS0_FP_1_COUNT_START : > + VP9_MV_CLASS0_FP_0_COUNT_START; > + > + for (tree_i = 0; tree_i < 3; tree_i++) { > + for (node = 0; node < 3; node++) { > + unsigned int start = > + coef_count_node_start; > + switch (node) { > + case 2: > + tree_left = count[start + 2]; > + tree_right = count[start + 3]; > + break; > + case 1: > + tree_left = count[start + 1]; > + tree_right = count[start + 2] + > + count[start + 3]; > + break; > + default: > + tree_left = count[start + 0]; > + tree_right = count[start + 1] + > + count[start + 2] + > + count[start + 3]; > + break; > + } > + > + vp9_tree_merge_probs(prev_prob, > + cur_prob, > + coef_node_start, > + tree_left, > + tree_right, > + tree_i, node); > + > + coef_node_start = coef_node_start + 1; > + } > + coef_count_node_start = > + coef_count_node_start + 4; > + } > + } > + } > +} > + > +static irqreturn_t codec_vp9_threaded_isr(struct amvdec_session *sess) > +{ > + struct amvdec_core *core = sess->core; > + struct codec_vp9 *vp9 = sess->priv; > + u32 dec_status = amvdec_read_dos(core, VP9_DEC_STATUS_REG); > + u32 prob_status = amvdec_read_dos(core, VP9_ADAPT_PROB_REG); > + int i; > + > + if (!vp9) > + return IRQ_HANDLED; > + > + mutex_lock(&vp9->lock); > + if (dec_status != VP9_HEAD_PARSER_DONE) { > + dev_err(core->dev_dec, "Unrecognized dec_status: %08X\n", > + dec_status); > + amvdec_abort(sess); > + goto unlock; > + } > + > + pr_debug("ISR: %08X;%08X\n", dec_status, prob_status); > + sess->keyframe_found = 1; > + > + if ((prob_status & 0xff) == 0xfd && vp9->cur_frame) { > + /* VP9_REQ_ADAPT_PROB */ > + u8 *prev_prob_b = ((u8 *)vp9->workspace_vaddr + > + PROB_OFFSET) + > + ((prob_status >> 8) * 0x1000); > + u8 *cur_prob_b = ((u8 *)vp9->workspace_vaddr + > + PROB_OFFSET) + 0x4000; > + u8 *count_b = (u8 *)vp9->workspace_vaddr + > + COUNT_OFFSET; > + int last_frame_type = vp9->prev_frame ? > + vp9->prev_frame->type : > + KEY_FRAME; > + > + adapt_coef_probs(last_frame_type == KEY_FRAME, > + vp9->cur_frame->type == KEY_FRAME ? 1 : 0, > + prob_status >> 8, > + (unsigned int *)prev_prob_b, > + (unsigned int *)cur_prob_b, > + (unsigned int *)count_b); > + > + memcpy(prev_prob_b, cur_prob_b, ADAPT_PROB_SIZE); > + amvdec_write_dos(core, VP9_ADAPT_PROB_REG, 0); > + } > + > + /* Invalidate first 3 refs */ > + for (i = 0; i < REFS_PER_FRAME ; ++i) > + vp9->frame_refs[i] = NULL; > + > + vp9->prev_frame = vp9->cur_frame; > + codec_vp9_update_ref(vp9); > + > + codec_vp9_fetch_rpm(sess); > + if (codec_vp9_process_rpm(vp9)) { > + amvdec_src_change(sess, vp9->width, vp9->height, 16); > + > + /* No frame is actually processed */ > + vp9->cur_frame = NULL; > + > + /* Show the remaining frame */ > + codec_vp9_show_frame(sess); > + > + /* FIXME: Save refs for resized frame */ > + if (vp9->frames_num) > + codec_vp9_save_refs(vp9); > + > + goto unlock; > + } > + > + codec_vp9_process_lf(vp9); > + codec_vp9_process_frame(sess); > + codec_vp9_show_frame(sess); > + > +unlock: > + mutex_unlock(&vp9->lock); > + return IRQ_HANDLED; > +} > + > +static irqreturn_t codec_vp9_isr(struct amvdec_session *sess) > +{ > + return IRQ_WAKE_THREAD; > +} > + > +struct amvdec_codec_ops codec_vp9_ops = { > + .start = codec_vp9_start, > + .stop = codec_vp9_stop, > + .isr = codec_vp9_isr, > + .threaded_isr = codec_vp9_threaded_isr, > + .num_pending_bufs = codec_vp9_num_pending_bufs, > + .drain = codec_vp9_flush_output, > + .resume = codec_vp9_resume, > +}; > diff --git a/drivers/staging/media/meson/vdec/codec_vp9.h b/drivers/staging/media/meson/vdec/codec_vp9.h > new file mode 100644 > index 000000000000..62db65a2b939 > --- /dev/null > +++ b/drivers/staging/media/meson/vdec/codec_vp9.h > @@ -0,0 +1,13 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > +/* > + * Copyright (C) 2018 Maxime Jourdan <maxi.jourdan@wanadoo.fr> > + */ > + > +#ifndef __MESON_VDEC_CODEC_VP9_H_ > +#define __MESON_VDEC_CODEC_VP9_H_ > + > +#include "vdec.h" > + > +extern struct amvdec_codec_ops codec_vp9_ops; > + > +#endif > diff --git a/drivers/staging/media/meson/vdec/hevc_regs.h b/drivers/staging/media/meson/vdec/hevc_regs.h > index 55c1a80b955a..0392f41a1eed 100644 > --- a/drivers/staging/media/meson/vdec/hevc_regs.h > +++ b/drivers/staging/media/meson/vdec/hevc_regs.h > @@ -122,6 +122,8 @@ > #define HEVC_MPRED_L0_REF00_POC 0xc880 > #define HEVC_MPRED_L1_REF00_POC 0xc8c0 > > +#define HEVC_MPRED_CTRL4 0xc930 > + > #define HEVC_MPRED_CUR_POC 0xc980 > #define HEVC_MPRED_COL_POC 0xc984 > #define HEVC_MPRED_MV_RD_END_ADDR 0xc988 > @@ -140,6 +142,10 @@ > #define HEVCD_IPP_LINEBUFF_BASE 0xd024 > #define HEVCD_IPP_AXIIF_CONFIG 0xd02c > > +#define VP9D_MPP_REF_SCALE_ENBL 0xd104 > +#define VP9D_MPP_REFINFO_TBL_ACCCONFIG 0xd108 > +#define VP9D_MPP_REFINFO_DATA 0xd10c > + > #define HEVCD_MPP_ANC2AXI_TBL_CONF_ADDR 0xd180 > #define HEVCD_MPP_ANC2AXI_TBL_CMD_ADDR 0xd184 > #define HEVCD_MPP_ANC2AXI_TBL_DATA 0xd190 > @@ -164,6 +170,7 @@ > #define HEVC_DBLK_CFG9 0xd424 > #define HEVC_DBLK_CFGA 0xd428 > #define HEVC_DBLK_STS0 0xd42c > +#define HEVC_DBLK_CFGB 0xd42c > #define HEVC_DBLK_STS1 0xd430 > #define HEVC_DBLK_CFGE 0xd438 > > diff --git a/drivers/staging/media/meson/vdec/vdec.c b/drivers/staging/media/meson/vdec/vdec.c > index 5514d2d259a4..1e531a335c50 100644 > --- a/drivers/staging/media/meson/vdec/vdec.c > +++ b/drivers/staging/media/meson/vdec/vdec.c > @@ -395,6 +395,7 @@ static void vdec_reset_bufs_recycle(struct amvdec_session *sess) > static void vdec_stop_streaming(struct vb2_queue *q) > { > struct amvdec_session *sess = vb2_get_drv_priv(q); > + struct amvdec_codec_ops *codec_ops = sess->fmt_out->codec_ops; > struct amvdec_core *core = sess->core; > struct vb2_v4l2_buffer *buf; > > @@ -423,6 +424,10 @@ static void vdec_stop_streaming(struct vb2_queue *q) > > sess->streamon_out = 0; > } else { > + /* Drain remaining refs if was still running */ > + if (sess->status >= STATUS_RUNNING && codec_ops->drain) > + codec_ops->drain(sess); > + > while ((buf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx))) > v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR); > > diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.c b/drivers/staging/media/meson/vdec/vdec_helpers.c > index 818064b6b4d0..a4f62b892188 100644 > --- a/drivers/staging/media/meson/vdec/vdec_helpers.c > +++ b/drivers/staging/media/meson/vdec/vdec_helpers.c > @@ -299,6 +299,10 @@ static void dst_buf_done(struct amvdec_session *sess, > sess->sequence_cap - 1); > v4l2_event_queue_fh(&sess->fh, &ev); > vbuf->flags |= V4L2_BUF_FLAG_LAST; > + } else if (sess->status == STATUS_NEEDS_RESUME) { > + /* Mark LAST for drained show frames during a source change */ > + vbuf->flags |= V4L2_BUF_FLAG_LAST; > + sess->sequence_cap = 0; > } else if (sess->should_stop) > dev_dbg(dev, "should_stop, %u bufs remain\n", > atomic_read(&sess->esparser_queued_bufs)); > diff --git a/drivers/staging/media/meson/vdec/vdec_platform.c b/drivers/staging/media/meson/vdec/vdec_platform.c > index e9356a46828f..72a833b1cebd 100644 > --- a/drivers/staging/media/meson/vdec/vdec_platform.c > +++ b/drivers/staging/media/meson/vdec/vdec_platform.c > @@ -8,8 +8,10 @@ > #include "vdec.h" > > #include "vdec_1.h" > +#include "vdec_hevc.h" > #include "codec_mpeg12.h" > #include "codec_h264.h" > +#include "codec_vp9.h" > > static const struct amvdec_format vdec_formats_gxbb[] = { > { > @@ -51,6 +53,18 @@ static const struct amvdec_format vdec_formats_gxbb[] = { > > static const struct amvdec_format vdec_formats_gxl[] = { > { > + .pixfmt = V4L2_PIX_FMT_VP9, > + .min_buffers = 16, > + .max_buffers = 24, > + .max_width = 3840, > + .max_height = 2160, > + .vdec_ops = &vdec_hevc_ops, > + .codec_ops = &codec_vp9_ops, > + .firmware_path = "meson/vdec/gxl_vp9.bin", Is there a pull request pending for this firmware ? I could not test as this firmware was missing. Note that it could be nice to remove the format from the enumeration in that case, as it's very confusing initially. > + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, > + .flags = V4L2_FMT_FLAG_COMPRESSED | > + V4L2_FMT_FLAG_DYN_RESOLUTION, > + }, { > .pixfmt = V4L2_PIX_FMT_H264, > .min_buffers = 2, > .max_buffers = 24, > @@ -127,6 +141,18 @@ static const struct amvdec_format vdec_formats_gxm[] = { > > static const struct amvdec_format vdec_formats_g12a[] = { > { > + .pixfmt = V4L2_PIX_FMT_VP9, > + .min_buffers = 16, > + .max_buffers = 24, > + .max_width = 3840, > + .max_height = 2160, > + .vdec_ops = &vdec_hevc_ops, > + .codec_ops = &codec_vp9_ops, > + .firmware_path = "meson/vdec/g12a_vp9.bin", > + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, > + .flags = V4L2_FMT_FLAG_COMPRESSED | > + V4L2_FMT_FLAG_DYN_RESOLUTION, > + }, { > .pixfmt = V4L2_PIX_FMT_H264, > .min_buffers = 2, > .max_buffers = 24, > @@ -165,6 +191,18 @@ static const struct amvdec_format vdec_formats_g12a[] = { > > static const struct amvdec_format vdec_formats_sm1[] = { > { > + .pixfmt = V4L2_PIX_FMT_VP9, > + .min_buffers = 16, > + .max_buffers = 24, > + .max_width = 3840, > + .max_height = 2160, > + .vdec_ops = &vdec_hevc_ops, > + .codec_ops = &codec_vp9_ops, > + .firmware_path = "meson/vdec/g12a_vp9.bin", > + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, > + .flags = V4L2_FMT_FLAG_COMPRESSED | > + V4L2_FMT_FLAG_DYN_RESOLUTION, > + }, { > .pixfmt = V4L2_PIX_FMT_H264, > .min_buffers = 2, > .max_buffers = 24,
On 03/02/2020 04:11, Nicolas Dufresne wrote: > Hi Neil, > > Le jeudi 16 janvier 2020 à 14:34 +0100, Neil Armstrong a écrit : >> From: Maxime Jourdan <mjourdan@baylibre.com> >> >> This adds VP9 decoding for the Amlogic GXL, G12A & SM1 SoCs, using >> the commong "HEVC" HW decoder. >> >> For G12A & SM1, it uses the IOMMU support from the firmware. >> >> For 10bit decoding, the firmware can only decode in the proprietary >> Amlogic Framebuffer Compression format, but can output in 8bit NV12 >> buffer while writing the decoded frame. >> >> Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com> >> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com> >> --- >> drivers/staging/media/meson/vdec/Makefile | 2 +- >> drivers/staging/media/meson/vdec/codec_vp9.c | 2139 +++++++++++++++++ >> drivers/staging/media/meson/vdec/codec_vp9.h | 13 + >> drivers/staging/media/meson/vdec/hevc_regs.h | 7 + >> drivers/staging/media/meson/vdec/vdec.c | 5 + >> .../staging/media/meson/vdec/vdec_helpers.c | 4 + >> .../staging/media/meson/vdec/vdec_platform.c | 38 + >> 7 files changed, 2207 insertions(+), 1 deletion(-) >> create mode 100644 drivers/staging/media/meson/vdec/codec_vp9.c >> create mode 100644 drivers/staging/media/meson/vdec/codec_vp9.h >> [...] >> diff --git a/drivers/staging/media/meson/vdec/vdec_platform.c b/drivers/staging/media/meson/vdec/vdec_platform.c >> index e9356a46828f..72a833b1cebd 100644 >> --- a/drivers/staging/media/meson/vdec/vdec_platform.c >> +++ b/drivers/staging/media/meson/vdec/vdec_platform.c >> @@ -8,8 +8,10 @@ >> #include "vdec.h" >> >> #include "vdec_1.h" >> +#include "vdec_hevc.h" >> #include "codec_mpeg12.h" >> #include "codec_h264.h" >> +#include "codec_vp9.h" >> >> static const struct amvdec_format vdec_formats_gxbb[] = { >> { >> @@ -51,6 +53,18 @@ static const struct amvdec_format vdec_formats_gxbb[] = { >> >> static const struct amvdec_format vdec_formats_gxl[] = { >> { >> + .pixfmt = V4L2_PIX_FMT_VP9, >> + .min_buffers = 16, >> + .max_buffers = 24, >> + .max_width = 3840, >> + .max_height = 2160, >> + .vdec_ops = &vdec_hevc_ops, >> + .codec_ops = &codec_vp9_ops, >> + .firmware_path = "meson/vdec/gxl_vp9.bin", > > Is there a pull request pending for this firmware ? I could not test as > this firmware was missing. Note that it could be nice to remove the > format from the enumeration in that case, as it's very confusing > initially. It has been merged yesterday, sorry for the delay. With maxime's patch, we tested it using ffmpeg master, mpv master with drm-prime rendering. We have a buildroot repo with the changed needed : https://gitlab.com/baylibre/amlogic/atv/buildroot-yukawa I will respin a v4 with the small fix from maxime. Neil > >> + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, >> + .flags = V4L2_FMT_FLAG_COMPRESSED | >> + V4L2_FMT_FLAG_DYN_RESOLUTION, >> + }, { >> .pixfmt = V4L2_PIX_FMT_H264, >> .min_buffers = 2, >> .max_buffers = 24, >> @@ -127,6 +141,18 @@ static const struct amvdec_format vdec_formats_gxm[] = { >> >> static const struct amvdec_format vdec_formats_g12a[] = { >> { >> + .pixfmt = V4L2_PIX_FMT_VP9, >> + .min_buffers = 16, >> + .max_buffers = 24, >> + .max_width = 3840, >> + .max_height = 2160, >> + .vdec_ops = &vdec_hevc_ops, >> + .codec_ops = &codec_vp9_ops, >> + .firmware_path = "meson/vdec/g12a_vp9.bin", >> + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, >> + .flags = V4L2_FMT_FLAG_COMPRESSED | >> + V4L2_FMT_FLAG_DYN_RESOLUTION, >> + }, { >> .pixfmt = V4L2_PIX_FMT_H264, >> .min_buffers = 2, >> .max_buffers = 24, >> @@ -165,6 +191,18 @@ static const struct amvdec_format vdec_formats_g12a[] = { >> >> static const struct amvdec_format vdec_formats_sm1[] = { >> { >> + .pixfmt = V4L2_PIX_FMT_VP9, >> + .min_buffers = 16, >> + .max_buffers = 24, >> + .max_width = 3840, >> + .max_height = 2160, >> + .vdec_ops = &vdec_hevc_ops, >> + .codec_ops = &codec_vp9_ops, >> + .firmware_path = "meson/vdec/g12a_vp9.bin", >> + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, >> + .flags = V4L2_FMT_FLAG_COMPRESSED | >> + V4L2_FMT_FLAG_DYN_RESOLUTION, >> + }, { >> .pixfmt = V4L2_PIX_FMT_H264, >> .min_buffers = 2, >> .max_buffers = 24, >
Le jeudi 06 février 2020 à 09:08 +0100, Neil Armstrong a écrit : > On 03/02/2020 04:11, Nicolas Dufresne wrote: > > Hi Neil, > > > > Le jeudi 16 janvier 2020 à 14:34 +0100, Neil Armstrong a écrit : > > > From: Maxime Jourdan <mjourdan@baylibre.com> > > > > > > This adds VP9 decoding for the Amlogic GXL, G12A & SM1 SoCs, using > > > the commong "HEVC" HW decoder. > > > > > > For G12A & SM1, it uses the IOMMU support from the firmware. > > > > > > For 10bit decoding, the firmware can only decode in the proprietary > > > Amlogic Framebuffer Compression format, but can output in 8bit NV12 > > > buffer while writing the decoded frame. > > > > > > Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com> > > > Signed-off-by: Neil Armstrong <narmstrong@baylibre.com> > > > --- > > > drivers/staging/media/meson/vdec/Makefile | 2 +- > > > drivers/staging/media/meson/vdec/codec_vp9.c | 2139 +++++++++++++++++ > > > drivers/staging/media/meson/vdec/codec_vp9.h | 13 + > > > drivers/staging/media/meson/vdec/hevc_regs.h | 7 + > > > drivers/staging/media/meson/vdec/vdec.c | 5 + > > > .../staging/media/meson/vdec/vdec_helpers.c | 4 + > > > .../staging/media/meson/vdec/vdec_platform.c | 38 + > > > 7 files changed, 2207 insertions(+), 1 deletion(-) > > > create mode 100644 drivers/staging/media/meson/vdec/codec_vp9.c > > > create mode 100644 drivers/staging/media/meson/vdec/codec_vp9.h > > > > > [...] > > > > diff --git a/drivers/staging/media/meson/vdec/vdec_platform.c b/drivers/staging/media/meson/vdec/vdec_platform.c > > > index e9356a46828f..72a833b1cebd 100644 > > > --- a/drivers/staging/media/meson/vdec/vdec_platform.c > > > +++ b/drivers/staging/media/meson/vdec/vdec_platform.c > > > @@ -8,8 +8,10 @@ > > > #include "vdec.h" > > > > > > #include "vdec_1.h" > > > +#include "vdec_hevc.h" > > > #include "codec_mpeg12.h" > > > #include "codec_h264.h" > > > +#include "codec_vp9.h" > > > > > > static const struct amvdec_format vdec_formats_gxbb[] = { > > > { > > > @@ -51,6 +53,18 @@ static const struct amvdec_format vdec_formats_gxbb[] = { > > > > > > static const struct amvdec_format vdec_formats_gxl[] = { > > > { > > > + .pixfmt = V4L2_PIX_FMT_VP9, > > > + .min_buffers = 16, > > > + .max_buffers = 24, > > > + .max_width = 3840, > > > + .max_height = 2160, > > > + .vdec_ops = &vdec_hevc_ops, > > > + .codec_ops = &codec_vp9_ops, > > > + .firmware_path = "meson/vdec/gxl_vp9.bin", > > > > Is there a pull request pending for this firmware ? I could not test as > > this firmware was missing. Note that it could be nice to remove the > > format from the enumeration in that case, as it's very confusing > > initially. > > It has been merged yesterday, sorry for the delay. > > With maxime's patch, we tested it using ffmpeg master, mpv master with drm-prime > rendering. We have a buildroot repo with the changed needed : > > https://gitlab.com/baylibre/amlogic/atv/buildroot-yukawa > > I will respin a v4 with the small fix from maxime. Ok, I see it now. I've given a try, the output was corrupted with this file from Android CTS + GStreamer 1.16.1 (Fedora 31 build). Profile_0_8bit/buf/crowd_run_1080X512_fr30_bd8_8buf_l3.webm Looking forward Maxime's patches. Just like the H264 case, seeking with kmssink lead to stalls, but this time, the kernel give only one message: [498230.951737] meson-vdec c8820000.video-codec: No dst buffer available And now I remember a comment, I guess it was Maxime about increasing the some number of buffers from 2 to 3. So I'll try and find time to get V4 and test again. Quite some improvement over my last series of test with GStreamer. It has this particularity of running input/output and capture QBUF on potentially 3 different threads. > > Neil > > > > > + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, > > > + .flags = V4L2_FMT_FLAG_COMPRESSED | > > > + V4L2_FMT_FLAG_DYN_RESOLUTION, > > > + }, { > > > .pixfmt = V4L2_PIX_FMT_H264, > > > .min_buffers = 2, > > > .max_buffers = 24, > > > @@ -127,6 +141,18 @@ static const struct amvdec_format vdec_formats_gxm[] = { > > > > > > static const struct amvdec_format vdec_formats_g12a[] = { > > > { > > > + .pixfmt = V4L2_PIX_FMT_VP9, > > > + .min_buffers = 16, > > > + .max_buffers = 24, > > > + .max_width = 3840, > > > + .max_height = 2160, > > > + .vdec_ops = &vdec_hevc_ops, > > > + .codec_ops = &codec_vp9_ops, > > > + .firmware_path = "meson/vdec/g12a_vp9.bin", > > > + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, > > > + .flags = V4L2_FMT_FLAG_COMPRESSED | > > > + V4L2_FMT_FLAG_DYN_RESOLUTION, > > > + }, { > > > .pixfmt = V4L2_PIX_FMT_H264, > > > .min_buffers = 2, > > > .max_buffers = 24, > > > @@ -165,6 +191,18 @@ static const struct amvdec_format vdec_formats_g12a[] = { > > > > > > static const struct amvdec_format vdec_formats_sm1[] = { > > > { > > > + .pixfmt = V4L2_PIX_FMT_VP9, > > > + .min_buffers = 16, > > > + .max_buffers = 24, > > > + .max_width = 3840, > > > + .max_height = 2160, > > > + .vdec_ops = &vdec_hevc_ops, > > > + .codec_ops = &codec_vp9_ops, > > > + .firmware_path = "meson/vdec/g12a_vp9.bin", > > > + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, > > > + .flags = V4L2_FMT_FLAG_COMPRESSED | > > > + V4L2_FMT_FLAG_DYN_RESOLUTION, > > > + }, { > > > .pixfmt = V4L2_PIX_FMT_H264, > > > .min_buffers = 2, > > > .max_buffers = 24,
diff --git a/drivers/staging/media/meson/vdec/Makefile b/drivers/staging/media/meson/vdec/Makefile index f55b6e625034..6e726af84ac9 100644 --- a/drivers/staging/media/meson/vdec/Makefile +++ b/drivers/staging/media/meson/vdec/Makefile @@ -3,6 +3,6 @@ meson-vdec-objs = esparser.o vdec.o vdec_helpers.o vdec_platform.o meson-vdec-objs += vdec_1.o vdec_hevc.o -meson-vdec-objs += codec_mpeg12.o codec_h264.o codec_hevc_common.o +meson-vdec-objs += codec_mpeg12.o codec_h264.o codec_hevc_common.o codec_vp9.o obj-$(CONFIG_VIDEO_MESON_VDEC) += meson-vdec.o diff --git a/drivers/staging/media/meson/vdec/codec_vp9.c b/drivers/staging/media/meson/vdec/codec_vp9.c new file mode 100644 index 000000000000..9e23ccbb7cc0 --- /dev/null +++ b/drivers/staging/media/meson/vdec/codec_vp9.c @@ -0,0 +1,2139 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2018 Maxime Jourdan <mjourdan@baylibre.com> + * Copyright (C) 2015 Amlogic, Inc. All rights reserved. + */ + +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> + +#include "dos_regs.h" +#include "hevc_regs.h" +#include "vdec_helpers.h" +#include "codec_hevc_common.h" + +/* HEVC reg mapping */ +#define VP9_DEC_STATUS_REG HEVC_ASSIST_SCRATCH_0 + #define VP9_10B_DECODE_SLICE 5 + #define VP9_HEAD_PARSER_DONE 0xf0 +#define VP9_RPM_BUFFER HEVC_ASSIST_SCRATCH_1 +#define VP9_SHORT_TERM_RPS HEVC_ASSIST_SCRATCH_2 +#define VP9_ADAPT_PROB_REG HEVC_ASSIST_SCRATCH_3 +#define VP9_MMU_MAP_BUFFER HEVC_ASSIST_SCRATCH_4 +#define VP9_PPS_BUFFER HEVC_ASSIST_SCRATCH_5 +#define VP9_SAO_UP HEVC_ASSIST_SCRATCH_6 +#define VP9_STREAM_SWAP_BUFFER HEVC_ASSIST_SCRATCH_7 +#define VP9_STREAM_SWAP_BUFFER2 HEVC_ASSIST_SCRATCH_8 +#define VP9_PROB_SWAP_BUFFER HEVC_ASSIST_SCRATCH_9 +#define VP9_COUNT_SWAP_BUFFER HEVC_ASSIST_SCRATCH_A +#define VP9_SEG_MAP_BUFFER HEVC_ASSIST_SCRATCH_B +#define VP9_SCALELUT HEVC_ASSIST_SCRATCH_D +#define VP9_WAIT_FLAG HEVC_ASSIST_SCRATCH_E +#define LMEM_DUMP_ADR HEVC_ASSIST_SCRATCH_F +#define NAL_SEARCH_CTL HEVC_ASSIST_SCRATCH_I +#define VP9_DECODE_MODE HEVC_ASSIST_SCRATCH_J + #define DECODE_MODE_SINGLE 0 +#define DECODE_STOP_POS HEVC_ASSIST_SCRATCH_K +#define HEVC_DECODE_COUNT HEVC_ASSIST_SCRATCH_M +#define HEVC_DECODE_SIZE HEVC_ASSIST_SCRATCH_N + +/* VP9 Constants */ +#define LCU_SIZE 64 +#define MAX_REF_PIC_NUM 24 +#define REFS_PER_FRAME 3 +#define REF_FRAMES 8 +#define MV_MEM_UNIT 0x240 +#define ADAPT_PROB_SIZE 0xf80 + +enum FRAME_TYPE { + KEY_FRAME = 0, + INTER_FRAME = 1, + FRAME_TYPES, +}; + +/* VP9 Workspace layout */ +#define MPRED_MV_BUF_SIZE 0x120000 + +#define IPP_SIZE 0x4000 +#define SAO_ABV_SIZE 0x30000 +#define SAO_VB_SIZE 0x30000 +#define SH_TM_RPS_SIZE 0x800 +#define VPS_SIZE 0x800 +#define SPS_SIZE 0x800 +#define PPS_SIZE 0x2000 +#define SAO_UP_SIZE 0x2800 +#define SWAP_BUF_SIZE 0x800 +#define SWAP_BUF2_SIZE 0x800 +#define SCALELUT_SIZE 0x8000 +#define DBLK_PARA_SIZE 0x80000 +#define DBLK_DATA_SIZE 0x80000 +#define SEG_MAP_SIZE 0xd800 +#define PROB_SIZE 0x5000 +#define COUNT_SIZE 0x3000 +#define MMU_VBH_SIZE 0x5000 +#define MPRED_ABV_SIZE 0x10000 +#define MPRED_MV_SIZE (MPRED_MV_BUF_SIZE * MAX_REF_PIC_NUM) +#define RPM_BUF_SIZE 0x100 +#define LMEM_SIZE 0x800 + +#define IPP_OFFSET 0x00 +#define SAO_ABV_OFFSET (IPP_OFFSET + IPP_SIZE) +#define SAO_VB_OFFSET (SAO_ABV_OFFSET + SAO_ABV_SIZE) +#define SH_TM_RPS_OFFSET (SAO_VB_OFFSET + SAO_VB_SIZE) +#define VPS_OFFSET (SH_TM_RPS_OFFSET + SH_TM_RPS_SIZE) +#define SPS_OFFSET (VPS_OFFSET + VPS_SIZE) +#define PPS_OFFSET (SPS_OFFSET + SPS_SIZE) +#define SAO_UP_OFFSET (PPS_OFFSET + PPS_SIZE) +#define SWAP_BUF_OFFSET (SAO_UP_OFFSET + SAO_UP_SIZE) +#define SWAP_BUF2_OFFSET (SWAP_BUF_OFFSET + SWAP_BUF_SIZE) +#define SCALELUT_OFFSET (SWAP_BUF2_OFFSET + SWAP_BUF2_SIZE) +#define DBLK_PARA_OFFSET (SCALELUT_OFFSET + SCALELUT_SIZE) +#define DBLK_DATA_OFFSET (DBLK_PARA_OFFSET + DBLK_PARA_SIZE) +#define SEG_MAP_OFFSET (DBLK_DATA_OFFSET + DBLK_DATA_SIZE) +#define PROB_OFFSET (SEG_MAP_OFFSET + SEG_MAP_SIZE) +#define COUNT_OFFSET (PROB_OFFSET + PROB_SIZE) +#define MMU_VBH_OFFSET (COUNT_OFFSET + COUNT_SIZE) +#define MPRED_ABV_OFFSET (MMU_VBH_OFFSET + MMU_VBH_SIZE) +#define MPRED_MV_OFFSET (MPRED_ABV_OFFSET + MPRED_ABV_SIZE) +#define RPM_OFFSET (MPRED_MV_OFFSET + MPRED_MV_SIZE) +#define LMEM_OFFSET (RPM_OFFSET + RPM_BUF_SIZE) + +#define SIZE_WORKSPACE ALIGN(LMEM_OFFSET + LMEM_SIZE, 64 * SZ_1K) + +#define NONE -1 +#define INTRA_FRAME 0 +#define LAST_FRAME 1 +#define GOLDEN_FRAME 2 +#define ALTREF_FRAME 3 +#define MAX_REF_FRAMES 4 + +/* + * Defines, declarations, sub-functions for vp9 de-block loop + filter Thr/Lvl table update + * - struct segmentation is for loop filter only (removed something) + * - function "vp9_loop_filter_init" and "vp9_loop_filter_frame_init" will + be instantiated in C_Entry + * - vp9_loop_filter_init run once before decoding start + * - vp9_loop_filter_frame_init run before every frame decoding start + * - set video format to VP9 is in vp9_loop_filter_init + */ +#define MAX_LOOP_FILTER 63 +#define MAX_REF_LF_DELTAS 4 +#define MAX_MODE_LF_DELTAS 2 +#define SEGMENT_DELTADATA 0 +#define SEGMENT_ABSDATA 1 +#define MAX_SEGMENTS 8 + +/* VP9 PROB processing defines */ +#define VP9_PARTITION_START 0 +#define VP9_PARTITION_SIZE_STEP (3 * 4) +#define VP9_PARTITION_ONE_SIZE (4 * VP9_PARTITION_SIZE_STEP) +#define VP9_PARTITION_KEY_START 0 +#define VP9_PARTITION_P_START VP9_PARTITION_ONE_SIZE +#define VP9_PARTITION_SIZE (2 * VP9_PARTITION_ONE_SIZE) +#define VP9_SKIP_START (VP9_PARTITION_START + VP9_PARTITION_SIZE) +#define VP9_SKIP_SIZE 4 /* only use 3*/ +#define VP9_TX_MODE_START (VP9_SKIP_START + VP9_SKIP_SIZE) +#define VP9_TX_MODE_8_0_OFFSET 0 +#define VP9_TX_MODE_8_1_OFFSET 1 +#define VP9_TX_MODE_16_0_OFFSET 2 +#define VP9_TX_MODE_16_1_OFFSET 4 +#define VP9_TX_MODE_32_0_OFFSET 6 +#define VP9_TX_MODE_32_1_OFFSET 9 +#define VP9_TX_MODE_SIZE 12 +#define VP9_COEF_START (VP9_TX_MODE_START + VP9_TX_MODE_SIZE) +#define VP9_COEF_BAND_0_OFFSET 0 +#define VP9_COEF_BAND_1_OFFSET (VP9_COEF_BAND_0_OFFSET + 3 * 3 + 1) +#define VP9_COEF_BAND_2_OFFSET (VP9_COEF_BAND_1_OFFSET + 6 * 3) +#define VP9_COEF_BAND_3_OFFSET (VP9_COEF_BAND_2_OFFSET + 6 * 3) +#define VP9_COEF_BAND_4_OFFSET (VP9_COEF_BAND_3_OFFSET + 6 * 3) +#define VP9_COEF_BAND_5_OFFSET (VP9_COEF_BAND_4_OFFSET + 6 * 3) +#define VP9_COEF_SIZE_ONE_SET 100 /* ((3 + 5 * 6) * 3 + 1 padding)*/ +#define VP9_COEF_4X4_START (VP9_COEF_START + 0 * VP9_COEF_SIZE_ONE_SET) +#define VP9_COEF_8X8_START (VP9_COEF_START + 4 * VP9_COEF_SIZE_ONE_SET) +#define VP9_COEF_16X16_START (VP9_COEF_START + 8 * VP9_COEF_SIZE_ONE_SET) +#define VP9_COEF_32X32_START (VP9_COEF_START + 12 * VP9_COEF_SIZE_ONE_SET) +#define VP9_COEF_SIZE_PLANE (2 * VP9_COEF_SIZE_ONE_SET) +#define VP9_COEF_SIZE (4 * 2 * 2 * VP9_COEF_SIZE_ONE_SET) +#define VP9_INTER_MODE_START (VP9_COEF_START + VP9_COEF_SIZE) +#define VP9_INTER_MODE_SIZE 24 /* only use 21 (# * 7)*/ +#define VP9_INTERP_START (VP9_INTER_MODE_START + VP9_INTER_MODE_SIZE) +#define VP9_INTERP_SIZE 8 +#define VP9_INTRA_INTER_START (VP9_INTERP_START + VP9_INTERP_SIZE) +#define VP9_INTRA_INTER_SIZE 4 +#define VP9_INTERP_INTRA_INTER_START VP9_INTERP_START +#define VP9_INTERP_INTRA_INTER_SIZE (VP9_INTERP_SIZE + VP9_INTRA_INTER_SIZE) +#define VP9_COMP_INTER_START \ + (VP9_INTERP_INTRA_INTER_START + VP9_INTERP_INTRA_INTER_SIZE) +#define VP9_COMP_INTER_SIZE 5 +#define VP9_COMP_REF_START (VP9_COMP_INTER_START + VP9_COMP_INTER_SIZE) +#define VP9_COMP_REF_SIZE 5 +#define VP9_SINGLE_REF_START (VP9_COMP_REF_START + VP9_COMP_REF_SIZE) +#define VP9_SINGLE_REF_SIZE 10 +#define VP9_REF_MODE_START VP9_COMP_INTER_START +#define VP9_REF_MODE_SIZE \ + (VP9_COMP_INTER_SIZE + VP9_COMP_REF_SIZE + VP9_SINGLE_REF_SIZE) +#define VP9_IF_Y_MODE_START (VP9_REF_MODE_START + VP9_REF_MODE_SIZE) +#define VP9_IF_Y_MODE_SIZE 36 +#define VP9_IF_UV_MODE_START (VP9_IF_Y_MODE_START + VP9_IF_Y_MODE_SIZE) +#define VP9_IF_UV_MODE_SIZE 92 /* only use 90*/ +#define VP9_MV_JOINTS_START (VP9_IF_UV_MODE_START + VP9_IF_UV_MODE_SIZE) +#define VP9_MV_JOINTS_SIZE 3 +#define VP9_MV_SIGN_0_START (VP9_MV_JOINTS_START + VP9_MV_JOINTS_SIZE) +#define VP9_MV_SIGN_0_SIZE 1 +#define VP9_MV_CLASSES_0_START (VP9_MV_SIGN_0_START + VP9_MV_SIGN_0_SIZE) +#define VP9_MV_CLASSES_0_SIZE 10 +#define VP9_MV_CLASS0_0_START \ + (VP9_MV_CLASSES_0_START + VP9_MV_CLASSES_0_SIZE) +#define VP9_MV_CLASS0_0_SIZE 1 +#define VP9_MV_BITS_0_START (VP9_MV_CLASS0_0_START + VP9_MV_CLASS0_0_SIZE) +#define VP9_MV_BITS_0_SIZE 10 +#define VP9_MV_SIGN_1_START (VP9_MV_BITS_0_START + VP9_MV_BITS_0_SIZE) +#define VP9_MV_SIGN_1_SIZE 1 +#define VP9_MV_CLASSES_1_START \ + (VP9_MV_SIGN_1_START + VP9_MV_SIGN_1_SIZE) +#define VP9_MV_CLASSES_1_SIZE 10 +#define VP9_MV_CLASS0_1_START \ + (VP9_MV_CLASSES_1_START + VP9_MV_CLASSES_1_SIZE) +#define VP9_MV_CLASS0_1_SIZE 1 +#define VP9_MV_BITS_1_START \ + (VP9_MV_CLASS0_1_START + VP9_MV_CLASS0_1_SIZE) +#define VP9_MV_BITS_1_SIZE 10 +#define VP9_MV_CLASS0_FP_0_START \ + (VP9_MV_BITS_1_START + VP9_MV_BITS_1_SIZE) +#define VP9_MV_CLASS0_FP_0_SIZE 9 +#define VP9_MV_CLASS0_FP_1_START \ + (VP9_MV_CLASS0_FP_0_START + VP9_MV_CLASS0_FP_0_SIZE) +#define VP9_MV_CLASS0_FP_1_SIZE 9 +#define VP9_MV_CLASS0_HP_0_START \ + (VP9_MV_CLASS0_FP_1_START + VP9_MV_CLASS0_FP_1_SIZE) +#define VP9_MV_CLASS0_HP_0_SIZE 2 +#define VP9_MV_CLASS0_HP_1_START \ + (VP9_MV_CLASS0_HP_0_START + VP9_MV_CLASS0_HP_0_SIZE) +#define VP9_MV_CLASS0_HP_1_SIZE 2 +#define VP9_MV_START VP9_MV_JOINTS_START +#define VP9_MV_SIZE 72 /*only use 69*/ + +#define VP9_TOTAL_SIZE (VP9_MV_START + VP9_MV_SIZE) + +/* VP9 COUNT mem processing defines */ +#define VP9_COEF_COUNT_START 0 +#define VP9_COEF_COUNT_BAND_0_OFFSET 0 +#define VP9_COEF_COUNT_BAND_1_OFFSET \ + (VP9_COEF_COUNT_BAND_0_OFFSET + 3 * 5) +#define VP9_COEF_COUNT_BAND_2_OFFSET \ + (VP9_COEF_COUNT_BAND_1_OFFSET + 6 * 5) +#define VP9_COEF_COUNT_BAND_3_OFFSET \ + (VP9_COEF_COUNT_BAND_2_OFFSET + 6 * 5) +#define VP9_COEF_COUNT_BAND_4_OFFSET \ + (VP9_COEF_COUNT_BAND_3_OFFSET + 6 * 5) +#define VP9_COEF_COUNT_BAND_5_OFFSET \ + (VP9_COEF_COUNT_BAND_4_OFFSET + 6 * 5) +#define VP9_COEF_COUNT_SIZE_ONE_SET 165 /* ((3 + 5 * 6) * 5 */ +#define VP9_COEF_COUNT_4X4_START \ + (VP9_COEF_COUNT_START + 0 * VP9_COEF_COUNT_SIZE_ONE_SET) +#define VP9_COEF_COUNT_8X8_START \ + (VP9_COEF_COUNT_START + 4 * VP9_COEF_COUNT_SIZE_ONE_SET) +#define VP9_COEF_COUNT_16X16_START \ + (VP9_COEF_COUNT_START + 8 * VP9_COEF_COUNT_SIZE_ONE_SET) +#define VP9_COEF_COUNT_32X32_START \ + (VP9_COEF_COUNT_START + 12 * VP9_COEF_COUNT_SIZE_ONE_SET) +#define VP9_COEF_COUNT_SIZE_PLANE (2 * VP9_COEF_COUNT_SIZE_ONE_SET) +#define VP9_COEF_COUNT_SIZE (4 * 2 * 2 * VP9_COEF_COUNT_SIZE_ONE_SET) + +#define VP9_INTRA_INTER_COUNT_START \ + (VP9_COEF_COUNT_START + VP9_COEF_COUNT_SIZE) +#define VP9_INTRA_INTER_COUNT_SIZE (4 * 2) +#define VP9_COMP_INTER_COUNT_START \ + (VP9_INTRA_INTER_COUNT_START + VP9_INTRA_INTER_COUNT_SIZE) +#define VP9_COMP_INTER_COUNT_SIZE (5 * 2) +#define VP9_COMP_REF_COUNT_START \ + (VP9_COMP_INTER_COUNT_START + VP9_COMP_INTER_COUNT_SIZE) +#define VP9_COMP_REF_COUNT_SIZE (5 * 2) +#define VP9_SINGLE_REF_COUNT_START \ + (VP9_COMP_REF_COUNT_START + VP9_COMP_REF_COUNT_SIZE) +#define VP9_SINGLE_REF_COUNT_SIZE (10 * 2) +#define VP9_TX_MODE_COUNT_START \ + (VP9_SINGLE_REF_COUNT_START + VP9_SINGLE_REF_COUNT_SIZE) +#define VP9_TX_MODE_COUNT_SIZE (12 * 2) +#define VP9_SKIP_COUNT_START \ + (VP9_TX_MODE_COUNT_START + VP9_TX_MODE_COUNT_SIZE) +#define VP9_SKIP_COUNT_SIZE (3 * 2) +#define VP9_MV_SIGN_0_COUNT_START \ + (VP9_SKIP_COUNT_START + VP9_SKIP_COUNT_SIZE) +#define VP9_MV_SIGN_0_COUNT_SIZE (1 * 2) +#define VP9_MV_SIGN_1_COUNT_START \ + (VP9_MV_SIGN_0_COUNT_START + VP9_MV_SIGN_0_COUNT_SIZE) +#define VP9_MV_SIGN_1_COUNT_SIZE (1 * 2) +#define VP9_MV_BITS_0_COUNT_START \ + (VP9_MV_SIGN_1_COUNT_START + VP9_MV_SIGN_1_COUNT_SIZE) +#define VP9_MV_BITS_0_COUNT_SIZE (10 * 2) +#define VP9_MV_BITS_1_COUNT_START \ + (VP9_MV_BITS_0_COUNT_START + VP9_MV_BITS_0_COUNT_SIZE) +#define VP9_MV_BITS_1_COUNT_SIZE (10 * 2) +#define VP9_MV_CLASS0_HP_0_COUNT_START \ + (VP9_MV_BITS_1_COUNT_START + VP9_MV_BITS_1_COUNT_SIZE) +#define VP9_MV_CLASS0_HP_0_COUNT_SIZE (2 * 2) +#define VP9_MV_CLASS0_HP_1_COUNT_START \ + (VP9_MV_CLASS0_HP_0_COUNT_START + VP9_MV_CLASS0_HP_0_COUNT_SIZE) +#define VP9_MV_CLASS0_HP_1_COUNT_SIZE (2 * 2) + +/* Start merge_tree */ +#define VP9_INTER_MODE_COUNT_START \ + (VP9_MV_CLASS0_HP_1_COUNT_START + VP9_MV_CLASS0_HP_1_COUNT_SIZE) +#define VP9_INTER_MODE_COUNT_SIZE (7 * 4) +#define VP9_IF_Y_MODE_COUNT_START \ + (VP9_INTER_MODE_COUNT_START + VP9_INTER_MODE_COUNT_SIZE) +#define VP9_IF_Y_MODE_COUNT_SIZE (10 * 4) +#define VP9_IF_UV_MODE_COUNT_START \ + (VP9_IF_Y_MODE_COUNT_START + VP9_IF_Y_MODE_COUNT_SIZE) +#define VP9_IF_UV_MODE_COUNT_SIZE (10 * 10) +#define VP9_PARTITION_P_COUNT_START \ + (VP9_IF_UV_MODE_COUNT_START + VP9_IF_UV_MODE_COUNT_SIZE) +#define VP9_PARTITION_P_COUNT_SIZE (4 * 4 * 4) +#define VP9_INTERP_COUNT_START \ + (VP9_PARTITION_P_COUNT_START + VP9_PARTITION_P_COUNT_SIZE) +#define VP9_INTERP_COUNT_SIZE (4 * 3) +#define VP9_MV_JOINTS_COUNT_START \ + (VP9_INTERP_COUNT_START + VP9_INTERP_COUNT_SIZE) +#define VP9_MV_JOINTS_COUNT_SIZE (1 * 4) +#define VP9_MV_CLASSES_0_COUNT_START \ + (VP9_MV_JOINTS_COUNT_START + VP9_MV_JOINTS_COUNT_SIZE) +#define VP9_MV_CLASSES_0_COUNT_SIZE (1 * 11) +#define VP9_MV_CLASS0_0_COUNT_START \ + (VP9_MV_CLASSES_0_COUNT_START + VP9_MV_CLASSES_0_COUNT_SIZE) +#define VP9_MV_CLASS0_0_COUNT_SIZE (1 * 2) +#define VP9_MV_CLASSES_1_COUNT_START \ + (VP9_MV_CLASS0_0_COUNT_START + VP9_MV_CLASS0_0_COUNT_SIZE) +#define VP9_MV_CLASSES_1_COUNT_SIZE (1 * 11) +#define VP9_MV_CLASS0_1_COUNT_START \ + (VP9_MV_CLASSES_1_COUNT_START + VP9_MV_CLASSES_1_COUNT_SIZE) +#define VP9_MV_CLASS0_1_COUNT_SIZE (1 * 2) +#define VP9_MV_CLASS0_FP_0_COUNT_START \ + (VP9_MV_CLASS0_1_COUNT_START + VP9_MV_CLASS0_1_COUNT_SIZE) +#define VP9_MV_CLASS0_FP_0_COUNT_SIZE (3 * 4) +#define VP9_MV_CLASS0_FP_1_COUNT_START \ + (VP9_MV_CLASS0_FP_0_COUNT_START + VP9_MV_CLASS0_FP_0_COUNT_SIZE) +#define VP9_MV_CLASS0_FP_1_COUNT_SIZE (3 * 4) + +#define DC_PRED 0 /* Average of above and left pixels */ +#define V_PRED 1 /* Vertical */ +#define H_PRED 2 /* Horizontal */ +#define D45_PRED 3 /* Directional 45 deg = round(arctan(1/1) * 180/pi) */ +#define D135_PRED 4 /* Directional 135 deg = 180 - 45 */ +#define D117_PRED 5 /* Directional 117 deg = 180 - 63 */ +#define D153_PRED 6 /* Directional 153 deg = 180 - 27 */ +#define D207_PRED 7 /* Directional 207 deg = 180 + 27 */ +#define D63_PRED 8 /* Directional 63 deg = round(arctan(2/1) * 180/pi) */ +#define TM_PRED 9 /* True-motion */ + +#define ROUND_POWER_OF_TWO(value, num) (((value) + (1 << ((num) - 1))) >> (num)) + +#define MODE_MV_COUNT_SAT 20 +static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { + 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, + 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 +}; + +union rpm_param { + struct { + u16 data[RPM_BUF_SIZE]; + } l; + struct { + u16 profile; + u16 show_existing_frame; + u16 frame_to_show_idx; + u16 frame_type; /*1 bit*/ + u16 show_frame; /*1 bit*/ + u16 error_resilient_mode; /*1 bit*/ + u16 intra_only; /*1 bit*/ + u16 display_size_present; /*1 bit*/ + u16 reset_frame_context; + u16 refresh_frame_flags; + u16 width; + u16 height; + u16 display_width; + u16 display_height; + u16 ref_info; + u16 same_frame_size; + u16 mode_ref_delta_enabled; + u16 ref_deltas[4]; + u16 mode_deltas[2]; + u16 filter_level; + u16 sharpness_level; + u16 bit_depth; + u16 seg_quant_info[8]; + u16 seg_enabled; + u16 seg_abs_delta; + /* bit 15: feature enabled; bit 8, sign; bit[5:0], data */ + u16 seg_lf_info[8]; + } p; +}; + +enum SEG_LVL_FEATURES { + SEG_LVL_ALT_Q = 0, /* Use alternate Quantizer */ + SEG_LVL_ALT_LF = 1, /* Use alternate loop filter value */ + SEG_LVL_REF_FRAME = 2, /* Optional Segment reference frame */ + SEG_LVL_SKIP = 3, /* Optional Segment (0,0) + skip mode */ + SEG_LVL_MAX = 4 /* Number of features supported */ +}; + +struct segmentation { + u8 enabled; + u8 update_map; + u8 update_data; + u8 abs_delta; + u8 temporal_update; + s16 feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; + unsigned int feature_mask[MAX_SEGMENTS]; +}; + +struct loop_filter_thresh { + u8 mblim; + u8 lim; + u8 hev_thr; +}; + +struct loop_filter_info_n { + struct loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; + u8 lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; +}; + +struct loopfilter { + int filter_level; + + int sharpness_level; + int last_sharpness_level; + + u8 mode_ref_delta_enabled; + u8 mode_ref_delta_update; + + /*0 = Intra, Last, GF, ARF*/ + signed char ref_deltas[MAX_REF_LF_DELTAS]; + signed char last_ref_deltas[MAX_REF_LF_DELTAS]; + + /*0 = ZERO_MV, MV*/ + signed char mode_deltas[MAX_MODE_LF_DELTAS]; + signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; +}; + +struct vp9_frame { + struct list_head list; + struct vb2_v4l2_buffer *vbuf; + int index; + int intra_only; + int show; + int type; + int done; + unsigned int width; + unsigned int height; +}; + +struct codec_vp9 { + /* VP9 context lock */ + struct mutex lock; + + /* Common part with the HEVC decoder */ + struct codec_hevc_common common; + + /* Buffer for the VP9 Workspace */ + void *workspace_vaddr; + dma_addr_t workspace_paddr; + + /* Contains many information parsed from the bitstream */ + union rpm_param rpm_param; + + /* Whether we detected the bitstream as 10-bit */ + int is_10bit; + + /* Coded resolution reported by the hardware */ + u32 width, height; + + /* All ref frames used by the HW at a given time */ + struct list_head ref_frames_list; + u32 frames_num; + + /* In case of downsampling (decoding with FBC but outputting in NV12M), + * we need to allocate additional buffers for FBC. + */ + void *fbc_buffer_vaddr[MAX_REF_PIC_NUM]; + dma_addr_t fbc_buffer_paddr[MAX_REF_PIC_NUM]; + + int ref_frame_map[REF_FRAMES]; + int next_ref_frame_map[REF_FRAMES]; + struct vp9_frame *frame_refs[REFS_PER_FRAME]; + + u32 lcu_total; + + /* loop filter */ + int default_filt_lvl; + struct loop_filter_info_n lfi; + struct loopfilter lf; + struct segmentation seg_4lf; + + struct vp9_frame *cur_frame; + struct vp9_frame *prev_frame; +}; + +static int div_r32(s64 m, int n) +{ + s64 qu = div_s64(m, n); + + return (int)qu; +} + +static int clip_prob(int p) +{ + return clamp_val(p, 1, 255); +} + +static int segfeature_active(struct segmentation *seg, int segment_id, + enum SEG_LVL_FEATURES feature_id) +{ + return seg->enabled && + (seg->feature_mask[segment_id] & (1 << feature_id)); +} + +static int get_segdata(struct segmentation *seg, int segment_id, + enum SEG_LVL_FEATURES feature_id) +{ + return seg->feature_data[segment_id][feature_id]; +} + +static void vp9_update_sharpness(struct loop_filter_info_n *lfi, + int sharpness_lvl) +{ + int lvl; + + /* For each possible value for the loop filter fill out limits*/ + for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { + /* Set loop filter parameters that control sharpness.*/ + int block_inside_limit = lvl >> ((sharpness_lvl > 0) + + (sharpness_lvl > 4)); + + if (sharpness_lvl > 0) { + if (block_inside_limit > (9 - sharpness_lvl)) + block_inside_limit = (9 - sharpness_lvl); + } + + if (block_inside_limit < 1) + block_inside_limit = 1; + + lfi->lfthr[lvl].lim = (u8)block_inside_limit; + lfi->lfthr[lvl].mblim = (u8)(2 * (lvl + 2) + + block_inside_limit); + } +} + +/* Instantiate this function once when decode is started */ +static void +vp9_loop_filter_init(struct amvdec_core *core, struct codec_vp9 *vp9) +{ + struct loop_filter_info_n *lfi = &vp9->lfi; + struct loopfilter *lf = &vp9->lf; + struct segmentation *seg_4lf = &vp9->seg_4lf; + int i; + + memset(lfi, 0, sizeof(struct loop_filter_info_n)); + memset(lf, 0, sizeof(struct loopfilter)); + memset(seg_4lf, 0, sizeof(struct segmentation)); + lf->sharpness_level = 0; + vp9_update_sharpness(lfi, lf->sharpness_level); + lf->last_sharpness_level = lf->sharpness_level; + + for (i = 0; i < 32; i++) { + unsigned int thr; + + thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | + (lfi->lfthr[i * 2 + 1].mblim & 0xff); + thr = (thr << 16) | ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | + (lfi->lfthr[i * 2].mblim & 0xff); + + amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); + } + + if (core->platform->revision >= VDEC_REVISION_G12A) + /* VP9 video format */ + amvdec_write_dos(core, HEVC_DBLK_CFGB, (0x54 << 8) | BIT(0)); + else if (core->platform->revision >= VDEC_REVISION_SM1) + amvdec_write_dos(core, HEVC_DBLK_CFGB, + (0x3 << 14) | /* dw fifo thres r and b */ + (0x3 << 12) | /* dw fifo thres r or b */ + (0x3 << 10) | /* dw fifo thres not r/b */ + BIT(0)); /* VP9 video format */ + else + amvdec_write_dos(core, HEVC_DBLK_CFGB, 0x40400001); +} + +static void +vp9_loop_filter_frame_init(struct amvdec_core *core, struct segmentation *seg, + struct loop_filter_info_n *lfi, + struct loopfilter *lf, int default_filt_lvl) +{ + int i; + int seg_id; + + /* + * n_shift is the multiplier for lf_deltas + * the multiplier is: + * - 1 for when filter_lvl is between 0 and 31 + * - 2 when filter_lvl is between 32 and 63 + */ + const int scale = 1 << (default_filt_lvl >> 5); + + /* update limits if sharpness has changed */ + if (lf->last_sharpness_level != lf->sharpness_level) { + vp9_update_sharpness(lfi, lf->sharpness_level); + lf->last_sharpness_level = lf->sharpness_level; + + /* Write to register */ + for (i = 0; i < 32; i++) { + unsigned int thr; + + thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | + (lfi->lfthr[i * 2 + 1].mblim & 0xff); + thr = (thr << 16) | + ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | + (lfi->lfthr[i * 2].mblim & 0xff); + + amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); + } + } + + for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { + int lvl_seg = default_filt_lvl; + + if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { + const int data = get_segdata(seg, seg_id, + SEG_LVL_ALT_LF); + lvl_seg = clamp_t(int, + seg->abs_delta == SEGMENT_ABSDATA ? + data : default_filt_lvl + data, + 0, MAX_LOOP_FILTER); + } + + if (!lf->mode_ref_delta_enabled) { + /* + * We could get rid of this if we assume that deltas + * are set to zero when not in use. + * encoder always uses deltas + */ + memset(lfi->lvl[seg_id], lvl_seg, + sizeof(lfi->lvl[seg_id])); + } else { + int ref, mode; + const int intra_lvl = + lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; + lfi->lvl[seg_id][INTRA_FRAME][0] = + clamp_val(intra_lvl, 0, MAX_LOOP_FILTER); + + for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { + for (mode = 0; mode < MAX_MODE_LF_DELTAS; + ++mode) { + const int inter_lvl = + lvl_seg + + lf->ref_deltas[ref] * scale + + lf->mode_deltas[mode] * scale; + lfi->lvl[seg_id][ref][mode] = + clamp_val(inter_lvl, 0, + MAX_LOOP_FILTER); + } + } + } + } + + for (i = 0; i < 16; i++) { + unsigned int level; + + level = ((lfi->lvl[i >> 1][3][i & 1] & 0x3f) << 24) | + ((lfi->lvl[i >> 1][2][i & 1] & 0x3f) << 16) | + ((lfi->lvl[i >> 1][1][i & 1] & 0x3f) << 8) | + (lfi->lvl[i >> 1][0][i & 1] & 0x3f); + if (!default_filt_lvl) + level = 0; + + amvdec_write_dos(core, HEVC_DBLK_CFGA, level); + } +} + +static void codec_vp9_flush_output(struct amvdec_session *sess) +{ + struct codec_vp9 *vp9 = sess->priv; + struct vp9_frame *tmp, *n; + + mutex_lock(&vp9->lock); + list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { + if (!tmp->done) { + if (tmp->show) + amvdec_dst_buf_done(sess, tmp->vbuf, + V4L2_FIELD_NONE); + else + v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); + + vp9->frames_num--; + } + + list_del(&tmp->list); + kfree(tmp); + } + mutex_unlock(&vp9->lock); +} + +static u32 codec_vp9_num_pending_bufs(struct amvdec_session *sess) +{ + struct codec_vp9 *vp9 = sess->priv; + + if (!vp9) + return 0; + + return vp9->frames_num; +} + +static int codec_vp9_alloc_workspace(struct amvdec_core *core, + struct codec_vp9 *vp9) +{ + /* Allocate some memory for the VP9 decoder's state */ + vp9->workspace_vaddr = dma_alloc_coherent(core->dev, SIZE_WORKSPACE, + &vp9->workspace_paddr, + GFP_KERNEL); + if (!vp9->workspace_vaddr) { + dev_err(core->dev, "Failed to allocate VP9 Workspace\n"); + return -ENOMEM; + } + + memset(vp9->workspace_vaddr, 0, SIZE_WORKSPACE); + + return 0; +} + +static void codec_vp9_setup_workspace(struct amvdec_session *sess, + struct codec_vp9 *vp9) +{ + struct amvdec_core *core = sess->core; + u32 revision = core->platform->revision; + dma_addr_t wkaddr = vp9->workspace_paddr; + + amvdec_write_dos(core, HEVCD_IPP_LINEBUFF_BASE, wkaddr + IPP_OFFSET); + amvdec_write_dos(core, VP9_RPM_BUFFER, wkaddr + RPM_OFFSET); + amvdec_write_dos(core, VP9_SHORT_TERM_RPS, wkaddr + SH_TM_RPS_OFFSET); + amvdec_write_dos(core, VP9_PPS_BUFFER, wkaddr + PPS_OFFSET); + amvdec_write_dos(core, VP9_SAO_UP, wkaddr + SAO_UP_OFFSET); + + amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER, + wkaddr + SWAP_BUF_OFFSET); + amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER2, + wkaddr + SWAP_BUF2_OFFSET); + amvdec_write_dos(core, VP9_SCALELUT, wkaddr + SCALELUT_OFFSET); + + if (core->platform->revision >= VDEC_REVISION_G12A) + amvdec_write_dos(core, HEVC_DBLK_CFGE, + wkaddr + DBLK_PARA_OFFSET); + + amvdec_write_dos(core, HEVC_DBLK_CFG4, wkaddr + DBLK_PARA_OFFSET); + amvdec_write_dos(core, HEVC_DBLK_CFG5, wkaddr + DBLK_DATA_OFFSET); + amvdec_write_dos(core, VP9_SEG_MAP_BUFFER, wkaddr + SEG_MAP_OFFSET); + amvdec_write_dos(core, VP9_PROB_SWAP_BUFFER, wkaddr + PROB_OFFSET); + amvdec_write_dos(core, VP9_COUNT_SWAP_BUFFER, wkaddr + COUNT_OFFSET); + amvdec_write_dos(core, LMEM_DUMP_ADR, wkaddr + LMEM_OFFSET); + + if (codec_hevc_use_mmu(revision, sess->pixfmt_cap, vp9->is_10bit)) { + amvdec_write_dos(core, HEVC_SAO_MMU_VH0_ADDR, + wkaddr + MMU_VBH_OFFSET); + amvdec_write_dos(core, HEVC_SAO_MMU_VH1_ADDR, + wkaddr + MMU_VBH_OFFSET + (MMU_VBH_SIZE / 2)); + + if (revision >= VDEC_REVISION_G12A) + amvdec_write_dos(core, HEVC_ASSIST_MMU_MAP_ADDR, + vp9->common.mmu_map_paddr); + else + amvdec_write_dos(core, VP9_MMU_MAP_BUFFER, + vp9->common.mmu_map_paddr); + } +} + +static int codec_vp9_start(struct amvdec_session *sess) +{ + struct amvdec_core *core = sess->core; + struct codec_vp9 *vp9; + u32 val; + int i; + int ret; + + vp9 = kzalloc(sizeof(*vp9), GFP_KERNEL); + if (!vp9) + return -ENOMEM; + + ret = codec_vp9_alloc_workspace(core, vp9); + if (ret) + goto free_vp9; + + codec_vp9_setup_workspace(sess, vp9); + amvdec_write_dos_bits(core, HEVC_STREAM_CONTROL, BIT(0)); + /* stream_fifo_hole */ + if (core->platform->revision >= VDEC_REVISION_G12A) + amvdec_write_dos_bits(core, HEVC_STREAM_FIFO_CTL, BIT(29)); + + val = amvdec_read_dos(core, HEVC_PARSER_INT_CONTROL) & 0x7fffffff; + val |= (3 << 29) | BIT(24) | BIT(22) | BIT(7) | BIT(4) | BIT(0); + amvdec_write_dos(core, HEVC_PARSER_INT_CONTROL, val); + amvdec_write_dos_bits(core, HEVC_SHIFT_STATUS, BIT(0)); + amvdec_write_dos(core, HEVC_SHIFT_CONTROL, BIT(10) | BIT(9) | + (3 << 6) | BIT(5) | BIT(2) | BIT(1) | BIT(0)); + amvdec_write_dos(core, HEVC_CABAC_CONTROL, BIT(0)); + amvdec_write_dos(core, HEVC_PARSER_CORE_CONTROL, BIT(0)); + amvdec_write_dos(core, HEVC_SHIFT_STARTCODE, 0x00000001); + + amvdec_write_dos(core, VP9_DEC_STATUS_REG, 0); + + amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, BIT(16)); + for (i = 0; i < ARRAY_SIZE(vdec_hevc_parser_cmd); ++i) + amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, + vdec_hevc_parser_cmd[i]); + + amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_0, PARSER_CMD_SKIP_CFG_0); + amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_1, PARSER_CMD_SKIP_CFG_1); + amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_2, PARSER_CMD_SKIP_CFG_2); + amvdec_write_dos(core, HEVC_PARSER_IF_CONTROL, + BIT(5) | BIT(2) | BIT(0)); + + amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(0)); + amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(1)); + + amvdec_write_dos(core, VP9_WAIT_FLAG, 1); + + /* clear mailbox interrupt */ + amvdec_write_dos(core, HEVC_ASSIST_MBOX1_CLR_REG, 1); + /* enable mailbox interrupt */ + amvdec_write_dos(core, HEVC_ASSIST_MBOX1_MASK, 1); + /* disable PSCALE for hardware sharing */ + amvdec_write_dos(core, HEVC_PSCALE_CTRL, 0); + /* Let the uCode do all the parsing */ + amvdec_write_dos(core, NAL_SEARCH_CTL, 0x8); + + amvdec_write_dos(core, DECODE_STOP_POS, 0); + amvdec_write_dos(core, VP9_DECODE_MODE, DECODE_MODE_SINGLE); + + pr_debug("decode_count: %u; decode_size: %u\n", + amvdec_read_dos(core, HEVC_DECODE_COUNT), + amvdec_read_dos(core, HEVC_DECODE_SIZE)); + + vp9_loop_filter_init(core, vp9); + + INIT_LIST_HEAD(&vp9->ref_frames_list); + mutex_init(&vp9->lock); + memset(&vp9->ref_frame_map, -1, sizeof(vp9->ref_frame_map)); + memset(&vp9->next_ref_frame_map, -1, sizeof(vp9->next_ref_frame_map)); + for (i = 0; i < REFS_PER_FRAME; ++i) + vp9->frame_refs[i] = NULL; + sess->priv = vp9; + + return 0; + +free_vp9: + kfree(vp9); + return ret; +} + +static int codec_vp9_stop(struct amvdec_session *sess) +{ + struct amvdec_core *core = sess->core; + struct codec_vp9 *vp9 = sess->priv; + + mutex_lock(&vp9->lock); + if (vp9->workspace_vaddr) + dma_free_coherent(core->dev, SIZE_WORKSPACE, + vp9->workspace_vaddr, + vp9->workspace_paddr); + + codec_hevc_free_fbc_buffers(sess, &vp9->common); + mutex_unlock(&vp9->lock); + + return 0; +} + +static void codec_vp9_set_sao(struct amvdec_session *sess, + struct vb2_buffer *vb) +{ + struct amvdec_core *core = sess->core; + struct codec_vp9 *vp9 = sess->priv; + + dma_addr_t buf_y_paddr; + dma_addr_t buf_u_v_paddr; + u32 val; + + if (codec_hevc_use_downsample(sess->pixfmt_cap, vp9->is_10bit)) + buf_y_paddr = + vp9->common.fbc_buffer_paddr[vb->index]; + else + buf_y_paddr = + vb2_dma_contig_plane_dma_addr(vb, 0); + + if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { + val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0200; + amvdec_write_dos(core, HEVC_SAO_CTRL5, val); + amvdec_write_dos(core, HEVC_CM_BODY_START_ADDR, buf_y_paddr); + } + + if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) { + buf_y_paddr = + vb2_dma_contig_plane_dma_addr(vb, 0); + buf_u_v_paddr = + vb2_dma_contig_plane_dma_addr(vb, 1); + amvdec_write_dos(core, HEVC_SAO_Y_START_ADDR, buf_y_paddr); + amvdec_write_dos(core, HEVC_SAO_C_START_ADDR, buf_u_v_paddr); + amvdec_write_dos(core, HEVC_SAO_Y_WPTR, buf_y_paddr); + amvdec_write_dos(core, HEVC_SAO_C_WPTR, buf_u_v_paddr); + } + + if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, + vp9->is_10bit)) { + amvdec_write_dos(core, HEVC_CM_HEADER_START_ADDR, + vp9->common.mmu_header_paddr[vb->index]); + /* use HEVC_CM_HEADER_START_ADDR */ + amvdec_write_dos_bits(core, HEVC_SAO_CTRL5, BIT(10)); + } + + amvdec_write_dos(core, HEVC_SAO_Y_LENGTH, + amvdec_get_output_size(sess)); + amvdec_write_dos(core, HEVC_SAO_C_LENGTH, + (amvdec_get_output_size(sess) / 2)); + + if (core->platform->revision >= VDEC_REVISION_G12A) { + amvdec_clear_dos_bits(core, HEVC_DBLK_CFGB, + BIT(4) | BIT(5) | BIT(8) | BIT(9)); + /* enable first, compressed write */ + if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) + amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(8)); + + /* enable second, uncompressed write */ + if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) + amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(9)); + + /* dblk pipeline mode=1 for performance */ + if (sess->width >= 1280) + amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(4)); + + pr_debug("HEVC_DBLK_CFGB: %08X\n", + amvdec_read_dos(core, HEVC_DBLK_CFGB)); + } + + val = amvdec_read_dos(core, HEVC_SAO_CTRL1) & ~0x3ff0; + val |= 0xff0; /* Set endianness for 2-bytes swaps (nv12) */ + if (core->platform->revision < VDEC_REVISION_G12A) { + val &= ~0x3; + if (!codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) + val |= BIT(0); /* disable cm compression */ + /* TOFIX: Handle Amlogic Framebuffer compression */ + } + + amvdec_write_dos(core, HEVC_SAO_CTRL1, val); + pr_debug("HEVC_SAO_CTRL1: %08X\n", val); + + /* no downscale for NV12 */ + val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0000; + amvdec_write_dos(core, HEVC_SAO_CTRL5, val); + + val = amvdec_read_dos(core, HEVCD_IPP_AXIIF_CONFIG) & ~0x30; + val |= 0xf; + val &= ~BIT(12); /* NV12 */ + amvdec_write_dos(core, HEVCD_IPP_AXIIF_CONFIG, val); +} + +static dma_addr_t codec_vp9_get_frame_mv_paddr(struct codec_vp9 *vp9, + struct vp9_frame *frame) +{ + return vp9->workspace_paddr + MPRED_MV_OFFSET + + (frame->index * MPRED_MV_BUF_SIZE); +} + +static void codec_vp9_set_mpred_mv(struct amvdec_core *core, + struct codec_vp9 *vp9) +{ + int mpred_mv_rd_end_addr; + int use_prev_frame_mvs = vp9->prev_frame->width == + vp9->cur_frame->width && + vp9->prev_frame->height == + vp9->cur_frame->height && + !vp9->prev_frame->intra_only && + vp9->prev_frame->show && + vp9->prev_frame->type != KEY_FRAME; + + amvdec_write_dos(core, HEVC_MPRED_CTRL3, 0x24122412); + amvdec_write_dos(core, HEVC_MPRED_ABV_START_ADDR, + vp9->workspace_paddr + MPRED_ABV_OFFSET); + + amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); + if (use_prev_frame_mvs) + amvdec_write_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); + + amvdec_write_dos(core, HEVC_MPRED_MV_WR_START_ADDR, + codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); + amvdec_write_dos(core, HEVC_MPRED_MV_WPTR, + codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); + + amvdec_write_dos(core, HEVC_MPRED_MV_RD_START_ADDR, + codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); + amvdec_write_dos(core, HEVC_MPRED_MV_RPTR, + codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); + + mpred_mv_rd_end_addr = + codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame) + + (vp9->lcu_total * MV_MEM_UNIT); + amvdec_write_dos(core, HEVC_MPRED_MV_RD_END_ADDR, mpred_mv_rd_end_addr); +} + +static void codec_vp9_update_next_ref(struct codec_vp9 *vp9) +{ + union rpm_param *param = &vp9->rpm_param; + u32 buf_idx = vp9->cur_frame->index; + int ref_index = 0; + int refresh_frame_flags; + int mask; + + refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? + 0xff : param->p.refresh_frame_flags; + + for (mask = refresh_frame_flags; mask; mask >>= 1) { + pr_debug("mask=%08X; ref_index=%d\n", mask, ref_index); + if (mask & 1) + vp9->next_ref_frame_map[ref_index] = buf_idx; + else + vp9->next_ref_frame_map[ref_index] = + vp9->ref_frame_map[ref_index]; + + ++ref_index; + } + + for (; ref_index < REF_FRAMES; ++ref_index) + vp9->next_ref_frame_map[ref_index] = + vp9->ref_frame_map[ref_index]; +} + +static void codec_vp9_save_refs(struct codec_vp9 *vp9) +{ + union rpm_param *param = &vp9->rpm_param; + int i; + + for (i = 0; i < REFS_PER_FRAME; ++i) { + const int ref = (param->p.ref_info >> + (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; + + if (vp9->ref_frame_map[ref] < 0) + continue; + + pr_warn("%s: FIXME, would need to save ref %d\n", + __func__, vp9->ref_frame_map[ref]); + } +} + +static void codec_vp9_update_ref(struct codec_vp9 *vp9) +{ + union rpm_param *param = &vp9->rpm_param; + int ref_index = 0; + int mask; + int refresh_frame_flags; + + if (!vp9->cur_frame) + return; + + refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? + 0xff : param->p.refresh_frame_flags; + + for (mask = refresh_frame_flags; mask; mask >>= 1) { + vp9->ref_frame_map[ref_index] = + vp9->next_ref_frame_map[ref_index]; + ++ref_index; + } + + if (param->p.show_existing_frame) + return; + + for (; ref_index < REF_FRAMES; ++ref_index) + vp9->ref_frame_map[ref_index] = + vp9->next_ref_frame_map[ref_index]; +} + +static struct vp9_frame *codec_vp9_get_frame_by_idx(struct codec_vp9 *vp9, + int idx) +{ + struct vp9_frame *frame; + + list_for_each_entry(frame, &vp9->ref_frames_list, list) { + if (frame->index == idx) + return frame; + } + + return NULL; +} + +static void codec_vp9_sync_ref(struct codec_vp9 *vp9) +{ + union rpm_param *param = &vp9->rpm_param; + int i; + + for (i = 0; i < REFS_PER_FRAME; ++i) { + const int ref = (param->p.ref_info >> + (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; + const int idx = vp9->ref_frame_map[ref]; + + vp9->frame_refs[i] = codec_vp9_get_frame_by_idx(vp9, idx); + if (!vp9->frame_refs[i]) + pr_warn("%s: couldn't find VP9 ref %d\n", __func__, + idx); + } +} + +static void codec_vp9_set_refs(struct amvdec_session *sess, + struct codec_vp9 *vp9) +{ + struct amvdec_core *core = sess->core; + int i; + + for (i = 0; i < REFS_PER_FRAME; ++i) { + struct vp9_frame *frame = vp9->frame_refs[i]; + int id_y; + int id_u_v; + + if (!frame) + continue; + + if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { + id_y = frame->index; + id_u_v = id_y; + } else { + id_y = frame->index * 2; + id_u_v = id_y + 1; + } + + amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR, + (id_u_v << 16) | (id_u_v << 8) | id_y); + } +} + +static void codec_vp9_set_mc(struct amvdec_session *sess, + struct codec_vp9 *vp9) +{ + struct amvdec_core *core = sess->core; + u32 scale = 0; + u32 sz; + int i; + + amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, 1); + codec_vp9_set_refs(sess, vp9); + amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, + (16 << 8) | 1); + codec_vp9_set_refs(sess, vp9); + + amvdec_write_dos(core, VP9D_MPP_REFINFO_TBL_ACCCONFIG, BIT(2)); + for (i = 0; i < REFS_PER_FRAME; ++i) { + if (!vp9->frame_refs[i]) + continue; + + if (vp9->frame_refs[i]->width != vp9->width || + vp9->frame_refs[i]->height != vp9->height) + scale = 1; + + sz = amvdec_am21c_body_size(vp9->frame_refs[i]->width, + vp9->frame_refs[i]->height); + + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, + vp9->frame_refs[i]->width); + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, + vp9->frame_refs[i]->height); + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, + (vp9->frame_refs[i]->width << 14) / + vp9->width); + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, + (vp9->frame_refs[i]->height << 14) / + vp9->height); + amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, sz >> 5); + } + + amvdec_write_dos(core, VP9D_MPP_REF_SCALE_ENBL, scale); +} + +static struct vp9_frame *codec_vp9_get_new_frame(struct amvdec_session *sess) +{ + struct codec_vp9 *vp9 = sess->priv; + union rpm_param *param = &vp9->rpm_param; + struct vb2_v4l2_buffer *vbuf; + struct vp9_frame *new_frame; + + new_frame = kzalloc(sizeof(*new_frame), GFP_KERNEL); + if (!new_frame) + return NULL; + + vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); + if (!vbuf) { + dev_err(sess->core->dev, "No dst buffer available\n"); + kfree(new_frame); + return NULL; + } + + while (codec_vp9_get_frame_by_idx(vp9, vbuf->vb2_buf.index)) { + struct vb2_v4l2_buffer *old_vbuf = vbuf; + + vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); + v4l2_m2m_buf_queue(sess->m2m_ctx, old_vbuf); + if (!vbuf) { + dev_err(sess->core->dev, "No dst buffer available\n"); + kfree(new_frame); + return NULL; + } + } + + new_frame->vbuf = vbuf; + new_frame->index = vbuf->vb2_buf.index; + new_frame->intra_only = param->p.intra_only; + new_frame->show = param->p.show_frame; + new_frame->type = param->p.frame_type; + new_frame->width = vp9->width; + new_frame->height = vp9->height; + list_add_tail(&new_frame->list, &vp9->ref_frames_list); + vp9->frames_num++; + + return new_frame; +} + +static void codec_vp9_show_existing_frame(struct codec_vp9 *vp9) +{ + union rpm_param *param = &vp9->rpm_param; + + if (!param->p.show_existing_frame) + return; + + pr_debug("showing frame %u\n", param->p.frame_to_show_idx); +} + +static void codec_vp9_rm_noshow_frame(struct amvdec_session *sess) +{ + struct codec_vp9 *vp9 = sess->priv; + struct vp9_frame *tmp; + + list_for_each_entry(tmp, &vp9->ref_frames_list, list) { + if (tmp->show) + continue; + + pr_debug("rm noshow: %u\n", tmp->index); + v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); + list_del(&tmp->list); + kfree(tmp); + vp9->frames_num--; + return; + } +} + +static void codec_vp9_process_frame(struct amvdec_session *sess) +{ + struct amvdec_core *core = sess->core; + struct codec_vp9 *vp9 = sess->priv; + union rpm_param *param = &vp9->rpm_param; + int intra_only; + + if (!param->p.show_frame) + codec_vp9_rm_noshow_frame(sess); + + vp9->cur_frame = codec_vp9_get_new_frame(sess); + if (!vp9->cur_frame) + return; + + pr_debug("frame %d: type: %08X; show_exist: %u; show: %u, intra_only: %u\n", + vp9->cur_frame->index, + param->p.frame_type, param->p.show_existing_frame, + param->p.show_frame, param->p.intra_only); + + if (param->p.frame_type != KEY_FRAME) + codec_vp9_sync_ref(vp9); + codec_vp9_update_next_ref(vp9); + codec_vp9_show_existing_frame(vp9); + + if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, + vp9->is_10bit)) + codec_hevc_fill_mmu_map(sess, &vp9->common, + &vp9->cur_frame->vbuf->vb2_buf); + + intra_only = param->p.show_frame ? 0 : param->p.intra_only; + + /* clear mpred (for keyframe only) */ + if (param->p.frame_type != KEY_FRAME && !intra_only) { + codec_vp9_set_mc(sess, vp9); + codec_vp9_set_mpred_mv(core, vp9); + } else { + amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); + } + + amvdec_write_dos(core, HEVC_PARSER_PICTURE_SIZE, + (vp9->height << 16) | vp9->width); + codec_vp9_set_sao(sess, &vp9->cur_frame->vbuf->vb2_buf); + + vp9_loop_filter_frame_init(core, &vp9->seg_4lf, + &vp9->lfi, &vp9->lf, + vp9->default_filt_lvl); + + /* ask uCode to start decoding */ + amvdec_write_dos(core, VP9_DEC_STATUS_REG, VP9_10B_DECODE_SLICE); +} + +static void codec_vp9_process_lf(struct codec_vp9 *vp9) +{ + union rpm_param *param = &vp9->rpm_param; + int i; + + vp9->lf.mode_ref_delta_enabled = param->p.mode_ref_delta_enabled; + vp9->lf.sharpness_level = param->p.sharpness_level; + vp9->default_filt_lvl = param->p.filter_level; + vp9->seg_4lf.enabled = param->p.seg_enabled; + vp9->seg_4lf.abs_delta = param->p.seg_abs_delta; + + for (i = 0; i < 4; i++) + vp9->lf.ref_deltas[i] = param->p.ref_deltas[i]; + + for (i = 0; i < 2; i++) + vp9->lf.mode_deltas[i] = param->p.mode_deltas[i]; + + for (i = 0; i < MAX_SEGMENTS; i++) + vp9->seg_4lf.feature_mask[i] = + (param->p.seg_lf_info[i] & 0x8000) ? + (1 << SEG_LVL_ALT_LF) : 0; + + for (i = 0; i < MAX_SEGMENTS; i++) + vp9->seg_4lf.feature_data[i][SEG_LVL_ALT_LF] = + (param->p.seg_lf_info[i] & 0x100) ? + -(param->p.seg_lf_info[i] & 0x3f) + : (param->p.seg_lf_info[i] & 0x3f); +} + +static void codec_vp9_resume(struct amvdec_session *sess) +{ + struct codec_vp9 *vp9 = sess->priv; + + mutex_lock(&vp9->lock); + + if (codec_hevc_setup_buffers(sess, &vp9->common, vp9->is_10bit)) { + mutex_unlock(&vp9->lock); + amvdec_abort(sess); + return; + } + + codec_vp9_setup_workspace(sess, vp9); + codec_hevc_setup_decode_head(sess, vp9->is_10bit); + codec_vp9_process_lf(vp9); + codec_vp9_process_frame(sess); + + mutex_unlock(&vp9->lock); +} + +/** + * The RPM section within the workspace contains + * many information regarding the parsed bitstream + */ +static void codec_vp9_fetch_rpm(struct amvdec_session *sess) +{ + struct codec_vp9 *vp9 = sess->priv; + u16 *rpm_vaddr = vp9->workspace_vaddr + RPM_OFFSET; + int i, j; + + for (i = 0; i < RPM_BUF_SIZE; i += 4) + for (j = 0; j < 4; j++) + vp9->rpm_param.l.data[i + j] = rpm_vaddr[i + 3 - j]; +} + +static int codec_vp9_process_rpm(struct codec_vp9 *vp9) +{ + union rpm_param *param = &vp9->rpm_param; + int src_changed = 0; + int is_10bit = 0; + int pic_width_64 = ALIGN(param->p.width, 64); + int pic_height_32 = ALIGN(param->p.height, 32); + int pic_width_lcu = (pic_width_64 % LCU_SIZE) ? + pic_width_64 / LCU_SIZE + 1 + : pic_width_64 / LCU_SIZE; + int pic_height_lcu = (pic_height_32 % LCU_SIZE) ? + pic_height_32 / LCU_SIZE + 1 + : pic_height_32 / LCU_SIZE; + vp9->lcu_total = pic_width_lcu * pic_height_lcu; + + if (param->p.bit_depth == 10) + is_10bit = 1; + + if (vp9->width != param->p.width || vp9->height != param->p.height || + vp9->is_10bit != is_10bit) + src_changed = 1; + + vp9->width = param->p.width; + vp9->height = param->p.height; + vp9->is_10bit = is_10bit; + + pr_debug("width: %u; height: %u; is_10bit: %d; src_changed: %d\n", + vp9->width, vp9->height, is_10bit, src_changed); + + return src_changed; +} + +static bool codec_vp9_is_ref(struct codec_vp9 *vp9, struct vp9_frame *frame) +{ + int i; + + for (i = 0; i < REF_FRAMES; ++i) + if (vp9->ref_frame_map[i] == frame->index) + return true; + + return false; +} + +static void codec_vp9_show_frame(struct amvdec_session *sess) +{ + struct codec_vp9 *vp9 = sess->priv; + struct vp9_frame *tmp, *n; + + list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { + if (!tmp->show || tmp == vp9->cur_frame) + continue; + + if (!tmp->done) { + pr_debug("Doning %u\n", tmp->index); + amvdec_dst_buf_done(sess, tmp->vbuf, V4L2_FIELD_NONE); + tmp->done = 1; + vp9->frames_num--; + } + + if (codec_vp9_is_ref(vp9, tmp) || tmp == vp9->prev_frame) + continue; + + pr_debug("deleting %d\n", tmp->index); + list_del(&tmp->list); + kfree(tmp); + } +} + +static void vp9_tree_merge_probs(unsigned int *prev_prob, + unsigned int *cur_prob, + int coef_node_start, int tree_left, + int tree_right, + int tree_i, int node) +{ + int prob_32, prob_res, prob_shift; + int pre_prob, new_prob; + int den, m_count, get_prob, factor; + + prob_32 = prev_prob[coef_node_start / 4 * 2]; + prob_res = coef_node_start & 3; + prob_shift = prob_res * 8; + pre_prob = (prob_32 >> prob_shift) & 0xff; + + den = tree_left + tree_right; + + if (den == 0) { + new_prob = pre_prob; + } else { + m_count = den < MODE_MV_COUNT_SAT ? den : MODE_MV_COUNT_SAT; + get_prob = + clip_prob(div_r32(((int64_t)tree_left * 256 + + (den >> 1)), + den)); + + /* weighted_prob */ + factor = count_to_update_factor[m_count]; + new_prob = ROUND_POWER_OF_TWO(pre_prob * (256 - factor) + + get_prob * factor, 8); + } + + cur_prob[coef_node_start / 4 * 2] = + (cur_prob[coef_node_start / 4 * 2] & (~(0xff << prob_shift))) | + (new_prob << prob_shift); +} + +static void adapt_coef_probs_cxt(unsigned int *prev_prob, + unsigned int *cur_prob, + unsigned int *count, + int update_factor, + int cxt_num, + int coef_cxt_start, + int coef_count_cxt_start) +{ + int prob_32, prob_res, prob_shift; + int pre_prob, new_prob; + int num, den, m_count, get_prob, factor; + int node, coef_node_start; + int count_sat = 24; + int cxt; + + for (cxt = 0; cxt < cxt_num; cxt++) { + const int n0 = count[coef_count_cxt_start]; + const int n1 = count[coef_count_cxt_start + 1]; + const int n2 = count[coef_count_cxt_start + 2]; + const int neob = count[coef_count_cxt_start + 3]; + const int nneob = count[coef_count_cxt_start + 4]; + const unsigned int branch_ct[3][2] = { + { neob, nneob }, + { n0, n1 + n2 }, + { n1, n2 } + }; + + coef_node_start = coef_cxt_start; + for (node = 0 ; node < 3 ; node++) { + prob_32 = prev_prob[coef_node_start / 4 * 2]; + prob_res = coef_node_start & 3; + prob_shift = prob_res * 8; + pre_prob = (prob_32 >> prob_shift) & 0xff; + + /* get binary prob */ + num = branch_ct[node][0]; + den = branch_ct[node][0] + branch_ct[node][1]; + m_count = den < count_sat ? den : count_sat; + + get_prob = (den == 0) ? + 128u : + clip_prob(div_r32(((int64_t)num * 256 + + (den >> 1)), den)); + + factor = update_factor * m_count / count_sat; + new_prob = + ROUND_POWER_OF_TWO(pre_prob * (256 - factor) + + get_prob * factor, 8); + + cur_prob[coef_node_start / 4 * 2] = + (cur_prob[coef_node_start / 4 * 2] & + (~(0xff << prob_shift))) | + (new_prob << prob_shift); + + coef_node_start += 1; + } + + coef_cxt_start = coef_cxt_start + 3; + coef_count_cxt_start = coef_count_cxt_start + 5; + } +} + +static void adapt_coef_probs(int prev_kf, int cur_kf, int pre_fc, + unsigned int *prev_prob, unsigned int *cur_prob, + unsigned int *count) +{ + int tx_size, coef_tx_size_start, coef_count_tx_size_start; + int plane, coef_plane_start, coef_count_plane_start; + int type, coef_type_start, coef_count_type_start; + int band, coef_band_start, coef_count_band_start; + int cxt_num; + int coef_cxt_start, coef_count_cxt_start; + int node, coef_node_start, coef_count_node_start; + + int tree_i, tree_left, tree_right; + int mvd_i; + + int update_factor = cur_kf ? 112 : (prev_kf ? 128 : 112); + + int prob_32; + int prob_res; + int prob_shift; + int pre_prob; + + int den; + int get_prob; + int m_count; + int factor; + + int new_prob; + + for (tx_size = 0 ; tx_size < 4 ; tx_size++) { + coef_tx_size_start = VP9_COEF_START + + tx_size * 4 * VP9_COEF_SIZE_ONE_SET; + coef_count_tx_size_start = VP9_COEF_COUNT_START + + tx_size * 4 * VP9_COEF_COUNT_SIZE_ONE_SET; + coef_plane_start = coef_tx_size_start; + coef_count_plane_start = coef_count_tx_size_start; + + for (plane = 0 ; plane < 2 ; plane++) { + coef_type_start = coef_plane_start; + coef_count_type_start = coef_count_plane_start; + + for (type = 0 ; type < 2 ; type++) { + coef_band_start = coef_type_start; + coef_count_band_start = coef_count_type_start; + + for (band = 0 ; band < 6 ; band++) { + if (band == 0) + cxt_num = 3; + else + cxt_num = 6; + coef_cxt_start = coef_band_start; + coef_count_cxt_start = + coef_count_band_start; + + adapt_coef_probs_cxt(prev_prob, + cur_prob, + count, + update_factor, + cxt_num, + coef_cxt_start, + coef_count_cxt_start); + + if (band == 0) { + coef_band_start += 10; + coef_count_band_start += 15; + } else { + coef_band_start += 18; + coef_count_band_start += 30; + } + } + coef_type_start += VP9_COEF_SIZE_ONE_SET; + coef_count_type_start += + VP9_COEF_COUNT_SIZE_ONE_SET; + } + + coef_plane_start += 2 * VP9_COEF_SIZE_ONE_SET; + coef_count_plane_start += + 2 * VP9_COEF_COUNT_SIZE_ONE_SET; + } + } + + if (cur_kf == 0) { + /* mode_mv_merge_probs - merge_intra_inter_prob */ + for (coef_count_node_start = VP9_INTRA_INTER_COUNT_START; + coef_count_node_start < (VP9_MV_CLASS0_HP_1_COUNT_START + + VP9_MV_CLASS0_HP_1_COUNT_SIZE); + coef_count_node_start += 2) { + if (coef_count_node_start == + VP9_INTRA_INTER_COUNT_START) + coef_node_start = VP9_INTRA_INTER_START; + else if (coef_count_node_start == + VP9_COMP_INTER_COUNT_START) + coef_node_start = VP9_COMP_INTER_START; + else if (coef_count_node_start == + VP9_TX_MODE_COUNT_START) + coef_node_start = VP9_TX_MODE_START; + else if (coef_count_node_start == + VP9_SKIP_COUNT_START) + coef_node_start = VP9_SKIP_START; + else if (coef_count_node_start == + VP9_MV_SIGN_0_COUNT_START) + coef_node_start = VP9_MV_SIGN_0_START; + else if (coef_count_node_start == + VP9_MV_SIGN_1_COUNT_START) + coef_node_start = VP9_MV_SIGN_1_START; + else if (coef_count_node_start == + VP9_MV_BITS_0_COUNT_START) + coef_node_start = VP9_MV_BITS_0_START; + else if (coef_count_node_start == + VP9_MV_BITS_1_COUNT_START) + coef_node_start = VP9_MV_BITS_1_START; + else if (coef_count_node_start == + VP9_MV_CLASS0_HP_0_COUNT_START) + coef_node_start = VP9_MV_CLASS0_HP_0_START; + + den = count[coef_count_node_start] + + count[coef_count_node_start + 1]; + + prob_32 = prev_prob[coef_node_start / 4 * 2]; + prob_res = coef_node_start & 3; + prob_shift = prob_res * 8; + pre_prob = (prob_32 >> prob_shift) & 0xff; + + if (den == 0) { + new_prob = pre_prob; + } else { + m_count = den < MODE_MV_COUNT_SAT ? + den : MODE_MV_COUNT_SAT; + get_prob = + clip_prob(div_r32(((int64_t) + count[coef_count_node_start] * 256 + + (den >> 1)), + den)); + + /* weighted prob */ + factor = count_to_update_factor[m_count]; + new_prob = + ROUND_POWER_OF_TWO(pre_prob * + (256 - factor) + + get_prob * factor, + 8); + } + + cur_prob[coef_node_start / 4 * 2] = + (cur_prob[coef_node_start / 4 * 2] & + (~(0xff << prob_shift))) | + (new_prob << prob_shift); + + coef_node_start = coef_node_start + 1; + } + + coef_node_start = VP9_INTER_MODE_START; + coef_count_node_start = VP9_INTER_MODE_COUNT_START; + for (tree_i = 0 ; tree_i < 7 ; tree_i++) { + for (node = 0 ; node < 3 ; node++) { + unsigned int start = coef_count_node_start; + + switch (node) { + case 2: + tree_left = count[start + 1]; + tree_right = count[start + 3]; + break; + case 1: + tree_left = count[start + 0]; + tree_right = count[start + 1] + + count[start + 3]; + break; + default: + tree_left = count[start + 2]; + tree_right = count[start + 0] + + count[start + 1] + + count[start + 3]; + break; + } + + vp9_tree_merge_probs(prev_prob, cur_prob, + coef_node_start, + tree_left, tree_right, + tree_i, node); + + coef_node_start = coef_node_start + 1; + } + + coef_count_node_start = coef_count_node_start + 4; + } + + coef_node_start = VP9_IF_Y_MODE_START; + coef_count_node_start = VP9_IF_Y_MODE_COUNT_START; + for (tree_i = 0 ; tree_i < 14 ; tree_i++) { + for (node = 0 ; node < 9 ; node++) { + unsigned int start = coef_count_node_start; + + switch (node) { + case 8: + tree_left = + count[start + D153_PRED]; + tree_right = + count[start + D207_PRED]; + break; + case 7: + tree_left = + count[start + D63_PRED]; + tree_right = + count[start + D207_PRED] + + count[start + D153_PRED]; + break; + case 6: + tree_left = + count[start + D45_PRED]; + tree_right = + count[start + D207_PRED] + + count[start + D153_PRED] + + count[start + D63_PRED]; + break; + case 5: + tree_left = + count[start + D135_PRED]; + tree_right = + count[start + D117_PRED]; + break; + case 4: + tree_left = + count[start + H_PRED]; + tree_right = + count[start + D117_PRED] + + count[start + D135_PRED]; + break; + case 3: + tree_left = + count[start + H_PRED] + + count[start + D117_PRED] + + count[start + D135_PRED]; + tree_right = + count[start + D45_PRED] + + count[start + D207_PRED] + + count[start + D153_PRED] + + count[start + D63_PRED]; + break; + case 2: + tree_left = + count[start + V_PRED]; + tree_right = + count[start + H_PRED] + + count[start + D117_PRED] + + count[start + D135_PRED] + + count[start + D45_PRED] + + count[start + D207_PRED] + + count[start + D153_PRED] + + count[start + D63_PRED]; + break; + case 1: + tree_left = + count[start + TM_PRED]; + tree_right = + count[start + V_PRED] + + count[start + H_PRED] + + count[start + D117_PRED] + + count[start + D135_PRED] + + count[start + D45_PRED] + + count[start + D207_PRED] + + count[start + D153_PRED] + + count[start + D63_PRED]; + break; + default: + tree_left = + count[start + DC_PRED]; + tree_right = + count[start + TM_PRED] + + count[start + V_PRED] + + count[start + H_PRED] + + count[start + D117_PRED] + + count[start + D135_PRED] + + count[start + D45_PRED] + + count[start + D207_PRED] + + count[start + D153_PRED] + + count[start + D63_PRED]; + break; + } + + vp9_tree_merge_probs(prev_prob, cur_prob, + coef_node_start, + tree_left, tree_right, + tree_i, node); + + coef_node_start = coef_node_start + 1; + } + coef_count_node_start = coef_count_node_start + 10; + } + + coef_node_start = VP9_PARTITION_P_START; + coef_count_node_start = VP9_PARTITION_P_COUNT_START; + for (tree_i = 0 ; tree_i < 16 ; tree_i++) { + for (node = 0 ; node < 3 ; node++) { + unsigned int start = coef_count_node_start; + + switch (node) { + case 2: + tree_left = count[start + 2]; + tree_right = count[start + 3]; + break; + case 1: + tree_left = count[start + 1]; + tree_right = count[start + 2] + + count[start + 3]; + break; + default: + tree_left = count[start + 0]; + tree_right = count[start + 1] + + count[start + 2] + + count[start + 3]; + break; + } + + vp9_tree_merge_probs(prev_prob, cur_prob, + coef_node_start, + tree_left, tree_right, + tree_i, node); + + coef_node_start = coef_node_start + 1; + } + + coef_count_node_start = coef_count_node_start + 4; + } + + coef_node_start = VP9_INTERP_START; + coef_count_node_start = VP9_INTERP_COUNT_START; + for (tree_i = 0 ; tree_i < 4 ; tree_i++) { + for (node = 0 ; node < 2 ; node++) { + unsigned int start = coef_count_node_start; + + switch (node) { + case 1: + tree_left = count[start + 1]; + tree_right = count[start + 2]; + break; + default: + tree_left = count[start + 0]; + tree_right = count[start + 1] + + count[start + 2]; + break; + } + + vp9_tree_merge_probs(prev_prob, cur_prob, + coef_node_start, + tree_left, tree_right, + tree_i, node); + + coef_node_start = coef_node_start + 1; + } + coef_count_node_start = coef_count_node_start + 3; + } + + coef_node_start = VP9_MV_JOINTS_START; + coef_count_node_start = VP9_MV_JOINTS_COUNT_START; + for (tree_i = 0 ; tree_i < 1 ; tree_i++) { + for (node = 0 ; node < 3 ; node++) { + unsigned int start = coef_count_node_start; + + switch (node) { + case 2: + tree_left = count[start + 2]; + tree_right = count[start + 3]; + break; + case 1: + tree_left = count[start + 1]; + tree_right = count[start + 2] + + count[start + 3]; + break; + default: + tree_left = count[start + 0]; + tree_right = count[start + 1] + + count[start + 2] + + count[start + 3]; + break; + } + + vp9_tree_merge_probs(prev_prob, cur_prob, + coef_node_start, + tree_left, tree_right, + tree_i, node); + + coef_node_start = coef_node_start + 1; + } + coef_count_node_start = coef_count_node_start + 4; + } + + for (mvd_i = 0 ; mvd_i < 2 ; mvd_i++) { + coef_node_start = mvd_i ? VP9_MV_CLASSES_1_START : + VP9_MV_CLASSES_0_START; + coef_count_node_start = mvd_i ? + VP9_MV_CLASSES_1_COUNT_START : + VP9_MV_CLASSES_0_COUNT_START; + tree_i = 0; + for (node = 0; node < 10; node++) { + unsigned int start = coef_count_node_start; + + switch (node) { + case 9: + tree_left = count[start + 9]; + tree_right = count[start + 10]; + break; + case 8: + tree_left = count[start + 7]; + tree_right = count[start + 8]; + break; + case 7: + tree_left = count[start + 7] + + count[start + 8]; + tree_right = count[start + 9] + + count[start + 10]; + break; + case 6: + tree_left = count[start + 6]; + tree_right = count[start + 7] + + count[start + 8] + + count[start + 9] + + count[start + 10]; + break; + case 5: + tree_left = count[start + 4]; + tree_right = count[start + 5]; + break; + case 4: + tree_left = count[start + 4] + + count[start + 5]; + tree_right = count[start + 6] + + count[start + 7] + + count[start + 8] + + count[start + 9] + + count[start + 10]; + break; + case 3: + tree_left = count[start + 2]; + tree_right = count[start + 3]; + break; + case 2: + tree_left = count[start + 2] + + count[start + 3]; + tree_right = count[start + 4] + + count[start + 5] + + count[start + 6] + + count[start + 7] + + count[start + 8] + + count[start + 9] + + count[start + 10]; + break; + case 1: + tree_left = count[start + 1]; + tree_right = count[start + 2] + + count[start + 3] + + count[start + 4] + + count[start + 5] + + count[start + 6] + + count[start + 7] + + count[start + 8] + + count[start + 9] + + count[start + 10]; + break; + default: + tree_left = count[start + 0]; + tree_right = count[start + 1] + + count[start + 2] + + count[start + 3] + + count[start + 4] + + count[start + 5] + + count[start + 6] + + count[start + 7] + + count[start + 8] + + count[start + 9] + + count[start + 10]; + break; + } + + vp9_tree_merge_probs(prev_prob, cur_prob, + coef_node_start, + tree_left, tree_right, + tree_i, node); + + coef_node_start = coef_node_start + 1; + } + + coef_node_start = mvd_i ? VP9_MV_CLASS0_1_START : + VP9_MV_CLASS0_0_START; + coef_count_node_start = mvd_i ? + VP9_MV_CLASS0_1_COUNT_START : + VP9_MV_CLASS0_0_COUNT_START; + tree_i = 0; + node = 0; + tree_left = count[coef_count_node_start + 0]; + tree_right = count[coef_count_node_start + 1]; + + vp9_tree_merge_probs(prev_prob, cur_prob, + coef_node_start, + tree_left, tree_right, + tree_i, node); + coef_node_start = mvd_i ? VP9_MV_CLASS0_FP_1_START : + VP9_MV_CLASS0_FP_0_START; + coef_count_node_start = mvd_i ? + VP9_MV_CLASS0_FP_1_COUNT_START : + VP9_MV_CLASS0_FP_0_COUNT_START; + + for (tree_i = 0; tree_i < 3; tree_i++) { + for (node = 0; node < 3; node++) { + unsigned int start = + coef_count_node_start; + switch (node) { + case 2: + tree_left = count[start + 2]; + tree_right = count[start + 3]; + break; + case 1: + tree_left = count[start + 1]; + tree_right = count[start + 2] + + count[start + 3]; + break; + default: + tree_left = count[start + 0]; + tree_right = count[start + 1] + + count[start + 2] + + count[start + 3]; + break; + } + + vp9_tree_merge_probs(prev_prob, + cur_prob, + coef_node_start, + tree_left, + tree_right, + tree_i, node); + + coef_node_start = coef_node_start + 1; + } + coef_count_node_start = + coef_count_node_start + 4; + } + } + } +} + +static irqreturn_t codec_vp9_threaded_isr(struct amvdec_session *sess) +{ + struct amvdec_core *core = sess->core; + struct codec_vp9 *vp9 = sess->priv; + u32 dec_status = amvdec_read_dos(core, VP9_DEC_STATUS_REG); + u32 prob_status = amvdec_read_dos(core, VP9_ADAPT_PROB_REG); + int i; + + if (!vp9) + return IRQ_HANDLED; + + mutex_lock(&vp9->lock); + if (dec_status != VP9_HEAD_PARSER_DONE) { + dev_err(core->dev_dec, "Unrecognized dec_status: %08X\n", + dec_status); + amvdec_abort(sess); + goto unlock; + } + + pr_debug("ISR: %08X;%08X\n", dec_status, prob_status); + sess->keyframe_found = 1; + + if ((prob_status & 0xff) == 0xfd && vp9->cur_frame) { + /* VP9_REQ_ADAPT_PROB */ + u8 *prev_prob_b = ((u8 *)vp9->workspace_vaddr + + PROB_OFFSET) + + ((prob_status >> 8) * 0x1000); + u8 *cur_prob_b = ((u8 *)vp9->workspace_vaddr + + PROB_OFFSET) + 0x4000; + u8 *count_b = (u8 *)vp9->workspace_vaddr + + COUNT_OFFSET; + int last_frame_type = vp9->prev_frame ? + vp9->prev_frame->type : + KEY_FRAME; + + adapt_coef_probs(last_frame_type == KEY_FRAME, + vp9->cur_frame->type == KEY_FRAME ? 1 : 0, + prob_status >> 8, + (unsigned int *)prev_prob_b, + (unsigned int *)cur_prob_b, + (unsigned int *)count_b); + + memcpy(prev_prob_b, cur_prob_b, ADAPT_PROB_SIZE); + amvdec_write_dos(core, VP9_ADAPT_PROB_REG, 0); + } + + /* Invalidate first 3 refs */ + for (i = 0; i < REFS_PER_FRAME ; ++i) + vp9->frame_refs[i] = NULL; + + vp9->prev_frame = vp9->cur_frame; + codec_vp9_update_ref(vp9); + + codec_vp9_fetch_rpm(sess); + if (codec_vp9_process_rpm(vp9)) { + amvdec_src_change(sess, vp9->width, vp9->height, 16); + + /* No frame is actually processed */ + vp9->cur_frame = NULL; + + /* Show the remaining frame */ + codec_vp9_show_frame(sess); + + /* FIXME: Save refs for resized frame */ + if (vp9->frames_num) + codec_vp9_save_refs(vp9); + + goto unlock; + } + + codec_vp9_process_lf(vp9); + codec_vp9_process_frame(sess); + codec_vp9_show_frame(sess); + +unlock: + mutex_unlock(&vp9->lock); + return IRQ_HANDLED; +} + +static irqreturn_t codec_vp9_isr(struct amvdec_session *sess) +{ + return IRQ_WAKE_THREAD; +} + +struct amvdec_codec_ops codec_vp9_ops = { + .start = codec_vp9_start, + .stop = codec_vp9_stop, + .isr = codec_vp9_isr, + .threaded_isr = codec_vp9_threaded_isr, + .num_pending_bufs = codec_vp9_num_pending_bufs, + .drain = codec_vp9_flush_output, + .resume = codec_vp9_resume, +}; diff --git a/drivers/staging/media/meson/vdec/codec_vp9.h b/drivers/staging/media/meson/vdec/codec_vp9.h new file mode 100644 index 000000000000..62db65a2b939 --- /dev/null +++ b/drivers/staging/media/meson/vdec/codec_vp9.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) 2018 Maxime Jourdan <maxi.jourdan@wanadoo.fr> + */ + +#ifndef __MESON_VDEC_CODEC_VP9_H_ +#define __MESON_VDEC_CODEC_VP9_H_ + +#include "vdec.h" + +extern struct amvdec_codec_ops codec_vp9_ops; + +#endif diff --git a/drivers/staging/media/meson/vdec/hevc_regs.h b/drivers/staging/media/meson/vdec/hevc_regs.h index 55c1a80b955a..0392f41a1eed 100644 --- a/drivers/staging/media/meson/vdec/hevc_regs.h +++ b/drivers/staging/media/meson/vdec/hevc_regs.h @@ -122,6 +122,8 @@ #define HEVC_MPRED_L0_REF00_POC 0xc880 #define HEVC_MPRED_L1_REF00_POC 0xc8c0 +#define HEVC_MPRED_CTRL4 0xc930 + #define HEVC_MPRED_CUR_POC 0xc980 #define HEVC_MPRED_COL_POC 0xc984 #define HEVC_MPRED_MV_RD_END_ADDR 0xc988 @@ -140,6 +142,10 @@ #define HEVCD_IPP_LINEBUFF_BASE 0xd024 #define HEVCD_IPP_AXIIF_CONFIG 0xd02c +#define VP9D_MPP_REF_SCALE_ENBL 0xd104 +#define VP9D_MPP_REFINFO_TBL_ACCCONFIG 0xd108 +#define VP9D_MPP_REFINFO_DATA 0xd10c + #define HEVCD_MPP_ANC2AXI_TBL_CONF_ADDR 0xd180 #define HEVCD_MPP_ANC2AXI_TBL_CMD_ADDR 0xd184 #define HEVCD_MPP_ANC2AXI_TBL_DATA 0xd190 @@ -164,6 +170,7 @@ #define HEVC_DBLK_CFG9 0xd424 #define HEVC_DBLK_CFGA 0xd428 #define HEVC_DBLK_STS0 0xd42c +#define HEVC_DBLK_CFGB 0xd42c #define HEVC_DBLK_STS1 0xd430 #define HEVC_DBLK_CFGE 0xd438 diff --git a/drivers/staging/media/meson/vdec/vdec.c b/drivers/staging/media/meson/vdec/vdec.c index 5514d2d259a4..1e531a335c50 100644 --- a/drivers/staging/media/meson/vdec/vdec.c +++ b/drivers/staging/media/meson/vdec/vdec.c @@ -395,6 +395,7 @@ static void vdec_reset_bufs_recycle(struct amvdec_session *sess) static void vdec_stop_streaming(struct vb2_queue *q) { struct amvdec_session *sess = vb2_get_drv_priv(q); + struct amvdec_codec_ops *codec_ops = sess->fmt_out->codec_ops; struct amvdec_core *core = sess->core; struct vb2_v4l2_buffer *buf; @@ -423,6 +424,10 @@ static void vdec_stop_streaming(struct vb2_queue *q) sess->streamon_out = 0; } else { + /* Drain remaining refs if was still running */ + if (sess->status >= STATUS_RUNNING && codec_ops->drain) + codec_ops->drain(sess); + while ((buf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx))) v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR); diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.c b/drivers/staging/media/meson/vdec/vdec_helpers.c index 818064b6b4d0..a4f62b892188 100644 --- a/drivers/staging/media/meson/vdec/vdec_helpers.c +++ b/drivers/staging/media/meson/vdec/vdec_helpers.c @@ -299,6 +299,10 @@ static void dst_buf_done(struct amvdec_session *sess, sess->sequence_cap - 1); v4l2_event_queue_fh(&sess->fh, &ev); vbuf->flags |= V4L2_BUF_FLAG_LAST; + } else if (sess->status == STATUS_NEEDS_RESUME) { + /* Mark LAST for drained show frames during a source change */ + vbuf->flags |= V4L2_BUF_FLAG_LAST; + sess->sequence_cap = 0; } else if (sess->should_stop) dev_dbg(dev, "should_stop, %u bufs remain\n", atomic_read(&sess->esparser_queued_bufs)); diff --git a/drivers/staging/media/meson/vdec/vdec_platform.c b/drivers/staging/media/meson/vdec/vdec_platform.c index e9356a46828f..72a833b1cebd 100644 --- a/drivers/staging/media/meson/vdec/vdec_platform.c +++ b/drivers/staging/media/meson/vdec/vdec_platform.c @@ -8,8 +8,10 @@ #include "vdec.h" #include "vdec_1.h" +#include "vdec_hevc.h" #include "codec_mpeg12.h" #include "codec_h264.h" +#include "codec_vp9.h" static const struct amvdec_format vdec_formats_gxbb[] = { { @@ -51,6 +53,18 @@ static const struct amvdec_format vdec_formats_gxbb[] = { static const struct amvdec_format vdec_formats_gxl[] = { { + .pixfmt = V4L2_PIX_FMT_VP9, + .min_buffers = 16, + .max_buffers = 24, + .max_width = 3840, + .max_height = 2160, + .vdec_ops = &vdec_hevc_ops, + .codec_ops = &codec_vp9_ops, + .firmware_path = "meson/vdec/gxl_vp9.bin", + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, + .flags = V4L2_FMT_FLAG_COMPRESSED | + V4L2_FMT_FLAG_DYN_RESOLUTION, + }, { .pixfmt = V4L2_PIX_FMT_H264, .min_buffers = 2, .max_buffers = 24, @@ -127,6 +141,18 @@ static const struct amvdec_format vdec_formats_gxm[] = { static const struct amvdec_format vdec_formats_g12a[] = { { + .pixfmt = V4L2_PIX_FMT_VP9, + .min_buffers = 16, + .max_buffers = 24, + .max_width = 3840, + .max_height = 2160, + .vdec_ops = &vdec_hevc_ops, + .codec_ops = &codec_vp9_ops, + .firmware_path = "meson/vdec/g12a_vp9.bin", + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, + .flags = V4L2_FMT_FLAG_COMPRESSED | + V4L2_FMT_FLAG_DYN_RESOLUTION, + }, { .pixfmt = V4L2_PIX_FMT_H264, .min_buffers = 2, .max_buffers = 24, @@ -165,6 +191,18 @@ static const struct amvdec_format vdec_formats_g12a[] = { static const struct amvdec_format vdec_formats_sm1[] = { { + .pixfmt = V4L2_PIX_FMT_VP9, + .min_buffers = 16, + .max_buffers = 24, + .max_width = 3840, + .max_height = 2160, + .vdec_ops = &vdec_hevc_ops, + .codec_ops = &codec_vp9_ops, + .firmware_path = "meson/vdec/g12a_vp9.bin", + .pixfmts_cap = { V4L2_PIX_FMT_NV12M, 0 }, + .flags = V4L2_FMT_FLAG_COMPRESSED | + V4L2_FMT_FLAG_DYN_RESOLUTION, + }, { .pixfmt = V4L2_PIX_FMT_H264, .min_buffers = 2, .max_buffers = 24,