diff mbox series

[v1,7/9] media: verisilicon: Add Rockchip AV1 decoder

Message ID 20221219155616.848690-8-benjamin.gaignard@collabora.com (mailing list archive)
State New, archived
Headers show
Series AV1 stateless decoder for RK3588 | expand

Commit Message

Benjamin Gaignard Dec. 19, 2022, 3:56 p.m. UTC
Implement AV1 stateless decoder for rockchip VPU981.
It decode 8 and 10 bits AV1 bitstreams.
AV1 scaling feature is done by the postprocessor.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
---
 drivers/media/platform/verisilicon/Makefile   |    1 +
 .../media/platform/verisilicon/hantro_hw.h    |   64 +-
 .../verisilicon/rockchip_vpu981_hw_av1_dec.c  | 2067 +++++++++++++++++
 .../verisilicon/rockchip_vpu981_regs.h        |  477 ++++
 4 files changed, 2607 insertions(+), 2 deletions(-)
 create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
 create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h

Comments

kernel test robot Dec. 19, 2022, 7:16 p.m. UTC | #1
Hi Benjamin,

I love your patch! Yet something to improve:

[auto build test ERROR on media-tree/master]
[also build test ERROR on rockchip/for-next linus/master v6.1 next-20221219]
[cannot apply to pza/reset/next pza/imx-drm/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Benjamin-Gaignard/AV1-stateless-decoder-for-RK3588/20221220-000013
base:   git://linuxtv.org/media_tree.git master
patch link:    https://lore.kernel.org/r/20221219155616.848690-8-benjamin.gaignard%40collabora.com
patch subject: [PATCH v1 7/9] media: verisilicon: Add Rockchip AV1 decoder
config: sh-allmodconfig
compiler: sh4-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/218f790ae8939b4333c68c1d7b0051c7f1033c9c
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Benjamin-Gaignard/AV1-stateless-decoder-for-RK3588/20221220-000013
        git checkout 218f790ae8939b4333c68c1d7b0051c7f1033c9c
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=sh olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=sh SHELL=/bin/bash drivers/media/platform/verisilicon/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   In file included from drivers/media/platform/verisilicon/hantro.h:28,
                    from drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:9:
   drivers/media/platform/verisilicon/hantro_hw.h:273:34: error: field 'frame_type' has incomplete type
     273 |         enum v4l2_av1_frame_type frame_type;
         |                                  ^~~~~~~~~~
   drivers/media/platform/verisilicon/hantro_hw.h:276:25: error: 'V4L2_AV1_NUM_REF_FRAMES' undeclared here (not in a function)
     276 |         u32 order_hints[V4L2_AV1_NUM_REF_FRAMES];
         |                         ^~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/hantro_hw.h:41:34: error: 'V4L2_AV1_TOTAL_REFS_PER_FRAME' undeclared here (not in a function)
      41 | #define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
         |                                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/hantro_hw.h:319:48: note: in expansion of macro 'AV1_MAX_FRAME_BUF_COUNT'
     319 |         struct hantro_av1_frame_ref frame_refs[AV1_MAX_FRAME_BUF_COUNT];
         |                                                ^~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_get_frame_index':
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:118:27: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     118 |         int i, idx = frame->ref_frame_idx[ref];
         |                           ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:123:26: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     123 |         timestamp = frame->reference_frame_ts[idx];
         |                          ^~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:112:72: warning: parameter 'ref' set but not used [-Wunused-but-set-parameter]
     112 | static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
         |                                                                    ~~~~^~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_frame_ref':
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:158:34: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     158 |                             frame->frame_width_minus_1 + 1;
         |                                  ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:160:34: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     160 |                             frame->frame_height_minus_1 + 1;
         |                                  ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:162:66: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     162 |                         av1_dec->frame_refs[i].frame_type = frame->frame_type;
         |                                                                  ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:163:66: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     163 |                         av1_dec->frame_refs[i].order_hint = frame->order_hint;
         |                                                                  ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:165:38: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     165 |                                 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME + i];
         |                                      ^~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:165:59: error: 'V4L2_AV1_REF_LAST_FRAME' undeclared (first use in this function)
     165 |                                 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME + i];
         |                                                           ^~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:165:59: note: each undeclared identifier is reported only once for each function it appears in
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:170:78: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     170 |                                 av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
         |                                                                              ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_clean_refs':
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:209:41: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     209 |                         if (ctrls->frame->reference_frame_ts[ref] == timestamp)
         |                                         ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: At top level:
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:230:6: warning: no previous prototype for 'rockchip_vpu981_av1_dec_tiles_free' [-Wmissing-prototypes]
     230 | void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_tiles_reallocate':
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:268:66: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_tile_group_entry'
     268 |         unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
         |                                                                  ^~
   In file included from include/vdso/const.h:5,
                    from include/linux/const.h:4,
                    from include/uapi/linux/kernel.h:6,
                    from include/linux/cache.h:5,
                    from include/linux/time.h:5,
                    from include/linux/videodev2.h:59,
                    from include/media/videobuf2-v4l2.h:15,
                    from include/media/v4l2-mem2mem.h:16,
                    from drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:8:
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:269:49: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     269 |         unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
         |                                                 ^~
   include/uapi/linux/const.h:32:44: note: in definition of macro '__ALIGN_KERNEL_MASK'
      32 | #define __ALIGN_KERNEL_MASK(x, mask)    (((x) + (mask)) & ~(mask))
         |                                            ^
   include/linux/align.h:8:33: note: in expansion of macro '__ALIGN_KERNEL'
       8 | #define ALIGN(x, a)             __ALIGN_KERNEL((x), (a))
         |                                 ^~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:269:31: note: in expansion of macro 'ALIGN'
     269 |         unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
         |                               ^~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:269:49: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     269 |         unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
         |                                                 ^~
   include/uapi/linux/const.h:32:50: note: in definition of macro '__ALIGN_KERNEL_MASK'
      32 | #define __ALIGN_KERNEL_MASK(x, mask)    (((x) + (mask)) & ~(mask))
         |                                                  ^~~~
   include/linux/align.h:8:33: note: in expansion of macro '__ALIGN_KERNEL'
       8 | #define ALIGN(x, a)             __ALIGN_KERNEL((x), (a))
         |                                 ^~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:269:31: note: in expansion of macro 'ALIGN'
     269 |         unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
         |                               ^~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:269:49: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     269 |         unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
         |                                                 ^~
   include/uapi/linux/const.h:32:61: note: in definition of macro '__ALIGN_KERNEL_MASK'
      32 | #define __ALIGN_KERNEL_MASK(x, mask)    (((x) + (mask)) & ~(mask))
         |                                                             ^~~~
   include/linux/align.h:8:33: note: in expansion of macro '__ALIGN_KERNEL'
       8 | #define ALIGN(x, a)             __ALIGN_KERNEL((x), (a))
         |                                 ^~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:269:31: note: in expansion of macro 'ALIGN'
     269 |         unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
         |                               ^~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: At top level:
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:327:6: warning: no previous prototype for 'rockchip_vpu981_av1_dec_exit' [-Wmissing-prototypes]
     327 | void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:363:5: warning: no previous prototype for 'rockchip_vpu981_av1_dec_init' [-Wmissing-prototypes]
     363 | int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
         |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_prepare_run':
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:420:48: error: 'V4L2_CID_STATELESS_AV1_SEQUENCE' undeclared (first use in this function); did you mean 'V4L2_CID_STATELESS_MPEG2_SEQUENCE'?
     420 |         ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
         |                                                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                                                V4L2_CID_STATELESS_MPEG2_SEQUENCE
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:425:34: error: 'V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY' undeclared (first use in this function)
     425 |             hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
         |                                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:429:45: error: 'V4L2_CID_STATELESS_AV1_FRAME' undeclared (first use in this function); did you mean 'V4L2_CID_STATELESS_VP9_FRAME'?
     429 |         ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
         |                                             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                                             V4L2_CID_STATELESS_VP9_FRAME
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:434:34: error: 'V4L2_CID_STATELESS_AV1_FILM_GRAIN' undeclared (first use in this function); did you mean 'V4L2_CID_STATELESS_VP9_FRAME'?
     434 |             hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
         |                                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                                  V4L2_CID_STATELESS_VP9_FRAME
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_set_global_model':
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:507:57: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     507 |         const struct v4l2_av1_global_motion *gm = &frame->global_motion;
         |                                                         ^~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:513:41: error: 'V4L2_AV1_REFS_PER_FRAME' undeclared (first use in this function)
     513 |         for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
         |                                         ^~~~~~~~~~~~~~~~~~~~~~~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:519:43: error: invalid use of undefined type 'const struct v4l2_av1_global_motion'
     519 |                                         gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
         |                                           ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:519:52: error: 'V4L2_AV1_REF_LAST_FRAME' undeclared (first use in this function)
     519 |                                         gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
         |                                                    ^~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:522:43: error: invalid use of undefined type 'const struct v4l2_av1_global_motion'
     522 |                                         gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
         |                                           ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:525:43: error: invalid use of undefined type 'const struct v4l2_av1_global_motion'
     525 |                                         gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
         |                                           ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:529:23: error: invalid use of undefined type 'const struct v4l2_av1_global_motion'
     529 |                 if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
         |                       ^~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:529:70: error: 'V4L2_AV1_WARP_MODEL_AFFINE' undeclared (first use in this function)
     529 |                 if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
         |                                                                      ^~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:531:44: error: invalid use of undefined type 'const struct v4l2_av1_global_motion'
     531 |                                         &gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
         |                                            ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_set_tile_info':
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:551:16: error: variable 'tile_info' has initializer but incomplete type
     551 |         struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
         |                ^~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:551:59: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     551 |         struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
         |                                                           ^~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:551:35: error: storage size of 'tile_info' isn't known
     551 |         struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
         |                                   ^~~~~~~~~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:585:44: error: invalid use of undefined type 'struct v4l2_ctrl_av1_tile_group_entry'
     585 |                         start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
         |                                            ^
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:585:53: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_tile_group_entry'
     585 |                         start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
         |                                                     ^
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:585:79: error: invalid use of undefined type 'struct v4l2_ctrl_av1_tile_group_entry'
     585 |                         start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
         |                                                                               ^
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:585:82: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_tile_group_entry'
     585 |                         start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
         |                                                                                  ^
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:592:50: error: invalid use of undefined type 'struct v4l2_ctrl_av1_tile_group_entry'
     592 |                         end = start + group_entry[tile_id].tile_size;
         |                                                  ^
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:592:59: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_tile_group_entry'
     592 |                         end = start + group_entry[tile_id].tile_size;
         |                                                           ^
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:551:35: warning: unused variable 'tile_info' [-Wunused-variable]
     551 |         struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
         |                                   ^~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_get_relative_dist':
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:623:35: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_sequence'
     623 |         int bits = ctrls->sequence->order_hint_bits - 1;
         |                                   ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:626:29: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_sequence'
     626 |         if (!ctrls->sequence->order_hint_bits)
         |                             ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_set_frame_sign_bias':
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:644:22: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_sequence'
     644 |         if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
         |                      ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:644:57: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     644 |         if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
         |                                                         ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:46:26: note: in definition of macro 'IS_INTRA'
      46 | #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
         |                          ^~~~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:46:34: error: 'V4L2_AV1_KEY_FRAME' undeclared (first use in this function)
      46 | #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
         |                                  ^~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:644:43: note: in expansion of macro 'IS_INTRA'
     644 |         if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
         |                                           ^~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:644:57: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     644 |         if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
         |                                                         ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:46:58: note: in definition of macro 'IS_INTRA'
      46 | #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
         |                                                          ^~~~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:46:66: error: 'V4L2_AV1_INTRA_ONLY_FRAME' undeclared (first use in this function)
      46 | #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
         |                                                                  ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:644:43: note: in expansion of macro 'IS_INTRA'
     644 |         if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
         |                                           ^~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:657:76: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     657 |                                                                       frame->order_hint);
         |                                                                            ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_set_ref':
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:675:30: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     675 |         int cur_width = frame->frame_width_minus_1 + 1;
         |                              ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:676:31: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     676 |         int cur_height = frame->frame_height_minus_1 + 1;
         |                               ^~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:664:70: warning: parameter 'idx' set but not used [-Wunused-but-set-parameter]
     664 | rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
         |                                                                  ~~~~^~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c: In function 'rockchip_vpu981_av1_dec_set_segmentation':
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:780:57: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     780 |         const struct v4l2_av1_segmentation *seg = &frame->segmentation;
         |                                                         ^~
>> drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:781:25: error: 'V4L2_AV1_MAX_SEGMENTS' undeclared (first use in this function)
     781 |         uint32_t segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
         |                         ^~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:781:48: error: 'V4L2_AV1_SEG_LVL_MAX' undeclared (first use in this function); did you mean 'V4L2_VP9_SEG_LVL_MAX'?
     781 |         uint32_t segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
         |                                                ^~~~~~~~~~~~~~~~~~~~
         |                                                V4L2_VP9_SEG_LVL_MAX
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:785:19: error: invalid use of undefined type 'const struct v4l2_av1_segmentation'
     785 |         if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)
         |                   ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:785:29: error: 'V4L2_AV1_SEGMENTATION_FLAG_ENABLED' undeclared (first use in this function); did you mean 'V4L2_VP9_SEGMENTATION_FLAG_ENABLED'?
     785 |         if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)
         |                             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                             V4L2_VP9_SEGMENTATION_FLAG_ENABLED
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:786:22: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     786 |             && (frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME)) {
         |                      ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:786:44: error: 'V4L2_AV1_REFS_PER_FRAME' undeclared (first use in this function)
     786 |             && (frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME)) {
         |                                            ^~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:787:69: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     787 |                 int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
         |                                                                     ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:790:47: warning: variable 'chroma_addr' set but not used [-Wunused-but-set-variable]
     790 |                         dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
         |                                               ^~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:806:32: error: invalid use of undefined type 'const struct v4l2_av1_segmentation'
     806 |                          !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
         |                                ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:806:42: error: 'V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE' undeclared (first use in this function); did you mean 'V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE'?
     806 |                          !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
         |                                          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                                          V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:808:32: error: invalid use of undefined type 'const struct v4l2_av1_segmentation'
     808 |                          !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
         |                                ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:808:42: error: 'V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP' undeclared (first use in this function); did you mean 'V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP'?
     808 |                          !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
         |                                          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                                          V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:810:32: error: invalid use of undefined type 'const struct v4l2_av1_segmentation'
     810 |                          !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
         |                                ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:813:34: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     813 |                          !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
         |                                  ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:813:44: error: 'V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE' undeclared (first use in this function); did you mean 'V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT'?
     813 |                          !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
         |                                            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                                            V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:815:27: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     815 |         if (IS_INTRA(frame->frame_type)
         |                           ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:46:26: note: in definition of macro 'IS_INTRA'
      46 | #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
         |                          ^~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:46:34: error: 'V4L2_AV1_KEY_FRAME' undeclared (first use in this function)
      46 | #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
         |                                  ^~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:815:13: note: in expansion of macro 'IS_INTRA'
     815 |         if (IS_INTRA(frame->frame_type)
         |             ^~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:815:27: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     815 |         if (IS_INTRA(frame->frame_type)
         |                           ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:46:58: note: in definition of macro 'IS_INTRA'
      46 | #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
         |                                                          ^~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:46:66: error: 'V4L2_AV1_INTRA_ONLY_FRAME' undeclared (first use in this function)
      46 | #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
         |                                                                  ^~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:815:13: note: in expansion of macro 'IS_INTRA'
     815 |         if (IS_INTRA(frame->frame_type)
         |             ^~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:816:24: error: invalid use of undefined type 'const struct v4l2_ctrl_av1_frame'
     816 |             || !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
         |                        ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:820:19: error: invalid use of undefined type 'const struct v4l2_av1_segmentation'
     820 |         if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)) {
         |                   ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:824:32: error: invalid use of undefined type 'const struct v4l2_av1_segmentation'
     824 |                         if (seg->feature_enabled[s] &
         |                                ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:825:29: error: implicit declaration of function 'V4L2_AV1_SEGMENT_FEATURE_ENABLED'; did you mean 'V4L2_VP9_SEGMENT_FEATURE_ENABLED'? [-Werror=implicit-function-declaration]
     825 |                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
         |                             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                             V4L2_VP9_SEGMENT_FEATURE_ENABLED
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:825:62: error: 'V4L2_AV1_SEG_LVL_ALT_Q' undeclared (first use in this function); did you mean 'V4L2_VP9_SEG_LVL_ALT_Q'?
     825 |                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
         |                                                              ^~~~~~~~~~~~~~~~~~~~~~
         |                                                              V4L2_VP9_SEG_LVL_ALT_Q
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:828:48: error: invalid use of undefined type 'const struct v4l2_av1_segmentation'
     828 |                                         abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]));
         |                                                ^~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:33:31: note: in definition of macro 'CLIP3'
      33 | #define CLIP3(l, h, v)      ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
         |                               ^
   include/linux/math.h:143:17: note: in expansion of macro '__abs_choose_expr'
     143 | #define abs(x)  __abs_choose_expr(x, long long,                         \
         |                 ^~~~~~~~~~~~~~~~~
   drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c:828:41: note: in expansion of macro 'abs'


vim +118 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c

    45	
  > 46	#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
    47	
    48	#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
  > 49	#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
    50	#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
  > 51	#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
  > 52	#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
  > 53	#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
  > 54	#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
    55	
    56	#define DIV_LUT_PREC_BITS 14
    57	#define DIV_LUT_BITS 8
    58	#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
    59	#define WARP_PARAM_REDUCE_BITS 6
    60	#define WARPEDMODEL_PREC_BITS 16
    61	
    62	#define AV1_DIV_ROUND_UP_POW2(value, n)			\
    63	({							\
    64		typeof(n) _n  = n;				\
    65		typeof(value) _value = value;			\
    66		(_value + (BIT(_n) >> 1)) >> _n;		\
    67	})
    68	
    69	#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
    70	({									\
    71		typeof(n) _n_  = n;						\
    72		typeof(value) _value_ = value;					\
    73		(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
    74			: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
    75	})
    76	
    77	struct rockchip_av1_film_grain {
    78		uint8_t scaling_lut_y[256];
    79		uint8_t scaling_lut_cb[256];
    80		uint8_t scaling_lut_cr[256];
    81		int16_t cropped_luma_grain_block[4096];
    82		int16_t cropped_chroma_grain_block[1024 * 2];
    83	};
    84	
    85	static const short div_lut[DIV_LUT_NUM + 1] = {
    86		16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
    87		15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
    88		15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
    89		14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
    90		13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
    91		13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
    92		13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
    93		12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
    94		12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
    95		11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
    96		11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
    97		11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
    98		10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
    99		10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
   100		10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
   101		9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
   102		9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
   103		9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
   104		9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
   105		9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
   106		8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
   107		8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
   108		8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
   109		8240,  8224,  8208,  8192,
   110	};
   111	
 > 112	static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
   113	{
   114		struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
   115		struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
   116		const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
   117		u64 timestamp;
 > 118		int i, idx = frame->ref_frame_idx[ref];
   119	
   120		if (idx >= AV1_MAX_FRAME_BUF_COUNT || idx < 0)
   121			return AV1_INVALID_IDX;
   122	
   123		timestamp = frame->reference_frame_ts[idx];
   124		for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
   125			if (av1_dec->frame_refs[i].ref_count == 0)
   126				continue;
   127			if (av1_dec->frame_refs[i].timestamp == timestamp)
   128				return i;
   129		}
   130	
   131		return AV1_INVALID_IDX;
   132	}
   133	
   134	static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
   135	{
   136		struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
   137		int idx = rockchip_vpu981_get_frame_index(ctx, ref);
   138	
   139		if (idx != AV1_INVALID_IDX)
   140			return av1_dec->frame_refs[idx].order_hint;
   141	
   142		return 0;
   143	}
   144	
   145	static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
   146						     u64 timestamp)
   147	{
   148		struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
   149		struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
   150		const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
   151		int i;
   152	
   153		for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
   154			if (av1_dec->frame_refs[i].ref_count == 0) {
   155				int j;
   156	
   157				av1_dec->frame_refs[i].width =
   158				    frame->frame_width_minus_1 + 1;
   159				av1_dec->frame_refs[i].height =
   160				    frame->frame_height_minus_1 + 1;
   161				av1_dec->frame_refs[i].timestamp = timestamp;
   162				av1_dec->frame_refs[i].frame_type = frame->frame_type;
   163				av1_dec->frame_refs[i].order_hint = frame->order_hint;
   164				av1_dec->frame_refs[i].gm_mode =
 > 165					frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME + i];
   166				if (!av1_dec->frame_refs[i].vb2_ref)
   167					av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
   168	
   169				for (j = 0; j < V4L2_AV1_NUM_REF_FRAMES; j++)
   170					av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
   171	
   172				av1_dec->frame_refs[i].ref_count++;
   173				av1_dec->current_frame_index = i;
   174				return i;
   175			}
   176		}
   177	
   178		return AV1_INVALID_IDX;
   179	}
   180	
   181	static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
   182	{
   183		struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
   184	
   185		if (idx < 0)
   186			return;
   187	
   188		av1_dec->frame_refs[idx].ref_count--;
   189	
   190		if (av1_dec->frame_refs[idx].ref_count < 0)
   191			pr_warn("AV1 reference frames refcounting error (idx %d)\n", idx);
   192	}
   193	
   194	static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
   195	{
   196		struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
   197		struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
   198	
   199		int ref, idx;
   200	
   201		for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
   202			u64 timestamp = av1_dec->frame_refs[idx].timestamp;
   203			bool used = false;
   204	
   205			if (av1_dec->frame_refs[idx].ref_count == 0)
   206				continue;
   207	
   208			for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
   209				if (ctrls->frame->reference_frame_ts[ref] == timestamp)
   210					used = true;
   211			}
   212	
   213			if (!used)
   214				rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
   215		}
   216	}
   217	
   218	static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
   219	{
   220		return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
   221	}
   222	
   223	static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
   224	{
   225		size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
   226	
   227		return ALIGN((cr_offset * 3) / 2, 64);
   228	}
   229	
 > 230	void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
   231	{
   232		struct hantro_dev *vpu = ctx->dev;
   233		struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
   234	
   235		if (av1_dec->db_data_col.cpu)
   236			dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
   237					  av1_dec->db_data_col.cpu,
   238					  av1_dec->db_data_col.dma);
   239		av1_dec->db_data_col.cpu = NULL;
   240	
   241		if (av1_dec->db_ctrl_col.cpu)
   242			dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
   243					  av1_dec->db_ctrl_col.cpu,
   244					  av1_dec->db_ctrl_col.dma);
   245		av1_dec->db_ctrl_col.cpu = NULL;
   246	
   247		if (av1_dec->cdef_col.cpu)
   248			dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
   249					  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
   250		av1_dec->cdef_col.cpu = NULL;
   251	
   252		if (av1_dec->sr_col.cpu)
   253			dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
   254					  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
   255		av1_dec->sr_col.cpu = NULL;
   256	
   257		if (av1_dec->lr_col.cpu)
   258			dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
   259					  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
   260		av1_dec->lr_col.cpu = NULL;
   261	}
   262	
   263	static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
   264	{
   265		struct hantro_dev *vpu = ctx->dev;
   266		struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
   267		struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
 > 268		unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
   269		unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
   270		unsigned int height_in_sb = height / 64;
   271		unsigned int stripe_num = ((height + 8) + 63) / 64;
   272		size_t size;
   273	
   274		if (num_tile_cols <= av1_dec->num_tile_cols_allocated)
   275			return 0;
   276	
   277		rockchip_vpu981_av1_dec_tiles_free(ctx);
   278	
   279		size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
   280		av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
   281							      &av1_dec->db_data_col.dma,
   282							      GFP_KERNEL);
   283		if (!av1_dec->db_data_col.cpu)
   284			goto buffer_allocation_error;
   285		av1_dec->db_data_col.size = size;
   286	
   287		size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
   288		av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
   289							      &av1_dec->db_ctrl_col.dma,
   290							      GFP_KERNEL);
   291		if (!av1_dec->db_ctrl_col.cpu)
   292			goto buffer_allocation_error;
   293		av1_dec->db_ctrl_col.size = size;
   294	
   295		size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
   296		av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
   297							   &av1_dec->cdef_col.dma,
   298							   GFP_KERNEL);
   299		if (!av1_dec->cdef_col.cpu)
   300			goto buffer_allocation_error;
   301		av1_dec->cdef_col.size = size;
   302	
   303		size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
   304		av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
   305							 &av1_dec->sr_col.dma,
   306							 GFP_KERNEL);
   307		if (!av1_dec->sr_col.cpu)
   308			goto buffer_allocation_error;
   309		av1_dec->sr_col.size = size;
   310	
   311		size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
   312		av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
   313							 &av1_dec->lr_col.dma,
   314							 GFP_KERNEL);
   315		if (!av1_dec->lr_col.cpu)
   316			goto buffer_allocation_error;
   317		av1_dec->lr_col.size = size;
   318	
   319		av1_dec->num_tile_cols_allocated = num_tile_cols;
   320		return 0;
   321	
   322	buffer_allocation_error:
   323		rockchip_vpu981_av1_dec_tiles_free(ctx);
   324		return -ENOMEM;
   325	}
   326
Nicolas Dufresne Dec. 19, 2022, 9:56 p.m. UTC | #2
Le lundi 19 décembre 2022 à 16:56 +0100, Benjamin Gaignard a écrit :
> Implement AV1 stateless decoder for rockchip VPU981.
> It decode 8 and 10 bits AV1 bitstreams.
> AV1 scaling feature is done by the postprocessor.

Can you clarify ? I knew the filmgrain was, but didn't expect spatial scaling to
need it. On this aspect, I don't see any new code to let Hantro driver know that
the postproc is needed. The frame header value should be taken into account in
hantro_needs_postproc() (a per codec ops would be nice).

> 
> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
> ---
>  drivers/media/platform/verisilicon/Makefile   |    1 +
>  .../media/platform/verisilicon/hantro_hw.h    |   64 +-
>  .../verisilicon/rockchip_vpu981_hw_av1_dec.c  | 2067 +++++++++++++++++
>  .../verisilicon/rockchip_vpu981_regs.h        |  477 ++++
>  4 files changed, 2607 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>  create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> 
> diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
> index d2b2679c00eb..c9a9806ab8c5 100644
> --- a/drivers/media/platform/verisilicon/Makefile
> +++ b/drivers/media/platform/verisilicon/Makefile
> @@ -18,6 +18,7 @@ hantro-vpu-y += \
>  		rockchip_vpu2_hw_h264_dec.o \
>  		rockchip_vpu2_hw_mpeg2_dec.o \
>  		rockchip_vpu2_hw_vp8_dec.o \
> +		rockchip_vpu981_hw_av1_dec.o \
>  		rockchip_av1_entropymode.o \
>  		hantro_jpeg.o \
>  		hantro_h264.o \
> diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
> index e395aeeae2f4..3c0a995998a5 100644
> --- a/drivers/media/platform/verisilicon/hantro_hw.h
> +++ b/drivers/media/platform/verisilicon/hantro_hw.h
> @@ -37,6 +37,9 @@
>  
>  #define NUM_REF_PICTURES	(V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
>  
> +#define AV1_REF_LIST_SIZE	8

This is the same as V4L2_AV1_TOTAL_REFS_PER_FRAME. AV1_TOTAL_REFS_PER_FRAME is
how this is called in the spec.

> +#define AV1_MAX_FRAME_BUF_COUNT	(V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
> +
>  struct hantro_dev;
>  struct hantro_ctx;
>  struct hantro_buf;
> @@ -250,23 +253,80 @@ struct hantro_vp9_dec_hw_ctx {
>  };
>  
>  /**
> - * hantro_av1_dec_hw_ctx
> + * struct hantro_av1_dec_ctrls
> + * @sequence:		AV1 Sequence
> + * @tile_group_entry:	AV1 Tile Group entry
> + * @frame:		AV1 Frame Header OBU
> + * @film_grain:		AV1 Film Grain
> + */
> +struct hantro_av1_dec_ctrls {
> +	const struct v4l2_ctrl_av1_sequence *sequence;
> +	const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry;
> +	const struct v4l2_ctrl_av1_frame *frame;
> +	const struct v4l2_ctrl_av1_film_grain *film_grain;
> +};
> +
> +struct hantro_av1_frame_ref {
> +	int width;
> +	int height;
> +	u64 timestamp;
> +	enum v4l2_av1_frame_type frame_type;
> +	int ref_count;
> +	u32 order_hint;
> +	u32 order_hints[V4L2_AV1_NUM_REF_FRAMES];
> +	int gm_mode;
> +	struct vb2_v4l2_buffer *vb2_ref;
> +};
> +
> +/**
> + * struct hantro_av1_dec_hw_ctx
> + * @db_data_col:	db tile col data buffer
> + * @db_ctrl_col:	db tile col ctrl buffer
> + * @cdef_col:		cdef tile col buffer
> + * @sr_col:		sr tile col buffer
> + * @lr_col:		lr tile col buffer
> + * @global_model:	global model buffer
> + * @tile_info:		tile info buffer
> + * @segment:		segmentation info buffer
> + * @prob_tbl:		probability table
> + * @prob_tbl_out:	probability table output
> + * @tile_buf:		tile buffer
> + * @ctrls:		V4L2 controls attached to a run
> + * @frame_refs:		reference frames info slots
> + * @ref_frame_sign_bias: array of sign bias
> + * @num_tile_cols_allocated: number of allocated tiles
>   * @cdfs:		current probabilities structure
>   * @cdfs_ndvc:		current mv probabilities structure
>   * @default_cdfs:	default probabilities structure
>   * @default_cdfs_ndvc:	default mv probabilties structure
>   * @cdfs_last:		stored probabilities structures
>   * @cdfs_last_ndvc:	stored mv probabilities structures
> + * @current_frame_index: index of the current in frame_refs array
>   */
>  struct hantro_av1_dec_hw_ctx {
> +	struct hantro_aux_buf db_data_col;
> +	struct hantro_aux_buf db_ctrl_col;
> +	struct hantro_aux_buf cdef_col;
> +	struct hantro_aux_buf sr_col;
> +	struct hantro_aux_buf lr_col;
> +	struct hantro_aux_buf global_model;
> +	struct hantro_aux_buf tile_info;
> +	struct hantro_aux_buf segment;
> +	struct hantro_aux_buf prob_tbl;
> +	struct hantro_aux_buf prob_tbl_out;
> +	struct hantro_aux_buf tile_buf;
> +	struct hantro_av1_dec_ctrls ctrls;
> +	struct hantro_av1_frame_ref frame_refs[AV1_MAX_FRAME_BUF_COUNT];
> +	uint32_t ref_frame_sign_bias[AV1_REF_LIST_SIZE];
> +	unsigned int num_tile_cols_allocated;
>  	struct av1cdfs *cdfs;
>  	struct mvcdfs  *cdfs_ndvc;
>  	struct av1cdfs default_cdfs;
>  	struct mvcdfs  default_cdfs_ndvc;
>  	struct av1cdfs cdfs_last[NUM_REF_FRAMES];
>  	struct mvcdfs  cdfs_last_ndvc[NUM_REF_FRAMES];
> +	int current_frame_index;
>  };
> -
>  /**
>   * struct hantro_postproc_ctx
>   *
> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> new file mode 100644
> index 000000000000..a183e4f35e00
> --- /dev/null
> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> @@ -0,0 +1,2067 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2021, Collabora
> + *
> + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
> + */
> +
> +#include <media/v4l2-mem2mem.h>
> +#include "hantro.h"
> +#include "hantro_v4l2.h"
> +#include "rockchip_vpu981_regs.h"
> +
> +#define AV1_DEC_MODE		17
> +#define GM_GLOBAL_MODELS_PER_FRAME	7
> +#define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
> +#define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
> +#define AV1_MAX_TILES		128
> +#define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
> +#define AV1DEC_MAX_PIC_BUFFERS	24
> +#define AV1_REF_SCALE_SHIFT	14
> +#define AV1_INVALID_IDX		-1
> +#define MAX_FRAME_DISTANCE	31
> +#define AV1_PRIMARY_REF_NONE	7
> +#define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
> +/*
> + * These 3 values aren't defined enum v4l2_av1_segment_feature because
> + * they are not part of the specification
> + */
> +#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
> +#define V4L2_AV1_SEG_LVL_ALT_LF_U	3
> +#define V4L2_AV1_SEG_LVL_ALT_LF_V	4
> +
> +#define CLIP3(l, h, v)      ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))

include/linux/minmax.h:#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)

> +
> +#define SUPERRES_SCALE_BITS 3
> +#define SCALE_NUMERATOR 8
> +#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
> +
> +#define RS_SUBPEL_BITS 6
> +#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
> +#define RS_SCALE_SUBPEL_BITS 14
> +#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
> +#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
> +#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
> +
> +#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
> +
> +#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
> +
> +#define DIV_LUT_PREC_BITS 14
> +#define DIV_LUT_BITS 8
> +#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
> +#define WARP_PARAM_REDUCE_BITS 6
> +#define WARPEDMODEL_PREC_BITS 16
> +
> +#define AV1_DIV_ROUND_UP_POW2(value, n)			\
> +({							\
> +	typeof(n) _n  = n;				\
> +	typeof(value) _value = value;			\
> +	(_value + (BIT(_n) >> 1)) >> _n;		\
> +})
> +
> +#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
> +({									\
> +	typeof(n) _n_  = n;						\
> +	typeof(value) _value_ = value;					\
> +	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
> +		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
> +})
> +
> +struct rockchip_av1_film_grain {
> +	uint8_t scaling_lut_y[256];
> +	uint8_t scaling_lut_cb[256];
> +	uint8_t scaling_lut_cr[256];
> +	int16_t cropped_luma_grain_block[4096];
> +	int16_t cropped_chroma_grain_block[1024 * 2];
> +};
> +
> +static const short div_lut[DIV_LUT_NUM + 1] = {
> +	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
> +	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
> +	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
> +	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
> +	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
> +	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
> +	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
> +	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
> +	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
> +	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
> +	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
> +	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
> +	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
> +	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
> +	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
> +	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
> +	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
> +	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
> +	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
> +	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
> +	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
> +	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
> +	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
> +	8240,  8224,  8208,  8192,
> +};
> +
> +static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	u64 timestamp;
> +	int i, idx = frame->ref_frame_idx[ref];
> +
> +	if (idx >= AV1_MAX_FRAME_BUF_COUNT || idx < 0)
> +		return AV1_INVALID_IDX;
> +
> +	timestamp = frame->reference_frame_ts[idx];
> +	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
> +		if (av1_dec->frame_refs[i].ref_count == 0)
> +			continue;
> +		if (av1_dec->frame_refs[i].timestamp == timestamp)
> +			return i;
> +	}

nit: Mediatek optimize this with a map

> +
> +	return AV1_INVALID_IDX;
> +}
> +
> +static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
> +
> +	if (idx != AV1_INVALID_IDX)
> +		return av1_dec->frame_refs[idx].order_hint;
> +
> +	return 0;
> +}
> +
> +static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
> +					     u64 timestamp)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	int i;
> +
> +	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
> +		if (av1_dec->frame_refs[i].ref_count == 0) {
> +			int j;
> +
> +			av1_dec->frame_refs[i].width =
> +			    frame->frame_width_minus_1 + 1;
> +			av1_dec->frame_refs[i].height =
> +			    frame->frame_height_minus_1 + 1;
> +			av1_dec->frame_refs[i].timestamp = timestamp;
> +			av1_dec->frame_refs[i].frame_type = frame->frame_type;
> +			av1_dec->frame_refs[i].order_hint = frame->order_hint;
> +			av1_dec->frame_refs[i].gm_mode =
> +				frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME + i];
> +			if (!av1_dec->frame_refs[i].vb2_ref)
> +				av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
> +
> +			for (j = 0; j < V4L2_AV1_NUM_REF_FRAMES; j++)
> +				av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
> +
> +			av1_dec->frame_refs[i].ref_count++;

This ref_count is only ever 0 and 1. Would be nicer if it was some boolean, like
"valid", or "active".

> +			av1_dec->current_frame_index = i;
> +			return i;
> +		}
> +	}
> +
> +	return AV1_INVALID_IDX;
> +}
> +
> +static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +
> +	if (idx < 0)
> +		return;
> +
> +	av1_dec->frame_refs[idx].ref_count--;
> +
> +	if (av1_dec->frame_refs[idx].ref_count < 0)
> +		pr_warn("AV1 reference frames refcounting error (idx %d)\n", idx);

Looks like a bug_on case, only driver programming issue could cause this.


> +}
> +
> +static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +
> +	int ref, idx;
> +
> +	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
> +		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
> +		bool used = false;
> +
> +		if (av1_dec->frame_refs[idx].ref_count == 0)
> +			continue;
> +
> +		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
> +			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
> +				used = true;
> +		}
> +
> +		if (!used)
> +			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
> +	}
> +}
> +
> +static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
> +{
> +	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
> +}
> +
> +static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
> +{
> +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> +
> +	return ALIGN((cr_offset * 3) / 2, 64);
> +}
> +
> +void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +
> +	if (av1_dec->db_data_col.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
> +				  av1_dec->db_data_col.cpu,
> +				  av1_dec->db_data_col.dma);
> +	av1_dec->db_data_col.cpu = NULL;
> +
> +	if (av1_dec->db_ctrl_col.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
> +				  av1_dec->db_ctrl_col.cpu,
> +				  av1_dec->db_ctrl_col.dma);
> +	av1_dec->db_ctrl_col.cpu = NULL;
> +
> +	if (av1_dec->cdef_col.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
> +				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
> +	av1_dec->cdef_col.cpu = NULL;
> +
> +	if (av1_dec->sr_col.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
> +				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
> +	av1_dec->sr_col.cpu = NULL;
> +
> +	if (av1_dec->lr_col.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
> +				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
> +	av1_dec->lr_col.cpu = NULL;
> +}
> +
> +static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
> +	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
> +	unsigned int height_in_sb = height / 64;
> +	unsigned int stripe_num = ((height + 8) + 63) / 64;
> +	size_t size;
> +
> +	if (num_tile_cols <= av1_dec->num_tile_cols_allocated)
> +		return 0;
> +
> +	rockchip_vpu981_av1_dec_tiles_free(ctx);
> +
> +	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
> +	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
> +						      &av1_dec->db_data_col.dma,
> +						      GFP_KERNEL);
> +	if (!av1_dec->db_data_col.cpu)
> +		goto buffer_allocation_error;
> +	av1_dec->db_data_col.size = size;
> +
> +	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;

No bit_depth scaling ?

> +	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
> +						      &av1_dec->db_ctrl_col.dma,
> +						      GFP_KERNEL);
> +	if (!av1_dec->db_ctrl_col.cpu)
> +		goto buffer_allocation_error;
> +	av1_dec->db_ctrl_col.size = size;
> +
> +	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
> +	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
> +						   &av1_dec->cdef_col.dma,
> +						   GFP_KERNEL);
> +	if (!av1_dec->cdef_col.cpu)
> +		goto buffer_allocation_error;
> +	av1_dec->cdef_col.size = size;
> +
> +	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
> +	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
> +						 &av1_dec->sr_col.dma,
> +						 GFP_KERNEL);
> +	if (!av1_dec->sr_col.cpu)
> +		goto buffer_allocation_error;
> +	av1_dec->sr_col.size = size;
> +
> +	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
> +	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
> +						 &av1_dec->lr_col.dma,
> +						 GFP_KERNEL);
> +	if (!av1_dec->lr_col.cpu)
> +		goto buffer_allocation_error;
> +	av1_dec->lr_col.size = size;
> +
> +	av1_dec->num_tile_cols_allocated = num_tile_cols;
> +	return 0;
> +
> +buffer_allocation_error:
> +	rockchip_vpu981_av1_dec_tiles_free(ctx);
> +	return -ENOMEM;
> +}
> +
> +void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +
> +	if (av1_dec->global_model.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
> +				  av1_dec->global_model.cpu,
> +				  av1_dec->global_model.dma);
> +	av1_dec->global_model.cpu = NULL;
> +
> +	if (av1_dec->tile_info.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
> +				  av1_dec->tile_info.cpu,
> +				  av1_dec->tile_info.dma);
> +	av1_dec->tile_info.cpu = NULL;
> +
> +	if (av1_dec->prob_tbl.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
> +				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
> +	av1_dec->prob_tbl.cpu = NULL;
> +
> +	if (av1_dec->prob_tbl_out.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
> +				  av1_dec->prob_tbl_out.cpu,
> +				  av1_dec->prob_tbl_out.dma);
> +	av1_dec->prob_tbl_out.cpu = NULL;
> +
> +	if (av1_dec->tile_buf.cpu)
> +		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
> +				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
> +	av1_dec->tile_buf.cpu = NULL;
> +
> +	rockchip_vpu981_av1_dec_tiles_free(ctx);
> +}
> +
> +int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +
> +	memset(av1_dec, 0, sizeof(*av1_dec));
> +
> +	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
> +						       &av1_dec->global_model.dma,
> +						       GFP_KERNEL);
> +	if (!av1_dec->global_model.cpu)
> +		return -ENOMEM;
> +	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
> +
> +	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
> +						    &av1_dec->tile_info.dma,
> +						    GFP_KERNEL);
> +	if (!av1_dec->tile_info.cpu)
> +		return -ENOMEM;
> +	av1_dec->tile_info.size = AV1_MAX_TILES;
> +
> +	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
> +						   ALIGN(sizeof(struct av1cdfs), 2048),
> +						   &av1_dec->prob_tbl.dma,
> +						   GFP_KERNEL);
> +	if (!av1_dec->prob_tbl.cpu)
> +		return -ENOMEM;
> +	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
> +
> +	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
> +						       ALIGN(sizeof(struct av1cdfs), 2048),
> +						       &av1_dec->prob_tbl_out.dma,
> +						       GFP_KERNEL);
> +	if (!av1_dec->prob_tbl_out.cpu)
> +		return -ENOMEM;
> +	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
> +	av1_dec->cdfs = &av1_dec->default_cdfs;
> +	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
> +
> +	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
> +
> +	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
> +						   AV1_TILE_SIZE,
> +						   &av1_dec->tile_buf.dma,
> +						   GFP_KERNEL);
> +	if (!av1_dec->tile_buf.cpu)
> +		return -ENOMEM;
> +	av1_dec->tile_buf.size = AV1_TILE_SIZE;
> +
> +	return 0;
> +}
> +
> +static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +
> +	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
> +	if (WARN_ON(!ctrls->sequence))
> +		return -EINVAL;
> +
> +	ctrls->tile_group_entry =
> +	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
> +	if (WARN_ON(!ctrls->tile_group_entry))
> +		return -EINVAL;
> +
> +	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
> +	if (WARN_ON(!ctrls->frame))
> +		return -EINVAL;
> +
> +	ctrls->film_grain =
> +	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
> +
> +	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
> +}
> +
> +static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
> +{
> +	if (n == 0)
> +		return 0;
> +	return 31 ^ __builtin_clz(n);
> +}
> +
> +static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
> +{
> +	int f;
> +	uint64_t e;
> +
> +	*shift = rockchip_vpu981_av1_dec_get_msb(d);
> +	/* e is obtained from D after resetting the most significant 1 bit. */
> +	e = d - ((u32)1 << *shift);
> +	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
> +	if (*shift > DIV_LUT_BITS)
> +		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
> +	else
> +		f = e << (DIV_LUT_BITS - *shift);
> +	if (f > DIV_LUT_NUM)
> +		return -1;
> +	*shift += DIV_LUT_PREC_BITS;
> +	/* Use f as lookup into the precomputed table of multipliers */
> +	return div_lut[f];
> +}
> +
> +static void rockchip_vpu981_av1_dec_get_shear_params(const uint32_t *params,
> +	int64_t *alpha, int64_t *beta, int64_t *gamma, int64_t *delta)
> +{
> +	const int *mat = params;
> +	short shift;
> +	short y;
> +	long long gv, dv;
> +
> +	if (mat[2] <= 0)
> +		return;
> +
> +	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
> +	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
> +
> +	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
> +
> +	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
> +
> +	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
> +
> +	dv = ((long long)mat[3] * mat[4]) * y;
> +	*delta = clamp_val(
> +		mat[5] -
> +		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
> +		S16_MIN, S16_MAX);
> +
> +	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
> +		 * (1 << WARP_PARAM_REDUCE_BITS);
> +	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
> +		* (1 << WARP_PARAM_REDUCE_BITS);
> +	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
> +		 * (1 << WARP_PARAM_REDUCE_BITS);
> +	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
> +		* (1 << WARP_PARAM_REDUCE_BITS);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
> +	uint8_t *dst = av1_dec->global_model.cpu;
> +	struct hantro_dev *vpu = ctx->dev;
> +	int ref_frame, i;
> +
> +	memset(dst, 0, GLOBAL_MODEL_SIZE);
> +	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
> +		int64_t alpha = 0, beta = 0, gamma = 0, delta = 0;
> +
> +		for (i = 0; i < 6; ++i) {
> +			if (i == 2)
> +				*(int32_t *)dst =
> +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
> +			else if (i == 3)
> +				*(int32_t *)dst =
> +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
> +			else
> +				*(int32_t *)dst =
> +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
> +			dst += 4;
> +		}
> +
> +		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
> +			rockchip_vpu981_av1_dec_get_shear_params(
> +					&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
> +					&alpha, &beta, &gamma, &delta);
> +
> +		*(int16_t *)dst = alpha;
> +		dst += 2;
> +		*(int16_t *)dst = beta;
> +		dst += 2;
> +		*(int16_t *)dst = gamma;
> +		dst += 2;
> +		*(int16_t *)dst = delta;
> +		dst += 2;
> +	}
> +
> +	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
> +	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
> +	    ctrls->tile_group_entry;
> +	int context_update_y =
> +	    tile_info.context_update_tile_id / tile_info.tile_cols;
> +	int context_update_x =
> +	    tile_info.context_update_tile_id % tile_info.tile_cols;
> +	int context_update_tile_id =
> +	    context_update_x * tile_info.tile_rows + context_update_y;
> +	uint8_t *dst = av1_dec->tile_info.cpu;
> +	struct hantro_dev *vpu = ctx->dev;
> +	int tile0, tile1;
> +
> +	memset(dst, 0, av1_dec->tile_info.size);
> +
> +	for (tile0 = 0; tile0 < tile_info.tile_cols; tile0++) {
> +		for (tile1 = 0; tile1 < tile_info.tile_rows; tile1++) {
> +			int tile_id = tile1 * tile_info.tile_cols + tile0;
> +			uint32_t start, end;
> +			uint32_t y0 =
> +			    tile_info.height_in_sbs_minus_1[tile1] + 1;
> +			uint32_t x0 = tile_info.width_in_sbs_minus_1[tile0] + 1;
> +
> +			// tile size in SB units (width,height)
> +			*dst++ = x0;
> +			*dst++ = 0;
> +			*dst++ = 0;
> +			*dst++ = 0;
> +			*dst++ = y0;
> +			*dst++ = 0;
> +			*dst++ = 0;
> +			*dst++ = 0;
> +
> +			// tile start position
> +			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
> +			*dst++ = start & 255;
> +			*dst++ = (start >> 8) & 255;
> +			*dst++ = (start >> 16) & 255;
> +			*dst++ = (start >> 24) & 255;
> +
> +			// # of bytes in tile data
> +			end = start + group_entry[tile_id].tile_size;
> +			*dst++ = end & 255;
> +			*dst++ = (end >> 8) & 255;
> +			*dst++ = (end >> 16) & 255;
> +			*dst++ = (end >> 24) & 255;
> +		}
> +	}
> +
> +	hantro_reg_write(vpu, &av1_multicore_expect_context_update,
> +			 !!(context_update_x == 0));
> +	hantro_reg_write(vpu, &av1_tile_enable, !!((tile_info.tile_cols > 1)
> +						   || (tile_info.tile_rows > 1)));
> +	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info.tile_cols);
> +	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info.tile_rows);
> +	hantro_reg_write(vpu, &av1_context_update_tile_id,
> +			 context_update_tile_id);
> +	hantro_reg_write(vpu, &av1_tile_transpose, 1);
> +	if (context_update_tile_id) {
> +		hantro_reg_write(vpu, &av1_dec_tile_size_mag,
> +				 tile_info.tile_size_bytes);
> +	} else
> +		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
> +
> +	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
> +}
> +
> +static int rockchip_vpu981_av1_dec_get_relative_dist(struct hantro_ctx *ctx,
> +						     int a, int b)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	int bits = ctrls->sequence->order_hint_bits - 1;
> +	int diff, m;
> +
> +	if (!ctrls->sequence->order_hint_bits)
> +		return 0;
> +
> +	diff = a - b;
> +	m = 1 << bits;
> +	diff = (diff & (m - 1)) - (diff & m);
> +
> +	return diff;
> +}

I think I've seen this one in MTK. Will be nice to make a v4l2-av1.h eventually
and share this. Can happen afterward though, to avoid make this too complex.

> +
> +static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
> +	int i;
> +
> +	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
> +		for (i = 0; i < AV1_REF_LIST_SIZE; i++)
> +			av1_dec->ref_frame_sign_bias[i] = 0;
> +
> +		return;
> +	}
> +	// Identify the nearest forward and backward references.
> +	for (i = 0; i < AV1_REF_LIST_SIZE - 1; i++) {
> +		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
> +			int rel_off =
> +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +								      rockchip_vpu981_get_order_hint
> +								      (ctx, i),
> +								      frame->order_hint);
> +			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
> +		}
> +	}
> +}
> +
> +static bool
> +rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
> +				int width, int height)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	struct hantro_dev *vpu = ctx->dev;
> +	struct hantro_decoded_buffer *dst;
> +	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
> +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> +	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
> +	int cur_width = frame->frame_width_minus_1 + 1;
> +	int cur_height = frame->frame_height_minus_1 + 1;
> +	int scale_width =
> +	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
> +	int scale_height =
> +	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
> +
> +	switch (ref) {
> +	case 0:
> +		hantro_reg_write(vpu, &av1_ref0_height, height);
> +		hantro_reg_write(vpu, &av1_ref0_width, width);
> +		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
> +		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
> +		break;
> +	case 1:
> +		hantro_reg_write(vpu, &av1_ref1_height, height);
> +		hantro_reg_write(vpu, &av1_ref1_width, width);
> +		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
> +		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
> +		break;
> +	case 2:
> +		hantro_reg_write(vpu, &av1_ref2_height, height);
> +		hantro_reg_write(vpu, &av1_ref2_width, width);
> +		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
> +		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
> +		break;
> +	case 3:
> +		hantro_reg_write(vpu, &av1_ref3_height, height);
> +		hantro_reg_write(vpu, &av1_ref3_width, width);
> +		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
> +		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
> +		break;
> +	case 4:
> +		hantro_reg_write(vpu, &av1_ref4_height, height);
> +		hantro_reg_write(vpu, &av1_ref4_width, width);
> +		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
> +		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
> +		break;
> +	case 5:
> +		hantro_reg_write(vpu, &av1_ref5_height, height);
> +		hantro_reg_write(vpu, &av1_ref5_width, width);
> +		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
> +		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
> +		break;
> +	case 6:
> +		hantro_reg_write(vpu, &av1_ref6_height, height);
> +		hantro_reg_write(vpu, &av1_ref6_width, width);
> +		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
> +		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
> +		break;
> +	default:
> +		pr_warn("AV1 invalid reference frame index\n");
> +	}
> +
> +	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
> +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
> +	chroma_addr = luma_addr + cr_offset;
> +	mv_addr = luma_addr + mv_offset;
> +
> +	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
> +	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
> +	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
> +
> +	return (scale_width != (1 << AV1_REF_SCALE_SHIFT))
> +		|| (scale_height != (1 << AV1_REF_SCALE_SHIFT));
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
> +						  int ref, int val)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +
> +	switch (ref) {
> +	case 0:
> +		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
> +		break;
> +	case 1:
> +		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
> +		break;
> +	case 2:
> +		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
> +		break;
> +	case 3:
> +		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
> +		break;
> +	case 4:
> +		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
> +		break;
> +	case 5:
> +		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
> +		break;
> +	case 6:
> +		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
> +		break;
> +	default:
> +		pr_warn("AV1 invalid sign bias index\n");
> +		break;
> +	}
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
> +	uint32_t segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
> +	struct hantro_dev *vpu = ctx->dev;
> +	uint8_t segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
> +
> +	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)
> +	    && (frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME)) {
> +		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
> +
> +		if (idx >= 0) {
> +			dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
> +			size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> +			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
> +
> +			luma_addr =
> +				hantro_get_dec_buf_addr(ctx,
> +							&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
> +			chroma_addr = luma_addr + cr_offset;
> +			mv_addr = luma_addr + mv_offset;
> +
> +			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
> +			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
> +		}
> +	}
> +
> +	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
> +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
> +	hantro_reg_write(vpu, &av1_segment_upd_e,
> +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
> +	hantro_reg_write(vpu, &av1_segment_e,
> +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
> +
> +	hantro_reg_write(vpu, &av1_error_resilient,
> +			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
> +
> +	if (IS_INTRA(frame->frame_type)
> +	    || !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
> +		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
> +	}
> +
> +	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)) {
> +		int s;
> +
> +		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
> +			if (seg->feature_enabled[s] &
> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
> +				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
> +				    CLIP3(0, 255,
> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]));
> +				segsign |=
> +					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
> +			}
> +
> +			if (seg->feature_enabled[s] &
> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
> +					CLIP3(-63, 63,
> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]));
> +
> +			if (seg->feature_enabled[s] &
> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
> +				    CLIP3(-63, 63,
> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]));
> +
> +			if (seg->feature_enabled[s] &
> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
> +				    CLIP3(-63, 63,
> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]));
> +
> +			if (seg->feature_enabled[s] &
> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
> +				    CLIP3(-63, 63,
> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]));
> +
> +			if (frame->frame_type && seg->feature_enabled[s] &
> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
> +				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
> +
> +			if (seg->feature_enabled[s] &
> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
> +				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
> +
> +			if (seg->feature_enabled[s] &
> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
> +				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
> +		}
> +	}
> +
> +	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
> +		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
> +			if (seg->feature_enabled[i]
> +			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
> +				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
> +				last_active_seg = max(i, last_active_seg);
> +			}
> +		}
> +	}
> +
> +	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
> +	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
> +
> +	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
> +
> +	/* Write QP, filter level, ref frame and skip for every segment */
> +	hantro_reg_write(vpu, &av1_quant_seg0,
> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> +	hantro_reg_write(vpu, &av1_refpic_seg0,
> +			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
> +	hantro_reg_write(vpu, &av1_skip_seg0,
> +			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
> +	hantro_reg_write(vpu, &av1_global_mv_seg0,
> +			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> +	hantro_reg_write(vpu, &av1_quant_seg1,
> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> +	hantro_reg_write(vpu, &av1_refpic_seg1,
> +			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
> +	hantro_reg_write(vpu, &av1_skip_seg1,
> +			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
> +	hantro_reg_write(vpu, &av1_global_mv_seg1,
> +			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> +	hantro_reg_write(vpu, &av1_quant_seg2,
> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> +	hantro_reg_write(vpu, &av1_refpic_seg2,
> +			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
> +	hantro_reg_write(vpu, &av1_skip_seg2,
> +			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
> +	hantro_reg_write(vpu, &av1_global_mv_seg2,
> +			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> +	hantro_reg_write(vpu, &av1_quant_seg3,
> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> +	hantro_reg_write(vpu, &av1_refpic_seg3,
> +			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
> +	hantro_reg_write(vpu, &av1_skip_seg3,
> +			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
> +	hantro_reg_write(vpu, &av1_global_mv_seg3,
> +			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> +	hantro_reg_write(vpu, &av1_quant_seg4,
> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> +	hantro_reg_write(vpu, &av1_refpic_seg4,
> +			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
> +	hantro_reg_write(vpu, &av1_skip_seg4,
> +			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
> +	hantro_reg_write(vpu, &av1_global_mv_seg4,
> +			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> +	hantro_reg_write(vpu, &av1_quant_seg5,
> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> +	hantro_reg_write(vpu, &av1_refpic_seg5,
> +			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
> +	hantro_reg_write(vpu, &av1_skip_seg5,
> +			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
> +	hantro_reg_write(vpu, &av1_global_mv_seg5,
> +			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> +	hantro_reg_write(vpu, &av1_quant_seg6,
> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> +	hantro_reg_write(vpu, &av1_refpic_seg6,
> +			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
> +	hantro_reg_write(vpu, &av1_skip_seg6,
> +			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
> +	hantro_reg_write(vpu, &av1_global_mv_seg6,
> +			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +
> +	hantro_reg_write(vpu, &av1_quant_seg7,
> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> +	hantro_reg_write(vpu, &av1_refpic_seg7,
> +			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
> +	hantro_reg_write(vpu, &av1_skip_seg7,
> +			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
> +	hantro_reg_write(vpu, &av1_global_mv_seg7,
> +			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> +}
> +
> +static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
> +	const struct v4l2_av1_quantization *quantization = &frame->quantization;
> +	int i;
> +
> +	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
> +		int qindex = quantization->base_q_idx;
> +
> +		if (segmentation->feature_enabled[i] &
> +		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
> +			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
> +		}
> +		qindex = CLIP3(0, 255, qindex);
> +
> +		if (qindex
> +		    || quantization->delta_q_y_dc
> +		    || quantization->delta_q_u_dc
> +		    || quantization->delta_q_u_ac
> +		    || quantization->delta_q_v_dc || quantization->delta_q_v_ac)
> +			return false;
> +	}
> +	return true;
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
> +	bool filtering_dis = (loop_filter->level[0] == 0)
> +			     && (loop_filter->level[1] == 0);
> +	struct hantro_dev *vpu = ctx->dev;
> +
> +	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
> +	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
> +	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
> +
> +	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
> +	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
> +	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
> +	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
> +
> +	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED
> +	    && !rockchip_vpu981_av1_dec_is_lossless(ctx)
> +	    && !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
> +				 loop_filter->ref_deltas[0]);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
> +				 loop_filter->ref_deltas[1]);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
> +				 loop_filter->ref_deltas[2]);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
> +				 loop_filter->ref_deltas[3]);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
> +				 loop_filter->ref_deltas[4]);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
> +				 loop_filter->ref_deltas[5]);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
> +				 loop_filter->ref_deltas[6]);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
> +				 loop_filter->ref_deltas[7]);
> +		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
> +				 loop_filter->mode_deltas[0]);
> +		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
> +				 loop_filter->mode_deltas[1]);
> +	} else {
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
> +		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
> +		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
> +		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
> +	}
> +
> +	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
> +	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	bool frame_is_intra = IS_INTRA(frame->frame_type);
> +	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
> +	int i;
> +
> +	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
> +		return;
> +
> +	for (i = 0; i < NUM_REF_FRAMES; i++) {
> +		if (frame->refresh_frame_flags & (1 << i)) {
> +			struct mvcdfs stored_mv_cdf;
> +
> +			rockchip_av1_get_cdfs(ctx, i);
> +			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
> +			*av1_dec->cdfs = *out_cdfs;
> +			if (frame_is_intra) {
> +				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
> +				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
> +			}
> +			rockchip_av1_store_cdfs(ctx,
> +						frame->refresh_frame_flags);
> +			break;
> +		}
> +	}
> +}
> +
> +void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
> +{
> +	rockchip_vpu981_av1_dec_update_prob(ctx);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	const struct v4l2_av1_quantization *quantization = &frame->quantization;
> +	struct hantro_dev *vpu = ctx->dev;
> +	bool error_resilient_mode =
> +	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
> +	bool frame_is_intra = IS_INTRA(frame->frame_type);
> +
> +	if (error_resilient_mode || frame_is_intra
> +	    || frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
> +		av1_dec->cdfs = &av1_dec->default_cdfs;
> +		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
> +		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
> +						 av1_dec->cdfs);
> +	} else {
> +		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
> +	}
> +	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
> +
> +	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
> +
> +	if (frame_is_intra) {
> +		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
> +		/* Overwrite MV context area with intrabc MV context */
> +		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
> +		       sizeof(struct mvcdfs));
> +	}
> +
> +	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
> +	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	const struct v4l2_av1_cdef *cdef = &frame->cdef;
> +	struct hantro_dev *vpu = ctx->dev;
> +	uint32_t luma_pri_strength = 0;
> +	uint16_t luma_sec_strength = 0;
> +	uint32_t chroma_pri_strength = 0;
> +	uint16_t chroma_sec_strength = 0;
> +	int i;
> +
> +	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
> +	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
> +
> +	for (i = 0; i < (1 << cdef->bits); i++) {
> +		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
> +		if (cdef->y_sec_strength[i] == 4)
> +			luma_sec_strength |= 3 << (i * 2);
> +		else
> +			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
> +
> +		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
> +		if (cdef->uv_sec_strength[i] == 4)
> +			chroma_sec_strength |= 3 << (i * 2);
> +		else
> +			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
> +	}
> +
> +	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
> +			 luma_pri_strength);
> +	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
> +			 luma_sec_strength);
> +	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
> +			 chroma_pri_strength);
> +	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
> +			 chroma_sec_strength);
> +
> +	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	const struct v4l2_av1_loop_restoration *loop_restoration =
> +	    &frame->loop_restoration;
> +	struct hantro_dev *vpu = ctx->dev;
> +	uint16_t lr_type = 0, lr_unit_size = 0;
> +	uint8_t restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
> +	int i;
> +
> +	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
> +		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
> +		restoration_unit_size[1] =
> +		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
> +		restoration_unit_size[2] =
> +		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
> +	}
> +
> +	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
> +		lr_type |=
> +		    loop_restoration->frame_restoration_type[i] << (i * 2);
> +		lr_unit_size |= restoration_unit_size[i] << (i * 2);
> +	}
> +
> +	hantro_reg_write(vpu, &av1_lr_type, lr_type);
> +	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
> +	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	struct hantro_dev *vpu = ctx->dev;
> +	uint8_t superres_scale_denominator = SCALE_NUMERATOR;
> +	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
> +	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
> +	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
> +	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
> +	int superres_init_luma_subpel_x = 0;
> +	int superres_init_chroma_subpel_x = 0;
> +	int superres_is_scaled = 0;
> +	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
> +	int upscaledLumaPlaneW, downscaledLumaPlaneW;
> +	int downscaledChromaPlaneW, upscaledChromaPlaneW;
> +	int stepLumaX, stepChromaX;
> +	int errLuma, errChroma;
> +	int initialLumaSubpelX, initialChromaSubpelX;

You haven't used camel case so far, are you sure this is allowed ?

> +	int width = 0;
> +
> +	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
> +		superres_scale_denominator = frame->superres_denom;
> +
> +	if (superres_scale_denominator <= SCALE_NUMERATOR)
> +		goto set_regs;
> +
> +	width = (frame->upscaled_width * SCALE_NUMERATOR +
> +		(superres_scale_denominator / 2)) / superres_scale_denominator;
> +
> +	if (width < min_w)
> +		width = min_w;
> +
> +	if (width == frame->upscaled_width)
> +		goto set_regs;
> +
> +	superres_is_scaled = 1;
> +	upscaledLumaPlaneW = frame->upscaled_width;
> +	downscaledLumaPlaneW = width;
> +	downscaledChromaPlaneW = (downscaledLumaPlaneW + 1) >> 1;
> +	upscaledChromaPlaneW = (upscaledLumaPlaneW + 1) >> 1;
> +	stepLumaX =
> +		((downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) +
> +		 (upscaledLumaPlaneW / 2)) / upscaledLumaPlaneW;
> +	stepChromaX =
> +		((downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) +
> +		 (upscaledChromaPlaneW / 2)) / upscaledChromaPlaneW;
> +	errLuma =
> +		(upscaledLumaPlaneW * stepLumaX)
> +		- (downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS);
> +	errChroma =
> +		(upscaledChromaPlaneW * stepChromaX)
> +		- (downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS);
> +	initialLumaSubpelX =
> +		((-((upscaledLumaPlaneW - downscaledLumaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
> +		  + upscaledLumaPlaneW / 2)
> +		 / upscaledLumaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errLuma / 2)
> +		& RS_SCALE_SUBPEL_MASK;
> +	initialChromaSubpelX =
> +		((-((upscaledChromaPlaneW - downscaledChromaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
> +		  + upscaledChromaPlaneW / 2)
> +		 / upscaledChromaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errChroma / 2)
> +		& RS_SCALE_SUBPEL_MASK;
> +	superres_luma_step = stepLumaX;
> +	superres_chroma_step = stepChromaX;
> +	superres_luma_step_invra =
> +		((upscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledLumaPlaneW / 2))
> +		/ downscaledLumaPlaneW;
> +	superres_chroma_step_invra =
> +		((upscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledChromaPlaneW / 2))
> +		/ downscaledChromaPlaneW;
> +	superres_init_luma_subpel_x = initialLumaSubpelX;
> +	superres_init_chroma_subpel_x = initialChromaSubpelX;
> +
> +set_regs:
> +	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
> +
> +	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
> +		hantro_reg_write(vpu, &av1_scale_denom_minus9,
> +				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
> +	else
> +		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
> +
> +	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
> +	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
> +	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
> +			 superres_luma_step_invra);
> +	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
> +			 superres_chroma_step_invra);
> +	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
> +			 superres_init_luma_subpel_x);
> +	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
> +			 superres_init_chroma_subpel_x);
> +	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
> +
> +	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	struct hantro_dev *vpu = ctx->dev;
> +	int pic_width_in_cbs = ALIGN(frame->frame_width_minus_1 + 1, 8) >> 3;
> +	int pic_height_in_cbs = ALIGN(frame->frame_height_minus_1 + 1, 8) >> 3;

Aren't these DIV_ROUND_UP(val + 1, 8) ?

> +	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
> +			    - (frame->frame_width_minus_1 + 1);
> +	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
> +			     - (frame->frame_height_minus_1 + 1);
> +
> +	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
> +	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
> +	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
> +	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
> +
> +	rockchip_vpu981_av1_dec_set_superres_params(ctx);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	struct hantro_dev *vpu = ctx->dev;
> +	bool use_ref_frame_mvs =
> +	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
> +	int cur_frame_offset = frame->order_hint;
> +	int alt_frame_offset = 0;
> +	int gld_frame_offset = 0;
> +	int bwd_frame_offset = 0;
> +	int alt2_frame_offset = 0;
> +	int refs_selected[3] = { 0, 0, 0 };
> +	int cur_mi_cols = (frame->frame_width_minus_1 + 8) >> 3;
> +	int cur_mi_rows = (frame->frame_height_minus_1 + 8) >> 3;

This looks like a contraction of (val + 1 + 7) / 8, so in short, another
DIV_ROUNDUP().

> +	int cur_offset[V4L2_AV1_NUM_REF_FRAMES - 1];
> +	int cur_roffset[V4L2_AV1_NUM_REF_FRAMES - 1];

This looks like V4L2_AV1_REFS_PER_FRAME. Daniel, should be remove this
V4L2_AV1_NUM_REF_FRAMES ? Its redundant with V4L2_AV1_TOTAL_REFS_PER_FRAME ...

> +	int mf_types[3] = { 0, 0, 0 };
> +	int ref_stamp = 2;
> +	int ref_ind = 0;
> +	int rf, idx;
> +
> +	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
> +	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
> +	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
> +	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
> +
> +	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
> +	if (idx >= 0) {
> +		int alt_frame_offset_in_lst =
> +			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
> +		bool is_lst_overlay =
> +		    (alt_frame_offset_in_lst == gld_frame_offset);
> +
> +		if (!is_lst_overlay) {
> +			int lst_mi_cols =
> +			    (av1_dec->frame_refs[idx].width + 7) >> 3;

DIV_ROUDNUP()

> +			int lst_mi_rows =
> +			    (av1_dec->frame_refs[idx].height + 7) >> 3;

again. I'll stop pointing these out, I think you can find them.

> +			bool lst_intra_only =
> +			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> +			if (lst_mi_cols == cur_mi_cols
> +			    && lst_mi_rows == cur_mi_rows && !lst_intra_only) {
> +				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
> +				refs_selected[ref_ind++] = LST_BUF_IDX;
> +			}
> +		}
> +		ref_stamp--;
> +	}
> +
> +	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
> +	if (rockchip_vpu981_av1_dec_get_relative_dist
> +	    (ctx, bwd_frame_offset, cur_frame_offset) > 0) {
> +		int bwd_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
> +		int bwd_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
> +		bool bwd_intra_only =
> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> +		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
> +		    !bwd_intra_only) {
> +			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
> +			refs_selected[ref_ind++] = BWD_BUF_IDX;
> +			ref_stamp--;
> +		}
> +	}
> +
> +	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
> +	if (rockchip_vpu981_av1_dec_get_relative_dist
> +	    (ctx, alt2_frame_offset, cur_frame_offset) > 0) {
> +		int alt2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
> +		int alt2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
> +		bool alt2_intra_only =
> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> +		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows
> +		    && !alt2_intra_only) {
> +			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
> +			refs_selected[ref_ind++] = ALT2_BUF_IDX;
> +			ref_stamp--;
> +		}
> +	}
> +
> +	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
> +	if (rockchip_vpu981_av1_dec_get_relative_dist
> +	    (ctx, alt_frame_offset, cur_frame_offset) > 0 && ref_stamp >= 0) {
> +		int alt_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
> +		int alt_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
> +		bool alt_intra_only =
> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> +		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
> +		    !alt_intra_only) {
> +			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
> +			refs_selected[ref_ind++] = ALT_BUF_IDX;
> +			ref_stamp--;
> +		}
> +	}
> +
> +	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
> +	if (idx >= 0 && ref_stamp >= 0) {
> +		int lst2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
> +		int lst2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;

I said I'd stop, but can't this be calculate once and saved in the reference
frame structure ?

> +		bool lst2_intra_only =
> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> +
> +		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows
> +		    && !lst2_intra_only) {
> +			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
> +			refs_selected[ref_ind++] = LST2_BUF_IDX;
> +			ref_stamp--;
> +		}
> +	}
> +
> +	for (rf = 0; rf < V4L2_AV1_NUM_REF_FRAMES - 1; ++rf) {
> +		idx = rockchip_vpu981_get_frame_index(ctx, rf);
> +		if (idx >= 0) {
> +			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
> +
> +			cur_offset[rf] =
> +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +								      cur_frame_offset,
> +								      rf_order_hint);
> +			cur_roffset[rf] =
> +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +								      rf_order_hint,
> +								      cur_frame_offset);
> +		} else {
> +			cur_offset[rf] = 0;
> +			cur_roffset[rf] = 0;
> +		}
> +	}
> +
> +	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
> +	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
> +	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
> +	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
> +
> +	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
> +
> +	if (use_ref_frame_mvs && ref_ind > 0 &&
> +	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
> +	    && cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
> +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
> +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
> +		int val;
> +
> +		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
> +	}
> +
> +	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
> +
> +	if (use_ref_frame_mvs && ref_ind > 1 &&
> +	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
> +	    && cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
> +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
> +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
> +		int val;
> +
> +		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
> +	}
> +
> +	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
> +	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
> +
> +	if (use_ref_frame_mvs && ref_ind > 2 &&
> +	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
> +	    && cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
> +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
> +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
> +		int val;
> +
> +		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
> +
> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> +				rf_order_hint,
> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
> +		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
> +	}
> +
> +	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
> +	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
> +	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
> +	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
> +	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
> +	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
> +	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
> +
> +	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
> +	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
> +	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
> +	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
> +	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
> +	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
> +	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
> +
> +	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
> +	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
> +	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> +	int frame_type = frame->frame_type;
> +	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
> +	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
> +	struct hantro_dev *vpu = ctx->dev;
> +	int i, ref_frames = 0;
> +	bool scale_enable = false;
> +
> +	if (IS_INTRA(frame_type) && !allow_intrabc)
> +		return;
> +
> +	if (!allow_intrabc) {
> +		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
> +			int idx = rockchip_vpu981_get_frame_index(ctx, i);
> +
> +			if (idx >= 0)
> +				ref_count[idx]++;
> +		}
> +
> +		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
> +			if (ref_count[i])
> +				ref_frames++;
> +		}
> +	} else {
> +		ref_frames = 1;
> +	}
> +	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
> +
> +	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
> +
> +	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_NUM_REF_FRAMES; i++) {
> +		uint32_t ref = i - 1;
> +		int idx = 0;
> +		int width, height;
> +
> +		if (allow_intrabc) {
> +			idx = av1_dec->current_frame_index;
> +			width = frame->frame_width_minus_1 + 1;
> +			height = frame->frame_height_minus_1 + 1;
> +		} else {
> +			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
> +				idx = rockchip_vpu981_get_frame_index(ctx, ref);
> +			width = av1_dec->frame_refs[idx].width;
> +			height = av1_dec->frame_refs[idx].height;
> +		}
> +
> +		scale_enable |=
> +		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
> +						    height);
> +
> +		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
> +						      av1_dec->ref_frame_sign_bias[i]);
> +	}
> +	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
> +
> +	hantro_reg_write(vpu, &av1_ref0_gm_mode,
> +			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
> +	hantro_reg_write(vpu, &av1_ref1_gm_mode,
> +			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
> +	hantro_reg_write(vpu, &av1_ref2_gm_mode,
> +			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
> +	hantro_reg_write(vpu, &av1_ref3_gm_mode,
> +			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
> +	hantro_reg_write(vpu, &av1_ref4_gm_mode,
> +			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
> +	hantro_reg_write(vpu, &av1_ref5_gm_mode,
> +			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
> +	hantro_reg_write(vpu, &av1_ref6_gm_mode,
> +			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
> +
> +	rockchip_vpu981_av1_dec_set_other_frames(ctx);
> +}
> +
> +static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +
> +	hantro_reg_write(vpu, &av1_skip_mode,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
> +	hantro_reg_write(vpu, &av1_tempor_mvp_e,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
> +	hantro_reg_write(vpu, &av1_delta_lf_res_log,
> +			 ctrls->frame->loop_filter.delta_lf_res);
> +	hantro_reg_write(vpu, &av1_delta_lf_multi,
> +			 !!(ctrls->frame->loop_filter.flags
> +			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
> +	hantro_reg_write(vpu, &av1_delta_lf_present,
> +			 !!(ctrls->frame->loop_filter.flags
> +			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
> +	hantro_reg_write(vpu, &av1_disable_cdf_update,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
> +	hantro_reg_write(vpu, &av1_allow_warp,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
> +	hantro_reg_write(vpu, &av1_show_frame,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
> +	hantro_reg_write(vpu, &av1_switchable_motion_mode,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
> +	hantro_reg_write(vpu, &av1_enable_cdef,
> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
> +	hantro_reg_write(vpu, &av1_allow_masked_compound,
> +			 !!(ctrls->sequence->flags
> +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
> +	hantro_reg_write(vpu, &av1_allow_interintra,
> +			 !!(ctrls->sequence->flags
> +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
> +	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
> +			 !!(ctrls->sequence->flags
> +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
> +	hantro_reg_write(vpu, &av1_allow_filter_intra,
> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
> +	hantro_reg_write(vpu, &av1_enable_jnt_comp,
> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
> +	hantro_reg_write(vpu, &av1_enable_dual_filter,
> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
> +	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
> +	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
> +	hantro_reg_write(vpu, &av1_allow_intrabc,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
> +
> +	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
> +		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
> +	else
> +		hantro_reg_write(vpu, &av1_force_interger_mv,
> +				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
> +
> +	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
> +	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
> +	hantro_reg_write(vpu, &av1_delta_q_present,
> +			 !!(ctrls->frame->quantization.flags
> +			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
> +
> +	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
> +	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
> +	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
> +	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);

I believe these register are read by PP when using format 0, perhaps something
nice to comment about, as they will impact the pixel produce by the post-
processor, which can be a surprising side effect. Bit 11:8 also hold
sw_bit_depth_out_minus8, but I don't know if its used for AV1, in my doc its
only used for AVS2 CODEC.

I think its good to underline that in VC8000/VC9000, some registers are shared
across multiple CODECs.

> +
> +	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
> +	hantro_reg_write(vpu, &av1_high_prec_mv_e,
> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
> +	hantro_reg_write(vpu, &av1_comp_pred_mode,
> +			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
> +	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
> +	hantro_reg_write(vpu, &av1_max_cb_size,
> +			 (ctrls->sequence->flags
> +			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
> +	hantro_reg_write(vpu, &av1_min_cb_size, 3);
> +
> +	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
> +	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
> +	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
> +	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
> +	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
> +	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
> +	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
> +	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
> +	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
> +	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
> +	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
> +
> +	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
> +	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
> +	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
> +	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
> +		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
> +		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
> +		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
> +	} else {
> +		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
> +		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
> +		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
> +	}
> +
> +	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
> +	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
> +	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
> +
> +	hantro_reg_write(vpu, &av1_skip_ref0,
> +			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
> +	hantro_reg_write(vpu, &av1_skip_ref1,
> +			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
> +
> +	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
> +	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
> +}
> +
> +static void
> +rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
> +					 struct vb2_v4l2_buffer *vb2_src)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> +	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
> +	    ctrls->tile_group_entry;
> +	struct hantro_dev *vpu = ctx->dev;
> +	dma_addr_t src_dma;
> +	u32 src_len, src_buf_len;
> +	int start_bit, offset;
> +
> +	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
> +	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
> +	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
> +
> +	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
> +	offset = group_entry[0].tile_offset & ~0xf;
> +
> +	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
> +	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
> +	hantro_reg_write(vpu, &av1_stream_len, src_len);
> +	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
> +	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
> +}
> +
> +static void
> +rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
> +{
> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> +	struct hantro_dev *vpu = ctx->dev;
> +	struct hantro_decoded_buffer *dst;
> +	struct vb2_v4l2_buffer *vb2_dst;
> +	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
> +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> +	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
> +
> +	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
> +	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
> +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
> +	chroma_addr = luma_addr + cr_offset;
> +	mv_addr = luma_addr + mv_offset;
> +
> +	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
> +	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
> +	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
> +}
> +
> +int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +	struct vb2_v4l2_buffer *vb2_src;
> +	int ret;
> +
> +	hantro_start_prepare_run(ctx);
> +
> +	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
> +	if (ret)
> +		goto prepare_error;
> +
> +	vb2_src = hantro_get_src_buf(ctx);
> +	if (!vb2_src)
> +		goto prepare_error;
> +
> +	rockchip_vpu981_av1_dec_clean_refs(ctx);
> +	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
> +
> +	rockchip_vpu981_av1_dec_set_parameters(ctx);
> +	rockchip_vpu981_av1_dec_set_global_model(ctx);
> +	rockchip_vpu981_av1_dec_set_tile_info(ctx);
> +	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
> +	rockchip_vpu981_av1_dec_set_segmentation(ctx);
> +	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
> +	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
> +	rockchip_vpu981_av1_dec_set_cdef(ctx);
> +	rockchip_vpu981_av1_dec_set_lr(ctx);
> +	rockchip_vpu981_av1_dec_set_prob(ctx);
> +
> +	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
> +	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
> +	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
> +	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
> +	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
> +
> +	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
> +	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
> +
> +	hantro_reg_write(vpu, &av1_dec_alignment, 64);
> +	hantro_reg_write(vpu, &av1_apf_disable, 0);
> +	hantro_reg_write(vpu, &av1_apf_threshold, 8);
> +	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
> +	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
> +	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
> +	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
> +	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
> +
> +	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
> +	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
> +	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
> +	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
> +
> +	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
> +	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
> +
> +	hantro_end_prepare_run(ctx);
> +
> +	hantro_reg_write(vpu, &av1_dec_e, 1);
> +
> +	return 0;
> +
> +prepare_error:
> +	hantro_end_prepare_run(ctx);
> +	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
> +	return ret;
> +}
> +
> +static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +	int width = ctx->dst_fmt.width;
> +	int height = ctx->dst_fmt.height;
> +	struct vb2_v4l2_buffer *vb2_dst;
> +	size_t chroma_offset;
> +	dma_addr_t dst_dma;
> +
> +	vb2_dst = hantro_get_dst_buf(ctx);
> +
> +	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
> +	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
> +	    ctx->dst_fmt.height;
> +
> +	/* enable post processor */
> +	hantro_reg_write(vpu, &av1_pp_out_e, 1);
> +	hantro_reg_write(vpu, &av1_pp_in_format, 0);
> +	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
> +	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
> +
> +	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
> +	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
> +	hantro_reg_write(vpu, &av1_pp_out_height, height);
> +	hantro_reg_write(vpu, &av1_pp_out_width, width);
> +	hantro_reg_write(vpu, &av1_pp_out_y_stride,
> +			 ctx->dst_fmt.plane_fmt[0].bytesperline);
> +	hantro_reg_write(vpu, &av1_pp_out_c_stride,
> +			 ctx->dst_fmt.plane_fmt[0].bytesperline);
> +	switch (ctx->dst_fmt.pixelformat) {
> +	case V4L2_PIX_FMT_P010:
> +		hantro_reg_write(vpu, &av1_pp_out_format, 1);
> +		break;
> +	case V4L2_PIX_FMT_NV12:
> +		hantro_reg_write(vpu, &av1_pp_out_format, 3);
> +		break;
> +	default:
> +		hantro_reg_write(vpu, &av1_pp_out_format, 0);
> +	}
> +
> +	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
> +	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
> +	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
> +	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
> +	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
> +	hantro_reg_write(vpu, &av1_pp_up_level, 0);
> +	hantro_reg_write(vpu, &av1_pp_down_level, 0);
> +	hantro_reg_write(vpu, &av1_pp_exist, 0);
> +
> +	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
> +	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
> +}
> +
> +static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +
> +	/* disable post processor */
> +	hantro_reg_write(vpu, &av1_pp_out_e, 0);
> +}
> +
> +const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
> +	.enable = rockchip_vpu981_postproc_enable,
> +	.disable = rockchip_vpu981_postproc_disable,
> +};

As enabling the post-proc is done by the common driver, we need a mechanism to
altern the core decisions, as we really need to enable post-proc to produce
filmgrain. Fortunatly, not applying this filter isn't visually bad, but that's
not the intent of the video author, so should not be like this by default.

> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> new file mode 100644
> index 000000000000..182e6c830ff6
> --- /dev/null
> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> @@ -0,0 +1,477 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2022, Collabora
> + *
> + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
> + */
> +
> +#ifndef _ROCKCHIP_VPU981_REGS_H_
> +#define _ROCKCHIP_VPU981_REGS_H_
> +
> +#include "hantro.h"
> +
> +#define AV1_SWREG(nr)	((nr) * 4)
> +
> +#define AV1_DEC_REG(b, s, m) \
> +	((const struct hantro_reg) { \
> +		.base = AV1_SWREG(b), \
> +		.shift = s, \
> +		.mask = m, \
> +	})

Just a note that a lot of this is identical to VC8000 register sets. But until
someone upstream VC8000 and till we have more VC9000 support, its fine to keep
that as if it was RK specific.

> +
> +#define AV1_REG_INTERRUPT		AV1_SWREG(1)
> +#define AV1_REG_INTERRUPT_DEC_RDY_INT	BIT(12)
> +
> +#define AV1_REG_CONFIG			AV1_SWREG(2)
> +#define AV1_REG_CONFIG_DEC_CLK_GATE_E	BIT(10)
> +
> +#define av1_dec_e			AV1_DEC_REG(1, 0, 0x1)
> +#define av1_dec_abort_e			AV1_DEC_REG(1, 5, 0x1)
> +#define av1_dec_tile_int_e		AV1_DEC_REG(1, 7, 0x1)
> +
> +#define av1_dec_clk_gate_e		AV1_DEC_REG(2, 10, 0x1)
> +
> +#define av1_dec_out_ec_bypass		AV1_DEC_REG(3, 8,  0x1)
> +#define av1_write_mvs_e			AV1_DEC_REG(3, 12, 0x1)
> +#define av1_filtering_dis		AV1_DEC_REG(3, 14, 0x1)
> +#define av1_dec_out_dis			AV1_DEC_REG(3, 15, 0x1)
> +#define av1_dec_out_ec_byte_word	AV1_DEC_REG(3, 16, 0x1)
> +#define av1_skip_mode			AV1_DEC_REG(3, 26, 0x1)
> +#define av1_dec_mode			AV1_DEC_REG(3, 27, 0x1f)
> +
> +#define av1_ref_frames			AV1_DEC_REG(4, 0, 0xf)
> +#define av1_pic_height_in_cbs		AV1_DEC_REG(4, 6, 0x1fff)
> +#define av1_pic_width_in_cbs		AV1_DEC_REG(4, 19, 0x1fff)
> +
> +#define av1_ref_scaling_enable		AV1_DEC_REG(5, 0, 0x1)
> +#define av1_filt_level_base_gt32	AV1_DEC_REG(5, 1, 0x1)
> +#define av1_error_resilient		AV1_DEC_REG(5, 2, 0x1)
> +#define av1_force_interger_mv		AV1_DEC_REG(5, 3, 0x1)
> +#define av1_allow_intrabc		AV1_DEC_REG(5, 4, 0x1)
> +#define av1_allow_screen_content_tools	AV1_DEC_REG(5, 5, 0x1)
> +#define av1_reduced_tx_set_used		AV1_DEC_REG(5, 6, 0x1)
> +#define av1_enable_dual_filter		AV1_DEC_REG(5, 7, 0x1)
> +#define av1_enable_jnt_comp		AV1_DEC_REG(5, 8, 0x1)
> +#define av1_allow_filter_intra		AV1_DEC_REG(5, 9, 0x1)
> +#define av1_enable_intra_edge_filter	AV1_DEC_REG(5, 10, 0x1)
> +#define av1_tempor_mvp_e		AV1_DEC_REG(5, 11, 0x1)
> +#define av1_allow_interintra		AV1_DEC_REG(5, 12, 0x1)
> +#define av1_allow_masked_compound	AV1_DEC_REG(5, 13, 0x1)
> +#define av1_enable_cdef			AV1_DEC_REG(5, 14, 0x1)
> +#define av1_switchable_motion_mode	AV1_DEC_REG(5, 15, 0x1)
> +#define av1_show_frame			AV1_DEC_REG(5, 16, 0x1)
> +#define av1_superres_is_scaled		AV1_DEC_REG(5, 17, 0x1)
> +#define av1_allow_warp			AV1_DEC_REG(5, 18, 0x1)
> +#define av1_disable_cdf_update		AV1_DEC_REG(5, 19, 0x1)
> +#define av1_preskip_segid		AV1_DEC_REG(5, 20, 0x1)
> +#define av1_delta_lf_present		AV1_DEC_REG(5, 21, 0x1)
> +#define av1_delta_lf_multi		AV1_DEC_REG(5, 22, 0x1)
> +#define av1_delta_lf_res_log		AV1_DEC_REG(5, 23, 0x3)
> +#define av1_strm_start_bit		AV1_DEC_REG(5, 25, 0x7f)
> +
> +#define	av1_stream_len			AV1_DEC_REG(6, 0, 0xffffffff)
> +
> +#define av1_delta_q_present		AV1_DEC_REG(7, 0, 0x1)
> +#define av1_delta_q_res_log		AV1_DEC_REG(7, 1, 0x3)
> +#define av1_cdef_damping		AV1_DEC_REG(7, 3, 0x3)
> +#define av1_cdef_bits			AV1_DEC_REG(7, 5, 0x3)
> +#define av1_apply_grain			AV1_DEC_REG(7, 7, 0x1)
> +#define av1_num_y_points_b		AV1_DEC_REG(7, 8, 0x1)
> +#define av1_num_cb_points_b		AV1_DEC_REG(7, 9, 0x1)
> +#define av1_num_cr_points_b		AV1_DEC_REG(7, 10, 0x1)
> +#define av1_overlap_flag		AV1_DEC_REG(7, 11, 0x1)
> +#define av1_clip_to_restricted_range	AV1_DEC_REG(7, 12, 0x1)
> +#define av1_chroma_scaling_from_luma	AV1_DEC_REG(7, 13, 0x1)
> +#define av1_random_seed			AV1_DEC_REG(7, 14, 0xffff)
> +#define av1_blackwhite_e		AV1_DEC_REG(7, 30, 0x1)
> +
> +#define av1_scaling_shift		AV1_DEC_REG(8, 0, 0xf)
> +#define av1_bit_depth_c_minus8		AV1_DEC_REG(8, 4, 0x3)
> +#define av1_bit_depth_y_minus8		AV1_DEC_REG(8, 6, 0x3)
> +#define av1_quant_base_qindex		AV1_DEC_REG(8, 8, 0xff)
> +#define av1_idr_pic_e			AV1_DEC_REG(8, 16, 0x1)
> +#define av1_superres_pic_width		AV1_DEC_REG(8, 17, 0x7fff)
> +
> +#define av1_ref4_sign_bias		AV1_DEC_REG(9, 2, 0x1)
> +#define av1_ref5_sign_bias		AV1_DEC_REG(9, 3, 0x1)
> +#define av1_ref6_sign_bias		AV1_DEC_REG(9, 4, 0x1)
> +#define av1_mf1_type			AV1_DEC_REG(9, 5, 0x7)
> +#define av1_mf2_type			AV1_DEC_REG(9, 8, 0x7)
> +#define av1_mf3_type			AV1_DEC_REG(9, 11, 0x7)
> +#define av1_scale_denom_minus9		AV1_DEC_REG(9, 14, 0x7)
> +#define av1_last_active_seg		AV1_DEC_REG(9, 17, 0x7)
> +#define av1_context_update_tile_id	AV1_DEC_REG(9, 20, 0xfff)
> +
> +#define av1_tile_transpose		AV1_DEC_REG(10, 0, 0x1)
> +#define av1_tile_enable			AV1_DEC_REG(10, 1, 0x1)
> +#define av1_multicore_full_width	AV1_DEC_REG(10,	2, 0xff)
> +#define av1_num_tile_rows_8k		AV1_DEC_REG(10, 10, 0x7f)
> +#define av1_num_tile_cols_8k		AV1_DEC_REG(10, 17, 0x7f)
> +#define av1_multicore_tile_start_x	AV1_DEC_REG(10, 24, 0xff)
> +
> +#define av1_use_temporal3_mvs		AV1_DEC_REG(11, 0, 0x1)
> +#define av1_use_temporal2_mvs		AV1_DEC_REG(11, 1, 0x1)
> +#define av1_use_temporal1_mvs		AV1_DEC_REG(11, 2, 0x1)
> +#define av1_use_temporal0_mvs		AV1_DEC_REG(11, 3, 0x1)
> +#define av1_comp_pred_mode		AV1_DEC_REG(11, 4, 0x3)
> +#define av1_high_prec_mv_e		AV1_DEC_REG(11, 7, 0x1)
> +#define av1_mcomp_filt_type		AV1_DEC_REG(11, 8, 0x7)
> +#define av1_multicore_expect_context_update	AV1_DEC_REG(11, 11, 0x1)
> +#define av1_multicore_sbx_offset	AV1_DEC_REG(11, 12, 0x7f)
> +#define av1_ulticore_tile_col		AV1_DEC_REG(11, 19, 0x7f)
> +#define av1_transform_mode		AV1_DEC_REG(11, 27, 0x7)
> +#define av1_dec_tile_size_mag		AV1_DEC_REG(11, 30, 0x3)
> +
> +#define av1_seg_quant_sign		AV1_DEC_REG(12, 2, 0xff)
> +#define av1_max_cb_size			AV1_DEC_REG(12, 10, 0x7)
> +#define av1_min_cb_size			AV1_DEC_REG(12, 13, 0x7)
> +#define av1_comp_pred_fixed_ref		AV1_DEC_REG(12, 16, 0x7)
> +#define av1_multicore_tile_width	AV1_DEC_REG(12, 19, 0x7f)
> +#define av1_pic_height_pad		AV1_DEC_REG(12, 26, 0x7)
> +#define av1_pic_width_pad		AV1_DEC_REG(12, 29, 0x7)
> +
> +#define av1_segment_e			AV1_DEC_REG(13, 0, 0x1)
> +#define av1_segment_upd_e		AV1_DEC_REG(13, 1, 0x1)
> +#define av1_segment_temp_upd_e		AV1_DEC_REG(13, 2, 0x1)
> +#define av1_comp_pred_var_ref0_av1	AV1_DEC_REG(13, 3, 0x7)
> +#define av1_comp_pred_var_ref1_av1	AV1_DEC_REG(13, 6, 0x7)
> +#define av1_lossless_e			AV1_DEC_REG(13, 9, 0x1)
> +#define av1_qp_delta_ch_ac_av1		AV1_DEC_REG(13, 11, 0x7f)
> +#define av1_qp_delta_ch_dc_av1		AV1_DEC_REG(13, 18, 0x7f)
> +#define av1_qp_delta_y_dc_av1		AV1_DEC_REG(13, 25, 0x7f)
> +
> +#define av1_quant_seg0			AV1_DEC_REG(14, 0, 0xff)
> +#define av1_filt_level_seg0		AV1_DEC_REG(14, 8, 0x3f)
> +#define av1_skip_seg0			AV1_DEC_REG(14, 14, 0x1)
> +#define av1_refpic_seg0			AV1_DEC_REG(14, 15, 0xf)
> +#define av1_filt_level_delta0_seg0	AV1_DEC_REG(14, 19, 0x7f)
> +#define av1_filt_level0			AV1_DEC_REG(14, 26, 0x3f)
> +
> +#define av1_quant_seg1			AV1_DEC_REG(15, 0, 0xff)
> +#define av1_filt_level_seg1		AV1_DEC_REG(15, 8, 0x3f)
> +#define av1_skip_seg1			AV1_DEC_REG(15, 14, 0x1)
> +#define av1_refpic_seg1			AV1_DEC_REG(15, 15, 0xf)
> +#define av1_filt_level_delta0_seg1	AV1_DEC_REG(15, 19, 0x7f)
> +#define av1_filt_level1			AV1_DEC_REG(15, 26, 0x3f)
> +
> +#define av1_quant_seg2			AV1_DEC_REG(16, 0, 0xff)
> +#define av1_filt_level_seg2		AV1_DEC_REG(16, 8, 0x3f)
> +#define av1_skip_seg2			AV1_DEC_REG(16, 14, 0x1)
> +#define av1_refpic_seg2			AV1_DEC_REG(16, 15, 0xf)
> +#define av1_filt_level_delta0_seg2	AV1_DEC_REG(16, 19, 0x7f)
> +#define av1_filt_level2			AV1_DEC_REG(16, 26, 0x3f)
> +
> +#define av1_quant_seg3			AV1_DEC_REG(17, 0, 0xff)
> +#define av1_filt_level_seg3		AV1_DEC_REG(17, 8, 0x3f)
> +#define av1_skip_seg3			AV1_DEC_REG(17, 14, 0x1)
> +#define av1_refpic_seg3			AV1_DEC_REG(17, 15, 0xf)
> +#define av1_filt_level_delta0_seg3	AV1_DEC_REG(17, 19, 0x7f)
> +#define av1_filt_level3			AV1_DEC_REG(17, 26, 0x3f)
> +
> +#define av1_quant_seg4			AV1_DEC_REG(18, 0, 0xff)
> +#define av1_filt_level_seg4		AV1_DEC_REG(18, 8, 0x3f)
> +#define av1_skip_seg4			AV1_DEC_REG(18, 14, 0x1)
> +#define av1_refpic_seg4			AV1_DEC_REG(18, 15, 0xf)
> +#define av1_filt_level_delta0_seg4	AV1_DEC_REG(18, 19, 0x7f)
> +#define av1_lr_type			AV1_DEC_REG(18, 26, 0x3f)
> +
> +#define av1_quant_seg5			AV1_DEC_REG(19, 0, 0xff)
> +#define av1_filt_level_seg5		AV1_DEC_REG(19, 8, 0x3f)
> +#define av1_skip_seg5			AV1_DEC_REG(19, 14, 0x1)
> +#define av1_refpic_seg5			AV1_DEC_REG(19, 15, 0xf)
> +#define av1_filt_level_delta0_seg5	AV1_DEC_REG(19, 19, 0x7f)
> +#define av1_lr_unit_size		AV1_DEC_REG(19, 26, 0x3f)
> +
> +#define av1_filt_level_delta1_seg0	AV1_DEC_REG(20, 0, 0x7f)
> +#define av1_filt_level_delta2_seg0	AV1_DEC_REG(20, 7, 0x7f)
> +#define av1_filt_level_delta3_seg0	AV1_DEC_REG(20, 14, 0x7f)
> +#define av1_global_mv_seg0		AV1_DEC_REG(20, 21, 0x1)
> +#define av1_mf1_last_offset		AV1_DEC_REG(20, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg1	AV1_DEC_REG(21, 0, 0x7f)
> +#define av1_filt_level_delta2_seg1	AV1_DEC_REG(21, 7, 0x7f)
> +#define av1_filt_level_delta3_seg1	AV1_DEC_REG(21, 14, 0x7f)
> +#define av1_global_mv_seg1		AV1_DEC_REG(21, 21, 0x1)
> +#define av1_mf1_last2_offset		AV1_DEC_REG(21, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg2	AV1_DEC_REG(22, 0, 0x7f)
> +#define av1_filt_level_delta2_seg2	AV1_DEC_REG(22, 7, 0x7f)
> +#define av1_filt_level_delta3_seg2	AV1_DEC_REG(22, 14, 0x7f)
> +#define av1_global_mv_seg2		AV1_DEC_REG(22, 21, 0x1)
> +#define av1_mf1_last3_offset		AV1_DEC_REG(22, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg3	AV1_DEC_REG(23, 0, 0x7f)
> +#define av1_filt_level_delta2_seg3	AV1_DEC_REG(23, 7, 0x7f)
> +#define av1_filt_level_delta3_seg3	AV1_DEC_REG(23, 14, 0x7f)
> +#define av1_global_mv_seg3		AV1_DEC_REG(23, 21, 0x1)
> +#define av1_mf1_golden_offset		AV1_DEC_REG(23, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg4	AV1_DEC_REG(24, 0, 0x7f)
> +#define av1_filt_level_delta2_seg4	AV1_DEC_REG(24, 7, 0x7f)
> +#define av1_filt_level_delta3_seg4	AV1_DEC_REG(24, 14, 0x7f)
> +#define av1_global_mv_seg4		AV1_DEC_REG(24, 21, 0x1)
> +#define av1_mf1_bwdref_offset		AV1_DEC_REG(24, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg5	AV1_DEC_REG(25, 0, 0x7f)
> +#define av1_filt_level_delta2_seg5	AV1_DEC_REG(25, 7, 0x7f)
> +#define av1_filt_level_delta3_seg5	AV1_DEC_REG(25, 14, 0x7f)
> +#define av1_global_mv_seg5		AV1_DEC_REG(25, 21, 0x1)
> +#define av1_mf1_altref2_offset		AV1_DEC_REG(25, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg6	AV1_DEC_REG(26, 0, 0x7f)
> +#define av1_filt_level_delta2_seg6	AV1_DEC_REG(26, 7, 0x7f)
> +#define av1_filt_level_delta3_seg6	AV1_DEC_REG(26, 14, 0x7f)
> +#define av1_global_mv_seg6		AV1_DEC_REG(26, 21, 0x1)
> +#define av1_mf1_altref_offset		AV1_DEC_REG(26, 22, 0x1ff)
> +
> +#define av1_filt_level_delta1_seg7	AV1_DEC_REG(27, 0, 0x7f)
> +#define av1_filt_level_delta2_seg7	AV1_DEC_REG(27, 7, 0x7f)
> +#define av1_filt_level_delta3_seg7	AV1_DEC_REG(27, 14, 0x7f)
> +#define av1_global_mv_seg7		AV1_DEC_REG(27, 21, 0x1)
> +#define av1_mf2_last_offset		AV1_DEC_REG(27, 22, 0x1ff)
> +
> +#define av1_cb_offset			AV1_DEC_REG(28, 0, 0x1ff)
> +#define av1_cb_luma_mult		AV1_DEC_REG(28, 9, 0xff)
> +#define av1_cb_mult			AV1_DEC_REG(28, 17, 0xff)
> +#define	av1_quant_delta_v_dc		AV1_DEC_REG(28, 25, 0x7f)
> +
> +#define av1_cr_offset			AV1_DEC_REG(29, 0, 0x1ff)
> +#define av1_cr_luma_mult		AV1_DEC_REG(29, 9, 0xff)
> +#define av1_cr_mult			AV1_DEC_REG(29, 17, 0xff)
> +#define	av1_quant_delta_v_ac		AV1_DEC_REG(29, 25, 0x7f)
> +
> +#define av1_filt_ref_adj_5		AV1_DEC_REG(30, 0, 0x7f)
> +#define av1_filt_ref_adj_4		AV1_DEC_REG(30, 7, 0x7f)
> +#define av1_filt_mb_adj_1		AV1_DEC_REG(30, 14, 0x7f)
> +#define av1_filt_mb_adj_0		AV1_DEC_REG(30, 21, 0x7f)
> +#define av1_filt_sharpness		AV1_DEC_REG(30, 28, 0x7)
> +
> +#define av1_quant_seg6			AV1_DEC_REG(31, 0, 0xff)
> +#define av1_filt_level_seg6		AV1_DEC_REG(31, 8, 0x3f)
> +#define av1_skip_seg6			AV1_DEC_REG(31, 14, 0x1)
> +#define av1_refpic_seg6			AV1_DEC_REG(31, 15, 0xf)
> +#define av1_filt_level_delta0_seg6	AV1_DEC_REG(31, 19, 0x7f)
> +#define av1_skip_ref0			AV1_DEC_REG(31, 26, 0xf)
> +
> +#define av1_quant_seg7			AV1_DEC_REG(32, 0, 0xff)
> +#define av1_filt_level_seg7		AV1_DEC_REG(32, 8, 0x3f)
> +#define av1_skip_seg7			AV1_DEC_REG(32, 14, 0x1)
> +#define av1_refpic_seg7			AV1_DEC_REG(32, 15, 0xf)
> +#define av1_filt_level_delta0_seg7	AV1_DEC_REG(32, 19, 0x7f)
> +#define av1_skip_ref1			AV1_DEC_REG(32, 26, 0xf)
> +
> +#define av1_ref0_height			AV1_DEC_REG(33, 0, 0xffff)
> +#define av1_ref0_width			AV1_DEC_REG(33, 16, 0xffff)
> +
> +#define av1_ref1_height			AV1_DEC_REG(34, 0, 0xffff)
> +#define av1_ref1_width			AV1_DEC_REG(34, 16, 0xffff)
> +
> +#define av1_ref2_height			AV1_DEC_REG(35, 0, 0xffff)
> +#define av1_ref2_width			AV1_DEC_REG(35, 16, 0xffff)
> +
> +#define av1_ref0_ver_scale		AV1_DEC_REG(36, 0, 0xffff)
> +#define av1_ref0_hor_scale		AV1_DEC_REG(36, 16, 0xffff)
> +
> +#define av1_ref1_ver_scale		AV1_DEC_REG(37, 0, 0xffff)
> +#define av1_ref1_hor_scale		AV1_DEC_REG(37, 16, 0xffff)
> +
> +#define av1_ref2_ver_scale		AV1_DEC_REG(38, 0, 0xffff)
> +#define av1_ref2_hor_scale		AV1_DEC_REG(38, 16, 0xffff)
> +
> +#define av1_ref3_ver_scale		AV1_DEC_REG(39, 0, 0xffff)
> +#define av1_ref3_hor_scale		AV1_DEC_REG(39, 16, 0xffff)
> +
> +#define av1_ref4_ver_scale		AV1_DEC_REG(40, 0, 0xffff)
> +#define av1_ref4_hor_scale		AV1_DEC_REG(40, 16, 0xffff)
> +
> +#define av1_ref5_ver_scale		AV1_DEC_REG(41, 0, 0xffff)
> +#define av1_ref5_hor_scale		AV1_DEC_REG(41, 16, 0xffff)
> +
> +#define av1_ref6_ver_scale		AV1_DEC_REG(42, 0, 0xffff)
> +#define av1_ref6_hor_scale		AV1_DEC_REG(42, 16, 0xffff)
> +
> +#define av1_ref3_height			AV1_DEC_REG(43, 0, 0xffff)
> +#define av1_ref3_width			AV1_DEC_REG(43, 16, 0xffff)
> +
> +#define av1_ref4_height			AV1_DEC_REG(44, 0, 0xffff)
> +#define av1_ref4_width			AV1_DEC_REG(44, 16, 0xffff)
> +
> +#define av1_ref5_height			AV1_DEC_REG(45, 0, 0xffff)
> +#define av1_ref5_width			AV1_DEC_REG(45, 16, 0xffff)
> +
> +#define av1_ref6_height			AV1_DEC_REG(46, 0, 0xffff)
> +#define av1_ref6_width			AV1_DEC_REG(46, 16, 0xffff)
> +
> +#define av1_mf2_last2_offset		AV1_DEC_REG(47, 0, 0x1ff)
> +#define av1_mf2_last3_offset		AV1_DEC_REG(47, 9, 0x1ff)
> +#define av1_mf2_golden_offset		AV1_DEC_REG(47, 18, 0x1ff)
> +#define av1_qmlevel_y			AV1_DEC_REG(47, 27, 0xf)
> +
> +#define av1_mf2_bwdref_offset		AV1_DEC_REG(48, 0, 0x1ff)
> +#define av1_mf2_altref2_offset		AV1_DEC_REG(48, 9, 0x1ff)
> +#define av1_mf2_altref_offset		AV1_DEC_REG(48, 18, 0x1ff)
> +#define av1_qmlevel_u			AV1_DEC_REG(48, 27, 0xf)
> +
> +#define av1_filt_ref_adj_6		AV1_DEC_REG(49, 0, 0x7f)
> +#define av1_filt_ref_adj_7		AV1_DEC_REG(49, 7, 0x7f)
> +#define av1_qmlevel_v			AV1_DEC_REG(49, 14, 0xf)
> +
> +#define av1_superres_chroma_step	AV1_DEC_REG(51, 0, 0x3fff)
> +#define av1_superres_luma_step		AV1_DEC_REG(51, 14, 0x3fff)
> +
> +#define av1_superres_init_chroma_subpel_x	AV1_DEC_REG(52, 0, 0x3fff)
> +#define av1_superres_init_luma_subpel_x		AV1_DEC_REG(52, 14, 0x3fff)
> +
> +#define av1_cdef_chroma_secondary_strength	AV1_DEC_REG(53, 0, 0xffff)
> +#define av1_cdef_luma_secondary_strength	AV1_DEC_REG(53, 16, 0xffff)
> +
> +#define av1_apf_threshold		AV1_DEC_REG(55, 0, 0xffff)
> +#define av1_apf_single_pu_mode		AV1_DEC_REG(55, 30, 0x1)
> +#define av1_apf_disable			AV1_DEC_REG(55, 30, 0x1)
> +
> +#define av1_dec_max_burst		AV1_DEC_REG(58, 0, 0xff)
> +#define av1_dec_buswidth		AV1_DEC_REG(58, 8, 0x7)
> +#define av1_dec_multicore_mode		AV1_DEC_REG(58, 11, 0x3)
> +#define av1_dec_axi_wd_id_e		AV1_DEC_REG(58,	13, 0x1)
> +#define av1_dec_axi_rd_id_e		AV1_DEC_REG(58, 14, 0x1)
> +#define av1_dec_mc_polltime		AV1_DEC_REG(58, 17, 0x3ff)
> +#define av1_dec_mc_pollmode		AV1_DEC_REG(58,	27, 0x3)
> +
> +#define av1_filt_ref_adj_3		AV1_DEC_REG(59, 0, 0x3f)
> +#define av1_filt_ref_adj_2		AV1_DEC_REG(59, 7, 0x3f)
> +#define av1_filt_ref_adj_1		AV1_DEC_REG(59, 14, 0x3f)
> +#define av1_filt_ref_adj_0		AV1_DEC_REG(59, 21, 0x3f)
> +#define av1_ref0_sign_bias		AV1_DEC_REG(59, 28, 0x1)
> +#define av1_ref1_sign_bias		AV1_DEC_REG(59, 29, 0x1)
> +#define av1_ref2_sign_bias		AV1_DEC_REG(59, 30, 0x1)
> +#define av1_ref3_sign_bias		AV1_DEC_REG(59, 31, 0x1)
> +
> +#define av1_cur_last_roffset		AV1_DEC_REG(184, 0, 0x1ff)
> +#define av1_cur_last_offset		AV1_DEC_REG(184, 9, 0x1ff)
> +#define av1_mf3_last_offset		AV1_DEC_REG(184, 18, 0x1ff)
> +#define av1_ref0_gm_mode		AV1_DEC_REG(184, 27, 0x3)
> +
> +#define av1_cur_last2_roffset		AV1_DEC_REG(185, 0, 0x1ff)
> +#define av1_cur_last2_offset		AV1_DEC_REG(185, 9, 0x1ff)
> +#define av1_mf3_last2_offset		AV1_DEC_REG(185, 18, 0x1ff)
> +#define av1_ref1_gm_mode		AV1_DEC_REG(185, 27, 0x3)
> +
> +#define av1_cur_last3_roffset		AV1_DEC_REG(186, 0, 0x1ff)
> +#define av1_cur_last3_offset		AV1_DEC_REG(186, 9, 0x1ff)
> +#define av1_mf3_last3_offset		AV1_DEC_REG(186, 18, 0x1ff)
> +#define av1_ref2_gm_mode		AV1_DEC_REG(186, 27, 0x3)
> +
> +#define av1_cur_golden_roffset		AV1_DEC_REG(187, 0, 0x1ff)
> +#define av1_cur_golden_offset		AV1_DEC_REG(187, 9, 0x1ff)
> +#define av1_mf3_golden_offset		AV1_DEC_REG(187, 18, 0x1ff)
> +#define av1_ref3_gm_mode		AV1_DEC_REG(187, 27, 0x3)
> +
> +#define av1_cur_bwdref_roffset		AV1_DEC_REG(188, 0, 0x1ff)
> +#define av1_cur_bwdref_offset		AV1_DEC_REG(188, 9, 0x1ff)
> +#define av1_mf3_bwdref_offset		AV1_DEC_REG(188, 18, 0x1ff)
> +#define av1_ref4_gm_mode		AV1_DEC_REG(188, 27, 0x3)
> +
> +#define av1_cur_altref2_roffset		AV1_DEC_REG(257, 0, 0x1ff)
> +#define av1_cur_altref2_offset		AV1_DEC_REG(257, 9, 0x1ff)
> +#define av1_mf3_altref2_offset		AV1_DEC_REG(257, 18, 0x1ff)
> +#define av1_ref5_gm_mode		AV1_DEC_REG(257, 27, 0x3)
> +
> +#define av1_strm_buffer_len		AV1_DEC_REG(258, 0, 0xffffffff)
> +
> +#define av1_strm_start_offset		AV1_DEC_REG(259, 0, 0xffffffff)
> +
> +#define av1_ppd_blend_exist		AV1_DEC_REG(260, 21, 0x1)
> +#define av1_ppd_dith_exist		AV1_DEC_REG(260, 23, 0x1)
> +#define av1_ablend_crop_e		AV1_DEC_REG(260, 24, 0x1)
> +#define av1_pp_format_p010_e		AV1_DEC_REG(260, 25, 0x1)
> +#define av1_pp_format_customer1_e	AV1_DEC_REG(260, 26, 0x1)
> +#define av1_pp_crop_exist		AV1_DEC_REG(260, 27, 0x1)
> +#define av1_pp_up_level			AV1_DEC_REG(260, 28, 0x1)
> +#define av1_pp_down_level		AV1_DEC_REG(260, 29, 0x3)
> +#define av1_pp_exist			AV1_DEC_REG(260, 31, 0x1)
> +
> +#define av1_cur_altref_roffset		AV1_DEC_REG(262, 0, 0x1ff)
> +#define av1_cur_altref_offset		AV1_DEC_REG(262, 9, 0x1ff)
> +#define av1_mf3_altref_offset		AV1_DEC_REG(262, 18, 0x1ff)
> +#define av1_ref6_gm_mode		AV1_DEC_REG(262, 27, 0x3)
> +
> +#define av1_cdef_luma_primary_strength	AV1_DEC_REG(263, 0, 0xffffffff)
> +
> +#define av1_cdef_chroma_primary_strength AV1_DEC_REG(264, 0, 0xffffffff)
> +
> +#define av1_axi_arqos			AV1_DEC_REG(265, 0, 0xf)
> +#define av1_axi_awqos			AV1_DEC_REG(265, 4, 0xf)
> +#define av1_axi_wr_ostd_threshold	AV1_DEC_REG(265, 8, 0x3ff)
> +#define av1_axi_rd_ostd_threshold	AV1_DEC_REG(265, 18, 0x3ff)
> +#define av1_axi_wr_4k_dis		AV1_DEC_REG(265, 31, 0x1)
> +
> +#define av1_128bit_mode			AV1_DEC_REG(266, 5, 0x1)
> +#define av1_wr_shaper_bypass		AV1_DEC_REG(266, 10, 0x1)
> +#define av1_error_conceal_e		AV1_DEC_REG(266, 30, 0x1)
> +
> +#define av1_superres_chroma_step_invra	AV1_DEC_REG(298, 0, 0xffff)
> +#define av1_superres_luma_step_invra	AV1_DEC_REG(298, 16, 0xffff)
> +
> +#define av1_dec_alignment		AV1_DEC_REG(314, 0, 0xffff)
> +
> +#define av1_ext_timeout_cycles		AV1_DEC_REG(318, 0, 0x7fffffff)
> +#define av1_ext_timeout_override_e	AV1_DEC_REG(318, 31, 0x1)
> +
> +#define av1_timeout_cycles		AV1_DEC_REG(319, 0, 0x7fffffff)
> +#define av1_timeout_override_e		AV1_DEC_REG(319, 31, 0x1)
> +
> +#define av1_pp_out_e			AV1_DEC_REG(320, 0, 0x1)
> +#define av1_pp_cr_first			AV1_DEC_REG(320, 1, 0x1)
> +#define av1_pp_out_mode			AV1_DEC_REG(320, 2, 0x1)
> +#define av1_pp_out_tile_e		AV1_DEC_REG(320, 3, 0x1)
> +#define av1_pp_status			AV1_DEC_REG(320, 4, 0xf)
> +#define av1_pp_in_blk_size		AV1_DEC_REG(320, 8, 0x7)
> +#define av1_pp_out_p010_fmt		AV1_DEC_REG(320, 11, 0x3)
> +#define av1_pp_out_rgb_fmt		AV1_DEC_REG(320, 13, 0x1f)
> +#define av1_rgb_range_max		AV1_DEC_REG(320, 18, 0xfff)
> +#define av1_pp_rgb_planar		AV1_DEC_REG(320, 30, 0x1)
> +
> +#define av1_scale_hratio		AV1_DEC_REG(322, 0, 0x3ffff)
> +#define av1_pp_out_format		AV1_DEC_REG(322, 18, 0x1f)
> +#define av1_ver_scale_mode		AV1_DEC_REG(322, 23, 0x3)
> +#define av1_hor_scale_mode		AV1_DEC_REG(322, 25, 0x3)
> +#define av1_pp_in_format		AV1_DEC_REG(322, 27, 0x1f)
> +
> +#define av1_pp_out_c_stride		AV1_DEC_REG(329, 0, 0xffff)
> +#define av1_pp_out_y_stride		AV1_DEC_REG(329, 16, 0xffff)
> +
> +#define av1_pp_in_height		AV1_DEC_REG(331, 0, 0xffff)
> +#define av1_pp_in_width			AV1_DEC_REG(331, 16, 0xffff)
> +
> +#define av1_pp_out_height		AV1_DEC_REG(332, 0, 0xffff)
> +#define av1_pp_out_width		AV1_DEC_REG(332, 16, 0xffff)
> +
> +#define av1_pp1_dup_ver			AV1_DEC_REG(394, 0, 0xff)
> +#define av1_pp1_dup_hor			AV1_DEC_REG(394, 8, 0xff)
> +#define av1_pp0_dup_ver			AV1_DEC_REG(394, 16, 0xff)
> +#define av1_pp0_dup_hor			AV1_DEC_REG(394, 24, 0xff)
> +
> +#define AV1_TILE_OUT_LU			(AV1_SWREG(65))
> +#define AV1_REFERENCE_Y(i)		(AV1_SWREG(67) + ((i) * 0x8))
> +#define AV1_SEGMENTATION		(AV1_SWREG(81))
> +#define AV1_GLOBAL_MODEL		(AV1_SWREG(83))
> +#define AV1_CDEF_COL			(AV1_SWREG(85))
> +#define AV1_SR_COL			(AV1_SWREG(89))
> +#define AV1_LR_COL			(AV1_SWREG(91))
> +#define AV1_FILM_GRAIN			(AV1_SWREG(95))
> +#define AV1_TILE_OUT_CH			(AV1_SWREG(99))
> +#define AV1_REFERENCE_CB(i)		(AV1_SWREG(101) + ((i) * 0x8))
> +#define AV1_TILE_OUT_MV			(AV1_SWREG(133))
> +#define AV1_REFERENCE_MV(i)		(AV1_SWREG(135) + ((i) * 0x8))
> +#define AV1_TILE_BASE			(AV1_SWREG(167))
> +#define AV1_INPUT_STREAM		(AV1_SWREG(169))
> +#define AV1_PROP_TABLE_OUT		(AV1_SWREG(171))
> +#define AV1_PROP_TABLE			(AV1_SWREG(173))
> +#define AV1_MC_SYNC_CURR		(AV1_SWREG(175))
> +#define AV1_MC_SYNC_LEFT		(AV1_SWREG(177))
> +#define AV1_DB_DATA_COL			(AV1_SWREG(179))
> +#define AV1_DB_CTRL_COL			(AV1_SWREG(183))
> +#define AV1_PP_OUT_LU			(AV1_SWREG(326))
> +#define AV1_PP_OUT_CH			(AV1_SWREG(328))
> +
> +#endif /* _ROCKCHIP_VPU981_REGS_H_ */
Benjamin Gaignard Dec. 20, 2022, 2:49 p.m. UTC | #3
Le 19/12/2022 à 22:56, Nicolas Dufresne a écrit :
> Le lundi 19 décembre 2022 à 16:56 +0100, Benjamin Gaignard a écrit :
>> Implement AV1 stateless decoder for rockchip VPU981.
>> It decode 8 and 10 bits AV1 bitstreams.
>> AV1 scaling feature is done by the postprocessor.
> Can you clarify ? I knew the filmgrain was, but didn't expect spatial scaling to
> need it. On this aspect, I don't see any new code to let Hantro driver know that
> the postproc is needed. The frame header value should be taken into account in
> hantro_needs_postproc() (a per codec ops would be nice).

2 tests (av1-1-b8-03-sizedown and av1-1-b8-03-sizeup) are only working if the postprocessor
is used. Given their names I assume it is related to scaling.
Force postprocessing is not an easy task in Hantro driver and this series is already big enough.

What I have prototype to solve this problem is to add a "need_postproc" field in context structure
but I still have issues around pixel format negotiation to solve before release it.

>
>> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
>> ---
>>   drivers/media/platform/verisilicon/Makefile   |    1 +
>>   .../media/platform/verisilicon/hantro_hw.h    |   64 +-
>>   .../verisilicon/rockchip_vpu981_hw_av1_dec.c  | 2067 +++++++++++++++++
>>   .../verisilicon/rockchip_vpu981_regs.h        |  477 ++++
>>   4 files changed, 2607 insertions(+), 2 deletions(-)
>>   create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>>   create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>>
>> diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
>> index d2b2679c00eb..c9a9806ab8c5 100644
>> --- a/drivers/media/platform/verisilicon/Makefile
>> +++ b/drivers/media/platform/verisilicon/Makefile
>> @@ -18,6 +18,7 @@ hantro-vpu-y += \
>>   		rockchip_vpu2_hw_h264_dec.o \
>>   		rockchip_vpu2_hw_mpeg2_dec.o \
>>   		rockchip_vpu2_hw_vp8_dec.o \
>> +		rockchip_vpu981_hw_av1_dec.o \
>>   		rockchip_av1_entropymode.o \
>>   		hantro_jpeg.o \
>>   		hantro_h264.o \
>> diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
>> index e395aeeae2f4..3c0a995998a5 100644
>> --- a/drivers/media/platform/verisilicon/hantro_hw.h
>> +++ b/drivers/media/platform/verisilicon/hantro_hw.h
>> @@ -37,6 +37,9 @@
>>   
>>   #define NUM_REF_PICTURES	(V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
>>   
>> +#define AV1_REF_LIST_SIZE	8
> This is the same as V4L2_AV1_TOTAL_REFS_PER_FRAME. AV1_TOTAL_REFS_PER_FRAME is
> how this is called in the spec.

ok

>
>> +#define AV1_MAX_FRAME_BUF_COUNT	(V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
>> +
>>   struct hantro_dev;
>>   struct hantro_ctx;
>>   struct hantro_buf;
>> @@ -250,23 +253,80 @@ struct hantro_vp9_dec_hw_ctx {
>>   };
>>   
>>   /**
>> - * hantro_av1_dec_hw_ctx
>> + * struct hantro_av1_dec_ctrls
>> + * @sequence:		AV1 Sequence
>> + * @tile_group_entry:	AV1 Tile Group entry
>> + * @frame:		AV1 Frame Header OBU
>> + * @film_grain:		AV1 Film Grain
>> + */
>> +struct hantro_av1_dec_ctrls {
>> +	const struct v4l2_ctrl_av1_sequence *sequence;
>> +	const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry;
>> +	const struct v4l2_ctrl_av1_frame *frame;
>> +	const struct v4l2_ctrl_av1_film_grain *film_grain;
>> +};
>> +
>> +struct hantro_av1_frame_ref {
>> +	int width;
>> +	int height;
>> +	u64 timestamp;
>> +	enum v4l2_av1_frame_type frame_type;
>> +	int ref_count;
>> +	u32 order_hint;
>> +	u32 order_hints[V4L2_AV1_NUM_REF_FRAMES];
>> +	int gm_mode;
>> +	struct vb2_v4l2_buffer *vb2_ref;
>> +};
>> +
>> +/**
>> + * struct hantro_av1_dec_hw_ctx
>> + * @db_data_col:	db tile col data buffer
>> + * @db_ctrl_col:	db tile col ctrl buffer
>> + * @cdef_col:		cdef tile col buffer
>> + * @sr_col:		sr tile col buffer
>> + * @lr_col:		lr tile col buffer
>> + * @global_model:	global model buffer
>> + * @tile_info:		tile info buffer
>> + * @segment:		segmentation info buffer
>> + * @prob_tbl:		probability table
>> + * @prob_tbl_out:	probability table output
>> + * @tile_buf:		tile buffer
>> + * @ctrls:		V4L2 controls attached to a run
>> + * @frame_refs:		reference frames info slots
>> + * @ref_frame_sign_bias: array of sign bias
>> + * @num_tile_cols_allocated: number of allocated tiles
>>    * @cdfs:		current probabilities structure
>>    * @cdfs_ndvc:		current mv probabilities structure
>>    * @default_cdfs:	default probabilities structure
>>    * @default_cdfs_ndvc:	default mv probabilties structure
>>    * @cdfs_last:		stored probabilities structures
>>    * @cdfs_last_ndvc:	stored mv probabilities structures
>> + * @current_frame_index: index of the current in frame_refs array
>>    */
>>   struct hantro_av1_dec_hw_ctx {
>> +	struct hantro_aux_buf db_data_col;
>> +	struct hantro_aux_buf db_ctrl_col;
>> +	struct hantro_aux_buf cdef_col;
>> +	struct hantro_aux_buf sr_col;
>> +	struct hantro_aux_buf lr_col;
>> +	struct hantro_aux_buf global_model;
>> +	struct hantro_aux_buf tile_info;
>> +	struct hantro_aux_buf segment;
>> +	struct hantro_aux_buf prob_tbl;
>> +	struct hantro_aux_buf prob_tbl_out;
>> +	struct hantro_aux_buf tile_buf;
>> +	struct hantro_av1_dec_ctrls ctrls;
>> +	struct hantro_av1_frame_ref frame_refs[AV1_MAX_FRAME_BUF_COUNT];
>> +	uint32_t ref_frame_sign_bias[AV1_REF_LIST_SIZE];
>> +	unsigned int num_tile_cols_allocated;
>>   	struct av1cdfs *cdfs;
>>   	struct mvcdfs  *cdfs_ndvc;
>>   	struct av1cdfs default_cdfs;
>>   	struct mvcdfs  default_cdfs_ndvc;
>>   	struct av1cdfs cdfs_last[NUM_REF_FRAMES];
>>   	struct mvcdfs  cdfs_last_ndvc[NUM_REF_FRAMES];
>> +	int current_frame_index;
>>   };
>> -
>>   /**
>>    * struct hantro_postproc_ctx
>>    *
>> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>> new file mode 100644
>> index 000000000000..a183e4f35e00
>> --- /dev/null
>> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>> @@ -0,0 +1,2067 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (c) 2021, Collabora
>> + *
>> + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
>> + */
>> +
>> +#include <media/v4l2-mem2mem.h>
>> +#include "hantro.h"
>> +#include "hantro_v4l2.h"
>> +#include "rockchip_vpu981_regs.h"
>> +
>> +#define AV1_DEC_MODE		17
>> +#define GM_GLOBAL_MODELS_PER_FRAME	7
>> +#define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
>> +#define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
>> +#define AV1_MAX_TILES		128
>> +#define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
>> +#define AV1DEC_MAX_PIC_BUFFERS	24
>> +#define AV1_REF_SCALE_SHIFT	14
>> +#define AV1_INVALID_IDX		-1
>> +#define MAX_FRAME_DISTANCE	31
>> +#define AV1_PRIMARY_REF_NONE	7
>> +#define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
>> +/*
>> + * These 3 values aren't defined enum v4l2_av1_segment_feature because
>> + * they are not part of the specification
>> + */
>> +#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
>> +#define V4L2_AV1_SEG_LVL_ALT_LF_U	3
>> +#define V4L2_AV1_SEG_LVL_ALT_LF_V	4
>> +
>> +#define CLIP3(l, h, v)      ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
> include/linux/minmax.h:#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)

ok I will remove it

>
>> +
>> +#define SUPERRES_SCALE_BITS 3
>> +#define SCALE_NUMERATOR 8
>> +#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
>> +
>> +#define RS_SUBPEL_BITS 6
>> +#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
>> +#define RS_SCALE_SUBPEL_BITS 14
>> +#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
>> +#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
>> +#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
>> +
>> +#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
>> +
>> +#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
>> +#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
>> +#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
>> +#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
>> +#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
>> +#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
>> +#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
>> +
>> +#define DIV_LUT_PREC_BITS 14
>> +#define DIV_LUT_BITS 8
>> +#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
>> +#define WARP_PARAM_REDUCE_BITS 6
>> +#define WARPEDMODEL_PREC_BITS 16
>> +
>> +#define AV1_DIV_ROUND_UP_POW2(value, n)			\
>> +({							\
>> +	typeof(n) _n  = n;				\
>> +	typeof(value) _value = value;			\
>> +	(_value + (BIT(_n) >> 1)) >> _n;		\
>> +})
>> +
>> +#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
>> +({									\
>> +	typeof(n) _n_  = n;						\
>> +	typeof(value) _value_ = value;					\
>> +	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
>> +		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
>> +})
>> +
>> +struct rockchip_av1_film_grain {
>> +	uint8_t scaling_lut_y[256];
>> +	uint8_t scaling_lut_cb[256];
>> +	uint8_t scaling_lut_cr[256];
>> +	int16_t cropped_luma_grain_block[4096];
>> +	int16_t cropped_chroma_grain_block[1024 * 2];
>> +};
>> +
>> +static const short div_lut[DIV_LUT_NUM + 1] = {
>> +	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
>> +	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
>> +	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
>> +	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
>> +	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
>> +	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
>> +	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
>> +	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
>> +	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
>> +	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
>> +	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
>> +	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
>> +	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
>> +	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
>> +	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
>> +	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
>> +	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
>> +	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
>> +	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
>> +	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
>> +	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
>> +	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
>> +	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
>> +	8240,  8224,  8208,  8192,
>> +};
>> +
>> +static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	u64 timestamp;
>> +	int i, idx = frame->ref_frame_idx[ref];
>> +
>> +	if (idx >= AV1_MAX_FRAME_BUF_COUNT || idx < 0)
>> +		return AV1_INVALID_IDX;
>> +
>> +	timestamp = frame->reference_frame_ts[idx];
>> +	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
>> +		if (av1_dec->frame_refs[i].ref_count == 0)
>> +			continue;
>> +		if (av1_dec->frame_refs[i].timestamp == timestamp)
>> +			return i;
>> +	}
> nit: Mediatek optimize this with a map
>
>> +
>> +	return AV1_INVALID_IDX;
>> +}
>> +
>> +static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
>> +
>> +	if (idx != AV1_INVALID_IDX)
>> +		return av1_dec->frame_refs[idx].order_hint;
>> +
>> +	return 0;
>> +}
>> +
>> +static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
>> +					     u64 timestamp)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	int i;
>> +
>> +	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
>> +		if (av1_dec->frame_refs[i].ref_count == 0) {
>> +			int j;
>> +
>> +			av1_dec->frame_refs[i].width =
>> +			    frame->frame_width_minus_1 + 1;
>> +			av1_dec->frame_refs[i].height =
>> +			    frame->frame_height_minus_1 + 1;
>> +			av1_dec->frame_refs[i].timestamp = timestamp;
>> +			av1_dec->frame_refs[i].frame_type = frame->frame_type;
>> +			av1_dec->frame_refs[i].order_hint = frame->order_hint;
>> +			av1_dec->frame_refs[i].gm_mode =
>> +				frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME + i];
>> +			if (!av1_dec->frame_refs[i].vb2_ref)
>> +				av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
>> +
>> +			for (j = 0; j < V4L2_AV1_NUM_REF_FRAMES; j++)
>> +				av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
>> +
>> +			av1_dec->frame_refs[i].ref_count++;
> This ref_count is only ever 0 and 1. Would be nicer if it was some boolean, like
> "valid", or "active".

I will change it to "bool used;"

>
>> +			av1_dec->current_frame_index = i;
>> +			return i;
>> +		}
>> +	}
>> +
>> +	return AV1_INVALID_IDX;
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +
>> +	if (idx < 0)
>> +		return;
>> +
>> +	av1_dec->frame_refs[idx].ref_count--;
>> +
>> +	if (av1_dec->frame_refs[idx].ref_count < 0)
>> +		pr_warn("AV1 reference frames refcounting error (idx %d)\n", idx);
> Looks like a bug_on case, only driver programming issue could cause this.

I have never hit it, I will remove it.

>
>
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +
>> +	int ref, idx;
>> +
>> +	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
>> +		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
>> +		bool used = false;
>> +
>> +		if (av1_dec->frame_refs[idx].ref_count == 0)
>> +			continue;
>> +
>> +		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
>> +			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
>> +				used = true;
>> +		}
>> +
>> +		if (!used)
>> +			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
>> +	}
>> +}
>> +
>> +static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
>> +{
>> +	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
>> +}
>> +
>> +static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
>> +{
>> +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
>> +
>> +	return ALIGN((cr_offset * 3) / 2, 64);
>> +}
>> +
>> +void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +
>> +	if (av1_dec->db_data_col.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
>> +				  av1_dec->db_data_col.cpu,
>> +				  av1_dec->db_data_col.dma);
>> +	av1_dec->db_data_col.cpu = NULL;
>> +
>> +	if (av1_dec->db_ctrl_col.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
>> +				  av1_dec->db_ctrl_col.cpu,
>> +				  av1_dec->db_ctrl_col.dma);
>> +	av1_dec->db_ctrl_col.cpu = NULL;
>> +
>> +	if (av1_dec->cdef_col.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
>> +				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
>> +	av1_dec->cdef_col.cpu = NULL;
>> +
>> +	if (av1_dec->sr_col.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
>> +				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
>> +	av1_dec->sr_col.cpu = NULL;
>> +
>> +	if (av1_dec->lr_col.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
>> +				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
>> +	av1_dec->lr_col.cpu = NULL;
>> +}
>> +
>> +static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
>> +	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
>> +	unsigned int height_in_sb = height / 64;
>> +	unsigned int stripe_num = ((height + 8) + 63) / 64;
>> +	size_t size;
>> +
>> +	if (num_tile_cols <= av1_dec->num_tile_cols_allocated)
>> +		return 0;
>> +
>> +	rockchip_vpu981_av1_dec_tiles_free(ctx);
>> +
>> +	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
>> +	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
>> +						      &av1_dec->db_data_col.dma,
>> +						      GFP_KERNEL);
>> +	if (!av1_dec->db_data_col.cpu)
>> +		goto buffer_allocation_error;
>> +	av1_dec->db_data_col.size = size;
>> +
>> +	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
> No bit_depth scaling ?

No, MPP code doesn't scale this one depending in bit_depth.

>
>> +	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
>> +						      &av1_dec->db_ctrl_col.dma,
>> +						      GFP_KERNEL);
>> +	if (!av1_dec->db_ctrl_col.cpu)
>> +		goto buffer_allocation_error;
>> +	av1_dec->db_ctrl_col.size = size;
>> +
>> +	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
>> +	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
>> +						   &av1_dec->cdef_col.dma,
>> +						   GFP_KERNEL);
>> +	if (!av1_dec->cdef_col.cpu)
>> +		goto buffer_allocation_error;
>> +	av1_dec->cdef_col.size = size;
>> +
>> +	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
>> +	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
>> +						 &av1_dec->sr_col.dma,
>> +						 GFP_KERNEL);
>> +	if (!av1_dec->sr_col.cpu)
>> +		goto buffer_allocation_error;
>> +	av1_dec->sr_col.size = size;
>> +
>> +	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
>> +	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
>> +						 &av1_dec->lr_col.dma,
>> +						 GFP_KERNEL);
>> +	if (!av1_dec->lr_col.cpu)
>> +		goto buffer_allocation_error;
>> +	av1_dec->lr_col.size = size;
>> +
>> +	av1_dec->num_tile_cols_allocated = num_tile_cols;
>> +	return 0;
>> +
>> +buffer_allocation_error:
>> +	rockchip_vpu981_av1_dec_tiles_free(ctx);
>> +	return -ENOMEM;
>> +}
>> +
>> +void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +
>> +	if (av1_dec->global_model.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
>> +				  av1_dec->global_model.cpu,
>> +				  av1_dec->global_model.dma);
>> +	av1_dec->global_model.cpu = NULL;
>> +
>> +	if (av1_dec->tile_info.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
>> +				  av1_dec->tile_info.cpu,
>> +				  av1_dec->tile_info.dma);
>> +	av1_dec->tile_info.cpu = NULL;
>> +
>> +	if (av1_dec->prob_tbl.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
>> +				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
>> +	av1_dec->prob_tbl.cpu = NULL;
>> +
>> +	if (av1_dec->prob_tbl_out.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
>> +				  av1_dec->prob_tbl_out.cpu,
>> +				  av1_dec->prob_tbl_out.dma);
>> +	av1_dec->prob_tbl_out.cpu = NULL;
>> +
>> +	if (av1_dec->tile_buf.cpu)
>> +		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
>> +				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
>> +	av1_dec->tile_buf.cpu = NULL;
>> +
>> +	rockchip_vpu981_av1_dec_tiles_free(ctx);
>> +}
>> +
>> +int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +
>> +	memset(av1_dec, 0, sizeof(*av1_dec));
>> +
>> +	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
>> +						       &av1_dec->global_model.dma,
>> +						       GFP_KERNEL);
>> +	if (!av1_dec->global_model.cpu)
>> +		return -ENOMEM;
>> +	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
>> +
>> +	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
>> +						    &av1_dec->tile_info.dma,
>> +						    GFP_KERNEL);
>> +	if (!av1_dec->tile_info.cpu)
>> +		return -ENOMEM;
>> +	av1_dec->tile_info.size = AV1_MAX_TILES;
>> +
>> +	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
>> +						   ALIGN(sizeof(struct av1cdfs), 2048),
>> +						   &av1_dec->prob_tbl.dma,
>> +						   GFP_KERNEL);
>> +	if (!av1_dec->prob_tbl.cpu)
>> +		return -ENOMEM;
>> +	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
>> +
>> +	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
>> +						       ALIGN(sizeof(struct av1cdfs), 2048),
>> +						       &av1_dec->prob_tbl_out.dma,
>> +						       GFP_KERNEL);
>> +	if (!av1_dec->prob_tbl_out.cpu)
>> +		return -ENOMEM;
>> +	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
>> +	av1_dec->cdfs = &av1_dec->default_cdfs;
>> +	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
>> +
>> +	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
>> +
>> +	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
>> +						   AV1_TILE_SIZE,
>> +						   &av1_dec->tile_buf.dma,
>> +						   GFP_KERNEL);
>> +	if (!av1_dec->tile_buf.cpu)
>> +		return -ENOMEM;
>> +	av1_dec->tile_buf.size = AV1_TILE_SIZE;
>> +
>> +	return 0;
>> +}
>> +
>> +static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +
>> +	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
>> +	if (WARN_ON(!ctrls->sequence))
>> +		return -EINVAL;
>> +
>> +	ctrls->tile_group_entry =
>> +	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
>> +	if (WARN_ON(!ctrls->tile_group_entry))
>> +		return -EINVAL;
>> +
>> +	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
>> +	if (WARN_ON(!ctrls->frame))
>> +		return -EINVAL;
>> +
>> +	ctrls->film_grain =
>> +	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
>> +
>> +	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
>> +}
>> +
>> +static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
>> +{
>> +	if (n == 0)
>> +		return 0;
>> +	return 31 ^ __builtin_clz(n);
>> +}
>> +
>> +static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
>> +{
>> +	int f;
>> +	uint64_t e;
>> +
>> +	*shift = rockchip_vpu981_av1_dec_get_msb(d);
>> +	/* e is obtained from D after resetting the most significant 1 bit. */
>> +	e = d - ((u32)1 << *shift);
>> +	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
>> +	if (*shift > DIV_LUT_BITS)
>> +		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
>> +	else
>> +		f = e << (DIV_LUT_BITS - *shift);
>> +	if (f > DIV_LUT_NUM)
>> +		return -1;
>> +	*shift += DIV_LUT_PREC_BITS;
>> +	/* Use f as lookup into the precomputed table of multipliers */
>> +	return div_lut[f];
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_get_shear_params(const uint32_t *params,
>> +	int64_t *alpha, int64_t *beta, int64_t *gamma, int64_t *delta)
>> +{
>> +	const int *mat = params;
>> +	short shift;
>> +	short y;
>> +	long long gv, dv;
>> +
>> +	if (mat[2] <= 0)
>> +		return;
>> +
>> +	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
>> +	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
>> +
>> +	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
>> +
>> +	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
>> +
>> +	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
>> +
>> +	dv = ((long long)mat[3] * mat[4]) * y;
>> +	*delta = clamp_val(
>> +		mat[5] -
>> +		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
>> +		S16_MIN, S16_MAX);
>> +
>> +	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
>> +		 * (1 << WARP_PARAM_REDUCE_BITS);
>> +	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
>> +		* (1 << WARP_PARAM_REDUCE_BITS);
>> +	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
>> +		 * (1 << WARP_PARAM_REDUCE_BITS);
>> +	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
>> +		* (1 << WARP_PARAM_REDUCE_BITS);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
>> +	uint8_t *dst = av1_dec->global_model.cpu;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int ref_frame, i;
>> +
>> +	memset(dst, 0, GLOBAL_MODEL_SIZE);
>> +	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
>> +		int64_t alpha = 0, beta = 0, gamma = 0, delta = 0;
>> +
>> +		for (i = 0; i < 6; ++i) {
>> +			if (i == 2)
>> +				*(int32_t *)dst =
>> +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
>> +			else if (i == 3)
>> +				*(int32_t *)dst =
>> +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
>> +			else
>> +				*(int32_t *)dst =
>> +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
>> +			dst += 4;
>> +		}
>> +
>> +		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
>> +			rockchip_vpu981_av1_dec_get_shear_params(
>> +					&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
>> +					&alpha, &beta, &gamma, &delta);
>> +
>> +		*(int16_t *)dst = alpha;
>> +		dst += 2;
>> +		*(int16_t *)dst = beta;
>> +		dst += 2;
>> +		*(int16_t *)dst = gamma;
>> +		dst += 2;
>> +		*(int16_t *)dst = delta;
>> +		dst += 2;
>> +	}
>> +
>> +	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
>> +	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
>> +	    ctrls->tile_group_entry;
>> +	int context_update_y =
>> +	    tile_info.context_update_tile_id / tile_info.tile_cols;
>> +	int context_update_x =
>> +	    tile_info.context_update_tile_id % tile_info.tile_cols;
>> +	int context_update_tile_id =
>> +	    context_update_x * tile_info.tile_rows + context_update_y;
>> +	uint8_t *dst = av1_dec->tile_info.cpu;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int tile0, tile1;
>> +
>> +	memset(dst, 0, av1_dec->tile_info.size);
>> +
>> +	for (tile0 = 0; tile0 < tile_info.tile_cols; tile0++) {
>> +		for (tile1 = 0; tile1 < tile_info.tile_rows; tile1++) {
>> +			int tile_id = tile1 * tile_info.tile_cols + tile0;
>> +			uint32_t start, end;
>> +			uint32_t y0 =
>> +			    tile_info.height_in_sbs_minus_1[tile1] + 1;
>> +			uint32_t x0 = tile_info.width_in_sbs_minus_1[tile0] + 1;
>> +
>> +			// tile size in SB units (width,height)
>> +			*dst++ = x0;
>> +			*dst++ = 0;
>> +			*dst++ = 0;
>> +			*dst++ = 0;
>> +			*dst++ = y0;
>> +			*dst++ = 0;
>> +			*dst++ = 0;
>> +			*dst++ = 0;
>> +
>> +			// tile start position
>> +			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
>> +			*dst++ = start & 255;
>> +			*dst++ = (start >> 8) & 255;
>> +			*dst++ = (start >> 16) & 255;
>> +			*dst++ = (start >> 24) & 255;
>> +
>> +			// # of bytes in tile data
>> +			end = start + group_entry[tile_id].tile_size;
>> +			*dst++ = end & 255;
>> +			*dst++ = (end >> 8) & 255;
>> +			*dst++ = (end >> 16) & 255;
>> +			*dst++ = (end >> 24) & 255;
>> +		}
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_multicore_expect_context_update,
>> +			 !!(context_update_x == 0));
>> +	hantro_reg_write(vpu, &av1_tile_enable, !!((tile_info.tile_cols > 1)
>> +						   || (tile_info.tile_rows > 1)));
>> +	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info.tile_cols);
>> +	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info.tile_rows);
>> +	hantro_reg_write(vpu, &av1_context_update_tile_id,
>> +			 context_update_tile_id);
>> +	hantro_reg_write(vpu, &av1_tile_transpose, 1);
>> +	if (context_update_tile_id) {
>> +		hantro_reg_write(vpu, &av1_dec_tile_size_mag,
>> +				 tile_info.tile_size_bytes);
>> +	} else
>> +		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
>> +
>> +	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
>> +}
>> +
>> +static int rockchip_vpu981_av1_dec_get_relative_dist(struct hantro_ctx *ctx,
>> +						     int a, int b)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	int bits = ctrls->sequence->order_hint_bits - 1;
>> +	int diff, m;
>> +
>> +	if (!ctrls->sequence->order_hint_bits)
>> +		return 0;
>> +
>> +	diff = a - b;
>> +	m = 1 << bits;
>> +	diff = (diff & (m - 1)) - (diff & m);
>> +
>> +	return diff;
>> +}
> I think I've seen this one in MTK. Will be nice to make a v4l2-av1.h eventually
> and share this. Can happen afterward though, to avoid make this too complex.

I will keep that in mind.

>
>> +
>> +static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
>> +	int i;
>> +
>> +	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
>> +		for (i = 0; i < AV1_REF_LIST_SIZE; i++)
>> +			av1_dec->ref_frame_sign_bias[i] = 0;
>> +
>> +		return;
>> +	}
>> +	// Identify the nearest forward and backward references.
>> +	for (i = 0; i < AV1_REF_LIST_SIZE - 1; i++) {
>> +		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
>> +			int rel_off =
>> +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +								      rockchip_vpu981_get_order_hint
>> +								      (ctx, i),
>> +								      frame->order_hint);
>> +			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
>> +		}
>> +	}
>> +}
>> +
>> +static bool
>> +rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
>> +				int width, int height)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct hantro_decoded_buffer *dst;
>> +	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
>> +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
>> +	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
>> +	int cur_width = frame->frame_width_minus_1 + 1;
>> +	int cur_height = frame->frame_height_minus_1 + 1;
>> +	int scale_width =
>> +	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
>> +	int scale_height =
>> +	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
>> +
>> +	switch (ref) {
>> +	case 0:
>> +		hantro_reg_write(vpu, &av1_ref0_height, height);
>> +		hantro_reg_write(vpu, &av1_ref0_width, width);
>> +		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
>> +		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
>> +		break;
>> +	case 1:
>> +		hantro_reg_write(vpu, &av1_ref1_height, height);
>> +		hantro_reg_write(vpu, &av1_ref1_width, width);
>> +		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
>> +		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
>> +		break;
>> +	case 2:
>> +		hantro_reg_write(vpu, &av1_ref2_height, height);
>> +		hantro_reg_write(vpu, &av1_ref2_width, width);
>> +		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
>> +		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
>> +		break;
>> +	case 3:
>> +		hantro_reg_write(vpu, &av1_ref3_height, height);
>> +		hantro_reg_write(vpu, &av1_ref3_width, width);
>> +		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
>> +		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
>> +		break;
>> +	case 4:
>> +		hantro_reg_write(vpu, &av1_ref4_height, height);
>> +		hantro_reg_write(vpu, &av1_ref4_width, width);
>> +		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
>> +		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
>> +		break;
>> +	case 5:
>> +		hantro_reg_write(vpu, &av1_ref5_height, height);
>> +		hantro_reg_write(vpu, &av1_ref5_width, width);
>> +		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
>> +		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
>> +		break;
>> +	case 6:
>> +		hantro_reg_write(vpu, &av1_ref6_height, height);
>> +		hantro_reg_write(vpu, &av1_ref6_width, width);
>> +		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
>> +		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
>> +		break;
>> +	default:
>> +		pr_warn("AV1 invalid reference frame index\n");
>> +	}
>> +
>> +	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
>> +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
>> +	chroma_addr = luma_addr + cr_offset;
>> +	mv_addr = luma_addr + mv_offset;
>> +
>> +	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
>> +	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
>> +	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
>> +
>> +	return (scale_width != (1 << AV1_REF_SCALE_SHIFT))
>> +		|| (scale_height != (1 << AV1_REF_SCALE_SHIFT));
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
>> +						  int ref, int val)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +
>> +	switch (ref) {
>> +	case 0:
>> +		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
>> +		break;
>> +	case 1:
>> +		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
>> +		break;
>> +	case 2:
>> +		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
>> +		break;
>> +	case 3:
>> +		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
>> +		break;
>> +	case 4:
>> +		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
>> +		break;
>> +	case 5:
>> +		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
>> +		break;
>> +	case 6:
>> +		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
>> +		break;
>> +	default:
>> +		pr_warn("AV1 invalid sign bias index\n");
>> +		break;
>> +	}
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
>> +	uint32_t segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	uint8_t segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
>> +
>> +	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)
>> +	    && (frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME)) {
>> +		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
>> +
>> +		if (idx >= 0) {
>> +			dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
>> +			size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
>> +			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
>> +
>> +			luma_addr =
>> +				hantro_get_dec_buf_addr(ctx,
>> +							&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
>> +			chroma_addr = luma_addr + cr_offset;
>> +			mv_addr = luma_addr + mv_offset;
>> +
>> +			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
>> +			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
>> +		}
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
>> +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
>> +	hantro_reg_write(vpu, &av1_segment_upd_e,
>> +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
>> +	hantro_reg_write(vpu, &av1_segment_e,
>> +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
>> +
>> +	hantro_reg_write(vpu, &av1_error_resilient,
>> +			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
>> +
>> +	if (IS_INTRA(frame->frame_type)
>> +	    || !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
>> +		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
>> +	}
>> +
>> +	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)) {
>> +		int s;
>> +
>> +		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
>> +			if (seg->feature_enabled[s] &
>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
>> +				    CLIP3(0, 255,
>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]));
>> +				segsign |=
>> +					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
>> +			}
>> +
>> +			if (seg->feature_enabled[s] &
>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
>> +					CLIP3(-63, 63,
>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]));
>> +
>> +			if (seg->feature_enabled[s] &
>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
>> +				    CLIP3(-63, 63,
>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]));
>> +
>> +			if (seg->feature_enabled[s] &
>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
>> +				    CLIP3(-63, 63,
>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]));
>> +
>> +			if (seg->feature_enabled[s] &
>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
>> +				    CLIP3(-63, 63,
>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]));
>> +
>> +			if (frame->frame_type && seg->feature_enabled[s] &
>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
>> +				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
>> +
>> +			if (seg->feature_enabled[s] &
>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
>> +				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
>> +
>> +			if (seg->feature_enabled[s] &
>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
>> +				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
>> +		}
>> +	}
>> +
>> +	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
>> +		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
>> +			if (seg->feature_enabled[i]
>> +			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
>> +				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
>> +				last_active_seg = max(i, last_active_seg);
>> +			}
>> +		}
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
>> +	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
>> +
>> +	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
>> +
>> +	/* Write QP, filter level, ref frame and skip for every segment */
>> +	hantro_reg_write(vpu, &av1_quant_seg0,
>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>> +	hantro_reg_write(vpu, &av1_refpic_seg0,
>> +			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
>> +	hantro_reg_write(vpu, &av1_skip_seg0,
>> +			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
>> +	hantro_reg_write(vpu, &av1_global_mv_seg0,
>> +			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>> +
>> +	hantro_reg_write(vpu, &av1_quant_seg1,
>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>> +	hantro_reg_write(vpu, &av1_refpic_seg1,
>> +			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
>> +	hantro_reg_write(vpu, &av1_skip_seg1,
>> +			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
>> +	hantro_reg_write(vpu, &av1_global_mv_seg1,
>> +			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>> +
>> +	hantro_reg_write(vpu, &av1_quant_seg2,
>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>> +	hantro_reg_write(vpu, &av1_refpic_seg2,
>> +			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
>> +	hantro_reg_write(vpu, &av1_skip_seg2,
>> +			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
>> +	hantro_reg_write(vpu, &av1_global_mv_seg2,
>> +			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>> +
>> +	hantro_reg_write(vpu, &av1_quant_seg3,
>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>> +	hantro_reg_write(vpu, &av1_refpic_seg3,
>> +			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
>> +	hantro_reg_write(vpu, &av1_skip_seg3,
>> +			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
>> +	hantro_reg_write(vpu, &av1_global_mv_seg3,
>> +			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>> +
>> +	hantro_reg_write(vpu, &av1_quant_seg4,
>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>> +	hantro_reg_write(vpu, &av1_refpic_seg4,
>> +			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
>> +	hantro_reg_write(vpu, &av1_skip_seg4,
>> +			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
>> +	hantro_reg_write(vpu, &av1_global_mv_seg4,
>> +			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>> +
>> +	hantro_reg_write(vpu, &av1_quant_seg5,
>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>> +	hantro_reg_write(vpu, &av1_refpic_seg5,
>> +			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
>> +	hantro_reg_write(vpu, &av1_skip_seg5,
>> +			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
>> +	hantro_reg_write(vpu, &av1_global_mv_seg5,
>> +			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>> +
>> +	hantro_reg_write(vpu, &av1_quant_seg6,
>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>> +	hantro_reg_write(vpu, &av1_refpic_seg6,
>> +			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
>> +	hantro_reg_write(vpu, &av1_skip_seg6,
>> +			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
>> +	hantro_reg_write(vpu, &av1_global_mv_seg6,
>> +			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>> +
>> +	hantro_reg_write(vpu, &av1_quant_seg7,
>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>> +	hantro_reg_write(vpu, &av1_refpic_seg7,
>> +			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
>> +	hantro_reg_write(vpu, &av1_skip_seg7,
>> +			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
>> +	hantro_reg_write(vpu, &av1_global_mv_seg7,
>> +			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>> +}
>> +
>> +static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
>> +	const struct v4l2_av1_quantization *quantization = &frame->quantization;
>> +	int i;
>> +
>> +	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
>> +		int qindex = quantization->base_q_idx;
>> +
>> +		if (segmentation->feature_enabled[i] &
>> +		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
>> +			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
>> +		}
>> +		qindex = CLIP3(0, 255, qindex);
>> +
>> +		if (qindex
>> +		    || quantization->delta_q_y_dc
>> +		    || quantization->delta_q_u_dc
>> +		    || quantization->delta_q_u_ac
>> +		    || quantization->delta_q_v_dc || quantization->delta_q_v_ac)
>> +			return false;
>> +	}
>> +	return true;
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
>> +	bool filtering_dis = (loop_filter->level[0] == 0)
>> +			     && (loop_filter->level[1] == 0);
>> +	struct hantro_dev *vpu = ctx->dev;
>> +
>> +	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
>> +	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
>> +	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
>> +
>> +	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
>> +	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
>> +	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
>> +	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
>> +
>> +	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED
>> +	    && !rockchip_vpu981_av1_dec_is_lossless(ctx)
>> +	    && !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
>> +				 loop_filter->ref_deltas[0]);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
>> +				 loop_filter->ref_deltas[1]);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
>> +				 loop_filter->ref_deltas[2]);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
>> +				 loop_filter->ref_deltas[3]);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
>> +				 loop_filter->ref_deltas[4]);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
>> +				 loop_filter->ref_deltas[5]);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
>> +				 loop_filter->ref_deltas[6]);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
>> +				 loop_filter->ref_deltas[7]);
>> +		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
>> +				 loop_filter->mode_deltas[0]);
>> +		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
>> +				 loop_filter->mode_deltas[1]);
>> +	} else {
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
>> +		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
>> +		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
>> +	}
>> +
>> +	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
>> +	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	bool frame_is_intra = IS_INTRA(frame->frame_type);
>> +	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
>> +	int i;
>> +
>> +	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
>> +		return;
>> +
>> +	for (i = 0; i < NUM_REF_FRAMES; i++) {
>> +		if (frame->refresh_frame_flags & (1 << i)) {
>> +			struct mvcdfs stored_mv_cdf;
>> +
>> +			rockchip_av1_get_cdfs(ctx, i);
>> +			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
>> +			*av1_dec->cdfs = *out_cdfs;
>> +			if (frame_is_intra) {
>> +				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
>> +				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
>> +			}
>> +			rockchip_av1_store_cdfs(ctx,
>> +						frame->refresh_frame_flags);
>> +			break;
>> +		}
>> +	}
>> +}
>> +
>> +void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
>> +{
>> +	rockchip_vpu981_av1_dec_update_prob(ctx);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	const struct v4l2_av1_quantization *quantization = &frame->quantization;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	bool error_resilient_mode =
>> +	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
>> +	bool frame_is_intra = IS_INTRA(frame->frame_type);
>> +
>> +	if (error_resilient_mode || frame_is_intra
>> +	    || frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
>> +		av1_dec->cdfs = &av1_dec->default_cdfs;
>> +		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
>> +		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
>> +						 av1_dec->cdfs);
>> +	} else {
>> +		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
>> +	}
>> +	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
>> +
>> +	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
>> +
>> +	if (frame_is_intra) {
>> +		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
>> +		/* Overwrite MV context area with intrabc MV context */
>> +		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
>> +		       sizeof(struct mvcdfs));
>> +	}
>> +
>> +	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
>> +	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	const struct v4l2_av1_cdef *cdef = &frame->cdef;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	uint32_t luma_pri_strength = 0;
>> +	uint16_t luma_sec_strength = 0;
>> +	uint32_t chroma_pri_strength = 0;
>> +	uint16_t chroma_sec_strength = 0;
>> +	int i;
>> +
>> +	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
>> +	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
>> +
>> +	for (i = 0; i < (1 << cdef->bits); i++) {
>> +		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
>> +		if (cdef->y_sec_strength[i] == 4)
>> +			luma_sec_strength |= 3 << (i * 2);
>> +		else
>> +			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
>> +
>> +		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
>> +		if (cdef->uv_sec_strength[i] == 4)
>> +			chroma_sec_strength |= 3 << (i * 2);
>> +		else
>> +			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
>> +			 luma_pri_strength);
>> +	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
>> +			 luma_sec_strength);
>> +	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
>> +			 chroma_pri_strength);
>> +	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
>> +			 chroma_sec_strength);
>> +
>> +	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	const struct v4l2_av1_loop_restoration *loop_restoration =
>> +	    &frame->loop_restoration;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	uint16_t lr_type = 0, lr_unit_size = 0;
>> +	uint8_t restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
>> +	int i;
>> +
>> +	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
>> +		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
>> +		restoration_unit_size[1] =
>> +		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
>> +		restoration_unit_size[2] =
>> +		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
>> +	}
>> +
>> +	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
>> +		lr_type |=
>> +		    loop_restoration->frame_restoration_type[i] << (i * 2);
>> +		lr_unit_size |= restoration_unit_size[i] << (i * 2);
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_lr_type, lr_type);
>> +	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
>> +	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	uint8_t superres_scale_denominator = SCALE_NUMERATOR;
>> +	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
>> +	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
>> +	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
>> +	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
>> +	int superres_init_luma_subpel_x = 0;
>> +	int superres_init_chroma_subpel_x = 0;
>> +	int superres_is_scaled = 0;
>> +	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
>> +	int upscaledLumaPlaneW, downscaledLumaPlaneW;
>> +	int downscaledChromaPlaneW, upscaledChromaPlaneW;
>> +	int stepLumaX, stepChromaX;
>> +	int errLuma, errChroma;
>> +	int initialLumaSubpelX, initialChromaSubpelX;
> You haven't used camel case so far, are you sure this is allowed ?

I will change them.

>
>> +	int width = 0;
>> +
>> +	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
>> +		superres_scale_denominator = frame->superres_denom;
>> +
>> +	if (superres_scale_denominator <= SCALE_NUMERATOR)
>> +		goto set_regs;
>> +
>> +	width = (frame->upscaled_width * SCALE_NUMERATOR +
>> +		(superres_scale_denominator / 2)) / superres_scale_denominator;
>> +
>> +	if (width < min_w)
>> +		width = min_w;
>> +
>> +	if (width == frame->upscaled_width)
>> +		goto set_regs;
>> +
>> +	superres_is_scaled = 1;
>> +	upscaledLumaPlaneW = frame->upscaled_width;
>> +	downscaledLumaPlaneW = width;
>> +	downscaledChromaPlaneW = (downscaledLumaPlaneW + 1) >> 1;
>> +	upscaledChromaPlaneW = (upscaledLumaPlaneW + 1) >> 1;
>> +	stepLumaX =
>> +		((downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) +
>> +		 (upscaledLumaPlaneW / 2)) / upscaledLumaPlaneW;
>> +	stepChromaX =
>> +		((downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) +
>> +		 (upscaledChromaPlaneW / 2)) / upscaledChromaPlaneW;
>> +	errLuma =
>> +		(upscaledLumaPlaneW * stepLumaX)
>> +		- (downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS);
>> +	errChroma =
>> +		(upscaledChromaPlaneW * stepChromaX)
>> +		- (downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS);
>> +	initialLumaSubpelX =
>> +		((-((upscaledLumaPlaneW - downscaledLumaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
>> +		  + upscaledLumaPlaneW / 2)
>> +		 / upscaledLumaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errLuma / 2)
>> +		& RS_SCALE_SUBPEL_MASK;
>> +	initialChromaSubpelX =
>> +		((-((upscaledChromaPlaneW - downscaledChromaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
>> +		  + upscaledChromaPlaneW / 2)
>> +		 / upscaledChromaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errChroma / 2)
>> +		& RS_SCALE_SUBPEL_MASK;
>> +	superres_luma_step = stepLumaX;
>> +	superres_chroma_step = stepChromaX;
>> +	superres_luma_step_invra =
>> +		((upscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledLumaPlaneW / 2))
>> +		/ downscaledLumaPlaneW;
>> +	superres_chroma_step_invra =
>> +		((upscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledChromaPlaneW / 2))
>> +		/ downscaledChromaPlaneW;
>> +	superres_init_luma_subpel_x = initialLumaSubpelX;
>> +	superres_init_chroma_subpel_x = initialChromaSubpelX;
>> +
>> +set_regs:
>> +	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
>> +
>> +	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
>> +		hantro_reg_write(vpu, &av1_scale_denom_minus9,
>> +				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
>> +	else
>> +		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
>> +
>> +	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
>> +	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
>> +	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
>> +			 superres_luma_step_invra);
>> +	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
>> +			 superres_chroma_step_invra);
>> +	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
>> +			 superres_init_luma_subpel_x);
>> +	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
>> +			 superres_init_chroma_subpel_x);
>> +	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
>> +
>> +	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int pic_width_in_cbs = ALIGN(frame->frame_width_minus_1 + 1, 8) >> 3;
>> +	int pic_height_in_cbs = ALIGN(frame->frame_height_minus_1 + 1, 8) >> 3;
> Aren't these DIV_ROUND_UP(val + 1, 8) ?
>
>> +	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
>> +			    - (frame->frame_width_minus_1 + 1);
>> +	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
>> +			     - (frame->frame_height_minus_1 + 1);
>> +
>> +	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
>> +	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
>> +	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
>> +	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
>> +
>> +	rockchip_vpu981_av1_dec_set_superres_params(ctx);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	bool use_ref_frame_mvs =
>> +	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
>> +	int cur_frame_offset = frame->order_hint;
>> +	int alt_frame_offset = 0;
>> +	int gld_frame_offset = 0;
>> +	int bwd_frame_offset = 0;
>> +	int alt2_frame_offset = 0;
>> +	int refs_selected[3] = { 0, 0, 0 };
>> +	int cur_mi_cols = (frame->frame_width_minus_1 + 8) >> 3;
>> +	int cur_mi_rows = (frame->frame_height_minus_1 + 8) >> 3;
> This looks like a contraction of (val + 1 + 7) / 8, so in short, another
> DIV_ROUNDUP().
>
>> +	int cur_offset[V4L2_AV1_NUM_REF_FRAMES - 1];
>> +	int cur_roffset[V4L2_AV1_NUM_REF_FRAMES - 1];
> This looks like V4L2_AV1_REFS_PER_FRAME. Daniel, should be remove this
> V4L2_AV1_NUM_REF_FRAMES ? Its redundant with V4L2_AV1_TOTAL_REFS_PER_FRAME ...
>
>> +	int mf_types[3] = { 0, 0, 0 };
>> +	int ref_stamp = 2;
>> +	int ref_ind = 0;
>> +	int rf, idx;
>> +
>> +	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
>> +	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
>> +	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
>> +	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
>> +
>> +	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
>> +	if (idx >= 0) {
>> +		int alt_frame_offset_in_lst =
>> +			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
>> +		bool is_lst_overlay =
>> +		    (alt_frame_offset_in_lst == gld_frame_offset);
>> +
>> +		if (!is_lst_overlay) {
>> +			int lst_mi_cols =
>> +			    (av1_dec->frame_refs[idx].width + 7) >> 3;
> DIV_ROUDNUP()
>
>> +			int lst_mi_rows =
>> +			    (av1_dec->frame_refs[idx].height + 7) >> 3;
> again. I'll stop pointing these out, I think you can find them.
>
>> +			bool lst_intra_only =
>> +			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>> +
>> +			if (lst_mi_cols == cur_mi_cols
>> +			    && lst_mi_rows == cur_mi_rows && !lst_intra_only) {
>> +				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
>> +				refs_selected[ref_ind++] = LST_BUF_IDX;
>> +			}
>> +		}
>> +		ref_stamp--;
>> +	}
>> +
>> +	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
>> +	if (rockchip_vpu981_av1_dec_get_relative_dist
>> +	    (ctx, bwd_frame_offset, cur_frame_offset) > 0) {
>> +		int bwd_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
>> +		int bwd_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
>> +		bool bwd_intra_only =
>> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>> +
>> +		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
>> +		    !bwd_intra_only) {
>> +			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
>> +			refs_selected[ref_ind++] = BWD_BUF_IDX;
>> +			ref_stamp--;
>> +		}
>> +	}
>> +
>> +	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
>> +	if (rockchip_vpu981_av1_dec_get_relative_dist
>> +	    (ctx, alt2_frame_offset, cur_frame_offset) > 0) {
>> +		int alt2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
>> +		int alt2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
>> +		bool alt2_intra_only =
>> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>> +
>> +		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows
>> +		    && !alt2_intra_only) {
>> +			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
>> +			refs_selected[ref_ind++] = ALT2_BUF_IDX;
>> +			ref_stamp--;
>> +		}
>> +	}
>> +
>> +	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
>> +	if (rockchip_vpu981_av1_dec_get_relative_dist
>> +	    (ctx, alt_frame_offset, cur_frame_offset) > 0 && ref_stamp >= 0) {
>> +		int alt_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
>> +		int alt_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
>> +		bool alt_intra_only =
>> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>> +
>> +		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
>> +		    !alt_intra_only) {
>> +			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
>> +			refs_selected[ref_ind++] = ALT_BUF_IDX;
>> +			ref_stamp--;
>> +		}
>> +	}
>> +
>> +	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
>> +	if (idx >= 0 && ref_stamp >= 0) {
>> +		int lst2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
>> +		int lst2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
> I said I'd stop, but can't this be calculate once and saved in the reference
> frame structure ?

Indeed that is a simplification, I will implement it.

>
>> +		bool lst2_intra_only =
>> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>> +
>> +		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows
>> +		    && !lst2_intra_only) {
>> +			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
>> +			refs_selected[ref_ind++] = LST2_BUF_IDX;
>> +			ref_stamp--;
>> +		}
>> +	}
>> +
>> +	for (rf = 0; rf < V4L2_AV1_NUM_REF_FRAMES - 1; ++rf) {
>> +		idx = rockchip_vpu981_get_frame_index(ctx, rf);
>> +		if (idx >= 0) {
>> +			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
>> +
>> +			cur_offset[rf] =
>> +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +								      cur_frame_offset,
>> +								      rf_order_hint);
>> +			cur_roffset[rf] =
>> +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +								      rf_order_hint,
>> +								      cur_frame_offset);
>> +		} else {
>> +			cur_offset[rf] = 0;
>> +			cur_roffset[rf] = 0;
>> +		}
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
>> +	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
>> +	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
>> +	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
>> +
>> +	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
>> +
>> +	if (use_ref_frame_mvs && ref_ind > 0 &&
>> +	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
>> +	    && cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
>> +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
>> +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
>> +		int val;
>> +
>> +		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
>> +
>> +	if (use_ref_frame_mvs && ref_ind > 1 &&
>> +	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
>> +	    && cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
>> +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
>> +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
>> +		int val;
>> +
>> +		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
>> +	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
>> +
>> +	if (use_ref_frame_mvs && ref_ind > 2 &&
>> +	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
>> +	    && cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
>> +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
>> +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
>> +		int val;
>> +
>> +		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
>> +
>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>> +				rf_order_hint,
>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
>> +		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
>> +	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
>> +	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
>> +	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
>> +	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
>> +	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
>> +	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
>> +
>> +	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
>> +	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
>> +	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
>> +	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
>> +	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
>> +	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
>> +	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
>> +
>> +	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
>> +	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
>> +	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>> +	int frame_type = frame->frame_type;
>> +	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
>> +	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int i, ref_frames = 0;
>> +	bool scale_enable = false;
>> +
>> +	if (IS_INTRA(frame_type) && !allow_intrabc)
>> +		return;
>> +
>> +	if (!allow_intrabc) {
>> +		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
>> +			int idx = rockchip_vpu981_get_frame_index(ctx, i);
>> +
>> +			if (idx >= 0)
>> +				ref_count[idx]++;
>> +		}
>> +
>> +		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
>> +			if (ref_count[i])
>> +				ref_frames++;
>> +		}
>> +	} else {
>> +		ref_frames = 1;
>> +	}
>> +	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
>> +
>> +	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
>> +
>> +	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_NUM_REF_FRAMES; i++) {
>> +		uint32_t ref = i - 1;
>> +		int idx = 0;
>> +		int width, height;
>> +
>> +		if (allow_intrabc) {
>> +			idx = av1_dec->current_frame_index;
>> +			width = frame->frame_width_minus_1 + 1;
>> +			height = frame->frame_height_minus_1 + 1;
>> +		} else {
>> +			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
>> +				idx = rockchip_vpu981_get_frame_index(ctx, ref);
>> +			width = av1_dec->frame_refs[idx].width;
>> +			height = av1_dec->frame_refs[idx].height;
>> +		}
>> +
>> +		scale_enable |=
>> +		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
>> +						    height);
>> +
>> +		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
>> +						      av1_dec->ref_frame_sign_bias[i]);
>> +	}
>> +	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
>> +
>> +	hantro_reg_write(vpu, &av1_ref0_gm_mode,
>> +			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
>> +	hantro_reg_write(vpu, &av1_ref1_gm_mode,
>> +			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
>> +	hantro_reg_write(vpu, &av1_ref2_gm_mode,
>> +			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
>> +	hantro_reg_write(vpu, &av1_ref3_gm_mode,
>> +			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
>> +	hantro_reg_write(vpu, &av1_ref4_gm_mode,
>> +			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
>> +	hantro_reg_write(vpu, &av1_ref5_gm_mode,
>> +			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
>> +	hantro_reg_write(vpu, &av1_ref6_gm_mode,
>> +			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
>> +
>> +	rockchip_vpu981_av1_dec_set_other_frames(ctx);
>> +}
>> +
>> +static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +
>> +	hantro_reg_write(vpu, &av1_skip_mode,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
>> +	hantro_reg_write(vpu, &av1_tempor_mvp_e,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
>> +	hantro_reg_write(vpu, &av1_delta_lf_res_log,
>> +			 ctrls->frame->loop_filter.delta_lf_res);
>> +	hantro_reg_write(vpu, &av1_delta_lf_multi,
>> +			 !!(ctrls->frame->loop_filter.flags
>> +			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
>> +	hantro_reg_write(vpu, &av1_delta_lf_present,
>> +			 !!(ctrls->frame->loop_filter.flags
>> +			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
>> +	hantro_reg_write(vpu, &av1_disable_cdf_update,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
>> +	hantro_reg_write(vpu, &av1_allow_warp,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
>> +	hantro_reg_write(vpu, &av1_show_frame,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
>> +	hantro_reg_write(vpu, &av1_switchable_motion_mode,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
>> +	hantro_reg_write(vpu, &av1_enable_cdef,
>> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
>> +	hantro_reg_write(vpu, &av1_allow_masked_compound,
>> +			 !!(ctrls->sequence->flags
>> +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
>> +	hantro_reg_write(vpu, &av1_allow_interintra,
>> +			 !!(ctrls->sequence->flags
>> +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
>> +	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
>> +			 !!(ctrls->sequence->flags
>> +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
>> +	hantro_reg_write(vpu, &av1_allow_filter_intra,
>> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
>> +	hantro_reg_write(vpu, &av1_enable_jnt_comp,
>> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
>> +	hantro_reg_write(vpu, &av1_enable_dual_filter,
>> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
>> +	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
>> +	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
>> +	hantro_reg_write(vpu, &av1_allow_intrabc,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
>> +
>> +	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
>> +		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
>> +	else
>> +		hantro_reg_write(vpu, &av1_force_interger_mv,
>> +				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
>> +
>> +	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
>> +	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
>> +	hantro_reg_write(vpu, &av1_delta_q_present,
>> +			 !!(ctrls->frame->quantization.flags
>> +			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
>> +
>> +	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
>> +	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
>> +	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
>> +	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
> I believe these register are read by PP when using format 0, perhaps something
> nice to comment about, as they will impact the pixel produce by the post-
> processor, which can be a surprising side effect. Bit 11:8 also hold
> sw_bit_depth_out_minus8, but I don't know if its used for AV1, in my doc its
> only used for AVS2 CODEC.

I have no indication about that in MPP code.
Post processor output pixel format is selected when writing in av1_pp_out_format field.

>
> I think its good to underline that in VC8000/VC9000, some registers are shared
> across multiple CODECs.
>
>> +
>> +	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
>> +	hantro_reg_write(vpu, &av1_high_prec_mv_e,
>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
>> +	hantro_reg_write(vpu, &av1_comp_pred_mode,
>> +			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
>> +	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
>> +	hantro_reg_write(vpu, &av1_max_cb_size,
>> +			 (ctrls->sequence->flags
>> +			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
>> +	hantro_reg_write(vpu, &av1_min_cb_size, 3);
>> +
>> +	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
>> +	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
>> +	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
>> +	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
>> +	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
>> +	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
>> +	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
>> +	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
>> +	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
>> +	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
>> +	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
>> +
>> +	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
>> +	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
>> +	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
>> +	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
>> +		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
>> +		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
>> +		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
>> +	} else {
>> +		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
>> +		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
>> +		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
>> +	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
>> +	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
>> +
>> +	hantro_reg_write(vpu, &av1_skip_ref0,
>> +			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
>> +	hantro_reg_write(vpu, &av1_skip_ref1,
>> +			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
>> +
>> +	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
>> +	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
>> +}
>> +
>> +static void
>> +rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
>> +					 struct vb2_v4l2_buffer *vb2_src)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>> +	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
>> +	    ctrls->tile_group_entry;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	dma_addr_t src_dma;
>> +	u32 src_len, src_buf_len;
>> +	int start_bit, offset;
>> +
>> +	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
>> +	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
>> +	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
>> +
>> +	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
>> +	offset = group_entry[0].tile_offset & ~0xf;
>> +
>> +	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
>> +	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
>> +	hantro_reg_write(vpu, &av1_stream_len, src_len);
>> +	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
>> +	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
>> +}
>> +
>> +static void
>> +rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct hantro_decoded_buffer *dst;
>> +	struct vb2_v4l2_buffer *vb2_dst;
>> +	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
>> +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
>> +	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
>> +
>> +	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
>> +	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
>> +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
>> +	chroma_addr = luma_addr + cr_offset;
>> +	mv_addr = luma_addr + mv_offset;
>> +
>> +	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
>> +	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
>> +	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
>> +}
>> +
>> +int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct vb2_v4l2_buffer *vb2_src;
>> +	int ret;
>> +
>> +	hantro_start_prepare_run(ctx);
>> +
>> +	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
>> +	if (ret)
>> +		goto prepare_error;
>> +
>> +	vb2_src = hantro_get_src_buf(ctx);
>> +	if (!vb2_src)
>> +		goto prepare_error;
>> +
>> +	rockchip_vpu981_av1_dec_clean_refs(ctx);
>> +	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
>> +
>> +	rockchip_vpu981_av1_dec_set_parameters(ctx);
>> +	rockchip_vpu981_av1_dec_set_global_model(ctx);
>> +	rockchip_vpu981_av1_dec_set_tile_info(ctx);
>> +	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
>> +	rockchip_vpu981_av1_dec_set_segmentation(ctx);
>> +	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
>> +	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
>> +	rockchip_vpu981_av1_dec_set_cdef(ctx);
>> +	rockchip_vpu981_av1_dec_set_lr(ctx);
>> +	rockchip_vpu981_av1_dec_set_prob(ctx);
>> +
>> +	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
>> +	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
>> +	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
>> +	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
>> +	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
>> +
>> +	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
>> +	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
>> +
>> +	hantro_reg_write(vpu, &av1_dec_alignment, 64);
>> +	hantro_reg_write(vpu, &av1_apf_disable, 0);
>> +	hantro_reg_write(vpu, &av1_apf_threshold, 8);
>> +	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
>> +	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
>> +	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
>> +	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
>> +	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
>> +
>> +	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
>> +	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
>> +	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
>> +	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
>> +
>> +	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
>> +	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
>> +
>> +	hantro_end_prepare_run(ctx);
>> +
>> +	hantro_reg_write(vpu, &av1_dec_e, 1);
>> +
>> +	return 0;
>> +
>> +prepare_error:
>> +	hantro_end_prepare_run(ctx);
>> +	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
>> +	return ret;
>> +}
>> +
>> +static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int width = ctx->dst_fmt.width;
>> +	int height = ctx->dst_fmt.height;
>> +	struct vb2_v4l2_buffer *vb2_dst;
>> +	size_t chroma_offset;
>> +	dma_addr_t dst_dma;
>> +
>> +	vb2_dst = hantro_get_dst_buf(ctx);
>> +
>> +	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
>> +	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
>> +	    ctx->dst_fmt.height;
>> +
>> +	/* enable post processor */
>> +	hantro_reg_write(vpu, &av1_pp_out_e, 1);
>> +	hantro_reg_write(vpu, &av1_pp_in_format, 0);
>> +	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
>> +	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
>> +
>> +	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
>> +	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
>> +	hantro_reg_write(vpu, &av1_pp_out_height, height);
>> +	hantro_reg_write(vpu, &av1_pp_out_width, width);
>> +	hantro_reg_write(vpu, &av1_pp_out_y_stride,
>> +			 ctx->dst_fmt.plane_fmt[0].bytesperline);
>> +	hantro_reg_write(vpu, &av1_pp_out_c_stride,
>> +			 ctx->dst_fmt.plane_fmt[0].bytesperline);
>> +	switch (ctx->dst_fmt.pixelformat) {
>> +	case V4L2_PIX_FMT_P010:
>> +		hantro_reg_write(vpu, &av1_pp_out_format, 1);
>> +		break;
>> +	case V4L2_PIX_FMT_NV12:
>> +		hantro_reg_write(vpu, &av1_pp_out_format, 3);
>> +		break;
>> +	default:
>> +		hantro_reg_write(vpu, &av1_pp_out_format, 0);
>> +	}
>> +
>> +	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
>> +	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
>> +	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
>> +	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
>> +	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
>> +	hantro_reg_write(vpu, &av1_pp_up_level, 0);
>> +	hantro_reg_write(vpu, &av1_pp_down_level, 0);
>> +	hantro_reg_write(vpu, &av1_pp_exist, 0);
>> +
>> +	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
>> +	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
>> +}
>> +
>> +static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +
>> +	/* disable post processor */
>> +	hantro_reg_write(vpu, &av1_pp_out_e, 0);
>> +}
>> +
>> +const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
>> +	.enable = rockchip_vpu981_postproc_enable,
>> +	.disable = rockchip_vpu981_postproc_disable,
>> +};
> As enabling the post-proc is done by the common driver, we need a mechanism to
> altern the core decisions, as we really need to enable post-proc to produce
> filmgrain. Fortunatly, not applying this filter isn't visually bad, but that's
> not the intent of the video author, so should not be like this by default.

I do agree but I think it is out of the scope of this patch.

>
>> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>> new file mode 100644
>> index 000000000000..182e6c830ff6
>> --- /dev/null
>> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>> @@ -0,0 +1,477 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (c) 2022, Collabora
>> + *
>> + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
>> + */
>> +
>> +#ifndef _ROCKCHIP_VPU981_REGS_H_
>> +#define _ROCKCHIP_VPU981_REGS_H_
>> +
>> +#include "hantro.h"
>> +
>> +#define AV1_SWREG(nr)	((nr) * 4)
>> +
>> +#define AV1_DEC_REG(b, s, m) \
>> +	((const struct hantro_reg) { \
>> +		.base = AV1_SWREG(b), \
>> +		.shift = s, \
>> +		.mask = m, \
>> +	})
> Just a note that a lot of this is identical to VC8000 register sets. But until
> someone upstream VC8000 and till we have more VC9000 support, its fine to keep
> that as if it was RK specific.
>
>> +
>> +#define AV1_REG_INTERRUPT		AV1_SWREG(1)
>> +#define AV1_REG_INTERRUPT_DEC_RDY_INT	BIT(12)
>> +
>> +#define AV1_REG_CONFIG			AV1_SWREG(2)
>> +#define AV1_REG_CONFIG_DEC_CLK_GATE_E	BIT(10)
>> +
>> +#define av1_dec_e			AV1_DEC_REG(1, 0, 0x1)
>> +#define av1_dec_abort_e			AV1_DEC_REG(1, 5, 0x1)
>> +#define av1_dec_tile_int_e		AV1_DEC_REG(1, 7, 0x1)
>> +
>> +#define av1_dec_clk_gate_e		AV1_DEC_REG(2, 10, 0x1)
>> +
>> +#define av1_dec_out_ec_bypass		AV1_DEC_REG(3, 8,  0x1)
>> +#define av1_write_mvs_e			AV1_DEC_REG(3, 12, 0x1)
>> +#define av1_filtering_dis		AV1_DEC_REG(3, 14, 0x1)
>> +#define av1_dec_out_dis			AV1_DEC_REG(3, 15, 0x1)
>> +#define av1_dec_out_ec_byte_word	AV1_DEC_REG(3, 16, 0x1)
>> +#define av1_skip_mode			AV1_DEC_REG(3, 26, 0x1)
>> +#define av1_dec_mode			AV1_DEC_REG(3, 27, 0x1f)
>> +
>> +#define av1_ref_frames			AV1_DEC_REG(4, 0, 0xf)
>> +#define av1_pic_height_in_cbs		AV1_DEC_REG(4, 6, 0x1fff)
>> +#define av1_pic_width_in_cbs		AV1_DEC_REG(4, 19, 0x1fff)
>> +
>> +#define av1_ref_scaling_enable		AV1_DEC_REG(5, 0, 0x1)
>> +#define av1_filt_level_base_gt32	AV1_DEC_REG(5, 1, 0x1)
>> +#define av1_error_resilient		AV1_DEC_REG(5, 2, 0x1)
>> +#define av1_force_interger_mv		AV1_DEC_REG(5, 3, 0x1)
>> +#define av1_allow_intrabc		AV1_DEC_REG(5, 4, 0x1)
>> +#define av1_allow_screen_content_tools	AV1_DEC_REG(5, 5, 0x1)
>> +#define av1_reduced_tx_set_used		AV1_DEC_REG(5, 6, 0x1)
>> +#define av1_enable_dual_filter		AV1_DEC_REG(5, 7, 0x1)
>> +#define av1_enable_jnt_comp		AV1_DEC_REG(5, 8, 0x1)
>> +#define av1_allow_filter_intra		AV1_DEC_REG(5, 9, 0x1)
>> +#define av1_enable_intra_edge_filter	AV1_DEC_REG(5, 10, 0x1)
>> +#define av1_tempor_mvp_e		AV1_DEC_REG(5, 11, 0x1)
>> +#define av1_allow_interintra		AV1_DEC_REG(5, 12, 0x1)
>> +#define av1_allow_masked_compound	AV1_DEC_REG(5, 13, 0x1)
>> +#define av1_enable_cdef			AV1_DEC_REG(5, 14, 0x1)
>> +#define av1_switchable_motion_mode	AV1_DEC_REG(5, 15, 0x1)
>> +#define av1_show_frame			AV1_DEC_REG(5, 16, 0x1)
>> +#define av1_superres_is_scaled		AV1_DEC_REG(5, 17, 0x1)
>> +#define av1_allow_warp			AV1_DEC_REG(5, 18, 0x1)
>> +#define av1_disable_cdf_update		AV1_DEC_REG(5, 19, 0x1)
>> +#define av1_preskip_segid		AV1_DEC_REG(5, 20, 0x1)
>> +#define av1_delta_lf_present		AV1_DEC_REG(5, 21, 0x1)
>> +#define av1_delta_lf_multi		AV1_DEC_REG(5, 22, 0x1)
>> +#define av1_delta_lf_res_log		AV1_DEC_REG(5, 23, 0x3)
>> +#define av1_strm_start_bit		AV1_DEC_REG(5, 25, 0x7f)
>> +
>> +#define	av1_stream_len			AV1_DEC_REG(6, 0, 0xffffffff)
>> +
>> +#define av1_delta_q_present		AV1_DEC_REG(7, 0, 0x1)
>> +#define av1_delta_q_res_log		AV1_DEC_REG(7, 1, 0x3)
>> +#define av1_cdef_damping		AV1_DEC_REG(7, 3, 0x3)
>> +#define av1_cdef_bits			AV1_DEC_REG(7, 5, 0x3)
>> +#define av1_apply_grain			AV1_DEC_REG(7, 7, 0x1)
>> +#define av1_num_y_points_b		AV1_DEC_REG(7, 8, 0x1)
>> +#define av1_num_cb_points_b		AV1_DEC_REG(7, 9, 0x1)
>> +#define av1_num_cr_points_b		AV1_DEC_REG(7, 10, 0x1)
>> +#define av1_overlap_flag		AV1_DEC_REG(7, 11, 0x1)
>> +#define av1_clip_to_restricted_range	AV1_DEC_REG(7, 12, 0x1)
>> +#define av1_chroma_scaling_from_luma	AV1_DEC_REG(7, 13, 0x1)
>> +#define av1_random_seed			AV1_DEC_REG(7, 14, 0xffff)
>> +#define av1_blackwhite_e		AV1_DEC_REG(7, 30, 0x1)
>> +
>> +#define av1_scaling_shift		AV1_DEC_REG(8, 0, 0xf)
>> +#define av1_bit_depth_c_minus8		AV1_DEC_REG(8, 4, 0x3)
>> +#define av1_bit_depth_y_minus8		AV1_DEC_REG(8, 6, 0x3)
>> +#define av1_quant_base_qindex		AV1_DEC_REG(8, 8, 0xff)
>> +#define av1_idr_pic_e			AV1_DEC_REG(8, 16, 0x1)
>> +#define av1_superres_pic_width		AV1_DEC_REG(8, 17, 0x7fff)
>> +
>> +#define av1_ref4_sign_bias		AV1_DEC_REG(9, 2, 0x1)
>> +#define av1_ref5_sign_bias		AV1_DEC_REG(9, 3, 0x1)
>> +#define av1_ref6_sign_bias		AV1_DEC_REG(9, 4, 0x1)
>> +#define av1_mf1_type			AV1_DEC_REG(9, 5, 0x7)
>> +#define av1_mf2_type			AV1_DEC_REG(9, 8, 0x7)
>> +#define av1_mf3_type			AV1_DEC_REG(9, 11, 0x7)
>> +#define av1_scale_denom_minus9		AV1_DEC_REG(9, 14, 0x7)
>> +#define av1_last_active_seg		AV1_DEC_REG(9, 17, 0x7)
>> +#define av1_context_update_tile_id	AV1_DEC_REG(9, 20, 0xfff)
>> +
>> +#define av1_tile_transpose		AV1_DEC_REG(10, 0, 0x1)
>> +#define av1_tile_enable			AV1_DEC_REG(10, 1, 0x1)
>> +#define av1_multicore_full_width	AV1_DEC_REG(10,	2, 0xff)
>> +#define av1_num_tile_rows_8k		AV1_DEC_REG(10, 10, 0x7f)
>> +#define av1_num_tile_cols_8k		AV1_DEC_REG(10, 17, 0x7f)
>> +#define av1_multicore_tile_start_x	AV1_DEC_REG(10, 24, 0xff)
>> +
>> +#define av1_use_temporal3_mvs		AV1_DEC_REG(11, 0, 0x1)
>> +#define av1_use_temporal2_mvs		AV1_DEC_REG(11, 1, 0x1)
>> +#define av1_use_temporal1_mvs		AV1_DEC_REG(11, 2, 0x1)
>> +#define av1_use_temporal0_mvs		AV1_DEC_REG(11, 3, 0x1)
>> +#define av1_comp_pred_mode		AV1_DEC_REG(11, 4, 0x3)
>> +#define av1_high_prec_mv_e		AV1_DEC_REG(11, 7, 0x1)
>> +#define av1_mcomp_filt_type		AV1_DEC_REG(11, 8, 0x7)
>> +#define av1_multicore_expect_context_update	AV1_DEC_REG(11, 11, 0x1)
>> +#define av1_multicore_sbx_offset	AV1_DEC_REG(11, 12, 0x7f)
>> +#define av1_ulticore_tile_col		AV1_DEC_REG(11, 19, 0x7f)
>> +#define av1_transform_mode		AV1_DEC_REG(11, 27, 0x7)
>> +#define av1_dec_tile_size_mag		AV1_DEC_REG(11, 30, 0x3)
>> +
>> +#define av1_seg_quant_sign		AV1_DEC_REG(12, 2, 0xff)
>> +#define av1_max_cb_size			AV1_DEC_REG(12, 10, 0x7)
>> +#define av1_min_cb_size			AV1_DEC_REG(12, 13, 0x7)
>> +#define av1_comp_pred_fixed_ref		AV1_DEC_REG(12, 16, 0x7)
>> +#define av1_multicore_tile_width	AV1_DEC_REG(12, 19, 0x7f)
>> +#define av1_pic_height_pad		AV1_DEC_REG(12, 26, 0x7)
>> +#define av1_pic_width_pad		AV1_DEC_REG(12, 29, 0x7)
>> +
>> +#define av1_segment_e			AV1_DEC_REG(13, 0, 0x1)
>> +#define av1_segment_upd_e		AV1_DEC_REG(13, 1, 0x1)
>> +#define av1_segment_temp_upd_e		AV1_DEC_REG(13, 2, 0x1)
>> +#define av1_comp_pred_var_ref0_av1	AV1_DEC_REG(13, 3, 0x7)
>> +#define av1_comp_pred_var_ref1_av1	AV1_DEC_REG(13, 6, 0x7)
>> +#define av1_lossless_e			AV1_DEC_REG(13, 9, 0x1)
>> +#define av1_qp_delta_ch_ac_av1		AV1_DEC_REG(13, 11, 0x7f)
>> +#define av1_qp_delta_ch_dc_av1		AV1_DEC_REG(13, 18, 0x7f)
>> +#define av1_qp_delta_y_dc_av1		AV1_DEC_REG(13, 25, 0x7f)
>> +
>> +#define av1_quant_seg0			AV1_DEC_REG(14, 0, 0xff)
>> +#define av1_filt_level_seg0		AV1_DEC_REG(14, 8, 0x3f)
>> +#define av1_skip_seg0			AV1_DEC_REG(14, 14, 0x1)
>> +#define av1_refpic_seg0			AV1_DEC_REG(14, 15, 0xf)
>> +#define av1_filt_level_delta0_seg0	AV1_DEC_REG(14, 19, 0x7f)
>> +#define av1_filt_level0			AV1_DEC_REG(14, 26, 0x3f)
>> +
>> +#define av1_quant_seg1			AV1_DEC_REG(15, 0, 0xff)
>> +#define av1_filt_level_seg1		AV1_DEC_REG(15, 8, 0x3f)
>> +#define av1_skip_seg1			AV1_DEC_REG(15, 14, 0x1)
>> +#define av1_refpic_seg1			AV1_DEC_REG(15, 15, 0xf)
>> +#define av1_filt_level_delta0_seg1	AV1_DEC_REG(15, 19, 0x7f)
>> +#define av1_filt_level1			AV1_DEC_REG(15, 26, 0x3f)
>> +
>> +#define av1_quant_seg2			AV1_DEC_REG(16, 0, 0xff)
>> +#define av1_filt_level_seg2		AV1_DEC_REG(16, 8, 0x3f)
>> +#define av1_skip_seg2			AV1_DEC_REG(16, 14, 0x1)
>> +#define av1_refpic_seg2			AV1_DEC_REG(16, 15, 0xf)
>> +#define av1_filt_level_delta0_seg2	AV1_DEC_REG(16, 19, 0x7f)
>> +#define av1_filt_level2			AV1_DEC_REG(16, 26, 0x3f)
>> +
>> +#define av1_quant_seg3			AV1_DEC_REG(17, 0, 0xff)
>> +#define av1_filt_level_seg3		AV1_DEC_REG(17, 8, 0x3f)
>> +#define av1_skip_seg3			AV1_DEC_REG(17, 14, 0x1)
>> +#define av1_refpic_seg3			AV1_DEC_REG(17, 15, 0xf)
>> +#define av1_filt_level_delta0_seg3	AV1_DEC_REG(17, 19, 0x7f)
>> +#define av1_filt_level3			AV1_DEC_REG(17, 26, 0x3f)
>> +
>> +#define av1_quant_seg4			AV1_DEC_REG(18, 0, 0xff)
>> +#define av1_filt_level_seg4		AV1_DEC_REG(18, 8, 0x3f)
>> +#define av1_skip_seg4			AV1_DEC_REG(18, 14, 0x1)
>> +#define av1_refpic_seg4			AV1_DEC_REG(18, 15, 0xf)
>> +#define av1_filt_level_delta0_seg4	AV1_DEC_REG(18, 19, 0x7f)
>> +#define av1_lr_type			AV1_DEC_REG(18, 26, 0x3f)
>> +
>> +#define av1_quant_seg5			AV1_DEC_REG(19, 0, 0xff)
>> +#define av1_filt_level_seg5		AV1_DEC_REG(19, 8, 0x3f)
>> +#define av1_skip_seg5			AV1_DEC_REG(19, 14, 0x1)
>> +#define av1_refpic_seg5			AV1_DEC_REG(19, 15, 0xf)
>> +#define av1_filt_level_delta0_seg5	AV1_DEC_REG(19, 19, 0x7f)
>> +#define av1_lr_unit_size		AV1_DEC_REG(19, 26, 0x3f)
>> +
>> +#define av1_filt_level_delta1_seg0	AV1_DEC_REG(20, 0, 0x7f)
>> +#define av1_filt_level_delta2_seg0	AV1_DEC_REG(20, 7, 0x7f)
>> +#define av1_filt_level_delta3_seg0	AV1_DEC_REG(20, 14, 0x7f)
>> +#define av1_global_mv_seg0		AV1_DEC_REG(20, 21, 0x1)
>> +#define av1_mf1_last_offset		AV1_DEC_REG(20, 22, 0x1ff)
>> +
>> +#define av1_filt_level_delta1_seg1	AV1_DEC_REG(21, 0, 0x7f)
>> +#define av1_filt_level_delta2_seg1	AV1_DEC_REG(21, 7, 0x7f)
>> +#define av1_filt_level_delta3_seg1	AV1_DEC_REG(21, 14, 0x7f)
>> +#define av1_global_mv_seg1		AV1_DEC_REG(21, 21, 0x1)
>> +#define av1_mf1_last2_offset		AV1_DEC_REG(21, 22, 0x1ff)
>> +
>> +#define av1_filt_level_delta1_seg2	AV1_DEC_REG(22, 0, 0x7f)
>> +#define av1_filt_level_delta2_seg2	AV1_DEC_REG(22, 7, 0x7f)
>> +#define av1_filt_level_delta3_seg2	AV1_DEC_REG(22, 14, 0x7f)
>> +#define av1_global_mv_seg2		AV1_DEC_REG(22, 21, 0x1)
>> +#define av1_mf1_last3_offset		AV1_DEC_REG(22, 22, 0x1ff)
>> +
>> +#define av1_filt_level_delta1_seg3	AV1_DEC_REG(23, 0, 0x7f)
>> +#define av1_filt_level_delta2_seg3	AV1_DEC_REG(23, 7, 0x7f)
>> +#define av1_filt_level_delta3_seg3	AV1_DEC_REG(23, 14, 0x7f)
>> +#define av1_global_mv_seg3		AV1_DEC_REG(23, 21, 0x1)
>> +#define av1_mf1_golden_offset		AV1_DEC_REG(23, 22, 0x1ff)
>> +
>> +#define av1_filt_level_delta1_seg4	AV1_DEC_REG(24, 0, 0x7f)
>> +#define av1_filt_level_delta2_seg4	AV1_DEC_REG(24, 7, 0x7f)
>> +#define av1_filt_level_delta3_seg4	AV1_DEC_REG(24, 14, 0x7f)
>> +#define av1_global_mv_seg4		AV1_DEC_REG(24, 21, 0x1)
>> +#define av1_mf1_bwdref_offset		AV1_DEC_REG(24, 22, 0x1ff)
>> +
>> +#define av1_filt_level_delta1_seg5	AV1_DEC_REG(25, 0, 0x7f)
>> +#define av1_filt_level_delta2_seg5	AV1_DEC_REG(25, 7, 0x7f)
>> +#define av1_filt_level_delta3_seg5	AV1_DEC_REG(25, 14, 0x7f)
>> +#define av1_global_mv_seg5		AV1_DEC_REG(25, 21, 0x1)
>> +#define av1_mf1_altref2_offset		AV1_DEC_REG(25, 22, 0x1ff)
>> +
>> +#define av1_filt_level_delta1_seg6	AV1_DEC_REG(26, 0, 0x7f)
>> +#define av1_filt_level_delta2_seg6	AV1_DEC_REG(26, 7, 0x7f)
>> +#define av1_filt_level_delta3_seg6	AV1_DEC_REG(26, 14, 0x7f)
>> +#define av1_global_mv_seg6		AV1_DEC_REG(26, 21, 0x1)
>> +#define av1_mf1_altref_offset		AV1_DEC_REG(26, 22, 0x1ff)
>> +
>> +#define av1_filt_level_delta1_seg7	AV1_DEC_REG(27, 0, 0x7f)
>> +#define av1_filt_level_delta2_seg7	AV1_DEC_REG(27, 7, 0x7f)
>> +#define av1_filt_level_delta3_seg7	AV1_DEC_REG(27, 14, 0x7f)
>> +#define av1_global_mv_seg7		AV1_DEC_REG(27, 21, 0x1)
>> +#define av1_mf2_last_offset		AV1_DEC_REG(27, 22, 0x1ff)
>> +
>> +#define av1_cb_offset			AV1_DEC_REG(28, 0, 0x1ff)
>> +#define av1_cb_luma_mult		AV1_DEC_REG(28, 9, 0xff)
>> +#define av1_cb_mult			AV1_DEC_REG(28, 17, 0xff)
>> +#define	av1_quant_delta_v_dc		AV1_DEC_REG(28, 25, 0x7f)
>> +
>> +#define av1_cr_offset			AV1_DEC_REG(29, 0, 0x1ff)
>> +#define av1_cr_luma_mult		AV1_DEC_REG(29, 9, 0xff)
>> +#define av1_cr_mult			AV1_DEC_REG(29, 17, 0xff)
>> +#define	av1_quant_delta_v_ac		AV1_DEC_REG(29, 25, 0x7f)
>> +
>> +#define av1_filt_ref_adj_5		AV1_DEC_REG(30, 0, 0x7f)
>> +#define av1_filt_ref_adj_4		AV1_DEC_REG(30, 7, 0x7f)
>> +#define av1_filt_mb_adj_1		AV1_DEC_REG(30, 14, 0x7f)
>> +#define av1_filt_mb_adj_0		AV1_DEC_REG(30, 21, 0x7f)
>> +#define av1_filt_sharpness		AV1_DEC_REG(30, 28, 0x7)
>> +
>> +#define av1_quant_seg6			AV1_DEC_REG(31, 0, 0xff)
>> +#define av1_filt_level_seg6		AV1_DEC_REG(31, 8, 0x3f)
>> +#define av1_skip_seg6			AV1_DEC_REG(31, 14, 0x1)
>> +#define av1_refpic_seg6			AV1_DEC_REG(31, 15, 0xf)
>> +#define av1_filt_level_delta0_seg6	AV1_DEC_REG(31, 19, 0x7f)
>> +#define av1_skip_ref0			AV1_DEC_REG(31, 26, 0xf)
>> +
>> +#define av1_quant_seg7			AV1_DEC_REG(32, 0, 0xff)
>> +#define av1_filt_level_seg7		AV1_DEC_REG(32, 8, 0x3f)
>> +#define av1_skip_seg7			AV1_DEC_REG(32, 14, 0x1)
>> +#define av1_refpic_seg7			AV1_DEC_REG(32, 15, 0xf)
>> +#define av1_filt_level_delta0_seg7	AV1_DEC_REG(32, 19, 0x7f)
>> +#define av1_skip_ref1			AV1_DEC_REG(32, 26, 0xf)
>> +
>> +#define av1_ref0_height			AV1_DEC_REG(33, 0, 0xffff)
>> +#define av1_ref0_width			AV1_DEC_REG(33, 16, 0xffff)
>> +
>> +#define av1_ref1_height			AV1_DEC_REG(34, 0, 0xffff)
>> +#define av1_ref1_width			AV1_DEC_REG(34, 16, 0xffff)
>> +
>> +#define av1_ref2_height			AV1_DEC_REG(35, 0, 0xffff)
>> +#define av1_ref2_width			AV1_DEC_REG(35, 16, 0xffff)
>> +
>> +#define av1_ref0_ver_scale		AV1_DEC_REG(36, 0, 0xffff)
>> +#define av1_ref0_hor_scale		AV1_DEC_REG(36, 16, 0xffff)
>> +
>> +#define av1_ref1_ver_scale		AV1_DEC_REG(37, 0, 0xffff)
>> +#define av1_ref1_hor_scale		AV1_DEC_REG(37, 16, 0xffff)
>> +
>> +#define av1_ref2_ver_scale		AV1_DEC_REG(38, 0, 0xffff)
>> +#define av1_ref2_hor_scale		AV1_DEC_REG(38, 16, 0xffff)
>> +
>> +#define av1_ref3_ver_scale		AV1_DEC_REG(39, 0, 0xffff)
>> +#define av1_ref3_hor_scale		AV1_DEC_REG(39, 16, 0xffff)
>> +
>> +#define av1_ref4_ver_scale		AV1_DEC_REG(40, 0, 0xffff)
>> +#define av1_ref4_hor_scale		AV1_DEC_REG(40, 16, 0xffff)
>> +
>> +#define av1_ref5_ver_scale		AV1_DEC_REG(41, 0, 0xffff)
>> +#define av1_ref5_hor_scale		AV1_DEC_REG(41, 16, 0xffff)
>> +
>> +#define av1_ref6_ver_scale		AV1_DEC_REG(42, 0, 0xffff)
>> +#define av1_ref6_hor_scale		AV1_DEC_REG(42, 16, 0xffff)
>> +
>> +#define av1_ref3_height			AV1_DEC_REG(43, 0, 0xffff)
>> +#define av1_ref3_width			AV1_DEC_REG(43, 16, 0xffff)
>> +
>> +#define av1_ref4_height			AV1_DEC_REG(44, 0, 0xffff)
>> +#define av1_ref4_width			AV1_DEC_REG(44, 16, 0xffff)
>> +
>> +#define av1_ref5_height			AV1_DEC_REG(45, 0, 0xffff)
>> +#define av1_ref5_width			AV1_DEC_REG(45, 16, 0xffff)
>> +
>> +#define av1_ref6_height			AV1_DEC_REG(46, 0, 0xffff)
>> +#define av1_ref6_width			AV1_DEC_REG(46, 16, 0xffff)
>> +
>> +#define av1_mf2_last2_offset		AV1_DEC_REG(47, 0, 0x1ff)
>> +#define av1_mf2_last3_offset		AV1_DEC_REG(47, 9, 0x1ff)
>> +#define av1_mf2_golden_offset		AV1_DEC_REG(47, 18, 0x1ff)
>> +#define av1_qmlevel_y			AV1_DEC_REG(47, 27, 0xf)
>> +
>> +#define av1_mf2_bwdref_offset		AV1_DEC_REG(48, 0, 0x1ff)
>> +#define av1_mf2_altref2_offset		AV1_DEC_REG(48, 9, 0x1ff)
>> +#define av1_mf2_altref_offset		AV1_DEC_REG(48, 18, 0x1ff)
>> +#define av1_qmlevel_u			AV1_DEC_REG(48, 27, 0xf)
>> +
>> +#define av1_filt_ref_adj_6		AV1_DEC_REG(49, 0, 0x7f)
>> +#define av1_filt_ref_adj_7		AV1_DEC_REG(49, 7, 0x7f)
>> +#define av1_qmlevel_v			AV1_DEC_REG(49, 14, 0xf)
>> +
>> +#define av1_superres_chroma_step	AV1_DEC_REG(51, 0, 0x3fff)
>> +#define av1_superres_luma_step		AV1_DEC_REG(51, 14, 0x3fff)
>> +
>> +#define av1_superres_init_chroma_subpel_x	AV1_DEC_REG(52, 0, 0x3fff)
>> +#define av1_superres_init_luma_subpel_x		AV1_DEC_REG(52, 14, 0x3fff)
>> +
>> +#define av1_cdef_chroma_secondary_strength	AV1_DEC_REG(53, 0, 0xffff)
>> +#define av1_cdef_luma_secondary_strength	AV1_DEC_REG(53, 16, 0xffff)
>> +
>> +#define av1_apf_threshold		AV1_DEC_REG(55, 0, 0xffff)
>> +#define av1_apf_single_pu_mode		AV1_DEC_REG(55, 30, 0x1)
>> +#define av1_apf_disable			AV1_DEC_REG(55, 30, 0x1)
>> +
>> +#define av1_dec_max_burst		AV1_DEC_REG(58, 0, 0xff)
>> +#define av1_dec_buswidth		AV1_DEC_REG(58, 8, 0x7)
>> +#define av1_dec_multicore_mode		AV1_DEC_REG(58, 11, 0x3)
>> +#define av1_dec_axi_wd_id_e		AV1_DEC_REG(58,	13, 0x1)
>> +#define av1_dec_axi_rd_id_e		AV1_DEC_REG(58, 14, 0x1)
>> +#define av1_dec_mc_polltime		AV1_DEC_REG(58, 17, 0x3ff)
>> +#define av1_dec_mc_pollmode		AV1_DEC_REG(58,	27, 0x3)
>> +
>> +#define av1_filt_ref_adj_3		AV1_DEC_REG(59, 0, 0x3f)
>> +#define av1_filt_ref_adj_2		AV1_DEC_REG(59, 7, 0x3f)
>> +#define av1_filt_ref_adj_1		AV1_DEC_REG(59, 14, 0x3f)
>> +#define av1_filt_ref_adj_0		AV1_DEC_REG(59, 21, 0x3f)
>> +#define av1_ref0_sign_bias		AV1_DEC_REG(59, 28, 0x1)
>> +#define av1_ref1_sign_bias		AV1_DEC_REG(59, 29, 0x1)
>> +#define av1_ref2_sign_bias		AV1_DEC_REG(59, 30, 0x1)
>> +#define av1_ref3_sign_bias		AV1_DEC_REG(59, 31, 0x1)
>> +
>> +#define av1_cur_last_roffset		AV1_DEC_REG(184, 0, 0x1ff)
>> +#define av1_cur_last_offset		AV1_DEC_REG(184, 9, 0x1ff)
>> +#define av1_mf3_last_offset		AV1_DEC_REG(184, 18, 0x1ff)
>> +#define av1_ref0_gm_mode		AV1_DEC_REG(184, 27, 0x3)
>> +
>> +#define av1_cur_last2_roffset		AV1_DEC_REG(185, 0, 0x1ff)
>> +#define av1_cur_last2_offset		AV1_DEC_REG(185, 9, 0x1ff)
>> +#define av1_mf3_last2_offset		AV1_DEC_REG(185, 18, 0x1ff)
>> +#define av1_ref1_gm_mode		AV1_DEC_REG(185, 27, 0x3)
>> +
>> +#define av1_cur_last3_roffset		AV1_DEC_REG(186, 0, 0x1ff)
>> +#define av1_cur_last3_offset		AV1_DEC_REG(186, 9, 0x1ff)
>> +#define av1_mf3_last3_offset		AV1_DEC_REG(186, 18, 0x1ff)
>> +#define av1_ref2_gm_mode		AV1_DEC_REG(186, 27, 0x3)
>> +
>> +#define av1_cur_golden_roffset		AV1_DEC_REG(187, 0, 0x1ff)
>> +#define av1_cur_golden_offset		AV1_DEC_REG(187, 9, 0x1ff)
>> +#define av1_mf3_golden_offset		AV1_DEC_REG(187, 18, 0x1ff)
>> +#define av1_ref3_gm_mode		AV1_DEC_REG(187, 27, 0x3)
>> +
>> +#define av1_cur_bwdref_roffset		AV1_DEC_REG(188, 0, 0x1ff)
>> +#define av1_cur_bwdref_offset		AV1_DEC_REG(188, 9, 0x1ff)
>> +#define av1_mf3_bwdref_offset		AV1_DEC_REG(188, 18, 0x1ff)
>> +#define av1_ref4_gm_mode		AV1_DEC_REG(188, 27, 0x3)
>> +
>> +#define av1_cur_altref2_roffset		AV1_DEC_REG(257, 0, 0x1ff)
>> +#define av1_cur_altref2_offset		AV1_DEC_REG(257, 9, 0x1ff)
>> +#define av1_mf3_altref2_offset		AV1_DEC_REG(257, 18, 0x1ff)
>> +#define av1_ref5_gm_mode		AV1_DEC_REG(257, 27, 0x3)
>> +
>> +#define av1_strm_buffer_len		AV1_DEC_REG(258, 0, 0xffffffff)
>> +
>> +#define av1_strm_start_offset		AV1_DEC_REG(259, 0, 0xffffffff)
>> +
>> +#define av1_ppd_blend_exist		AV1_DEC_REG(260, 21, 0x1)
>> +#define av1_ppd_dith_exist		AV1_DEC_REG(260, 23, 0x1)
>> +#define av1_ablend_crop_e		AV1_DEC_REG(260, 24, 0x1)
>> +#define av1_pp_format_p010_e		AV1_DEC_REG(260, 25, 0x1)
>> +#define av1_pp_format_customer1_e	AV1_DEC_REG(260, 26, 0x1)
>> +#define av1_pp_crop_exist		AV1_DEC_REG(260, 27, 0x1)
>> +#define av1_pp_up_level			AV1_DEC_REG(260, 28, 0x1)
>> +#define av1_pp_down_level		AV1_DEC_REG(260, 29, 0x3)
>> +#define av1_pp_exist			AV1_DEC_REG(260, 31, 0x1)
>> +
>> +#define av1_cur_altref_roffset		AV1_DEC_REG(262, 0, 0x1ff)
>> +#define av1_cur_altref_offset		AV1_DEC_REG(262, 9, 0x1ff)
>> +#define av1_mf3_altref_offset		AV1_DEC_REG(262, 18, 0x1ff)
>> +#define av1_ref6_gm_mode		AV1_DEC_REG(262, 27, 0x3)
>> +
>> +#define av1_cdef_luma_primary_strength	AV1_DEC_REG(263, 0, 0xffffffff)
>> +
>> +#define av1_cdef_chroma_primary_strength AV1_DEC_REG(264, 0, 0xffffffff)
>> +
>> +#define av1_axi_arqos			AV1_DEC_REG(265, 0, 0xf)
>> +#define av1_axi_awqos			AV1_DEC_REG(265, 4, 0xf)
>> +#define av1_axi_wr_ostd_threshold	AV1_DEC_REG(265, 8, 0x3ff)
>> +#define av1_axi_rd_ostd_threshold	AV1_DEC_REG(265, 18, 0x3ff)
>> +#define av1_axi_wr_4k_dis		AV1_DEC_REG(265, 31, 0x1)
>> +
>> +#define av1_128bit_mode			AV1_DEC_REG(266, 5, 0x1)
>> +#define av1_wr_shaper_bypass		AV1_DEC_REG(266, 10, 0x1)
>> +#define av1_error_conceal_e		AV1_DEC_REG(266, 30, 0x1)
>> +
>> +#define av1_superres_chroma_step_invra	AV1_DEC_REG(298, 0, 0xffff)
>> +#define av1_superres_luma_step_invra	AV1_DEC_REG(298, 16, 0xffff)
>> +
>> +#define av1_dec_alignment		AV1_DEC_REG(314, 0, 0xffff)
>> +
>> +#define av1_ext_timeout_cycles		AV1_DEC_REG(318, 0, 0x7fffffff)
>> +#define av1_ext_timeout_override_e	AV1_DEC_REG(318, 31, 0x1)
>> +
>> +#define av1_timeout_cycles		AV1_DEC_REG(319, 0, 0x7fffffff)
>> +#define av1_timeout_override_e		AV1_DEC_REG(319, 31, 0x1)
>> +
>> +#define av1_pp_out_e			AV1_DEC_REG(320, 0, 0x1)
>> +#define av1_pp_cr_first			AV1_DEC_REG(320, 1, 0x1)
>> +#define av1_pp_out_mode			AV1_DEC_REG(320, 2, 0x1)
>> +#define av1_pp_out_tile_e		AV1_DEC_REG(320, 3, 0x1)
>> +#define av1_pp_status			AV1_DEC_REG(320, 4, 0xf)
>> +#define av1_pp_in_blk_size		AV1_DEC_REG(320, 8, 0x7)
>> +#define av1_pp_out_p010_fmt		AV1_DEC_REG(320, 11, 0x3)
>> +#define av1_pp_out_rgb_fmt		AV1_DEC_REG(320, 13, 0x1f)
>> +#define av1_rgb_range_max		AV1_DEC_REG(320, 18, 0xfff)
>> +#define av1_pp_rgb_planar		AV1_DEC_REG(320, 30, 0x1)
>> +
>> +#define av1_scale_hratio		AV1_DEC_REG(322, 0, 0x3ffff)
>> +#define av1_pp_out_format		AV1_DEC_REG(322, 18, 0x1f)
>> +#define av1_ver_scale_mode		AV1_DEC_REG(322, 23, 0x3)
>> +#define av1_hor_scale_mode		AV1_DEC_REG(322, 25, 0x3)
>> +#define av1_pp_in_format		AV1_DEC_REG(322, 27, 0x1f)
>> +
>> +#define av1_pp_out_c_stride		AV1_DEC_REG(329, 0, 0xffff)
>> +#define av1_pp_out_y_stride		AV1_DEC_REG(329, 16, 0xffff)
>> +
>> +#define av1_pp_in_height		AV1_DEC_REG(331, 0, 0xffff)
>> +#define av1_pp_in_width			AV1_DEC_REG(331, 16, 0xffff)
>> +
>> +#define av1_pp_out_height		AV1_DEC_REG(332, 0, 0xffff)
>> +#define av1_pp_out_width		AV1_DEC_REG(332, 16, 0xffff)
>> +
>> +#define av1_pp1_dup_ver			AV1_DEC_REG(394, 0, 0xff)
>> +#define av1_pp1_dup_hor			AV1_DEC_REG(394, 8, 0xff)
>> +#define av1_pp0_dup_ver			AV1_DEC_REG(394, 16, 0xff)
>> +#define av1_pp0_dup_hor			AV1_DEC_REG(394, 24, 0xff)
>> +
>> +#define AV1_TILE_OUT_LU			(AV1_SWREG(65))
>> +#define AV1_REFERENCE_Y(i)		(AV1_SWREG(67) + ((i) * 0x8))
>> +#define AV1_SEGMENTATION		(AV1_SWREG(81))
>> +#define AV1_GLOBAL_MODEL		(AV1_SWREG(83))
>> +#define AV1_CDEF_COL			(AV1_SWREG(85))
>> +#define AV1_SR_COL			(AV1_SWREG(89))
>> +#define AV1_LR_COL			(AV1_SWREG(91))
>> +#define AV1_FILM_GRAIN			(AV1_SWREG(95))
>> +#define AV1_TILE_OUT_CH			(AV1_SWREG(99))
>> +#define AV1_REFERENCE_CB(i)		(AV1_SWREG(101) + ((i) * 0x8))
>> +#define AV1_TILE_OUT_MV			(AV1_SWREG(133))
>> +#define AV1_REFERENCE_MV(i)		(AV1_SWREG(135) + ((i) * 0x8))
>> +#define AV1_TILE_BASE			(AV1_SWREG(167))
>> +#define AV1_INPUT_STREAM		(AV1_SWREG(169))
>> +#define AV1_PROP_TABLE_OUT		(AV1_SWREG(171))
>> +#define AV1_PROP_TABLE			(AV1_SWREG(173))
>> +#define AV1_MC_SYNC_CURR		(AV1_SWREG(175))
>> +#define AV1_MC_SYNC_LEFT		(AV1_SWREG(177))
>> +#define AV1_DB_DATA_COL			(AV1_SWREG(179))
>> +#define AV1_DB_CTRL_COL			(AV1_SWREG(183))
>> +#define AV1_PP_OUT_LU			(AV1_SWREG(326))
>> +#define AV1_PP_OUT_CH			(AV1_SWREG(328))
>> +
>> +#endif /* _ROCKCHIP_VPU981_REGS_H_ */
Nicolas Dufresne Dec. 21, 2022, 4:30 p.m. UTC | #4
Le mardi 20 décembre 2022 à 15:49 +0100, Benjamin Gaignard a écrit :
> Le 19/12/2022 à 22:56, Nicolas Dufresne a écrit :
> > Le lundi 19 décembre 2022 à 16:56 +0100, Benjamin Gaignard a écrit :
> > > Implement AV1 stateless decoder for rockchip VPU981.
> > > It decode 8 and 10 bits AV1 bitstreams.
> > > AV1 scaling feature is done by the postprocessor.
> > Can you clarify ? I knew the filmgrain was, but didn't expect spatial scaling to
> > need it. On this aspect, I don't see any new code to let Hantro driver know that
> > the postproc is needed. The frame header value should be taken into account in
> > hantro_needs_postproc() (a per codec ops would be nice).
> 
> 2 tests (av1-1-b8-03-sizedown and av1-1-b8-03-sizeup) are only working if the postprocessor
> is used. Given their names I assume it is related to scaling.

I can take care of investigating what these stream do, and how the postproc
could save them. These are not SVC, so there is no reference scaling. But I
think to find exactly what they do to comment more.

> Force postprocessing is not an easy task in Hantro driver and this series is already big enough.
> 
> What I have prototype to solve this problem is to add a "need_postproc" field in context structure
> but I still have issues around pixel format negotiation to solve before release it.
> 
> > 
> > > Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
> > > ---
> > >   drivers/media/platform/verisilicon/Makefile   |    1 +
> > >   .../media/platform/verisilicon/hantro_hw.h    |   64 +-
> > >   .../verisilicon/rockchip_vpu981_hw_av1_dec.c  | 2067 +++++++++++++++++
> > >   .../verisilicon/rockchip_vpu981_regs.h        |  477 ++++
> > >   4 files changed, 2607 insertions(+), 2 deletions(-)
> > >   create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> > >   create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> > > 
> > > diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
> > > index d2b2679c00eb..c9a9806ab8c5 100644
> > > --- a/drivers/media/platform/verisilicon/Makefile
> > > +++ b/drivers/media/platform/verisilicon/Makefile
> > > @@ -18,6 +18,7 @@ hantro-vpu-y += \
> > >   		rockchip_vpu2_hw_h264_dec.o \
> > >   		rockchip_vpu2_hw_mpeg2_dec.o \
> > >   		rockchip_vpu2_hw_vp8_dec.o \
> > > +		rockchip_vpu981_hw_av1_dec.o \
> > >   		rockchip_av1_entropymode.o \
> > >   		hantro_jpeg.o \
> > >   		hantro_h264.o \
> > > diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
> > > index e395aeeae2f4..3c0a995998a5 100644
> > > --- a/drivers/media/platform/verisilicon/hantro_hw.h
> > > +++ b/drivers/media/platform/verisilicon/hantro_hw.h
> > > @@ -37,6 +37,9 @@
> > >   
> > >   #define NUM_REF_PICTURES	(V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
> > >   
> > > +#define AV1_REF_LIST_SIZE	8
> > This is the same as V4L2_AV1_TOTAL_REFS_PER_FRAME. AV1_TOTAL_REFS_PER_FRAME is
> > how this is called in the spec.
> 
> ok
> 
> > 
> > > +#define AV1_MAX_FRAME_BUF_COUNT	(V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
> > > +
> > >   struct hantro_dev;
> > >   struct hantro_ctx;
> > >   struct hantro_buf;
> > > @@ -250,23 +253,80 @@ struct hantro_vp9_dec_hw_ctx {
> > >   };
> > >   
> > >   /**
> > > - * hantro_av1_dec_hw_ctx
> > > + * struct hantro_av1_dec_ctrls
> > > + * @sequence:		AV1 Sequence
> > > + * @tile_group_entry:	AV1 Tile Group entry
> > > + * @frame:		AV1 Frame Header OBU
> > > + * @film_grain:		AV1 Film Grain
> > > + */
> > > +struct hantro_av1_dec_ctrls {
> > > +	const struct v4l2_ctrl_av1_sequence *sequence;
> > > +	const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry;
> > > +	const struct v4l2_ctrl_av1_frame *frame;
> > > +	const struct v4l2_ctrl_av1_film_grain *film_grain;
> > > +};
> > > +
> > > +struct hantro_av1_frame_ref {
> > > +	int width;
> > > +	int height;
> > > +	u64 timestamp;
> > > +	enum v4l2_av1_frame_type frame_type;
> > > +	int ref_count;
> > > +	u32 order_hint;
> > > +	u32 order_hints[V4L2_AV1_NUM_REF_FRAMES];
> > > +	int gm_mode;
> > > +	struct vb2_v4l2_buffer *vb2_ref;
> > > +};
> > > +
> > > +/**
> > > + * struct hantro_av1_dec_hw_ctx
> > > + * @db_data_col:	db tile col data buffer
> > > + * @db_ctrl_col:	db tile col ctrl buffer
> > > + * @cdef_col:		cdef tile col buffer
> > > + * @sr_col:		sr tile col buffer
> > > + * @lr_col:		lr tile col buffer
> > > + * @global_model:	global model buffer
> > > + * @tile_info:		tile info buffer
> > > + * @segment:		segmentation info buffer
> > > + * @prob_tbl:		probability table
> > > + * @prob_tbl_out:	probability table output
> > > + * @tile_buf:		tile buffer
> > > + * @ctrls:		V4L2 controls attached to a run
> > > + * @frame_refs:		reference frames info slots
> > > + * @ref_frame_sign_bias: array of sign bias
> > > + * @num_tile_cols_allocated: number of allocated tiles
> > >    * @cdfs:		current probabilities structure
> > >    * @cdfs_ndvc:		current mv probabilities structure
> > >    * @default_cdfs:	default probabilities structure
> > >    * @default_cdfs_ndvc:	default mv probabilties structure
> > >    * @cdfs_last:		stored probabilities structures
> > >    * @cdfs_last_ndvc:	stored mv probabilities structures
> > > + * @current_frame_index: index of the current in frame_refs array
> > >    */
> > >   struct hantro_av1_dec_hw_ctx {
> > > +	struct hantro_aux_buf db_data_col;
> > > +	struct hantro_aux_buf db_ctrl_col;
> > > +	struct hantro_aux_buf cdef_col;
> > > +	struct hantro_aux_buf sr_col;
> > > +	struct hantro_aux_buf lr_col;
> > > +	struct hantro_aux_buf global_model;
> > > +	struct hantro_aux_buf tile_info;
> > > +	struct hantro_aux_buf segment;
> > > +	struct hantro_aux_buf prob_tbl;
> > > +	struct hantro_aux_buf prob_tbl_out;
> > > +	struct hantro_aux_buf tile_buf;
> > > +	struct hantro_av1_dec_ctrls ctrls;
> > > +	struct hantro_av1_frame_ref frame_refs[AV1_MAX_FRAME_BUF_COUNT];
> > > +	uint32_t ref_frame_sign_bias[AV1_REF_LIST_SIZE];
> > > +	unsigned int num_tile_cols_allocated;
> > >   	struct av1cdfs *cdfs;
> > >   	struct mvcdfs  *cdfs_ndvc;
> > >   	struct av1cdfs default_cdfs;
> > >   	struct mvcdfs  default_cdfs_ndvc;
> > >   	struct av1cdfs cdfs_last[NUM_REF_FRAMES];
> > >   	struct mvcdfs  cdfs_last_ndvc[NUM_REF_FRAMES];
> > > +	int current_frame_index;
> > >   };
> > > -
> > >   /**
> > >    * struct hantro_postproc_ctx
> > >    *
> > > diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> > > new file mode 100644
> > > index 000000000000..a183e4f35e00
> > > --- /dev/null
> > > +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
> > > @@ -0,0 +1,2067 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * Copyright (c) 2021, Collabora
> > > + *
> > > + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
> > > + */
> > > +
> > > +#include <media/v4l2-mem2mem.h>
> > > +#include "hantro.h"
> > > +#include "hantro_v4l2.h"
> > > +#include "rockchip_vpu981_regs.h"
> > > +
> > > +#define AV1_DEC_MODE		17
> > > +#define GM_GLOBAL_MODELS_PER_FRAME	7
> > > +#define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
> > > +#define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
> > > +#define AV1_MAX_TILES		128
> > > +#define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
> > > +#define AV1DEC_MAX_PIC_BUFFERS	24
> > > +#define AV1_REF_SCALE_SHIFT	14
> > > +#define AV1_INVALID_IDX		-1
> > > +#define MAX_FRAME_DISTANCE	31
> > > +#define AV1_PRIMARY_REF_NONE	7
> > > +#define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
> > > +/*
> > > + * These 3 values aren't defined enum v4l2_av1_segment_feature because
> > > + * they are not part of the specification
> > > + */
> > > +#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
> > > +#define V4L2_AV1_SEG_LVL_ALT_LF_U	3
> > > +#define V4L2_AV1_SEG_LVL_ALT_LF_V	4
> > > +
> > > +#define CLIP3(l, h, v)      ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
> > include/linux/minmax.h:#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
> 
> ok I will remove it
> 
> > 
> > > +
> > > +#define SUPERRES_SCALE_BITS 3
> > > +#define SCALE_NUMERATOR 8
> > > +#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
> > > +
> > > +#define RS_SUBPEL_BITS 6
> > > +#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
> > > +#define RS_SCALE_SUBPEL_BITS 14
> > > +#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
> > > +#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
> > > +#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
> > > +
> > > +#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
> > > +
> > > +#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
> > > +#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
> > > +#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
> > > +#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
> > > +#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
> > > +#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
> > > +#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
> > > +
> > > +#define DIV_LUT_PREC_BITS 14
> > > +#define DIV_LUT_BITS 8
> > > +#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
> > > +#define WARP_PARAM_REDUCE_BITS 6
> > > +#define WARPEDMODEL_PREC_BITS 16
> > > +
> > > +#define AV1_DIV_ROUND_UP_POW2(value, n)			\
> > > +({							\
> > > +	typeof(n) _n  = n;				\
> > > +	typeof(value) _value = value;			\
> > > +	(_value + (BIT(_n) >> 1)) >> _n;		\
> > > +})
> > > +
> > > +#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
> > > +({									\
> > > +	typeof(n) _n_  = n;						\
> > > +	typeof(value) _value_ = value;					\
> > > +	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
> > > +		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
> > > +})
> > > +
> > > +struct rockchip_av1_film_grain {
> > > +	uint8_t scaling_lut_y[256];
> > > +	uint8_t scaling_lut_cb[256];
> > > +	uint8_t scaling_lut_cr[256];
> > > +	int16_t cropped_luma_grain_block[4096];
> > > +	int16_t cropped_chroma_grain_block[1024 * 2];
> > > +};
> > > +
> > > +static const short div_lut[DIV_LUT_NUM + 1] = {
> > > +	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
> > > +	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
> > > +	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
> > > +	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
> > > +	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
> > > +	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
> > > +	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
> > > +	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
> > > +	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
> > > +	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
> > > +	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
> > > +	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
> > > +	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
> > > +	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
> > > +	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
> > > +	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
> > > +	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
> > > +	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
> > > +	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
> > > +	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
> > > +	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
> > > +	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
> > > +	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
> > > +	8240,  8224,  8208,  8192,
> > > +};
> > > +
> > > +static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	u64 timestamp;
> > > +	int i, idx = frame->ref_frame_idx[ref];
> > > +
> > > +	if (idx >= AV1_MAX_FRAME_BUF_COUNT || idx < 0)
> > > +		return AV1_INVALID_IDX;
> > > +
> > > +	timestamp = frame->reference_frame_ts[idx];
> > > +	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
> > > +		if (av1_dec->frame_refs[i].ref_count == 0)
> > > +			continue;
> > > +		if (av1_dec->frame_refs[i].timestamp == timestamp)
> > > +			return i;
> > > +	}
> > nit: Mediatek optimize this with a map
> > 
> > > +
> > > +	return AV1_INVALID_IDX;
> > > +}
> > > +
> > > +static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
> > > +
> > > +	if (idx != AV1_INVALID_IDX)
> > > +		return av1_dec->frame_refs[idx].order_hint;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
> > > +					     u64 timestamp)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	int i;
> > > +
> > > +	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
> > > +		if (av1_dec->frame_refs[i].ref_count == 0) {
> > > +			int j;
> > > +
> > > +			av1_dec->frame_refs[i].width =
> > > +			    frame->frame_width_minus_1 + 1;
> > > +			av1_dec->frame_refs[i].height =
> > > +			    frame->frame_height_minus_1 + 1;
> > > +			av1_dec->frame_refs[i].timestamp = timestamp;
> > > +			av1_dec->frame_refs[i].frame_type = frame->frame_type;
> > > +			av1_dec->frame_refs[i].order_hint = frame->order_hint;
> > > +			av1_dec->frame_refs[i].gm_mode =
> > > +				frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME + i];
> > > +			if (!av1_dec->frame_refs[i].vb2_ref)
> > > +				av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
> > > +
> > > +			for (j = 0; j < V4L2_AV1_NUM_REF_FRAMES; j++)
> > > +				av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
> > > +
> > > +			av1_dec->frame_refs[i].ref_count++;
> > This ref_count is only ever 0 and 1. Would be nicer if it was some boolean, like
> > "valid", or "active".
> 
> I will change it to "bool used;"
> 
> > 
> > > +			av1_dec->current_frame_index = i;
> > > +			return i;
> > > +		}
> > > +	}
> > > +
> > > +	return AV1_INVALID_IDX;
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +
> > > +	if (idx < 0)
> > > +		return;
> > > +
> > > +	av1_dec->frame_refs[idx].ref_count--;
> > > +
> > > +	if (av1_dec->frame_refs[idx].ref_count < 0)
> > > +		pr_warn("AV1 reference frames refcounting error (idx %d)\n", idx);
> > Looks like a bug_on case, only driver programming issue could cause this.
> 
> I have never hit it, I will remove it.
> 
> > 
> > 
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +
> > > +	int ref, idx;
> > > +
> > > +	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
> > > +		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
> > > +		bool used = false;
> > > +
> > > +		if (av1_dec->frame_refs[idx].ref_count == 0)
> > > +			continue;
> > > +
> > > +		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
> > > +			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
> > > +				used = true;
> > > +		}
> > > +
> > > +		if (!used)
> > > +			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
> > > +	}
> > > +}
> > > +
> > > +static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
> > > +{
> > > +	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
> > > +}
> > > +
> > > +static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
> > > +{
> > > +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> > > +
> > > +	return ALIGN((cr_offset * 3) / 2, 64);
> > > +}
> > > +
> > > +void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +
> > > +	if (av1_dec->db_data_col.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
> > > +				  av1_dec->db_data_col.cpu,
> > > +				  av1_dec->db_data_col.dma);
> > > +	av1_dec->db_data_col.cpu = NULL;
> > > +
> > > +	if (av1_dec->db_ctrl_col.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
> > > +				  av1_dec->db_ctrl_col.cpu,
> > > +				  av1_dec->db_ctrl_col.dma);
> > > +	av1_dec->db_ctrl_col.cpu = NULL;
> > > +
> > > +	if (av1_dec->cdef_col.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
> > > +				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
> > > +	av1_dec->cdef_col.cpu = NULL;
> > > +
> > > +	if (av1_dec->sr_col.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
> > > +				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
> > > +	av1_dec->sr_col.cpu = NULL;
> > > +
> > > +	if (av1_dec->lr_col.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
> > > +				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
> > > +	av1_dec->lr_col.cpu = NULL;
> > > +}
> > > +
> > > +static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
> > > +	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
> > > +	unsigned int height_in_sb = height / 64;
> > > +	unsigned int stripe_num = ((height + 8) + 63) / 64;
> > > +	size_t size;
> > > +
> > > +	if (num_tile_cols <= av1_dec->num_tile_cols_allocated)
> > > +		return 0;
> > > +
> > > +	rockchip_vpu981_av1_dec_tiles_free(ctx);
> > > +
> > > +	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
> > > +	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
> > > +						      &av1_dec->db_data_col.dma,
> > > +						      GFP_KERNEL);
> > > +	if (!av1_dec->db_data_col.cpu)
> > > +		goto buffer_allocation_error;
> > > +	av1_dec->db_data_col.size = size;
> > > +
> > > +	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
> > No bit_depth scaling ?
> 
> No, MPP code doesn't scale this one depending in bit_depth.
> 
> > 
> > > +	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
> > > +						      &av1_dec->db_ctrl_col.dma,
> > > +						      GFP_KERNEL);
> > > +	if (!av1_dec->db_ctrl_col.cpu)
> > > +		goto buffer_allocation_error;
> > > +	av1_dec->db_ctrl_col.size = size;
> > > +
> > > +	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
> > > +	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
> > > +						   &av1_dec->cdef_col.dma,
> > > +						   GFP_KERNEL);
> > > +	if (!av1_dec->cdef_col.cpu)
> > > +		goto buffer_allocation_error;
> > > +	av1_dec->cdef_col.size = size;
> > > +
> > > +	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
> > > +	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
> > > +						 &av1_dec->sr_col.dma,
> > > +						 GFP_KERNEL);
> > > +	if (!av1_dec->sr_col.cpu)
> > > +		goto buffer_allocation_error;
> > > +	av1_dec->sr_col.size = size;
> > > +
> > > +	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
> > > +	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
> > > +						 &av1_dec->lr_col.dma,
> > > +						 GFP_KERNEL);
> > > +	if (!av1_dec->lr_col.cpu)
> > > +		goto buffer_allocation_error;
> > > +	av1_dec->lr_col.size = size;
> > > +
> > > +	av1_dec->num_tile_cols_allocated = num_tile_cols;
> > > +	return 0;
> > > +
> > > +buffer_allocation_error:
> > > +	rockchip_vpu981_av1_dec_tiles_free(ctx);
> > > +	return -ENOMEM;
> > > +}
> > > +
> > > +void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +
> > > +	if (av1_dec->global_model.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
> > > +				  av1_dec->global_model.cpu,
> > > +				  av1_dec->global_model.dma);
> > > +	av1_dec->global_model.cpu = NULL;
> > > +
> > > +	if (av1_dec->tile_info.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
> > > +				  av1_dec->tile_info.cpu,
> > > +				  av1_dec->tile_info.dma);
> > > +	av1_dec->tile_info.cpu = NULL;
> > > +
> > > +	if (av1_dec->prob_tbl.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
> > > +				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
> > > +	av1_dec->prob_tbl.cpu = NULL;
> > > +
> > > +	if (av1_dec->prob_tbl_out.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
> > > +				  av1_dec->prob_tbl_out.cpu,
> > > +				  av1_dec->prob_tbl_out.dma);
> > > +	av1_dec->prob_tbl_out.cpu = NULL;
> > > +
> > > +	if (av1_dec->tile_buf.cpu)
> > > +		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
> > > +				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
> > > +	av1_dec->tile_buf.cpu = NULL;
> > > +
> > > +	rockchip_vpu981_av1_dec_tiles_free(ctx);
> > > +}
> > > +
> > > +int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +
> > > +	memset(av1_dec, 0, sizeof(*av1_dec));
> > > +
> > > +	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
> > > +						       &av1_dec->global_model.dma,
> > > +						       GFP_KERNEL);
> > > +	if (!av1_dec->global_model.cpu)
> > > +		return -ENOMEM;
> > > +	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
> > > +
> > > +	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
> > > +						    &av1_dec->tile_info.dma,
> > > +						    GFP_KERNEL);
> > > +	if (!av1_dec->tile_info.cpu)
> > > +		return -ENOMEM;
> > > +	av1_dec->tile_info.size = AV1_MAX_TILES;
> > > +
> > > +	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
> > > +						   ALIGN(sizeof(struct av1cdfs), 2048),
> > > +						   &av1_dec->prob_tbl.dma,
> > > +						   GFP_KERNEL);
> > > +	if (!av1_dec->prob_tbl.cpu)
> > > +		return -ENOMEM;
> > > +	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
> > > +
> > > +	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
> > > +						       ALIGN(sizeof(struct av1cdfs), 2048),
> > > +						       &av1_dec->prob_tbl_out.dma,
> > > +						       GFP_KERNEL);
> > > +	if (!av1_dec->prob_tbl_out.cpu)
> > > +		return -ENOMEM;
> > > +	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
> > > +	av1_dec->cdfs = &av1_dec->default_cdfs;
> > > +	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
> > > +
> > > +	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
> > > +
> > > +	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
> > > +						   AV1_TILE_SIZE,
> > > +						   &av1_dec->tile_buf.dma,
> > > +						   GFP_KERNEL);
> > > +	if (!av1_dec->tile_buf.cpu)
> > > +		return -ENOMEM;
> > > +	av1_dec->tile_buf.size = AV1_TILE_SIZE;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +
> > > +	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
> > > +	if (WARN_ON(!ctrls->sequence))
> > > +		return -EINVAL;
> > > +
> > > +	ctrls->tile_group_entry =
> > > +	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
> > > +	if (WARN_ON(!ctrls->tile_group_entry))
> > > +		return -EINVAL;
> > > +
> > > +	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
> > > +	if (WARN_ON(!ctrls->frame))
> > > +		return -EINVAL;
> > > +
> > > +	ctrls->film_grain =
> > > +	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
> > > +
> > > +	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
> > > +}
> > > +
> > > +static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
> > > +{
> > > +	if (n == 0)
> > > +		return 0;
> > > +	return 31 ^ __builtin_clz(n);
> > > +}
> > > +
> > > +static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
> > > +{
> > > +	int f;
> > > +	uint64_t e;
> > > +
> > > +	*shift = rockchip_vpu981_av1_dec_get_msb(d);
> > > +	/* e is obtained from D after resetting the most significant 1 bit. */
> > > +	e = d - ((u32)1 << *shift);
> > > +	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
> > > +	if (*shift > DIV_LUT_BITS)
> > > +		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
> > > +	else
> > > +		f = e << (DIV_LUT_BITS - *shift);
> > > +	if (f > DIV_LUT_NUM)
> > > +		return -1;
> > > +	*shift += DIV_LUT_PREC_BITS;
> > > +	/* Use f as lookup into the precomputed table of multipliers */
> > > +	return div_lut[f];
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_get_shear_params(const uint32_t *params,
> > > +	int64_t *alpha, int64_t *beta, int64_t *gamma, int64_t *delta)
> > > +{
> > > +	const int *mat = params;
> > > +	short shift;
> > > +	short y;
> > > +	long long gv, dv;
> > > +
> > > +	if (mat[2] <= 0)
> > > +		return;
> > > +
> > > +	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
> > > +	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
> > > +
> > > +	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
> > > +
> > > +	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
> > > +
> > > +	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
> > > +
> > > +	dv = ((long long)mat[3] * mat[4]) * y;
> > > +	*delta = clamp_val(
> > > +		mat[5] -
> > > +		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
> > > +		S16_MIN, S16_MAX);
> > > +
> > > +	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
> > > +		 * (1 << WARP_PARAM_REDUCE_BITS);
> > > +	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
> > > +		* (1 << WARP_PARAM_REDUCE_BITS);
> > > +	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
> > > +		 * (1 << WARP_PARAM_REDUCE_BITS);
> > > +	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
> > > +		* (1 << WARP_PARAM_REDUCE_BITS);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
> > > +	uint8_t *dst = av1_dec->global_model.cpu;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	int ref_frame, i;
> > > +
> > > +	memset(dst, 0, GLOBAL_MODEL_SIZE);
> > > +	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
> > > +		int64_t alpha = 0, beta = 0, gamma = 0, delta = 0;
> > > +
> > > +		for (i = 0; i < 6; ++i) {
> > > +			if (i == 2)
> > > +				*(int32_t *)dst =
> > > +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
> > > +			else if (i == 3)
> > > +				*(int32_t *)dst =
> > > +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
> > > +			else
> > > +				*(int32_t *)dst =
> > > +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
> > > +			dst += 4;
> > > +		}
> > > +
> > > +		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
> > > +			rockchip_vpu981_av1_dec_get_shear_params(
> > > +					&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
> > > +					&alpha, &beta, &gamma, &delta);
> > > +
> > > +		*(int16_t *)dst = alpha;
> > > +		dst += 2;
> > > +		*(int16_t *)dst = beta;
> > > +		dst += 2;
> > > +		*(int16_t *)dst = gamma;
> > > +		dst += 2;
> > > +		*(int16_t *)dst = delta;
> > > +		dst += 2;
> > > +	}
> > > +
> > > +	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
> > > +	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
> > > +	    ctrls->tile_group_entry;
> > > +	int context_update_y =
> > > +	    tile_info.context_update_tile_id / tile_info.tile_cols;
> > > +	int context_update_x =
> > > +	    tile_info.context_update_tile_id % tile_info.tile_cols;
> > > +	int context_update_tile_id =
> > > +	    context_update_x * tile_info.tile_rows + context_update_y;
> > > +	uint8_t *dst = av1_dec->tile_info.cpu;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	int tile0, tile1;
> > > +
> > > +	memset(dst, 0, av1_dec->tile_info.size);
> > > +
> > > +	for (tile0 = 0; tile0 < tile_info.tile_cols; tile0++) {
> > > +		for (tile1 = 0; tile1 < tile_info.tile_rows; tile1++) {
> > > +			int tile_id = tile1 * tile_info.tile_cols + tile0;
> > > +			uint32_t start, end;
> > > +			uint32_t y0 =
> > > +			    tile_info.height_in_sbs_minus_1[tile1] + 1;
> > > +			uint32_t x0 = tile_info.width_in_sbs_minus_1[tile0] + 1;
> > > +
> > > +			// tile size in SB units (width,height)
> > > +			*dst++ = x0;
> > > +			*dst++ = 0;
> > > +			*dst++ = 0;
> > > +			*dst++ = 0;
> > > +			*dst++ = y0;
> > > +			*dst++ = 0;
> > > +			*dst++ = 0;
> > > +			*dst++ = 0;
> > > +
> > > +			// tile start position
> > > +			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
> > > +			*dst++ = start & 255;
> > > +			*dst++ = (start >> 8) & 255;
> > > +			*dst++ = (start >> 16) & 255;
> > > +			*dst++ = (start >> 24) & 255;
> > > +
> > > +			// # of bytes in tile data
> > > +			end = start + group_entry[tile_id].tile_size;
> > > +			*dst++ = end & 255;
> > > +			*dst++ = (end >> 8) & 255;
> > > +			*dst++ = (end >> 16) & 255;
> > > +			*dst++ = (end >> 24) & 255;
> > > +		}
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_multicore_expect_context_update,
> > > +			 !!(context_update_x == 0));
> > > +	hantro_reg_write(vpu, &av1_tile_enable, !!((tile_info.tile_cols > 1)
> > > +						   || (tile_info.tile_rows > 1)));
> > > +	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info.tile_cols);
> > > +	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info.tile_rows);
> > > +	hantro_reg_write(vpu, &av1_context_update_tile_id,
> > > +			 context_update_tile_id);
> > > +	hantro_reg_write(vpu, &av1_tile_transpose, 1);
> > > +	if (context_update_tile_id) {
> > > +		hantro_reg_write(vpu, &av1_dec_tile_size_mag,
> > > +				 tile_info.tile_size_bytes);
> > > +	} else
> > > +		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
> > > +
> > > +	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
> > > +}
> > > +
> > > +static int rockchip_vpu981_av1_dec_get_relative_dist(struct hantro_ctx *ctx,
> > > +						     int a, int b)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	int bits = ctrls->sequence->order_hint_bits - 1;
> > > +	int diff, m;
> > > +
> > > +	if (!ctrls->sequence->order_hint_bits)
> > > +		return 0;
> > > +
> > > +	diff = a - b;
> > > +	m = 1 << bits;
> > > +	diff = (diff & (m - 1)) - (diff & m);
> > > +
> > > +	return diff;
> > > +}
> > I think I've seen this one in MTK. Will be nice to make a v4l2-av1.h eventually
> > and share this. Can happen afterward though, to avoid make this too complex.
> 
> I will keep that in mind.
> 
> > 
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
> > > +	int i;
> > > +
> > > +	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
> > > +		for (i = 0; i < AV1_REF_LIST_SIZE; i++)
> > > +			av1_dec->ref_frame_sign_bias[i] = 0;
> > > +
> > > +		return;
> > > +	}
> > > +	// Identify the nearest forward and backward references.
> > > +	for (i = 0; i < AV1_REF_LIST_SIZE - 1; i++) {
> > > +		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
> > > +			int rel_off =
> > > +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +								      rockchip_vpu981_get_order_hint
> > > +								      (ctx, i),
> > > +								      frame->order_hint);
> > > +			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
> > > +		}
> > > +	}
> > > +}
> > > +
> > > +static bool
> > > +rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
> > > +				int width, int height)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	struct hantro_decoded_buffer *dst;
> > > +	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
> > > +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> > > +	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
> > > +	int cur_width = frame->frame_width_minus_1 + 1;
> > > +	int cur_height = frame->frame_height_minus_1 + 1;
> > > +	int scale_width =
> > > +	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
> > > +	int scale_height =
> > > +	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
> > > +
> > > +	switch (ref) {
> > > +	case 0:
> > > +		hantro_reg_write(vpu, &av1_ref0_height, height);
> > > +		hantro_reg_write(vpu, &av1_ref0_width, width);
> > > +		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
> > > +		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
> > > +		break;
> > > +	case 1:
> > > +		hantro_reg_write(vpu, &av1_ref1_height, height);
> > > +		hantro_reg_write(vpu, &av1_ref1_width, width);
> > > +		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
> > > +		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
> > > +		break;
> > > +	case 2:
> > > +		hantro_reg_write(vpu, &av1_ref2_height, height);
> > > +		hantro_reg_write(vpu, &av1_ref2_width, width);
> > > +		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
> > > +		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
> > > +		break;
> > > +	case 3:
> > > +		hantro_reg_write(vpu, &av1_ref3_height, height);
> > > +		hantro_reg_write(vpu, &av1_ref3_width, width);
> > > +		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
> > > +		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
> > > +		break;
> > > +	case 4:
> > > +		hantro_reg_write(vpu, &av1_ref4_height, height);
> > > +		hantro_reg_write(vpu, &av1_ref4_width, width);
> > > +		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
> > > +		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
> > > +		break;
> > > +	case 5:
> > > +		hantro_reg_write(vpu, &av1_ref5_height, height);
> > > +		hantro_reg_write(vpu, &av1_ref5_width, width);
> > > +		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
> > > +		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
> > > +		break;
> > > +	case 6:
> > > +		hantro_reg_write(vpu, &av1_ref6_height, height);
> > > +		hantro_reg_write(vpu, &av1_ref6_width, width);
> > > +		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
> > > +		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
> > > +		break;
> > > +	default:
> > > +		pr_warn("AV1 invalid reference frame index\n");
> > > +	}
> > > +
> > > +	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
> > > +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
> > > +	chroma_addr = luma_addr + cr_offset;
> > > +	mv_addr = luma_addr + mv_offset;
> > > +
> > > +	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
> > > +	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
> > > +	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
> > > +
> > > +	return (scale_width != (1 << AV1_REF_SCALE_SHIFT))
> > > +		|| (scale_height != (1 << AV1_REF_SCALE_SHIFT));
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
> > > +						  int ref, int val)
> > > +{
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +
> > > +	switch (ref) {
> > > +	case 0:
> > > +		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
> > > +		break;
> > > +	case 1:
> > > +		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
> > > +		break;
> > > +	case 2:
> > > +		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
> > > +		break;
> > > +	case 3:
> > > +		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
> > > +		break;
> > > +	case 4:
> > > +		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
> > > +		break;
> > > +	case 5:
> > > +		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
> > > +		break;
> > > +	case 6:
> > > +		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
> > > +		break;
> > > +	default:
> > > +		pr_warn("AV1 invalid sign bias index\n");
> > > +		break;
> > > +	}
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
> > > +	uint32_t segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	uint8_t segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
> > > +
> > > +	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)
> > > +	    && (frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME)) {
> > > +		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
> > > +
> > > +		if (idx >= 0) {
> > > +			dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
> > > +			size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> > > +			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
> > > +
> > > +			luma_addr =
> > > +				hantro_get_dec_buf_addr(ctx,
> > > +							&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
> > > +			chroma_addr = luma_addr + cr_offset;
> > > +			mv_addr = luma_addr + mv_offset;
> > > +
> > > +			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
> > > +			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
> > > +		}
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
> > > +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
> > > +	hantro_reg_write(vpu, &av1_segment_upd_e,
> > > +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
> > > +	hantro_reg_write(vpu, &av1_segment_e,
> > > +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
> > > +
> > > +	hantro_reg_write(vpu, &av1_error_resilient,
> > > +			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
> > > +
> > > +	if (IS_INTRA(frame->frame_type)
> > > +	    || !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
> > > +		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
> > > +	}
> > > +
> > > +	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)) {
> > > +		int s;
> > > +
> > > +		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
> > > +			if (seg->feature_enabled[s] &
> > > +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
> > > +				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
> > > +				    CLIP3(0, 255,
> > > +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]));
> > > +				segsign |=
> > > +					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
> > > +			}
> > > +
> > > +			if (seg->feature_enabled[s] &
> > > +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
> > > +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
> > > +					CLIP3(-63, 63,
> > > +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]));
> > > +
> > > +			if (seg->feature_enabled[s] &
> > > +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
> > > +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
> > > +				    CLIP3(-63, 63,
> > > +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]));
> > > +
> > > +			if (seg->feature_enabled[s] &
> > > +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
> > > +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
> > > +				    CLIP3(-63, 63,
> > > +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]));
> > > +
> > > +			if (seg->feature_enabled[s] &
> > > +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
> > > +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
> > > +				    CLIP3(-63, 63,
> > > +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]));
> > > +
> > > +			if (frame->frame_type && seg->feature_enabled[s] &
> > > +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
> > > +				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
> > > +
> > > +			if (seg->feature_enabled[s] &
> > > +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
> > > +				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
> > > +
> > > +			if (seg->feature_enabled[s] &
> > > +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
> > > +				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
> > > +		}
> > > +	}
> > > +
> > > +	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
> > > +		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
> > > +			if (seg->feature_enabled[i]
> > > +			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
> > > +				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
> > > +				last_active_seg = max(i, last_active_seg);
> > > +			}
> > > +		}
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
> > > +	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
> > > +
> > > +	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
> > > +
> > > +	/* Write QP, filter level, ref frame and skip for every segment */
> > > +	hantro_reg_write(vpu, &av1_quant_seg0,
> > > +			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
> > > +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
> > > +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
> > > +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
> > > +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> > > +	hantro_reg_write(vpu, &av1_refpic_seg0,
> > > +			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_skip_seg0,
> > > +			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
> > > +	hantro_reg_write(vpu, &av1_global_mv_seg0,
> > > +			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> > > +
> > > +	hantro_reg_write(vpu, &av1_quant_seg1,
> > > +			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
> > > +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
> > > +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
> > > +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
> > > +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> > > +	hantro_reg_write(vpu, &av1_refpic_seg1,
> > > +			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_skip_seg1,
> > > +			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
> > > +	hantro_reg_write(vpu, &av1_global_mv_seg1,
> > > +			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> > > +
> > > +	hantro_reg_write(vpu, &av1_quant_seg2,
> > > +			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
> > > +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
> > > +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
> > > +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
> > > +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> > > +	hantro_reg_write(vpu, &av1_refpic_seg2,
> > > +			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_skip_seg2,
> > > +			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
> > > +	hantro_reg_write(vpu, &av1_global_mv_seg2,
> > > +			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> > > +
> > > +	hantro_reg_write(vpu, &av1_quant_seg3,
> > > +			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
> > > +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
> > > +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
> > > +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
> > > +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> > > +	hantro_reg_write(vpu, &av1_refpic_seg3,
> > > +			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_skip_seg3,
> > > +			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
> > > +	hantro_reg_write(vpu, &av1_global_mv_seg3,
> > > +			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> > > +
> > > +	hantro_reg_write(vpu, &av1_quant_seg4,
> > > +			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
> > > +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
> > > +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
> > > +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
> > > +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> > > +	hantro_reg_write(vpu, &av1_refpic_seg4,
> > > +			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_skip_seg4,
> > > +			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
> > > +	hantro_reg_write(vpu, &av1_global_mv_seg4,
> > > +			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> > > +
> > > +	hantro_reg_write(vpu, &av1_quant_seg5,
> > > +			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
> > > +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
> > > +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
> > > +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
> > > +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> > > +	hantro_reg_write(vpu, &av1_refpic_seg5,
> > > +			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_skip_seg5,
> > > +			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
> > > +	hantro_reg_write(vpu, &av1_global_mv_seg5,
> > > +			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> > > +
> > > +	hantro_reg_write(vpu, &av1_quant_seg6,
> > > +			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
> > > +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
> > > +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
> > > +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
> > > +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> > > +	hantro_reg_write(vpu, &av1_refpic_seg6,
> > > +			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_skip_seg6,
> > > +			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
> > > +	hantro_reg_write(vpu, &av1_global_mv_seg6,
> > > +			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> > > +
> > > +	hantro_reg_write(vpu, &av1_quant_seg7,
> > > +			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
> > > +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
> > > +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
> > > +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
> > > +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
> > > +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
> > > +	hantro_reg_write(vpu, &av1_refpic_seg7,
> > > +			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_skip_seg7,
> > > +			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
> > > +	hantro_reg_write(vpu, &av1_global_mv_seg7,
> > > +			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
> > > +}
> > > +
> > > +static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
> > > +	const struct v4l2_av1_quantization *quantization = &frame->quantization;
> > > +	int i;
> > > +
> > > +	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
> > > +		int qindex = quantization->base_q_idx;
> > > +
> > > +		if (segmentation->feature_enabled[i] &
> > > +		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
> > > +			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
> > > +		}
> > > +		qindex = CLIP3(0, 255, qindex);
> > > +
> > > +		if (qindex
> > > +		    || quantization->delta_q_y_dc
> > > +		    || quantization->delta_q_u_dc
> > > +		    || quantization->delta_q_u_ac
> > > +		    || quantization->delta_q_v_dc || quantization->delta_q_v_ac)
> > > +			return false;
> > > +	}
> > > +	return true;
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
> > > +	bool filtering_dis = (loop_filter->level[0] == 0)
> > > +			     && (loop_filter->level[1] == 0);
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +
> > > +	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
> > > +	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
> > > +	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
> > > +
> > > +	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
> > > +	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
> > > +	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
> > > +	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
> > > +
> > > +	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED
> > > +	    && !rockchip_vpu981_av1_dec_is_lossless(ctx)
> > > +	    && !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
> > > +				 loop_filter->ref_deltas[0]);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
> > > +				 loop_filter->ref_deltas[1]);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
> > > +				 loop_filter->ref_deltas[2]);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
> > > +				 loop_filter->ref_deltas[3]);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
> > > +				 loop_filter->ref_deltas[4]);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
> > > +				 loop_filter->ref_deltas[5]);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
> > > +				 loop_filter->ref_deltas[6]);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
> > > +				 loop_filter->ref_deltas[7]);
> > > +		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
> > > +				 loop_filter->mode_deltas[0]);
> > > +		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
> > > +				 loop_filter->mode_deltas[1]);
> > > +	} else {
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
> > > +		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
> > > +		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
> > > +		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
> > > +	}
> > > +
> > > +	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
> > > +	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	bool frame_is_intra = IS_INTRA(frame->frame_type);
> > > +	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
> > > +	int i;
> > > +
> > > +	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
> > > +		return;
> > > +
> > > +	for (i = 0; i < NUM_REF_FRAMES; i++) {
> > > +		if (frame->refresh_frame_flags & (1 << i)) {
> > > +			struct mvcdfs stored_mv_cdf;
> > > +
> > > +			rockchip_av1_get_cdfs(ctx, i);
> > > +			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
> > > +			*av1_dec->cdfs = *out_cdfs;
> > > +			if (frame_is_intra) {
> > > +				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
> > > +				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
> > > +			}
> > > +			rockchip_av1_store_cdfs(ctx,
> > > +						frame->refresh_frame_flags);
> > > +			break;
> > > +		}
> > > +	}
> > > +}
> > > +
> > > +void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
> > > +{
> > > +	rockchip_vpu981_av1_dec_update_prob(ctx);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	const struct v4l2_av1_quantization *quantization = &frame->quantization;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	bool error_resilient_mode =
> > > +	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
> > > +	bool frame_is_intra = IS_INTRA(frame->frame_type);
> > > +
> > > +	if (error_resilient_mode || frame_is_intra
> > > +	    || frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
> > > +		av1_dec->cdfs = &av1_dec->default_cdfs;
> > > +		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
> > > +		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
> > > +						 av1_dec->cdfs);
> > > +	} else {
> > > +		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
> > > +	}
> > > +	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
> > > +
> > > +	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
> > > +
> > > +	if (frame_is_intra) {
> > > +		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
> > > +		/* Overwrite MV context area with intrabc MV context */
> > > +		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
> > > +		       sizeof(struct mvcdfs));
> > > +	}
> > > +
> > > +	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
> > > +	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	const struct v4l2_av1_cdef *cdef = &frame->cdef;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	uint32_t luma_pri_strength = 0;
> > > +	uint16_t luma_sec_strength = 0;
> > > +	uint32_t chroma_pri_strength = 0;
> > > +	uint16_t chroma_sec_strength = 0;
> > > +	int i;
> > > +
> > > +	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
> > > +	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
> > > +
> > > +	for (i = 0; i < (1 << cdef->bits); i++) {
> > > +		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
> > > +		if (cdef->y_sec_strength[i] == 4)
> > > +			luma_sec_strength |= 3 << (i * 2);
> > > +		else
> > > +			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
> > > +
> > > +		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
> > > +		if (cdef->uv_sec_strength[i] == 4)
> > > +			chroma_sec_strength |= 3 << (i * 2);
> > > +		else
> > > +			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
> > > +			 luma_pri_strength);
> > > +	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
> > > +			 luma_sec_strength);
> > > +	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
> > > +			 chroma_pri_strength);
> > > +	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
> > > +			 chroma_sec_strength);
> > > +
> > > +	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	const struct v4l2_av1_loop_restoration *loop_restoration =
> > > +	    &frame->loop_restoration;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	uint16_t lr_type = 0, lr_unit_size = 0;
> > > +	uint8_t restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
> > > +	int i;
> > > +
> > > +	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
> > > +		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
> > > +		restoration_unit_size[1] =
> > > +		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
> > > +		restoration_unit_size[2] =
> > > +		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
> > > +	}
> > > +
> > > +	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
> > > +		lr_type |=
> > > +		    loop_restoration->frame_restoration_type[i] << (i * 2);
> > > +		lr_unit_size |= restoration_unit_size[i] << (i * 2);
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_lr_type, lr_type);
> > > +	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
> > > +	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	uint8_t superres_scale_denominator = SCALE_NUMERATOR;
> > > +	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
> > > +	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
> > > +	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
> > > +	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
> > > +	int superres_init_luma_subpel_x = 0;
> > > +	int superres_init_chroma_subpel_x = 0;
> > > +	int superres_is_scaled = 0;
> > > +	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
> > > +	int upscaledLumaPlaneW, downscaledLumaPlaneW;
> > > +	int downscaledChromaPlaneW, upscaledChromaPlaneW;
> > > +	int stepLumaX, stepChromaX;
> > > +	int errLuma, errChroma;
> > > +	int initialLumaSubpelX, initialChromaSubpelX;
> > You haven't used camel case so far, are you sure this is allowed ?
> 
> I will change them.
> 
> > 
> > > +	int width = 0;
> > > +
> > > +	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
> > > +		superres_scale_denominator = frame->superres_denom;
> > > +
> > > +	if (superres_scale_denominator <= SCALE_NUMERATOR)
> > > +		goto set_regs;
> > > +
> > > +	width = (frame->upscaled_width * SCALE_NUMERATOR +
> > > +		(superres_scale_denominator / 2)) / superres_scale_denominator;
> > > +
> > > +	if (width < min_w)
> > > +		width = min_w;
> > > +
> > > +	if (width == frame->upscaled_width)
> > > +		goto set_regs;
> > > +
> > > +	superres_is_scaled = 1;
> > > +	upscaledLumaPlaneW = frame->upscaled_width;
> > > +	downscaledLumaPlaneW = width;
> > > +	downscaledChromaPlaneW = (downscaledLumaPlaneW + 1) >> 1;
> > > +	upscaledChromaPlaneW = (upscaledLumaPlaneW + 1) >> 1;
> > > +	stepLumaX =
> > > +		((downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) +
> > > +		 (upscaledLumaPlaneW / 2)) / upscaledLumaPlaneW;
> > > +	stepChromaX =
> > > +		((downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) +
> > > +		 (upscaledChromaPlaneW / 2)) / upscaledChromaPlaneW;
> > > +	errLuma =
> > > +		(upscaledLumaPlaneW * stepLumaX)
> > > +		- (downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS);
> > > +	errChroma =
> > > +		(upscaledChromaPlaneW * stepChromaX)
> > > +		- (downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS);
> > > +	initialLumaSubpelX =
> > > +		((-((upscaledLumaPlaneW - downscaledLumaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
> > > +		  + upscaledLumaPlaneW / 2)
> > > +		 / upscaledLumaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errLuma / 2)
> > > +		& RS_SCALE_SUBPEL_MASK;
> > > +	initialChromaSubpelX =
> > > +		((-((upscaledChromaPlaneW - downscaledChromaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
> > > +		  + upscaledChromaPlaneW / 2)
> > > +		 / upscaledChromaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errChroma / 2)
> > > +		& RS_SCALE_SUBPEL_MASK;
> > > +	superres_luma_step = stepLumaX;
> > > +	superres_chroma_step = stepChromaX;
> > > +	superres_luma_step_invra =
> > > +		((upscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledLumaPlaneW / 2))
> > > +		/ downscaledLumaPlaneW;
> > > +	superres_chroma_step_invra =
> > > +		((upscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledChromaPlaneW / 2))
> > > +		/ downscaledChromaPlaneW;
> > > +	superres_init_luma_subpel_x = initialLumaSubpelX;
> > > +	superres_init_chroma_subpel_x = initialChromaSubpelX;
> > > +
> > > +set_regs:
> > > +	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
> > > +
> > > +	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
> > > +		hantro_reg_write(vpu, &av1_scale_denom_minus9,
> > > +				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
> > > +	else
> > > +		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
> > > +
> > > +	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
> > > +	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
> > > +	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
> > > +			 superres_luma_step_invra);
> > > +	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
> > > +			 superres_chroma_step_invra);
> > > +	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
> > > +			 superres_init_luma_subpel_x);
> > > +	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
> > > +			 superres_init_chroma_subpel_x);
> > > +	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
> > > +
> > > +	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	int pic_width_in_cbs = ALIGN(frame->frame_width_minus_1 + 1, 8) >> 3;
> > > +	int pic_height_in_cbs = ALIGN(frame->frame_height_minus_1 + 1, 8) >> 3;
> > Aren't these DIV_ROUND_UP(val + 1, 8) ?
> > 
> > > +	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
> > > +			    - (frame->frame_width_minus_1 + 1);
> > > +	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
> > > +			     - (frame->frame_height_minus_1 + 1);
> > > +
> > > +	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
> > > +	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
> > > +	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
> > > +	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
> > > +
> > > +	rockchip_vpu981_av1_dec_set_superres_params(ctx);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	bool use_ref_frame_mvs =
> > > +	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
> > > +	int cur_frame_offset = frame->order_hint;
> > > +	int alt_frame_offset = 0;
> > > +	int gld_frame_offset = 0;
> > > +	int bwd_frame_offset = 0;
> > > +	int alt2_frame_offset = 0;
> > > +	int refs_selected[3] = { 0, 0, 0 };
> > > +	int cur_mi_cols = (frame->frame_width_minus_1 + 8) >> 3;
> > > +	int cur_mi_rows = (frame->frame_height_minus_1 + 8) >> 3;
> > This looks like a contraction of (val + 1 + 7) / 8, so in short, another
> > DIV_ROUNDUP().
> > 
> > > +	int cur_offset[V4L2_AV1_NUM_REF_FRAMES - 1];
> > > +	int cur_roffset[V4L2_AV1_NUM_REF_FRAMES - 1];
> > This looks like V4L2_AV1_REFS_PER_FRAME. Daniel, should be remove this
> > V4L2_AV1_NUM_REF_FRAMES ? Its redundant with V4L2_AV1_TOTAL_REFS_PER_FRAME ...
> > 
> > > +	int mf_types[3] = { 0, 0, 0 };
> > > +	int ref_stamp = 2;
> > > +	int ref_ind = 0;
> > > +	int rf, idx;
> > > +
> > > +	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
> > > +	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
> > > +	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
> > > +	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
> > > +
> > > +	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
> > > +	if (idx >= 0) {
> > > +		int alt_frame_offset_in_lst =
> > > +			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
> > > +		bool is_lst_overlay =
> > > +		    (alt_frame_offset_in_lst == gld_frame_offset);
> > > +
> > > +		if (!is_lst_overlay) {
> > > +			int lst_mi_cols =
> > > +			    (av1_dec->frame_refs[idx].width + 7) >> 3;
> > DIV_ROUDNUP()
> > 
> > > +			int lst_mi_rows =
> > > +			    (av1_dec->frame_refs[idx].height + 7) >> 3;
> > again. I'll stop pointing these out, I think you can find them.
> > 
> > > +			bool lst_intra_only =
> > > +			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> > > +
> > > +			if (lst_mi_cols == cur_mi_cols
> > > +			    && lst_mi_rows == cur_mi_rows && !lst_intra_only) {
> > > +				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
> > > +				refs_selected[ref_ind++] = LST_BUF_IDX;
> > > +			}
> > > +		}
> > > +		ref_stamp--;
> > > +	}
> > > +
> > > +	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
> > > +	if (rockchip_vpu981_av1_dec_get_relative_dist
> > > +	    (ctx, bwd_frame_offset, cur_frame_offset) > 0) {
> > > +		int bwd_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
> > > +		int bwd_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
> > > +		bool bwd_intra_only =
> > > +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> > > +
> > > +		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
> > > +		    !bwd_intra_only) {
> > > +			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
> > > +			refs_selected[ref_ind++] = BWD_BUF_IDX;
> > > +			ref_stamp--;
> > > +		}
> > > +	}
> > > +
> > > +	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
> > > +	if (rockchip_vpu981_av1_dec_get_relative_dist
> > > +	    (ctx, alt2_frame_offset, cur_frame_offset) > 0) {
> > > +		int alt2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
> > > +		int alt2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
> > > +		bool alt2_intra_only =
> > > +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> > > +
> > > +		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows
> > > +		    && !alt2_intra_only) {
> > > +			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
> > > +			refs_selected[ref_ind++] = ALT2_BUF_IDX;
> > > +			ref_stamp--;
> > > +		}
> > > +	}
> > > +
> > > +	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
> > > +	if (rockchip_vpu981_av1_dec_get_relative_dist
> > > +	    (ctx, alt_frame_offset, cur_frame_offset) > 0 && ref_stamp >= 0) {
> > > +		int alt_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
> > > +		int alt_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
> > > +		bool alt_intra_only =
> > > +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> > > +
> > > +		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
> > > +		    !alt_intra_only) {
> > > +			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
> > > +			refs_selected[ref_ind++] = ALT_BUF_IDX;
> > > +			ref_stamp--;
> > > +		}
> > > +	}
> > > +
> > > +	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
> > > +	if (idx >= 0 && ref_stamp >= 0) {
> > > +		int lst2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
> > > +		int lst2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
> > I said I'd stop, but can't this be calculate once and saved in the reference
> > frame structure ?
> 
> Indeed that is a simplification, I will implement it.
> 
> > 
> > > +		bool lst2_intra_only =
> > > +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
> > > +
> > > +		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows
> > > +		    && !lst2_intra_only) {
> > > +			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
> > > +			refs_selected[ref_ind++] = LST2_BUF_IDX;
> > > +			ref_stamp--;
> > > +		}
> > > +	}
> > > +
> > > +	for (rf = 0; rf < V4L2_AV1_NUM_REF_FRAMES - 1; ++rf) {
> > > +		idx = rockchip_vpu981_get_frame_index(ctx, rf);
> > > +		if (idx >= 0) {
> > > +			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
> > > +
> > > +			cur_offset[rf] =
> > > +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +								      cur_frame_offset,
> > > +								      rf_order_hint);
> > > +			cur_roffset[rf] =
> > > +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +								      rf_order_hint,
> > > +								      cur_frame_offset);
> > > +		} else {
> > > +			cur_offset[rf] = 0;
> > > +			cur_roffset[rf] = 0;
> > > +		}
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
> > > +	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
> > > +	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
> > > +	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
> > > +
> > > +	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
> > > +
> > > +	if (use_ref_frame_mvs && ref_ind > 0 &&
> > > +	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
> > > +	    && cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
> > > +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
> > > +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
> > > +		int val;
> > > +
> > > +		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
> > > +
> > > +	if (use_ref_frame_mvs && ref_ind > 1 &&
> > > +	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
> > > +	    && cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
> > > +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
> > > +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
> > > +		int val;
> > > +
> > > +		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
> > > +	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
> > > +
> > > +	if (use_ref_frame_mvs && ref_ind > 2 &&
> > > +	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
> > > +	    && cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
> > > +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
> > > +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
> > > +		int val;
> > > +
> > > +		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
> > > +
> > > +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
> > > +				rf_order_hint,
> > > +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
> > > +		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
> > > +	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
> > > +	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
> > > +	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
> > > +	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
> > > +	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
> > > +	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
> > > +
> > > +	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
> > > +	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
> > > +	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
> > > +	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
> > > +	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
> > > +	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
> > > +	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
> > > +
> > > +	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
> > > +	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
> > > +	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
> > > +	int frame_type = frame->frame_type;
> > > +	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
> > > +	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	int i, ref_frames = 0;
> > > +	bool scale_enable = false;
> > > +
> > > +	if (IS_INTRA(frame_type) && !allow_intrabc)
> > > +		return;
> > > +
> > > +	if (!allow_intrabc) {
> > > +		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
> > > +			int idx = rockchip_vpu981_get_frame_index(ctx, i);
> > > +
> > > +			if (idx >= 0)
> > > +				ref_count[idx]++;
> > > +		}
> > > +
> > > +		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
> > > +			if (ref_count[i])
> > > +				ref_frames++;
> > > +		}
> > > +	} else {
> > > +		ref_frames = 1;
> > > +	}
> > > +	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
> > > +
> > > +	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
> > > +
> > > +	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_NUM_REF_FRAMES; i++) {
> > > +		uint32_t ref = i - 1;
> > > +		int idx = 0;
> > > +		int width, height;
> > > +
> > > +		if (allow_intrabc) {
> > > +			idx = av1_dec->current_frame_index;
> > > +			width = frame->frame_width_minus_1 + 1;
> > > +			height = frame->frame_height_minus_1 + 1;
> > > +		} else {
> > > +			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
> > > +				idx = rockchip_vpu981_get_frame_index(ctx, ref);
> > > +			width = av1_dec->frame_refs[idx].width;
> > > +			height = av1_dec->frame_refs[idx].height;
> > > +		}
> > > +
> > > +		scale_enable |=
> > > +		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
> > > +						    height);
> > > +
> > > +		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
> > > +						      av1_dec->ref_frame_sign_bias[i]);
> > > +	}
> > > +	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
> > > +
> > > +	hantro_reg_write(vpu, &av1_ref0_gm_mode,
> > > +			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_ref1_gm_mode,
> > > +			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_ref2_gm_mode,
> > > +			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_ref3_gm_mode,
> > > +			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_ref4_gm_mode,
> > > +			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_ref5_gm_mode,
> > > +			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
> > > +	hantro_reg_write(vpu, &av1_ref6_gm_mode,
> > > +			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
> > > +
> > > +	rockchip_vpu981_av1_dec_set_other_frames(ctx);
> > > +}
> > > +
> > > +static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +
> > > +	hantro_reg_write(vpu, &av1_skip_mode,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
> > > +	hantro_reg_write(vpu, &av1_tempor_mvp_e,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
> > > +	hantro_reg_write(vpu, &av1_delta_lf_res_log,
> > > +			 ctrls->frame->loop_filter.delta_lf_res);
> > > +	hantro_reg_write(vpu, &av1_delta_lf_multi,
> > > +			 !!(ctrls->frame->loop_filter.flags
> > > +			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
> > > +	hantro_reg_write(vpu, &av1_delta_lf_present,
> > > +			 !!(ctrls->frame->loop_filter.flags
> > > +			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
> > > +	hantro_reg_write(vpu, &av1_disable_cdf_update,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
> > > +	hantro_reg_write(vpu, &av1_allow_warp,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
> > > +	hantro_reg_write(vpu, &av1_show_frame,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
> > > +	hantro_reg_write(vpu, &av1_switchable_motion_mode,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
> > > +	hantro_reg_write(vpu, &av1_enable_cdef,
> > > +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
> > > +	hantro_reg_write(vpu, &av1_allow_masked_compound,
> > > +			 !!(ctrls->sequence->flags
> > > +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
> > > +	hantro_reg_write(vpu, &av1_allow_interintra,
> > > +			 !!(ctrls->sequence->flags
> > > +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
> > > +	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
> > > +			 !!(ctrls->sequence->flags
> > > +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
> > > +	hantro_reg_write(vpu, &av1_allow_filter_intra,
> > > +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
> > > +	hantro_reg_write(vpu, &av1_enable_jnt_comp,
> > > +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
> > > +	hantro_reg_write(vpu, &av1_enable_dual_filter,
> > > +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
> > > +	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
> > > +	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
> > > +	hantro_reg_write(vpu, &av1_allow_intrabc,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
> > > +
> > > +	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
> > > +		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
> > > +	else
> > > +		hantro_reg_write(vpu, &av1_force_interger_mv,
> > > +				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
> > > +
> > > +	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
> > > +	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
> > > +	hantro_reg_write(vpu, &av1_delta_q_present,
> > > +			 !!(ctrls->frame->quantization.flags
> > > +			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
> > > +
> > > +	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
> > > +	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
> > > +	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
> > > +	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
> > I believe these register are read by PP when using format 0, perhaps something
> > nice to comment about, as they will impact the pixel produce by the post-
> > processor, which can be a surprising side effect. Bit 11:8 also hold
> > sw_bit_depth_out_minus8, but I don't know if its used for AV1, in my doc its
> > only used for AVS2 CODEC.
> 
> I have no indication about that in MPP code.
> Post processor output pixel format is selected when writing in av1_pp_out_format field.

You now have access to the same doc as me.


> > 
> > I think its good to underline that in VC8000/VC9000, some registers are shared
> > across multiple CODECs.
> > 
> > > +
> > > +	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
> > > +	hantro_reg_write(vpu, &av1_high_prec_mv_e,
> > > +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
> > > +	hantro_reg_write(vpu, &av1_comp_pred_mode,
> > > +			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
> > > +	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
> > > +	hantro_reg_write(vpu, &av1_max_cb_size,
> > > +			 (ctrls->sequence->flags
> > > +			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
> > > +	hantro_reg_write(vpu, &av1_min_cb_size, 3);
> > > +
> > > +	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
> > > +	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
> > > +	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
> > > +	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
> > > +	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
> > > +	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
> > > +	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
> > > +	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
> > > +	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
> > > +	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
> > > +	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
> > > +
> > > +	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
> > > +	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
> > > +	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
> > > +	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
> > > +		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
> > > +		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
> > > +		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
> > > +	} else {
> > > +		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
> > > +		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
> > > +		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
> > > +	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
> > > +	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
> > > +
> > > +	hantro_reg_write(vpu, &av1_skip_ref0,
> > > +			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
> > > +	hantro_reg_write(vpu, &av1_skip_ref1,
> > > +			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
> > > +
> > > +	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
> > > +	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
> > > +}
> > > +
> > > +static void
> > > +rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
> > > +					 struct vb2_v4l2_buffer *vb2_src)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
> > > +	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
> > > +	    ctrls->tile_group_entry;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	dma_addr_t src_dma;
> > > +	u32 src_len, src_buf_len;
> > > +	int start_bit, offset;
> > > +
> > > +	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
> > > +	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
> > > +	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
> > > +
> > > +	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
> > > +	offset = group_entry[0].tile_offset & ~0xf;
> > > +
> > > +	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
> > > +	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
> > > +	hantro_reg_write(vpu, &av1_stream_len, src_len);
> > > +	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
> > > +	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
> > > +}
> > > +
> > > +static void
> > > +rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	struct hantro_decoded_buffer *dst;
> > > +	struct vb2_v4l2_buffer *vb2_dst;
> > > +	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
> > > +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
> > > +	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
> > > +
> > > +	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
> > > +	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
> > > +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
> > > +	chroma_addr = luma_addr + cr_offset;
> > > +	mv_addr = luma_addr + mv_offset;
> > > +
> > > +	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
> > > +	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
> > > +	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
> > > +}
> > > +
> > > +int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	struct vb2_v4l2_buffer *vb2_src;
> > > +	int ret;
> > > +
> > > +	hantro_start_prepare_run(ctx);
> > > +
> > > +	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
> > > +	if (ret)
> > > +		goto prepare_error;
> > > +
> > > +	vb2_src = hantro_get_src_buf(ctx);
> > > +	if (!vb2_src)
> > > +		goto prepare_error;
> > > +
> > > +	rockchip_vpu981_av1_dec_clean_refs(ctx);
> > > +	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
> > > +
> > > +	rockchip_vpu981_av1_dec_set_parameters(ctx);
> > > +	rockchip_vpu981_av1_dec_set_global_model(ctx);
> > > +	rockchip_vpu981_av1_dec_set_tile_info(ctx);
> > > +	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
> > > +	rockchip_vpu981_av1_dec_set_segmentation(ctx);
> > > +	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
> > > +	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
> > > +	rockchip_vpu981_av1_dec_set_cdef(ctx);
> > > +	rockchip_vpu981_av1_dec_set_lr(ctx);
> > > +	rockchip_vpu981_av1_dec_set_prob(ctx);
> > > +
> > > +	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
> > > +	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
> > > +	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
> > > +	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
> > > +	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
> > > +
> > > +	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
> > > +	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
> > > +
> > > +	hantro_reg_write(vpu, &av1_dec_alignment, 64);
> > > +	hantro_reg_write(vpu, &av1_apf_disable, 0);
> > > +	hantro_reg_write(vpu, &av1_apf_threshold, 8);
> > > +	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
> > > +	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
> > > +	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
> > > +	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
> > > +	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
> > > +
> > > +	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
> > > +	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
> > > +	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
> > > +	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
> > > +
> > > +	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
> > > +	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
> > > +
> > > +	hantro_end_prepare_run(ctx);
> > > +
> > > +	hantro_reg_write(vpu, &av1_dec_e, 1);
> > > +
> > > +	return 0;
> > > +
> > > +prepare_error:
> > > +	hantro_end_prepare_run(ctx);
> > > +	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
> > > +	return ret;
> > > +}
> > > +
> > > +static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +	int width = ctx->dst_fmt.width;
> > > +	int height = ctx->dst_fmt.height;
> > > +	struct vb2_v4l2_buffer *vb2_dst;
> > > +	size_t chroma_offset;
> > > +	dma_addr_t dst_dma;
> > > +
> > > +	vb2_dst = hantro_get_dst_buf(ctx);
> > > +
> > > +	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
> > > +	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
> > > +	    ctx->dst_fmt.height;
> > > +
> > > +	/* enable post processor */
> > > +	hantro_reg_write(vpu, &av1_pp_out_e, 1);
> > > +	hantro_reg_write(vpu, &av1_pp_in_format, 0);
> > > +	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
> > > +	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
> > > +
> > > +	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
> > > +	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
> > > +	hantro_reg_write(vpu, &av1_pp_out_height, height);
> > > +	hantro_reg_write(vpu, &av1_pp_out_width, width);
> > > +	hantro_reg_write(vpu, &av1_pp_out_y_stride,
> > > +			 ctx->dst_fmt.plane_fmt[0].bytesperline);
> > > +	hantro_reg_write(vpu, &av1_pp_out_c_stride,
> > > +			 ctx->dst_fmt.plane_fmt[0].bytesperline);
> > > +	switch (ctx->dst_fmt.pixelformat) {
> > > +	case V4L2_PIX_FMT_P010:
> > > +		hantro_reg_write(vpu, &av1_pp_out_format, 1);
> > > +		break;
> > > +	case V4L2_PIX_FMT_NV12:
> > > +		hantro_reg_write(vpu, &av1_pp_out_format, 3);
> > > +		break;
> > > +	default:
> > > +		hantro_reg_write(vpu, &av1_pp_out_format, 0);
> > > +	}
> > > +
> > > +	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
> > > +	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
> > > +	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
> > > +	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
> > > +	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
> > > +	hantro_reg_write(vpu, &av1_pp_up_level, 0);
> > > +	hantro_reg_write(vpu, &av1_pp_down_level, 0);
> > > +	hantro_reg_write(vpu, &av1_pp_exist, 0);
> > > +
> > > +	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
> > > +	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
> > > +}
> > > +
> > > +static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
> > > +{
> > > +	struct hantro_dev *vpu = ctx->dev;
> > > +
> > > +	/* disable post processor */
> > > +	hantro_reg_write(vpu, &av1_pp_out_e, 0);
> > > +}
> > > +
> > > +const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
> > > +	.enable = rockchip_vpu981_postproc_enable,
> > > +	.disable = rockchip_vpu981_postproc_disable,
> > > +};
> > As enabling the post-proc is done by the common driver, we need a mechanism to
> > altern the core decisions, as we really need to enable post-proc to produce
> > filmgrain. Fortunatly, not applying this filter isn't visually bad, but that's
> > not the intent of the video author, so should not be like this by default.
> 
> I do agree but I think it is out of the scope of this patch.

I believe I managed to changed your mind, please do carry these discussions in
public place though, linux-media IRC channel is a good place.

> 
> > 
> > > diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> > > new file mode 100644
> > > index 000000000000..182e6c830ff6
> > > --- /dev/null
> > > +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
> > > @@ -0,0 +1,477 @@
> > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > +/*
> > > + * Copyright (c) 2022, Collabora
> > > + *
> > > + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
> > > + */
> > > +
> > > +#ifndef _ROCKCHIP_VPU981_REGS_H_
> > > +#define _ROCKCHIP_VPU981_REGS_H_
> > > +
> > > +#include "hantro.h"
> > > +
> > > +#define AV1_SWREG(nr)	((nr) * 4)
> > > +
> > > +#define AV1_DEC_REG(b, s, m) \
> > > +	((const struct hantro_reg) { \
> > > +		.base = AV1_SWREG(b), \
> > > +		.shift = s, \
> > > +		.mask = m, \
> > > +	})
> > Just a note that a lot of this is identical to VC8000 register sets. But until
> > someone upstream VC8000 and till we have more VC9000 support, its fine to keep
> > that as if it was RK specific.
> > 
> > > +
> > > +#define AV1_REG_INTERRUPT		AV1_SWREG(1)
> > > +#define AV1_REG_INTERRUPT_DEC_RDY_INT	BIT(12)
> > > +
> > > +#define AV1_REG_CONFIG			AV1_SWREG(2)
> > > +#define AV1_REG_CONFIG_DEC_CLK_GATE_E	BIT(10)
> > > +
> > > +#define av1_dec_e			AV1_DEC_REG(1, 0, 0x1)
> > > +#define av1_dec_abort_e			AV1_DEC_REG(1, 5, 0x1)
> > > +#define av1_dec_tile_int_e		AV1_DEC_REG(1, 7, 0x1)
> > > +
> > > +#define av1_dec_clk_gate_e		AV1_DEC_REG(2, 10, 0x1)
> > > +
> > > +#define av1_dec_out_ec_bypass		AV1_DEC_REG(3, 8,  0x1)
> > > +#define av1_write_mvs_e			AV1_DEC_REG(3, 12, 0x1)
> > > +#define av1_filtering_dis		AV1_DEC_REG(3, 14, 0x1)
> > > +#define av1_dec_out_dis			AV1_DEC_REG(3, 15, 0x1)
> > > +#define av1_dec_out_ec_byte_word	AV1_DEC_REG(3, 16, 0x1)
> > > +#define av1_skip_mode			AV1_DEC_REG(3, 26, 0x1)
> > > +#define av1_dec_mode			AV1_DEC_REG(3, 27, 0x1f)
> > > +
> > > +#define av1_ref_frames			AV1_DEC_REG(4, 0, 0xf)
> > > +#define av1_pic_height_in_cbs		AV1_DEC_REG(4, 6, 0x1fff)
> > > +#define av1_pic_width_in_cbs		AV1_DEC_REG(4, 19, 0x1fff)
> > > +
> > > +#define av1_ref_scaling_enable		AV1_DEC_REG(5, 0, 0x1)
> > > +#define av1_filt_level_base_gt32	AV1_DEC_REG(5, 1, 0x1)
> > > +#define av1_error_resilient		AV1_DEC_REG(5, 2, 0x1)
> > > +#define av1_force_interger_mv		AV1_DEC_REG(5, 3, 0x1)
> > > +#define av1_allow_intrabc		AV1_DEC_REG(5, 4, 0x1)
> > > +#define av1_allow_screen_content_tools	AV1_DEC_REG(5, 5, 0x1)
> > > +#define av1_reduced_tx_set_used		AV1_DEC_REG(5, 6, 0x1)
> > > +#define av1_enable_dual_filter		AV1_DEC_REG(5, 7, 0x1)
> > > +#define av1_enable_jnt_comp		AV1_DEC_REG(5, 8, 0x1)
> > > +#define av1_allow_filter_intra		AV1_DEC_REG(5, 9, 0x1)
> > > +#define av1_enable_intra_edge_filter	AV1_DEC_REG(5, 10, 0x1)
> > > +#define av1_tempor_mvp_e		AV1_DEC_REG(5, 11, 0x1)
> > > +#define av1_allow_interintra		AV1_DEC_REG(5, 12, 0x1)
> > > +#define av1_allow_masked_compound	AV1_DEC_REG(5, 13, 0x1)
> > > +#define av1_enable_cdef			AV1_DEC_REG(5, 14, 0x1)
> > > +#define av1_switchable_motion_mode	AV1_DEC_REG(5, 15, 0x1)
> > > +#define av1_show_frame			AV1_DEC_REG(5, 16, 0x1)
> > > +#define av1_superres_is_scaled		AV1_DEC_REG(5, 17, 0x1)
> > > +#define av1_allow_warp			AV1_DEC_REG(5, 18, 0x1)
> > > +#define av1_disable_cdf_update		AV1_DEC_REG(5, 19, 0x1)
> > > +#define av1_preskip_segid		AV1_DEC_REG(5, 20, 0x1)
> > > +#define av1_delta_lf_present		AV1_DEC_REG(5, 21, 0x1)
> > > +#define av1_delta_lf_multi		AV1_DEC_REG(5, 22, 0x1)
> > > +#define av1_delta_lf_res_log		AV1_DEC_REG(5, 23, 0x3)
> > > +#define av1_strm_start_bit		AV1_DEC_REG(5, 25, 0x7f)
> > > +
> > > +#define	av1_stream_len			AV1_DEC_REG(6, 0, 0xffffffff)
> > > +
> > > +#define av1_delta_q_present		AV1_DEC_REG(7, 0, 0x1)
> > > +#define av1_delta_q_res_log		AV1_DEC_REG(7, 1, 0x3)
> > > +#define av1_cdef_damping		AV1_DEC_REG(7, 3, 0x3)
> > > +#define av1_cdef_bits			AV1_DEC_REG(7, 5, 0x3)
> > > +#define av1_apply_grain			AV1_DEC_REG(7, 7, 0x1)
> > > +#define av1_num_y_points_b		AV1_DEC_REG(7, 8, 0x1)
> > > +#define av1_num_cb_points_b		AV1_DEC_REG(7, 9, 0x1)
> > > +#define av1_num_cr_points_b		AV1_DEC_REG(7, 10, 0x1)
> > > +#define av1_overlap_flag		AV1_DEC_REG(7, 11, 0x1)
> > > +#define av1_clip_to_restricted_range	AV1_DEC_REG(7, 12, 0x1)
> > > +#define av1_chroma_scaling_from_luma	AV1_DEC_REG(7, 13, 0x1)
> > > +#define av1_random_seed			AV1_DEC_REG(7, 14, 0xffff)
> > > +#define av1_blackwhite_e		AV1_DEC_REG(7, 30, 0x1)
> > > +
> > > +#define av1_scaling_shift		AV1_DEC_REG(8, 0, 0xf)
> > > +#define av1_bit_depth_c_minus8		AV1_DEC_REG(8, 4, 0x3)
> > > +#define av1_bit_depth_y_minus8		AV1_DEC_REG(8, 6, 0x3)
> > > +#define av1_quant_base_qindex		AV1_DEC_REG(8, 8, 0xff)
> > > +#define av1_idr_pic_e			AV1_DEC_REG(8, 16, 0x1)
> > > +#define av1_superres_pic_width		AV1_DEC_REG(8, 17, 0x7fff)
> > > +
> > > +#define av1_ref4_sign_bias		AV1_DEC_REG(9, 2, 0x1)
> > > +#define av1_ref5_sign_bias		AV1_DEC_REG(9, 3, 0x1)
> > > +#define av1_ref6_sign_bias		AV1_DEC_REG(9, 4, 0x1)
> > > +#define av1_mf1_type			AV1_DEC_REG(9, 5, 0x7)
> > > +#define av1_mf2_type			AV1_DEC_REG(9, 8, 0x7)
> > > +#define av1_mf3_type			AV1_DEC_REG(9, 11, 0x7)
> > > +#define av1_scale_denom_minus9		AV1_DEC_REG(9, 14, 0x7)
> > > +#define av1_last_active_seg		AV1_DEC_REG(9, 17, 0x7)
> > > +#define av1_context_update_tile_id	AV1_DEC_REG(9, 20, 0xfff)
> > > +
> > > +#define av1_tile_transpose		AV1_DEC_REG(10, 0, 0x1)
> > > +#define av1_tile_enable			AV1_DEC_REG(10, 1, 0x1)
> > > +#define av1_multicore_full_width	AV1_DEC_REG(10,	2, 0xff)
> > > +#define av1_num_tile_rows_8k		AV1_DEC_REG(10, 10, 0x7f)
> > > +#define av1_num_tile_cols_8k		AV1_DEC_REG(10, 17, 0x7f)
> > > +#define av1_multicore_tile_start_x	AV1_DEC_REG(10, 24, 0xff)
> > > +
> > > +#define av1_use_temporal3_mvs		AV1_DEC_REG(11, 0, 0x1)
> > > +#define av1_use_temporal2_mvs		AV1_DEC_REG(11, 1, 0x1)
> > > +#define av1_use_temporal1_mvs		AV1_DEC_REG(11, 2, 0x1)
> > > +#define av1_use_temporal0_mvs		AV1_DEC_REG(11, 3, 0x1)
> > > +#define av1_comp_pred_mode		AV1_DEC_REG(11, 4, 0x3)
> > > +#define av1_high_prec_mv_e		AV1_DEC_REG(11, 7, 0x1)
> > > +#define av1_mcomp_filt_type		AV1_DEC_REG(11, 8, 0x7)
> > > +#define av1_multicore_expect_context_update	AV1_DEC_REG(11, 11, 0x1)
> > > +#define av1_multicore_sbx_offset	AV1_DEC_REG(11, 12, 0x7f)
> > > +#define av1_ulticore_tile_col		AV1_DEC_REG(11, 19, 0x7f)
> > > +#define av1_transform_mode		AV1_DEC_REG(11, 27, 0x7)
> > > +#define av1_dec_tile_size_mag		AV1_DEC_REG(11, 30, 0x3)
> > > +
> > > +#define av1_seg_quant_sign		AV1_DEC_REG(12, 2, 0xff)
> > > +#define av1_max_cb_size			AV1_DEC_REG(12, 10, 0x7)
> > > +#define av1_min_cb_size			AV1_DEC_REG(12, 13, 0x7)
> > > +#define av1_comp_pred_fixed_ref		AV1_DEC_REG(12, 16, 0x7)
> > > +#define av1_multicore_tile_width	AV1_DEC_REG(12, 19, 0x7f)
> > > +#define av1_pic_height_pad		AV1_DEC_REG(12, 26, 0x7)
> > > +#define av1_pic_width_pad		AV1_DEC_REG(12, 29, 0x7)
> > > +
> > > +#define av1_segment_e			AV1_DEC_REG(13, 0, 0x1)
> > > +#define av1_segment_upd_e		AV1_DEC_REG(13, 1, 0x1)
> > > +#define av1_segment_temp_upd_e		AV1_DEC_REG(13, 2, 0x1)
> > > +#define av1_comp_pred_var_ref0_av1	AV1_DEC_REG(13, 3, 0x7)
> > > +#define av1_comp_pred_var_ref1_av1	AV1_DEC_REG(13, 6, 0x7)
> > > +#define av1_lossless_e			AV1_DEC_REG(13, 9, 0x1)
> > > +#define av1_qp_delta_ch_ac_av1		AV1_DEC_REG(13, 11, 0x7f)
> > > +#define av1_qp_delta_ch_dc_av1		AV1_DEC_REG(13, 18, 0x7f)
> > > +#define av1_qp_delta_y_dc_av1		AV1_DEC_REG(13, 25, 0x7f)
> > > +
> > > +#define av1_quant_seg0			AV1_DEC_REG(14, 0, 0xff)
> > > +#define av1_filt_level_seg0		AV1_DEC_REG(14, 8, 0x3f)
> > > +#define av1_skip_seg0			AV1_DEC_REG(14, 14, 0x1)
> > > +#define av1_refpic_seg0			AV1_DEC_REG(14, 15, 0xf)
> > > +#define av1_filt_level_delta0_seg0	AV1_DEC_REG(14, 19, 0x7f)
> > > +#define av1_filt_level0			AV1_DEC_REG(14, 26, 0x3f)
> > > +
> > > +#define av1_quant_seg1			AV1_DEC_REG(15, 0, 0xff)
> > > +#define av1_filt_level_seg1		AV1_DEC_REG(15, 8, 0x3f)
> > > +#define av1_skip_seg1			AV1_DEC_REG(15, 14, 0x1)
> > > +#define av1_refpic_seg1			AV1_DEC_REG(15, 15, 0xf)
> > > +#define av1_filt_level_delta0_seg1	AV1_DEC_REG(15, 19, 0x7f)
> > > +#define av1_filt_level1			AV1_DEC_REG(15, 26, 0x3f)
> > > +
> > > +#define av1_quant_seg2			AV1_DEC_REG(16, 0, 0xff)
> > > +#define av1_filt_level_seg2		AV1_DEC_REG(16, 8, 0x3f)
> > > +#define av1_skip_seg2			AV1_DEC_REG(16, 14, 0x1)
> > > +#define av1_refpic_seg2			AV1_DEC_REG(16, 15, 0xf)
> > > +#define av1_filt_level_delta0_seg2	AV1_DEC_REG(16, 19, 0x7f)
> > > +#define av1_filt_level2			AV1_DEC_REG(16, 26, 0x3f)
> > > +
> > > +#define av1_quant_seg3			AV1_DEC_REG(17, 0, 0xff)
> > > +#define av1_filt_level_seg3		AV1_DEC_REG(17, 8, 0x3f)
> > > +#define av1_skip_seg3			AV1_DEC_REG(17, 14, 0x1)
> > > +#define av1_refpic_seg3			AV1_DEC_REG(17, 15, 0xf)
> > > +#define av1_filt_level_delta0_seg3	AV1_DEC_REG(17, 19, 0x7f)
> > > +#define av1_filt_level3			AV1_DEC_REG(17, 26, 0x3f)
> > > +
> > > +#define av1_quant_seg4			AV1_DEC_REG(18, 0, 0xff)
> > > +#define av1_filt_level_seg4		AV1_DEC_REG(18, 8, 0x3f)
> > > +#define av1_skip_seg4			AV1_DEC_REG(18, 14, 0x1)
> > > +#define av1_refpic_seg4			AV1_DEC_REG(18, 15, 0xf)
> > > +#define av1_filt_level_delta0_seg4	AV1_DEC_REG(18, 19, 0x7f)
> > > +#define av1_lr_type			AV1_DEC_REG(18, 26, 0x3f)
> > > +
> > > +#define av1_quant_seg5			AV1_DEC_REG(19, 0, 0xff)
> > > +#define av1_filt_level_seg5		AV1_DEC_REG(19, 8, 0x3f)
> > > +#define av1_skip_seg5			AV1_DEC_REG(19, 14, 0x1)
> > > +#define av1_refpic_seg5			AV1_DEC_REG(19, 15, 0xf)
> > > +#define av1_filt_level_delta0_seg5	AV1_DEC_REG(19, 19, 0x7f)
> > > +#define av1_lr_unit_size		AV1_DEC_REG(19, 26, 0x3f)
> > > +
> > > +#define av1_filt_level_delta1_seg0	AV1_DEC_REG(20, 0, 0x7f)
> > > +#define av1_filt_level_delta2_seg0	AV1_DEC_REG(20, 7, 0x7f)
> > > +#define av1_filt_level_delta3_seg0	AV1_DEC_REG(20, 14, 0x7f)
> > > +#define av1_global_mv_seg0		AV1_DEC_REG(20, 21, 0x1)
> > > +#define av1_mf1_last_offset		AV1_DEC_REG(20, 22, 0x1ff)
> > > +
> > > +#define av1_filt_level_delta1_seg1	AV1_DEC_REG(21, 0, 0x7f)
> > > +#define av1_filt_level_delta2_seg1	AV1_DEC_REG(21, 7, 0x7f)
> > > +#define av1_filt_level_delta3_seg1	AV1_DEC_REG(21, 14, 0x7f)
> > > +#define av1_global_mv_seg1		AV1_DEC_REG(21, 21, 0x1)
> > > +#define av1_mf1_last2_offset		AV1_DEC_REG(21, 22, 0x1ff)
> > > +
> > > +#define av1_filt_level_delta1_seg2	AV1_DEC_REG(22, 0, 0x7f)
> > > +#define av1_filt_level_delta2_seg2	AV1_DEC_REG(22, 7, 0x7f)
> > > +#define av1_filt_level_delta3_seg2	AV1_DEC_REG(22, 14, 0x7f)
> > > +#define av1_global_mv_seg2		AV1_DEC_REG(22, 21, 0x1)
> > > +#define av1_mf1_last3_offset		AV1_DEC_REG(22, 22, 0x1ff)
> > > +
> > > +#define av1_filt_level_delta1_seg3	AV1_DEC_REG(23, 0, 0x7f)
> > > +#define av1_filt_level_delta2_seg3	AV1_DEC_REG(23, 7, 0x7f)
> > > +#define av1_filt_level_delta3_seg3	AV1_DEC_REG(23, 14, 0x7f)
> > > +#define av1_global_mv_seg3		AV1_DEC_REG(23, 21, 0x1)
> > > +#define av1_mf1_golden_offset		AV1_DEC_REG(23, 22, 0x1ff)
> > > +
> > > +#define av1_filt_level_delta1_seg4	AV1_DEC_REG(24, 0, 0x7f)
> > > +#define av1_filt_level_delta2_seg4	AV1_DEC_REG(24, 7, 0x7f)
> > > +#define av1_filt_level_delta3_seg4	AV1_DEC_REG(24, 14, 0x7f)
> > > +#define av1_global_mv_seg4		AV1_DEC_REG(24, 21, 0x1)
> > > +#define av1_mf1_bwdref_offset		AV1_DEC_REG(24, 22, 0x1ff)
> > > +
> > > +#define av1_filt_level_delta1_seg5	AV1_DEC_REG(25, 0, 0x7f)
> > > +#define av1_filt_level_delta2_seg5	AV1_DEC_REG(25, 7, 0x7f)
> > > +#define av1_filt_level_delta3_seg5	AV1_DEC_REG(25, 14, 0x7f)
> > > +#define av1_global_mv_seg5		AV1_DEC_REG(25, 21, 0x1)
> > > +#define av1_mf1_altref2_offset		AV1_DEC_REG(25, 22, 0x1ff)
> > > +
> > > +#define av1_filt_level_delta1_seg6	AV1_DEC_REG(26, 0, 0x7f)
> > > +#define av1_filt_level_delta2_seg6	AV1_DEC_REG(26, 7, 0x7f)
> > > +#define av1_filt_level_delta3_seg6	AV1_DEC_REG(26, 14, 0x7f)
> > > +#define av1_global_mv_seg6		AV1_DEC_REG(26, 21, 0x1)
> > > +#define av1_mf1_altref_offset		AV1_DEC_REG(26, 22, 0x1ff)
> > > +
> > > +#define av1_filt_level_delta1_seg7	AV1_DEC_REG(27, 0, 0x7f)
> > > +#define av1_filt_level_delta2_seg7	AV1_DEC_REG(27, 7, 0x7f)
> > > +#define av1_filt_level_delta3_seg7	AV1_DEC_REG(27, 14, 0x7f)
> > > +#define av1_global_mv_seg7		AV1_DEC_REG(27, 21, 0x1)
> > > +#define av1_mf2_last_offset		AV1_DEC_REG(27, 22, 0x1ff)
> > > +
> > > +#define av1_cb_offset			AV1_DEC_REG(28, 0, 0x1ff)
> > > +#define av1_cb_luma_mult		AV1_DEC_REG(28, 9, 0xff)
> > > +#define av1_cb_mult			AV1_DEC_REG(28, 17, 0xff)
> > > +#define	av1_quant_delta_v_dc		AV1_DEC_REG(28, 25, 0x7f)
> > > +
> > > +#define av1_cr_offset			AV1_DEC_REG(29, 0, 0x1ff)
> > > +#define av1_cr_luma_mult		AV1_DEC_REG(29, 9, 0xff)
> > > +#define av1_cr_mult			AV1_DEC_REG(29, 17, 0xff)
> > > +#define	av1_quant_delta_v_ac		AV1_DEC_REG(29, 25, 0x7f)
> > > +
> > > +#define av1_filt_ref_adj_5		AV1_DEC_REG(30, 0, 0x7f)
> > > +#define av1_filt_ref_adj_4		AV1_DEC_REG(30, 7, 0x7f)
> > > +#define av1_filt_mb_adj_1		AV1_DEC_REG(30, 14, 0x7f)
> > > +#define av1_filt_mb_adj_0		AV1_DEC_REG(30, 21, 0x7f)
> > > +#define av1_filt_sharpness		AV1_DEC_REG(30, 28, 0x7)
> > > +
> > > +#define av1_quant_seg6			AV1_DEC_REG(31, 0, 0xff)
> > > +#define av1_filt_level_seg6		AV1_DEC_REG(31, 8, 0x3f)
> > > +#define av1_skip_seg6			AV1_DEC_REG(31, 14, 0x1)
> > > +#define av1_refpic_seg6			AV1_DEC_REG(31, 15, 0xf)
> > > +#define av1_filt_level_delta0_seg6	AV1_DEC_REG(31, 19, 0x7f)
> > > +#define av1_skip_ref0			AV1_DEC_REG(31, 26, 0xf)
> > > +
> > > +#define av1_quant_seg7			AV1_DEC_REG(32, 0, 0xff)
> > > +#define av1_filt_level_seg7		AV1_DEC_REG(32, 8, 0x3f)
> > > +#define av1_skip_seg7			AV1_DEC_REG(32, 14, 0x1)
> > > +#define av1_refpic_seg7			AV1_DEC_REG(32, 15, 0xf)
> > > +#define av1_filt_level_delta0_seg7	AV1_DEC_REG(32, 19, 0x7f)
> > > +#define av1_skip_ref1			AV1_DEC_REG(32, 26, 0xf)
> > > +
> > > +#define av1_ref0_height			AV1_DEC_REG(33, 0, 0xffff)
> > > +#define av1_ref0_width			AV1_DEC_REG(33, 16, 0xffff)
> > > +
> > > +#define av1_ref1_height			AV1_DEC_REG(34, 0, 0xffff)
> > > +#define av1_ref1_width			AV1_DEC_REG(34, 16, 0xffff)
> > > +
> > > +#define av1_ref2_height			AV1_DEC_REG(35, 0, 0xffff)
> > > +#define av1_ref2_width			AV1_DEC_REG(35, 16, 0xffff)
> > > +
> > > +#define av1_ref0_ver_scale		AV1_DEC_REG(36, 0, 0xffff)
> > > +#define av1_ref0_hor_scale		AV1_DEC_REG(36, 16, 0xffff)
> > > +
> > > +#define av1_ref1_ver_scale		AV1_DEC_REG(37, 0, 0xffff)
> > > +#define av1_ref1_hor_scale		AV1_DEC_REG(37, 16, 0xffff)
> > > +
> > > +#define av1_ref2_ver_scale		AV1_DEC_REG(38, 0, 0xffff)
> > > +#define av1_ref2_hor_scale		AV1_DEC_REG(38, 16, 0xffff)
> > > +
> > > +#define av1_ref3_ver_scale		AV1_DEC_REG(39, 0, 0xffff)
> > > +#define av1_ref3_hor_scale		AV1_DEC_REG(39, 16, 0xffff)
> > > +
> > > +#define av1_ref4_ver_scale		AV1_DEC_REG(40, 0, 0xffff)
> > > +#define av1_ref4_hor_scale		AV1_DEC_REG(40, 16, 0xffff)
> > > +
> > > +#define av1_ref5_ver_scale		AV1_DEC_REG(41, 0, 0xffff)
> > > +#define av1_ref5_hor_scale		AV1_DEC_REG(41, 16, 0xffff)
> > > +
> > > +#define av1_ref6_ver_scale		AV1_DEC_REG(42, 0, 0xffff)
> > > +#define av1_ref6_hor_scale		AV1_DEC_REG(42, 16, 0xffff)
> > > +
> > > +#define av1_ref3_height			AV1_DEC_REG(43, 0, 0xffff)
> > > +#define av1_ref3_width			AV1_DEC_REG(43, 16, 0xffff)
> > > +
> > > +#define av1_ref4_height			AV1_DEC_REG(44, 0, 0xffff)
> > > +#define av1_ref4_width			AV1_DEC_REG(44, 16, 0xffff)
> > > +
> > > +#define av1_ref5_height			AV1_DEC_REG(45, 0, 0xffff)
> > > +#define av1_ref5_width			AV1_DEC_REG(45, 16, 0xffff)
> > > +
> > > +#define av1_ref6_height			AV1_DEC_REG(46, 0, 0xffff)
> > > +#define av1_ref6_width			AV1_DEC_REG(46, 16, 0xffff)
> > > +
> > > +#define av1_mf2_last2_offset		AV1_DEC_REG(47, 0, 0x1ff)
> > > +#define av1_mf2_last3_offset		AV1_DEC_REG(47, 9, 0x1ff)
> > > +#define av1_mf2_golden_offset		AV1_DEC_REG(47, 18, 0x1ff)
> > > +#define av1_qmlevel_y			AV1_DEC_REG(47, 27, 0xf)
> > > +
> > > +#define av1_mf2_bwdref_offset		AV1_DEC_REG(48, 0, 0x1ff)
> > > +#define av1_mf2_altref2_offset		AV1_DEC_REG(48, 9, 0x1ff)
> > > +#define av1_mf2_altref_offset		AV1_DEC_REG(48, 18, 0x1ff)
> > > +#define av1_qmlevel_u			AV1_DEC_REG(48, 27, 0xf)
> > > +
> > > +#define av1_filt_ref_adj_6		AV1_DEC_REG(49, 0, 0x7f)
> > > +#define av1_filt_ref_adj_7		AV1_DEC_REG(49, 7, 0x7f)
> > > +#define av1_qmlevel_v			AV1_DEC_REG(49, 14, 0xf)
> > > +
> > > +#define av1_superres_chroma_step	AV1_DEC_REG(51, 0, 0x3fff)
> > > +#define av1_superres_luma_step		AV1_DEC_REG(51, 14, 0x3fff)
> > > +
> > > +#define av1_superres_init_chroma_subpel_x	AV1_DEC_REG(52, 0, 0x3fff)
> > > +#define av1_superres_init_luma_subpel_x		AV1_DEC_REG(52, 14, 0x3fff)
> > > +
> > > +#define av1_cdef_chroma_secondary_strength	AV1_DEC_REG(53, 0, 0xffff)
> > > +#define av1_cdef_luma_secondary_strength	AV1_DEC_REG(53, 16, 0xffff)
> > > +
> > > +#define av1_apf_threshold		AV1_DEC_REG(55, 0, 0xffff)
> > > +#define av1_apf_single_pu_mode		AV1_DEC_REG(55, 30, 0x1)
> > > +#define av1_apf_disable			AV1_DEC_REG(55, 30, 0x1)
> > > +
> > > +#define av1_dec_max_burst		AV1_DEC_REG(58, 0, 0xff)
> > > +#define av1_dec_buswidth		AV1_DEC_REG(58, 8, 0x7)
> > > +#define av1_dec_multicore_mode		AV1_DEC_REG(58, 11, 0x3)
> > > +#define av1_dec_axi_wd_id_e		AV1_DEC_REG(58,	13, 0x1)
> > > +#define av1_dec_axi_rd_id_e		AV1_DEC_REG(58, 14, 0x1)
> > > +#define av1_dec_mc_polltime		AV1_DEC_REG(58, 17, 0x3ff)
> > > +#define av1_dec_mc_pollmode		AV1_DEC_REG(58,	27, 0x3)
> > > +
> > > +#define av1_filt_ref_adj_3		AV1_DEC_REG(59, 0, 0x3f)
> > > +#define av1_filt_ref_adj_2		AV1_DEC_REG(59, 7, 0x3f)
> > > +#define av1_filt_ref_adj_1		AV1_DEC_REG(59, 14, 0x3f)
> > > +#define av1_filt_ref_adj_0		AV1_DEC_REG(59, 21, 0x3f)
> > > +#define av1_ref0_sign_bias		AV1_DEC_REG(59, 28, 0x1)
> > > +#define av1_ref1_sign_bias		AV1_DEC_REG(59, 29, 0x1)
> > > +#define av1_ref2_sign_bias		AV1_DEC_REG(59, 30, 0x1)
> > > +#define av1_ref3_sign_bias		AV1_DEC_REG(59, 31, 0x1)
> > > +
> > > +#define av1_cur_last_roffset		AV1_DEC_REG(184, 0, 0x1ff)
> > > +#define av1_cur_last_offset		AV1_DEC_REG(184, 9, 0x1ff)
> > > +#define av1_mf3_last_offset		AV1_DEC_REG(184, 18, 0x1ff)
> > > +#define av1_ref0_gm_mode		AV1_DEC_REG(184, 27, 0x3)
> > > +
> > > +#define av1_cur_last2_roffset		AV1_DEC_REG(185, 0, 0x1ff)
> > > +#define av1_cur_last2_offset		AV1_DEC_REG(185, 9, 0x1ff)
> > > +#define av1_mf3_last2_offset		AV1_DEC_REG(185, 18, 0x1ff)
> > > +#define av1_ref1_gm_mode		AV1_DEC_REG(185, 27, 0x3)
> > > +
> > > +#define av1_cur_last3_roffset		AV1_DEC_REG(186, 0, 0x1ff)
> > > +#define av1_cur_last3_offset		AV1_DEC_REG(186, 9, 0x1ff)
> > > +#define av1_mf3_last3_offset		AV1_DEC_REG(186, 18, 0x1ff)
> > > +#define av1_ref2_gm_mode		AV1_DEC_REG(186, 27, 0x3)
> > > +
> > > +#define av1_cur_golden_roffset		AV1_DEC_REG(187, 0, 0x1ff)
> > > +#define av1_cur_golden_offset		AV1_DEC_REG(187, 9, 0x1ff)
> > > +#define av1_mf3_golden_offset		AV1_DEC_REG(187, 18, 0x1ff)
> > > +#define av1_ref3_gm_mode		AV1_DEC_REG(187, 27, 0x3)
> > > +
> > > +#define av1_cur_bwdref_roffset		AV1_DEC_REG(188, 0, 0x1ff)
> > > +#define av1_cur_bwdref_offset		AV1_DEC_REG(188, 9, 0x1ff)
> > > +#define av1_mf3_bwdref_offset		AV1_DEC_REG(188, 18, 0x1ff)
> > > +#define av1_ref4_gm_mode		AV1_DEC_REG(188, 27, 0x3)
> > > +
> > > +#define av1_cur_altref2_roffset		AV1_DEC_REG(257, 0, 0x1ff)
> > > +#define av1_cur_altref2_offset		AV1_DEC_REG(257, 9, 0x1ff)
> > > +#define av1_mf3_altref2_offset		AV1_DEC_REG(257, 18, 0x1ff)
> > > +#define av1_ref5_gm_mode		AV1_DEC_REG(257, 27, 0x3)
> > > +
> > > +#define av1_strm_buffer_len		AV1_DEC_REG(258, 0, 0xffffffff)
> > > +
> > > +#define av1_strm_start_offset		AV1_DEC_REG(259, 0, 0xffffffff)
> > > +
> > > +#define av1_ppd_blend_exist		AV1_DEC_REG(260, 21, 0x1)
> > > +#define av1_ppd_dith_exist		AV1_DEC_REG(260, 23, 0x1)
> > > +#define av1_ablend_crop_e		AV1_DEC_REG(260, 24, 0x1)
> > > +#define av1_pp_format_p010_e		AV1_DEC_REG(260, 25, 0x1)
> > > +#define av1_pp_format_customer1_e	AV1_DEC_REG(260, 26, 0x1)
> > > +#define av1_pp_crop_exist		AV1_DEC_REG(260, 27, 0x1)
> > > +#define av1_pp_up_level			AV1_DEC_REG(260, 28, 0x1)
> > > +#define av1_pp_down_level		AV1_DEC_REG(260, 29, 0x3)
> > > +#define av1_pp_exist			AV1_DEC_REG(260, 31, 0x1)
> > > +
> > > +#define av1_cur_altref_roffset		AV1_DEC_REG(262, 0, 0x1ff)
> > > +#define av1_cur_altref_offset		AV1_DEC_REG(262, 9, 0x1ff)
> > > +#define av1_mf3_altref_offset		AV1_DEC_REG(262, 18, 0x1ff)
> > > +#define av1_ref6_gm_mode		AV1_DEC_REG(262, 27, 0x3)
> > > +
> > > +#define av1_cdef_luma_primary_strength	AV1_DEC_REG(263, 0, 0xffffffff)
> > > +
> > > +#define av1_cdef_chroma_primary_strength AV1_DEC_REG(264, 0, 0xffffffff)
> > > +
> > > +#define av1_axi_arqos			AV1_DEC_REG(265, 0, 0xf)
> > > +#define av1_axi_awqos			AV1_DEC_REG(265, 4, 0xf)
> > > +#define av1_axi_wr_ostd_threshold	AV1_DEC_REG(265, 8, 0x3ff)
> > > +#define av1_axi_rd_ostd_threshold	AV1_DEC_REG(265, 18, 0x3ff)
> > > +#define av1_axi_wr_4k_dis		AV1_DEC_REG(265, 31, 0x1)
> > > +
> > > +#define av1_128bit_mode			AV1_DEC_REG(266, 5, 0x1)
> > > +#define av1_wr_shaper_bypass		AV1_DEC_REG(266, 10, 0x1)
> > > +#define av1_error_conceal_e		AV1_DEC_REG(266, 30, 0x1)
> > > +
> > > +#define av1_superres_chroma_step_invra	AV1_DEC_REG(298, 0, 0xffff)
> > > +#define av1_superres_luma_step_invra	AV1_DEC_REG(298, 16, 0xffff)
> > > +
> > > +#define av1_dec_alignment		AV1_DEC_REG(314, 0, 0xffff)
> > > +
> > > +#define av1_ext_timeout_cycles		AV1_DEC_REG(318, 0, 0x7fffffff)
> > > +#define av1_ext_timeout_override_e	AV1_DEC_REG(318, 31, 0x1)
> > > +
> > > +#define av1_timeout_cycles		AV1_DEC_REG(319, 0, 0x7fffffff)
> > > +#define av1_timeout_override_e		AV1_DEC_REG(319, 31, 0x1)
> > > +
> > > +#define av1_pp_out_e			AV1_DEC_REG(320, 0, 0x1)
> > > +#define av1_pp_cr_first			AV1_DEC_REG(320, 1, 0x1)
> > > +#define av1_pp_out_mode			AV1_DEC_REG(320, 2, 0x1)
> > > +#define av1_pp_out_tile_e		AV1_DEC_REG(320, 3, 0x1)
> > > +#define av1_pp_status			AV1_DEC_REG(320, 4, 0xf)
> > > +#define av1_pp_in_blk_size		AV1_DEC_REG(320, 8, 0x7)
> > > +#define av1_pp_out_p010_fmt		AV1_DEC_REG(320, 11, 0x3)
> > > +#define av1_pp_out_rgb_fmt		AV1_DEC_REG(320, 13, 0x1f)
> > > +#define av1_rgb_range_max		AV1_DEC_REG(320, 18, 0xfff)
> > > +#define av1_pp_rgb_planar		AV1_DEC_REG(320, 30, 0x1)
> > > +
> > > +#define av1_scale_hratio		AV1_DEC_REG(322, 0, 0x3ffff)
> > > +#define av1_pp_out_format		AV1_DEC_REG(322, 18, 0x1f)
> > > +#define av1_ver_scale_mode		AV1_DEC_REG(322, 23, 0x3)
> > > +#define av1_hor_scale_mode		AV1_DEC_REG(322, 25, 0x3)
> > > +#define av1_pp_in_format		AV1_DEC_REG(322, 27, 0x1f)
> > > +
> > > +#define av1_pp_out_c_stride		AV1_DEC_REG(329, 0, 0xffff)
> > > +#define av1_pp_out_y_stride		AV1_DEC_REG(329, 16, 0xffff)
> > > +
> > > +#define av1_pp_in_height		AV1_DEC_REG(331, 0, 0xffff)
> > > +#define av1_pp_in_width			AV1_DEC_REG(331, 16, 0xffff)
> > > +
> > > +#define av1_pp_out_height		AV1_DEC_REG(332, 0, 0xffff)
> > > +#define av1_pp_out_width		AV1_DEC_REG(332, 16, 0xffff)
> > > +
> > > +#define av1_pp1_dup_ver			AV1_DEC_REG(394, 0, 0xff)
> > > +#define av1_pp1_dup_hor			AV1_DEC_REG(394, 8, 0xff)
> > > +#define av1_pp0_dup_ver			AV1_DEC_REG(394, 16, 0xff)
> > > +#define av1_pp0_dup_hor			AV1_DEC_REG(394, 24, 0xff)
> > > +
> > > +#define AV1_TILE_OUT_LU			(AV1_SWREG(65))
> > > +#define AV1_REFERENCE_Y(i)		(AV1_SWREG(67) + ((i) * 0x8))
> > > +#define AV1_SEGMENTATION		(AV1_SWREG(81))
> > > +#define AV1_GLOBAL_MODEL		(AV1_SWREG(83))
> > > +#define AV1_CDEF_COL			(AV1_SWREG(85))
> > > +#define AV1_SR_COL			(AV1_SWREG(89))
> > > +#define AV1_LR_COL			(AV1_SWREG(91))
> > > +#define AV1_FILM_GRAIN			(AV1_SWREG(95))
> > > +#define AV1_TILE_OUT_CH			(AV1_SWREG(99))
> > > +#define AV1_REFERENCE_CB(i)		(AV1_SWREG(101) + ((i) * 0x8))
> > > +#define AV1_TILE_OUT_MV			(AV1_SWREG(133))
> > > +#define AV1_REFERENCE_MV(i)		(AV1_SWREG(135) + ((i) * 0x8))
> > > +#define AV1_TILE_BASE			(AV1_SWREG(167))
> > > +#define AV1_INPUT_STREAM		(AV1_SWREG(169))
> > > +#define AV1_PROP_TABLE_OUT		(AV1_SWREG(171))
> > > +#define AV1_PROP_TABLE			(AV1_SWREG(173))
> > > +#define AV1_MC_SYNC_CURR		(AV1_SWREG(175))
> > > +#define AV1_MC_SYNC_LEFT		(AV1_SWREG(177))
> > > +#define AV1_DB_DATA_COL			(AV1_SWREG(179))
> > > +#define AV1_DB_CTRL_COL			(AV1_SWREG(183))
> > > +#define AV1_PP_OUT_LU			(AV1_SWREG(326))
> > > +#define AV1_PP_OUT_CH			(AV1_SWREG(328))
> > > +
> > > +#endif /* _ROCKCHIP_VPU981_REGS_H_ */
Benjamin Gaignard Dec. 21, 2022, 4:37 p.m. UTC | #5
Le 21/12/2022 à 17:30, Nicolas Dufresne a écrit :
> Le mardi 20 décembre 2022 à 15:49 +0100, Benjamin Gaignard a écrit :
>> Le 19/12/2022 à 22:56, Nicolas Dufresne a écrit :
>>> Le lundi 19 décembre 2022 à 16:56 +0100, Benjamin Gaignard a écrit :
>>>> Implement AV1 stateless decoder for rockchip VPU981.
>>>> It decode 8 and 10 bits AV1 bitstreams.
>>>> AV1 scaling feature is done by the postprocessor.
>>> Can you clarify ? I knew the filmgrain was, but didn't expect spatial scaling to
>>> need it. On this aspect, I don't see any new code to let Hantro driver know that
>>> the postproc is needed. The frame header value should be taken into account in
>>> hantro_needs_postproc() (a per codec ops would be nice).
>> 2 tests (av1-1-b8-03-sizedown and av1-1-b8-03-sizeup) are only working if the postprocessor
>> is used. Given their names I assume it is related to scaling.
> I can take care of investigating what these stream do, and how the postproc
> could save them. These are not SVC, so there is no reference scaling. But I
> think to find exactly what they do to comment more.

In middle of the bitstream a new sequence is send with a new resolution.
That is well handled when post-processor is used but not with native formats.

>
>> Force postprocessing is not an easy task in Hantro driver and this series is already big enough.
>>
>> What I have prototype to solve this problem is to add a "need_postproc" field in context structure
>> but I still have issues around pixel format negotiation to solve before release it.
>>
>>>> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
>>>> ---
>>>>    drivers/media/platform/verisilicon/Makefile   |    1 +
>>>>    .../media/platform/verisilicon/hantro_hw.h    |   64 +-
>>>>    .../verisilicon/rockchip_vpu981_hw_av1_dec.c  | 2067 +++++++++++++++++
>>>>    .../verisilicon/rockchip_vpu981_regs.h        |  477 ++++
>>>>    4 files changed, 2607 insertions(+), 2 deletions(-)
>>>>    create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>>>>    create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>>>>
>>>> diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
>>>> index d2b2679c00eb..c9a9806ab8c5 100644
>>>> --- a/drivers/media/platform/verisilicon/Makefile
>>>> +++ b/drivers/media/platform/verisilicon/Makefile
>>>> @@ -18,6 +18,7 @@ hantro-vpu-y += \
>>>>    		rockchip_vpu2_hw_h264_dec.o \
>>>>    		rockchip_vpu2_hw_mpeg2_dec.o \
>>>>    		rockchip_vpu2_hw_vp8_dec.o \
>>>> +		rockchip_vpu981_hw_av1_dec.o \
>>>>    		rockchip_av1_entropymode.o \
>>>>    		hantro_jpeg.o \
>>>>    		hantro_h264.o \
>>>> diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
>>>> index e395aeeae2f4..3c0a995998a5 100644
>>>> --- a/drivers/media/platform/verisilicon/hantro_hw.h
>>>> +++ b/drivers/media/platform/verisilicon/hantro_hw.h
>>>> @@ -37,6 +37,9 @@
>>>>    
>>>>    #define NUM_REF_PICTURES	(V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
>>>>    
>>>> +#define AV1_REF_LIST_SIZE	8
>>> This is the same as V4L2_AV1_TOTAL_REFS_PER_FRAME. AV1_TOTAL_REFS_PER_FRAME is
>>> how this is called in the spec.
>> ok
>>
>>>> +#define AV1_MAX_FRAME_BUF_COUNT	(V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
>>>> +
>>>>    struct hantro_dev;
>>>>    struct hantro_ctx;
>>>>    struct hantro_buf;
>>>> @@ -250,23 +253,80 @@ struct hantro_vp9_dec_hw_ctx {
>>>>    };
>>>>    
>>>>    /**
>>>> - * hantro_av1_dec_hw_ctx
>>>> + * struct hantro_av1_dec_ctrls
>>>> + * @sequence:		AV1 Sequence
>>>> + * @tile_group_entry:	AV1 Tile Group entry
>>>> + * @frame:		AV1 Frame Header OBU
>>>> + * @film_grain:		AV1 Film Grain
>>>> + */
>>>> +struct hantro_av1_dec_ctrls {
>>>> +	const struct v4l2_ctrl_av1_sequence *sequence;
>>>> +	const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry;
>>>> +	const struct v4l2_ctrl_av1_frame *frame;
>>>> +	const struct v4l2_ctrl_av1_film_grain *film_grain;
>>>> +};
>>>> +
>>>> +struct hantro_av1_frame_ref {
>>>> +	int width;
>>>> +	int height;
>>>> +	u64 timestamp;
>>>> +	enum v4l2_av1_frame_type frame_type;
>>>> +	int ref_count;
>>>> +	u32 order_hint;
>>>> +	u32 order_hints[V4L2_AV1_NUM_REF_FRAMES];
>>>> +	int gm_mode;
>>>> +	struct vb2_v4l2_buffer *vb2_ref;
>>>> +};
>>>> +
>>>> +/**
>>>> + * struct hantro_av1_dec_hw_ctx
>>>> + * @db_data_col:	db tile col data buffer
>>>> + * @db_ctrl_col:	db tile col ctrl buffer
>>>> + * @cdef_col:		cdef tile col buffer
>>>> + * @sr_col:		sr tile col buffer
>>>> + * @lr_col:		lr tile col buffer
>>>> + * @global_model:	global model buffer
>>>> + * @tile_info:		tile info buffer
>>>> + * @segment:		segmentation info buffer
>>>> + * @prob_tbl:		probability table
>>>> + * @prob_tbl_out:	probability table output
>>>> + * @tile_buf:		tile buffer
>>>> + * @ctrls:		V4L2 controls attached to a run
>>>> + * @frame_refs:		reference frames info slots
>>>> + * @ref_frame_sign_bias: array of sign bias
>>>> + * @num_tile_cols_allocated: number of allocated tiles
>>>>     * @cdfs:		current probabilities structure
>>>>     * @cdfs_ndvc:		current mv probabilities structure
>>>>     * @default_cdfs:	default probabilities structure
>>>>     * @default_cdfs_ndvc:	default mv probabilties structure
>>>>     * @cdfs_last:		stored probabilities structures
>>>>     * @cdfs_last_ndvc:	stored mv probabilities structures
>>>> + * @current_frame_index: index of the current in frame_refs array
>>>>     */
>>>>    struct hantro_av1_dec_hw_ctx {
>>>> +	struct hantro_aux_buf db_data_col;
>>>> +	struct hantro_aux_buf db_ctrl_col;
>>>> +	struct hantro_aux_buf cdef_col;
>>>> +	struct hantro_aux_buf sr_col;
>>>> +	struct hantro_aux_buf lr_col;
>>>> +	struct hantro_aux_buf global_model;
>>>> +	struct hantro_aux_buf tile_info;
>>>> +	struct hantro_aux_buf segment;
>>>> +	struct hantro_aux_buf prob_tbl;
>>>> +	struct hantro_aux_buf prob_tbl_out;
>>>> +	struct hantro_aux_buf tile_buf;
>>>> +	struct hantro_av1_dec_ctrls ctrls;
>>>> +	struct hantro_av1_frame_ref frame_refs[AV1_MAX_FRAME_BUF_COUNT];
>>>> +	uint32_t ref_frame_sign_bias[AV1_REF_LIST_SIZE];
>>>> +	unsigned int num_tile_cols_allocated;
>>>>    	struct av1cdfs *cdfs;
>>>>    	struct mvcdfs  *cdfs_ndvc;
>>>>    	struct av1cdfs default_cdfs;
>>>>    	struct mvcdfs  default_cdfs_ndvc;
>>>>    	struct av1cdfs cdfs_last[NUM_REF_FRAMES];
>>>>    	struct mvcdfs  cdfs_last_ndvc[NUM_REF_FRAMES];
>>>> +	int current_frame_index;
>>>>    };
>>>> -
>>>>    /**
>>>>     * struct hantro_postproc_ctx
>>>>     *
>>>> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>>>> new file mode 100644
>>>> index 000000000000..a183e4f35e00
>>>> --- /dev/null
>>>> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>>>> @@ -0,0 +1,2067 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> +/*
>>>> + * Copyright (c) 2021, Collabora
>>>> + *
>>>> + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
>>>> + */
>>>> +
>>>> +#include <media/v4l2-mem2mem.h>
>>>> +#include "hantro.h"
>>>> +#include "hantro_v4l2.h"
>>>> +#include "rockchip_vpu981_regs.h"
>>>> +
>>>> +#define AV1_DEC_MODE		17
>>>> +#define GM_GLOBAL_MODELS_PER_FRAME	7
>>>> +#define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
>>>> +#define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
>>>> +#define AV1_MAX_TILES		128
>>>> +#define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
>>>> +#define AV1DEC_MAX_PIC_BUFFERS	24
>>>> +#define AV1_REF_SCALE_SHIFT	14
>>>> +#define AV1_INVALID_IDX		-1
>>>> +#define MAX_FRAME_DISTANCE	31
>>>> +#define AV1_PRIMARY_REF_NONE	7
>>>> +#define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
>>>> +/*
>>>> + * These 3 values aren't defined enum v4l2_av1_segment_feature because
>>>> + * they are not part of the specification
>>>> + */
>>>> +#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
>>>> +#define V4L2_AV1_SEG_LVL_ALT_LF_U	3
>>>> +#define V4L2_AV1_SEG_LVL_ALT_LF_V	4
>>>> +
>>>> +#define CLIP3(l, h, v)      ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
>>> include/linux/minmax.h:#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
>> ok I will remove it
>>
>>>> +
>>>> +#define SUPERRES_SCALE_BITS 3
>>>> +#define SCALE_NUMERATOR 8
>>>> +#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
>>>> +
>>>> +#define RS_SUBPEL_BITS 6
>>>> +#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
>>>> +#define RS_SCALE_SUBPEL_BITS 14
>>>> +#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
>>>> +#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
>>>> +#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
>>>> +
>>>> +#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
>>>> +
>>>> +#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
>>>> +#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
>>>> +#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
>>>> +#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
>>>> +#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
>>>> +#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
>>>> +#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
>>>> +
>>>> +#define DIV_LUT_PREC_BITS 14
>>>> +#define DIV_LUT_BITS 8
>>>> +#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
>>>> +#define WARP_PARAM_REDUCE_BITS 6
>>>> +#define WARPEDMODEL_PREC_BITS 16
>>>> +
>>>> +#define AV1_DIV_ROUND_UP_POW2(value, n)			\
>>>> +({							\
>>>> +	typeof(n) _n  = n;				\
>>>> +	typeof(value) _value = value;			\
>>>> +	(_value + (BIT(_n) >> 1)) >> _n;		\
>>>> +})
>>>> +
>>>> +#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
>>>> +({									\
>>>> +	typeof(n) _n_  = n;						\
>>>> +	typeof(value) _value_ = value;					\
>>>> +	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
>>>> +		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
>>>> +})
>>>> +
>>>> +struct rockchip_av1_film_grain {
>>>> +	uint8_t scaling_lut_y[256];
>>>> +	uint8_t scaling_lut_cb[256];
>>>> +	uint8_t scaling_lut_cr[256];
>>>> +	int16_t cropped_luma_grain_block[4096];
>>>> +	int16_t cropped_chroma_grain_block[1024 * 2];
>>>> +};
>>>> +
>>>> +static const short div_lut[DIV_LUT_NUM + 1] = {
>>>> +	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
>>>> +	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
>>>> +	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
>>>> +	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
>>>> +	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
>>>> +	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
>>>> +	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
>>>> +	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
>>>> +	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
>>>> +	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
>>>> +	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
>>>> +	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
>>>> +	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
>>>> +	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
>>>> +	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
>>>> +	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
>>>> +	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
>>>> +	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
>>>> +	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
>>>> +	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
>>>> +	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
>>>> +	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
>>>> +	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
>>>> +	8240,  8224,  8208,  8192,
>>>> +};
>>>> +
>>>> +static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	u64 timestamp;
>>>> +	int i, idx = frame->ref_frame_idx[ref];
>>>> +
>>>> +	if (idx >= AV1_MAX_FRAME_BUF_COUNT || idx < 0)
>>>> +		return AV1_INVALID_IDX;
>>>> +
>>>> +	timestamp = frame->reference_frame_ts[idx];
>>>> +	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
>>>> +		if (av1_dec->frame_refs[i].ref_count == 0)
>>>> +			continue;
>>>> +		if (av1_dec->frame_refs[i].timestamp == timestamp)
>>>> +			return i;
>>>> +	}
>>> nit: Mediatek optimize this with a map
>>>
>>>> +
>>>> +	return AV1_INVALID_IDX;
>>>> +}
>>>> +
>>>> +static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
>>>> +
>>>> +	if (idx != AV1_INVALID_IDX)
>>>> +		return av1_dec->frame_refs[idx].order_hint;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
>>>> +					     u64 timestamp)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	int i;
>>>> +
>>>> +	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
>>>> +		if (av1_dec->frame_refs[i].ref_count == 0) {
>>>> +			int j;
>>>> +
>>>> +			av1_dec->frame_refs[i].width =
>>>> +			    frame->frame_width_minus_1 + 1;
>>>> +			av1_dec->frame_refs[i].height =
>>>> +			    frame->frame_height_minus_1 + 1;
>>>> +			av1_dec->frame_refs[i].timestamp = timestamp;
>>>> +			av1_dec->frame_refs[i].frame_type = frame->frame_type;
>>>> +			av1_dec->frame_refs[i].order_hint = frame->order_hint;
>>>> +			av1_dec->frame_refs[i].gm_mode =
>>>> +				frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME + i];
>>>> +			if (!av1_dec->frame_refs[i].vb2_ref)
>>>> +				av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
>>>> +
>>>> +			for (j = 0; j < V4L2_AV1_NUM_REF_FRAMES; j++)
>>>> +				av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
>>>> +
>>>> +			av1_dec->frame_refs[i].ref_count++;
>>> This ref_count is only ever 0 and 1. Would be nicer if it was some boolean, like
>>> "valid", or "active".
>> I will change it to "bool used;"
>>
>>>> +			av1_dec->current_frame_index = i;
>>>> +			return i;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	return AV1_INVALID_IDX;
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +
>>>> +	if (idx < 0)
>>>> +		return;
>>>> +
>>>> +	av1_dec->frame_refs[idx].ref_count--;
>>>> +
>>>> +	if (av1_dec->frame_refs[idx].ref_count < 0)
>>>> +		pr_warn("AV1 reference frames refcounting error (idx %d)\n", idx);
>>> Looks like a bug_on case, only driver programming issue could cause this.
>> I have never hit it, I will remove it.
>>
>>>
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +
>>>> +	int ref, idx;
>>>> +
>>>> +	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
>>>> +		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
>>>> +		bool used = false;
>>>> +
>>>> +		if (av1_dec->frame_refs[idx].ref_count == 0)
>>>> +			continue;
>>>> +
>>>> +		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
>>>> +			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
>>>> +				used = true;
>>>> +		}
>>>> +
>>>> +		if (!used)
>>>> +			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
>>>> +	}
>>>> +}
>>>> +
>>>> +static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
>>>> +{
>>>> +	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
>>>> +}
>>>> +
>>>> +static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
>>>> +{
>>>> +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
>>>> +
>>>> +	return ALIGN((cr_offset * 3) / 2, 64);
>>>> +}
>>>> +
>>>> +void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +
>>>> +	if (av1_dec->db_data_col.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
>>>> +				  av1_dec->db_data_col.cpu,
>>>> +				  av1_dec->db_data_col.dma);
>>>> +	av1_dec->db_data_col.cpu = NULL;
>>>> +
>>>> +	if (av1_dec->db_ctrl_col.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
>>>> +				  av1_dec->db_ctrl_col.cpu,
>>>> +				  av1_dec->db_ctrl_col.dma);
>>>> +	av1_dec->db_ctrl_col.cpu = NULL;
>>>> +
>>>> +	if (av1_dec->cdef_col.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
>>>> +				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
>>>> +	av1_dec->cdef_col.cpu = NULL;
>>>> +
>>>> +	if (av1_dec->sr_col.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
>>>> +				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
>>>> +	av1_dec->sr_col.cpu = NULL;
>>>> +
>>>> +	if (av1_dec->lr_col.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
>>>> +				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
>>>> +	av1_dec->lr_col.cpu = NULL;
>>>> +}
>>>> +
>>>> +static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
>>>> +	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
>>>> +	unsigned int height_in_sb = height / 64;
>>>> +	unsigned int stripe_num = ((height + 8) + 63) / 64;
>>>> +	size_t size;
>>>> +
>>>> +	if (num_tile_cols <= av1_dec->num_tile_cols_allocated)
>>>> +		return 0;
>>>> +
>>>> +	rockchip_vpu981_av1_dec_tiles_free(ctx);
>>>> +
>>>> +	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
>>>> +	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
>>>> +						      &av1_dec->db_data_col.dma,
>>>> +						      GFP_KERNEL);
>>>> +	if (!av1_dec->db_data_col.cpu)
>>>> +		goto buffer_allocation_error;
>>>> +	av1_dec->db_data_col.size = size;
>>>> +
>>>> +	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
>>> No bit_depth scaling ?
>> No, MPP code doesn't scale this one depending in bit_depth.
>>
>>>> +	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
>>>> +						      &av1_dec->db_ctrl_col.dma,
>>>> +						      GFP_KERNEL);
>>>> +	if (!av1_dec->db_ctrl_col.cpu)
>>>> +		goto buffer_allocation_error;
>>>> +	av1_dec->db_ctrl_col.size = size;
>>>> +
>>>> +	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
>>>> +	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
>>>> +						   &av1_dec->cdef_col.dma,
>>>> +						   GFP_KERNEL);
>>>> +	if (!av1_dec->cdef_col.cpu)
>>>> +		goto buffer_allocation_error;
>>>> +	av1_dec->cdef_col.size = size;
>>>> +
>>>> +	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
>>>> +	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
>>>> +						 &av1_dec->sr_col.dma,
>>>> +						 GFP_KERNEL);
>>>> +	if (!av1_dec->sr_col.cpu)
>>>> +		goto buffer_allocation_error;
>>>> +	av1_dec->sr_col.size = size;
>>>> +
>>>> +	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
>>>> +	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
>>>> +						 &av1_dec->lr_col.dma,
>>>> +						 GFP_KERNEL);
>>>> +	if (!av1_dec->lr_col.cpu)
>>>> +		goto buffer_allocation_error;
>>>> +	av1_dec->lr_col.size = size;
>>>> +
>>>> +	av1_dec->num_tile_cols_allocated = num_tile_cols;
>>>> +	return 0;
>>>> +
>>>> +buffer_allocation_error:
>>>> +	rockchip_vpu981_av1_dec_tiles_free(ctx);
>>>> +	return -ENOMEM;
>>>> +}
>>>> +
>>>> +void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +
>>>> +	if (av1_dec->global_model.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
>>>> +				  av1_dec->global_model.cpu,
>>>> +				  av1_dec->global_model.dma);
>>>> +	av1_dec->global_model.cpu = NULL;
>>>> +
>>>> +	if (av1_dec->tile_info.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
>>>> +				  av1_dec->tile_info.cpu,
>>>> +				  av1_dec->tile_info.dma);
>>>> +	av1_dec->tile_info.cpu = NULL;
>>>> +
>>>> +	if (av1_dec->prob_tbl.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
>>>> +				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
>>>> +	av1_dec->prob_tbl.cpu = NULL;
>>>> +
>>>> +	if (av1_dec->prob_tbl_out.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
>>>> +				  av1_dec->prob_tbl_out.cpu,
>>>> +				  av1_dec->prob_tbl_out.dma);
>>>> +	av1_dec->prob_tbl_out.cpu = NULL;
>>>> +
>>>> +	if (av1_dec->tile_buf.cpu)
>>>> +		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
>>>> +				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
>>>> +	av1_dec->tile_buf.cpu = NULL;
>>>> +
>>>> +	rockchip_vpu981_av1_dec_tiles_free(ctx);
>>>> +}
>>>> +
>>>> +int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +
>>>> +	memset(av1_dec, 0, sizeof(*av1_dec));
>>>> +
>>>> +	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
>>>> +						       &av1_dec->global_model.dma,
>>>> +						       GFP_KERNEL);
>>>> +	if (!av1_dec->global_model.cpu)
>>>> +		return -ENOMEM;
>>>> +	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
>>>> +
>>>> +	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
>>>> +						    &av1_dec->tile_info.dma,
>>>> +						    GFP_KERNEL);
>>>> +	if (!av1_dec->tile_info.cpu)
>>>> +		return -ENOMEM;
>>>> +	av1_dec->tile_info.size = AV1_MAX_TILES;
>>>> +
>>>> +	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
>>>> +						   ALIGN(sizeof(struct av1cdfs), 2048),
>>>> +						   &av1_dec->prob_tbl.dma,
>>>> +						   GFP_KERNEL);
>>>> +	if (!av1_dec->prob_tbl.cpu)
>>>> +		return -ENOMEM;
>>>> +	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
>>>> +
>>>> +	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
>>>> +						       ALIGN(sizeof(struct av1cdfs), 2048),
>>>> +						       &av1_dec->prob_tbl_out.dma,
>>>> +						       GFP_KERNEL);
>>>> +	if (!av1_dec->prob_tbl_out.cpu)
>>>> +		return -ENOMEM;
>>>> +	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
>>>> +	av1_dec->cdfs = &av1_dec->default_cdfs;
>>>> +	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
>>>> +
>>>> +	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
>>>> +
>>>> +	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
>>>> +						   AV1_TILE_SIZE,
>>>> +						   &av1_dec->tile_buf.dma,
>>>> +						   GFP_KERNEL);
>>>> +	if (!av1_dec->tile_buf.cpu)
>>>> +		return -ENOMEM;
>>>> +	av1_dec->tile_buf.size = AV1_TILE_SIZE;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +
>>>> +	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
>>>> +	if (WARN_ON(!ctrls->sequence))
>>>> +		return -EINVAL;
>>>> +
>>>> +	ctrls->tile_group_entry =
>>>> +	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
>>>> +	if (WARN_ON(!ctrls->tile_group_entry))
>>>> +		return -EINVAL;
>>>> +
>>>> +	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
>>>> +	if (WARN_ON(!ctrls->frame))
>>>> +		return -EINVAL;
>>>> +
>>>> +	ctrls->film_grain =
>>>> +	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
>>>> +
>>>> +	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
>>>> +}
>>>> +
>>>> +static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
>>>> +{
>>>> +	if (n == 0)
>>>> +		return 0;
>>>> +	return 31 ^ __builtin_clz(n);
>>>> +}
>>>> +
>>>> +static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
>>>> +{
>>>> +	int f;
>>>> +	uint64_t e;
>>>> +
>>>> +	*shift = rockchip_vpu981_av1_dec_get_msb(d);
>>>> +	/* e is obtained from D after resetting the most significant 1 bit. */
>>>> +	e = d - ((u32)1 << *shift);
>>>> +	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
>>>> +	if (*shift > DIV_LUT_BITS)
>>>> +		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
>>>> +	else
>>>> +		f = e << (DIV_LUT_BITS - *shift);
>>>> +	if (f > DIV_LUT_NUM)
>>>> +		return -1;
>>>> +	*shift += DIV_LUT_PREC_BITS;
>>>> +	/* Use f as lookup into the precomputed table of multipliers */
>>>> +	return div_lut[f];
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_get_shear_params(const uint32_t *params,
>>>> +	int64_t *alpha, int64_t *beta, int64_t *gamma, int64_t *delta)
>>>> +{
>>>> +	const int *mat = params;
>>>> +	short shift;
>>>> +	short y;
>>>> +	long long gv, dv;
>>>> +
>>>> +	if (mat[2] <= 0)
>>>> +		return;
>>>> +
>>>> +	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
>>>> +	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
>>>> +
>>>> +	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
>>>> +
>>>> +	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
>>>> +
>>>> +	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
>>>> +
>>>> +	dv = ((long long)mat[3] * mat[4]) * y;
>>>> +	*delta = clamp_val(
>>>> +		mat[5] -
>>>> +		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
>>>> +		S16_MIN, S16_MAX);
>>>> +
>>>> +	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
>>>> +		 * (1 << WARP_PARAM_REDUCE_BITS);
>>>> +	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
>>>> +		* (1 << WARP_PARAM_REDUCE_BITS);
>>>> +	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
>>>> +		 * (1 << WARP_PARAM_REDUCE_BITS);
>>>> +	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
>>>> +		* (1 << WARP_PARAM_REDUCE_BITS);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
>>>> +	uint8_t *dst = av1_dec->global_model.cpu;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	int ref_frame, i;
>>>> +
>>>> +	memset(dst, 0, GLOBAL_MODEL_SIZE);
>>>> +	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
>>>> +		int64_t alpha = 0, beta = 0, gamma = 0, delta = 0;
>>>> +
>>>> +		for (i = 0; i < 6; ++i) {
>>>> +			if (i == 2)
>>>> +				*(int32_t *)dst =
>>>> +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
>>>> +			else if (i == 3)
>>>> +				*(int32_t *)dst =
>>>> +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
>>>> +			else
>>>> +				*(int32_t *)dst =
>>>> +					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
>>>> +			dst += 4;
>>>> +		}
>>>> +
>>>> +		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
>>>> +			rockchip_vpu981_av1_dec_get_shear_params(
>>>> +					&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
>>>> +					&alpha, &beta, &gamma, &delta);
>>>> +
>>>> +		*(int16_t *)dst = alpha;
>>>> +		dst += 2;
>>>> +		*(int16_t *)dst = beta;
>>>> +		dst += 2;
>>>> +		*(int16_t *)dst = gamma;
>>>> +		dst += 2;
>>>> +		*(int16_t *)dst = delta;
>>>> +		dst += 2;
>>>> +	}
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
>>>> +	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
>>>> +	    ctrls->tile_group_entry;
>>>> +	int context_update_y =
>>>> +	    tile_info.context_update_tile_id / tile_info.tile_cols;
>>>> +	int context_update_x =
>>>> +	    tile_info.context_update_tile_id % tile_info.tile_cols;
>>>> +	int context_update_tile_id =
>>>> +	    context_update_x * tile_info.tile_rows + context_update_y;
>>>> +	uint8_t *dst = av1_dec->tile_info.cpu;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	int tile0, tile1;
>>>> +
>>>> +	memset(dst, 0, av1_dec->tile_info.size);
>>>> +
>>>> +	for (tile0 = 0; tile0 < tile_info.tile_cols; tile0++) {
>>>> +		for (tile1 = 0; tile1 < tile_info.tile_rows; tile1++) {
>>>> +			int tile_id = tile1 * tile_info.tile_cols + tile0;
>>>> +			uint32_t start, end;
>>>> +			uint32_t y0 =
>>>> +			    tile_info.height_in_sbs_minus_1[tile1] + 1;
>>>> +			uint32_t x0 = tile_info.width_in_sbs_minus_1[tile0] + 1;
>>>> +
>>>> +			// tile size in SB units (width,height)
>>>> +			*dst++ = x0;
>>>> +			*dst++ = 0;
>>>> +			*dst++ = 0;
>>>> +			*dst++ = 0;
>>>> +			*dst++ = y0;
>>>> +			*dst++ = 0;
>>>> +			*dst++ = 0;
>>>> +			*dst++ = 0;
>>>> +
>>>> +			// tile start position
>>>> +			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
>>>> +			*dst++ = start & 255;
>>>> +			*dst++ = (start >> 8) & 255;
>>>> +			*dst++ = (start >> 16) & 255;
>>>> +			*dst++ = (start >> 24) & 255;
>>>> +
>>>> +			// # of bytes in tile data
>>>> +			end = start + group_entry[tile_id].tile_size;
>>>> +			*dst++ = end & 255;
>>>> +			*dst++ = (end >> 8) & 255;
>>>> +			*dst++ = (end >> 16) & 255;
>>>> +			*dst++ = (end >> 24) & 255;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_multicore_expect_context_update,
>>>> +			 !!(context_update_x == 0));
>>>> +	hantro_reg_write(vpu, &av1_tile_enable, !!((tile_info.tile_cols > 1)
>>>> +						   || (tile_info.tile_rows > 1)));
>>>> +	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info.tile_cols);
>>>> +	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info.tile_rows);
>>>> +	hantro_reg_write(vpu, &av1_context_update_tile_id,
>>>> +			 context_update_tile_id);
>>>> +	hantro_reg_write(vpu, &av1_tile_transpose, 1);
>>>> +	if (context_update_tile_id) {
>>>> +		hantro_reg_write(vpu, &av1_dec_tile_size_mag,
>>>> +				 tile_info.tile_size_bytes);
>>>> +	} else
>>>> +		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
>>>> +}
>>>> +
>>>> +static int rockchip_vpu981_av1_dec_get_relative_dist(struct hantro_ctx *ctx,
>>>> +						     int a, int b)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	int bits = ctrls->sequence->order_hint_bits - 1;
>>>> +	int diff, m;
>>>> +
>>>> +	if (!ctrls->sequence->order_hint_bits)
>>>> +		return 0;
>>>> +
>>>> +	diff = a - b;
>>>> +	m = 1 << bits;
>>>> +	diff = (diff & (m - 1)) - (diff & m);
>>>> +
>>>> +	return diff;
>>>> +}
>>> I think I've seen this one in MTK. Will be nice to make a v4l2-av1.h eventually
>>> and share this. Can happen afterward though, to avoid make this too complex.
>> I will keep that in mind.
>>
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
>>>> +	int i;
>>>> +
>>>> +	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
>>>> +		for (i = 0; i < AV1_REF_LIST_SIZE; i++)
>>>> +			av1_dec->ref_frame_sign_bias[i] = 0;
>>>> +
>>>> +		return;
>>>> +	}
>>>> +	// Identify the nearest forward and backward references.
>>>> +	for (i = 0; i < AV1_REF_LIST_SIZE - 1; i++) {
>>>> +		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
>>>> +			int rel_off =
>>>> +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +								      rockchip_vpu981_get_order_hint
>>>> +								      (ctx, i),
>>>> +								      frame->order_hint);
>>>> +			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
>>>> +		}
>>>> +	}
>>>> +}
>>>> +
>>>> +static bool
>>>> +rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
>>>> +				int width, int height)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	struct hantro_decoded_buffer *dst;
>>>> +	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
>>>> +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
>>>> +	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
>>>> +	int cur_width = frame->frame_width_minus_1 + 1;
>>>> +	int cur_height = frame->frame_height_minus_1 + 1;
>>>> +	int scale_width =
>>>> +	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
>>>> +	int scale_height =
>>>> +	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
>>>> +
>>>> +	switch (ref) {
>>>> +	case 0:
>>>> +		hantro_reg_write(vpu, &av1_ref0_height, height);
>>>> +		hantro_reg_write(vpu, &av1_ref0_width, width);
>>>> +		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
>>>> +		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
>>>> +		break;
>>>> +	case 1:
>>>> +		hantro_reg_write(vpu, &av1_ref1_height, height);
>>>> +		hantro_reg_write(vpu, &av1_ref1_width, width);
>>>> +		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
>>>> +		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
>>>> +		break;
>>>> +	case 2:
>>>> +		hantro_reg_write(vpu, &av1_ref2_height, height);
>>>> +		hantro_reg_write(vpu, &av1_ref2_width, width);
>>>> +		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
>>>> +		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
>>>> +		break;
>>>> +	case 3:
>>>> +		hantro_reg_write(vpu, &av1_ref3_height, height);
>>>> +		hantro_reg_write(vpu, &av1_ref3_width, width);
>>>> +		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
>>>> +		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
>>>> +		break;
>>>> +	case 4:
>>>> +		hantro_reg_write(vpu, &av1_ref4_height, height);
>>>> +		hantro_reg_write(vpu, &av1_ref4_width, width);
>>>> +		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
>>>> +		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
>>>> +		break;
>>>> +	case 5:
>>>> +		hantro_reg_write(vpu, &av1_ref5_height, height);
>>>> +		hantro_reg_write(vpu, &av1_ref5_width, width);
>>>> +		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
>>>> +		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
>>>> +		break;
>>>> +	case 6:
>>>> +		hantro_reg_write(vpu, &av1_ref6_height, height);
>>>> +		hantro_reg_write(vpu, &av1_ref6_width, width);
>>>> +		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
>>>> +		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
>>>> +		break;
>>>> +	default:
>>>> +		pr_warn("AV1 invalid reference frame index\n");
>>>> +	}
>>>> +
>>>> +	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
>>>> +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
>>>> +	chroma_addr = luma_addr + cr_offset;
>>>> +	mv_addr = luma_addr + mv_offset;
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
>>>> +	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
>>>> +	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
>>>> +
>>>> +	return (scale_width != (1 << AV1_REF_SCALE_SHIFT))
>>>> +		|| (scale_height != (1 << AV1_REF_SCALE_SHIFT));
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
>>>> +						  int ref, int val)
>>>> +{
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +
>>>> +	switch (ref) {
>>>> +	case 0:
>>>> +		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
>>>> +		break;
>>>> +	case 1:
>>>> +		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
>>>> +		break;
>>>> +	case 2:
>>>> +		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
>>>> +		break;
>>>> +	case 3:
>>>> +		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
>>>> +		break;
>>>> +	case 4:
>>>> +		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
>>>> +		break;
>>>> +	case 5:
>>>> +		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
>>>> +		break;
>>>> +	case 6:
>>>> +		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
>>>> +		break;
>>>> +	default:
>>>> +		pr_warn("AV1 invalid sign bias index\n");
>>>> +		break;
>>>> +	}
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
>>>> +	uint32_t segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	uint8_t segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
>>>> +
>>>> +	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)
>>>> +	    && (frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME)) {
>>>> +		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
>>>> +
>>>> +		if (idx >= 0) {
>>>> +			dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
>>>> +			size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
>>>> +			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
>>>> +
>>>> +			luma_addr =
>>>> +				hantro_get_dec_buf_addr(ctx,
>>>> +							&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
>>>> +			chroma_addr = luma_addr + cr_offset;
>>>> +			mv_addr = luma_addr + mv_offset;
>>>> +
>>>> +			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
>>>> +			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
>>>> +		}
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
>>>> +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
>>>> +	hantro_reg_write(vpu, &av1_segment_upd_e,
>>>> +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
>>>> +	hantro_reg_write(vpu, &av1_segment_e,
>>>> +			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_error_resilient,
>>>> +			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
>>>> +
>>>> +	if (IS_INTRA(frame->frame_type)
>>>> +	    || !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
>>>> +		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
>>>> +	}
>>>> +
>>>> +	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)) {
>>>> +		int s;
>>>> +
>>>> +		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
>>>> +			if (seg->feature_enabled[s] &
>>>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
>>>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
>>>> +				    CLIP3(0, 255,
>>>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]));
>>>> +				segsign |=
>>>> +					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
>>>> +			}
>>>> +
>>>> +			if (seg->feature_enabled[s] &
>>>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
>>>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
>>>> +					CLIP3(-63, 63,
>>>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]));
>>>> +
>>>> +			if (seg->feature_enabled[s] &
>>>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
>>>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
>>>> +				    CLIP3(-63, 63,
>>>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]));
>>>> +
>>>> +			if (seg->feature_enabled[s] &
>>>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
>>>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
>>>> +				    CLIP3(-63, 63,
>>>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]));
>>>> +
>>>> +			if (seg->feature_enabled[s] &
>>>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
>>>> +				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
>>>> +				    CLIP3(-63, 63,
>>>> +					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]));
>>>> +
>>>> +			if (frame->frame_type && seg->feature_enabled[s] &
>>>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
>>>> +				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
>>>> +
>>>> +			if (seg->feature_enabled[s] &
>>>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
>>>> +				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
>>>> +
>>>> +			if (seg->feature_enabled[s] &
>>>> +			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
>>>> +				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
>>>> +		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
>>>> +			if (seg->feature_enabled[i]
>>>> +			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
>>>> +				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
>>>> +				last_active_seg = max(i, last_active_seg);
>>>> +			}
>>>> +		}
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
>>>> +	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
>>>> +
>>>> +	/* Write QP, filter level, ref frame and skip for every segment */
>>>> +	hantro_reg_write(vpu, &av1_quant_seg0,
>>>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
>>>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
>>>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
>>>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
>>>> +			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>>>> +	hantro_reg_write(vpu, &av1_refpic_seg0,
>>>> +			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_skip_seg0,
>>>> +			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
>>>> +	hantro_reg_write(vpu, &av1_global_mv_seg0,
>>>> +			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_quant_seg1,
>>>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
>>>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
>>>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
>>>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
>>>> +			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>>>> +	hantro_reg_write(vpu, &av1_refpic_seg1,
>>>> +			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_skip_seg1,
>>>> +			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
>>>> +	hantro_reg_write(vpu, &av1_global_mv_seg1,
>>>> +			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_quant_seg2,
>>>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
>>>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
>>>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
>>>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
>>>> +			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>>>> +	hantro_reg_write(vpu, &av1_refpic_seg2,
>>>> +			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_skip_seg2,
>>>> +			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
>>>> +	hantro_reg_write(vpu, &av1_global_mv_seg2,
>>>> +			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_quant_seg3,
>>>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
>>>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
>>>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
>>>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
>>>> +			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>>>> +	hantro_reg_write(vpu, &av1_refpic_seg3,
>>>> +			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_skip_seg3,
>>>> +			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
>>>> +	hantro_reg_write(vpu, &av1_global_mv_seg3,
>>>> +			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_quant_seg4,
>>>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
>>>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
>>>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
>>>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
>>>> +			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>>>> +	hantro_reg_write(vpu, &av1_refpic_seg4,
>>>> +			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_skip_seg4,
>>>> +			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
>>>> +	hantro_reg_write(vpu, &av1_global_mv_seg4,
>>>> +			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_quant_seg5,
>>>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
>>>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
>>>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
>>>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
>>>> +			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>>>> +	hantro_reg_write(vpu, &av1_refpic_seg5,
>>>> +			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_skip_seg5,
>>>> +			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
>>>> +	hantro_reg_write(vpu, &av1_global_mv_seg5,
>>>> +			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_quant_seg6,
>>>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
>>>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
>>>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
>>>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
>>>> +			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>>>> +	hantro_reg_write(vpu, &av1_refpic_seg6,
>>>> +			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_skip_seg6,
>>>> +			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
>>>> +	hantro_reg_write(vpu, &av1_global_mv_seg6,
>>>> +			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_quant_seg7,
>>>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
>>>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
>>>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
>>>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
>>>> +			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
>>>> +	hantro_reg_write(vpu, &av1_refpic_seg7,
>>>> +			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_skip_seg7,
>>>> +			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
>>>> +	hantro_reg_write(vpu, &av1_global_mv_seg7,
>>>> +			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
>>>> +}
>>>> +
>>>> +static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
>>>> +	const struct v4l2_av1_quantization *quantization = &frame->quantization;
>>>> +	int i;
>>>> +
>>>> +	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
>>>> +		int qindex = quantization->base_q_idx;
>>>> +
>>>> +		if (segmentation->feature_enabled[i] &
>>>> +		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
>>>> +			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
>>>> +		}
>>>> +		qindex = CLIP3(0, 255, qindex);
>>>> +
>>>> +		if (qindex
>>>> +		    || quantization->delta_q_y_dc
>>>> +		    || quantization->delta_q_u_dc
>>>> +		    || quantization->delta_q_u_ac
>>>> +		    || quantization->delta_q_v_dc || quantization->delta_q_v_ac)
>>>> +			return false;
>>>> +	}
>>>> +	return true;
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
>>>> +	bool filtering_dis = (loop_filter->level[0] == 0)
>>>> +			     && (loop_filter->level[1] == 0);
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
>>>> +	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
>>>> +	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
>>>> +
>>>> +	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED
>>>> +	    && !rockchip_vpu981_av1_dec_is_lossless(ctx)
>>>> +	    && !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
>>>> +				 loop_filter->ref_deltas[0]);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
>>>> +				 loop_filter->ref_deltas[1]);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
>>>> +				 loop_filter->ref_deltas[2]);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
>>>> +				 loop_filter->ref_deltas[3]);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
>>>> +				 loop_filter->ref_deltas[4]);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
>>>> +				 loop_filter->ref_deltas[5]);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
>>>> +				 loop_filter->ref_deltas[6]);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
>>>> +				 loop_filter->ref_deltas[7]);
>>>> +		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
>>>> +				 loop_filter->mode_deltas[0]);
>>>> +		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
>>>> +				 loop_filter->mode_deltas[1]);
>>>> +	} else {
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
>>>> +		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
>>>> +		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
>>>> +		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
>>>> +	}
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
>>>> +	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	bool frame_is_intra = IS_INTRA(frame->frame_type);
>>>> +	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
>>>> +	int i;
>>>> +
>>>> +	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
>>>> +		return;
>>>> +
>>>> +	for (i = 0; i < NUM_REF_FRAMES; i++) {
>>>> +		if (frame->refresh_frame_flags & (1 << i)) {
>>>> +			struct mvcdfs stored_mv_cdf;
>>>> +
>>>> +			rockchip_av1_get_cdfs(ctx, i);
>>>> +			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
>>>> +			*av1_dec->cdfs = *out_cdfs;
>>>> +			if (frame_is_intra) {
>>>> +				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
>>>> +				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
>>>> +			}
>>>> +			rockchip_av1_store_cdfs(ctx,
>>>> +						frame->refresh_frame_flags);
>>>> +			break;
>>>> +		}
>>>> +	}
>>>> +}
>>>> +
>>>> +void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
>>>> +{
>>>> +	rockchip_vpu981_av1_dec_update_prob(ctx);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	const struct v4l2_av1_quantization *quantization = &frame->quantization;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	bool error_resilient_mode =
>>>> +	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
>>>> +	bool frame_is_intra = IS_INTRA(frame->frame_type);
>>>> +
>>>> +	if (error_resilient_mode || frame_is_intra
>>>> +	    || frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
>>>> +		av1_dec->cdfs = &av1_dec->default_cdfs;
>>>> +		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
>>>> +		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
>>>> +						 av1_dec->cdfs);
>>>> +	} else {
>>>> +		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
>>>> +	}
>>>> +	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
>>>> +
>>>> +	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
>>>> +
>>>> +	if (frame_is_intra) {
>>>> +		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
>>>> +		/* Overwrite MV context area with intrabc MV context */
>>>> +		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
>>>> +		       sizeof(struct mvcdfs));
>>>> +	}
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
>>>> +	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	const struct v4l2_av1_cdef *cdef = &frame->cdef;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	uint32_t luma_pri_strength = 0;
>>>> +	uint16_t luma_sec_strength = 0;
>>>> +	uint32_t chroma_pri_strength = 0;
>>>> +	uint16_t chroma_sec_strength = 0;
>>>> +	int i;
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
>>>> +	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
>>>> +
>>>> +	for (i = 0; i < (1 << cdef->bits); i++) {
>>>> +		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
>>>> +		if (cdef->y_sec_strength[i] == 4)
>>>> +			luma_sec_strength |= 3 << (i * 2);
>>>> +		else
>>>> +			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
>>>> +
>>>> +		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
>>>> +		if (cdef->uv_sec_strength[i] == 4)
>>>> +			chroma_sec_strength |= 3 << (i * 2);
>>>> +		else
>>>> +			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
>>>> +			 luma_pri_strength);
>>>> +	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
>>>> +			 luma_sec_strength);
>>>> +	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
>>>> +			 chroma_pri_strength);
>>>> +	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
>>>> +			 chroma_sec_strength);
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	const struct v4l2_av1_loop_restoration *loop_restoration =
>>>> +	    &frame->loop_restoration;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	uint16_t lr_type = 0, lr_unit_size = 0;
>>>> +	uint8_t restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
>>>> +	int i;
>>>> +
>>>> +	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
>>>> +		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
>>>> +		restoration_unit_size[1] =
>>>> +		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
>>>> +		restoration_unit_size[2] =
>>>> +		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
>>>> +	}
>>>> +
>>>> +	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
>>>> +		lr_type |=
>>>> +		    loop_restoration->frame_restoration_type[i] << (i * 2);
>>>> +		lr_unit_size |= restoration_unit_size[i] << (i * 2);
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_lr_type, lr_type);
>>>> +	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
>>>> +	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	uint8_t superres_scale_denominator = SCALE_NUMERATOR;
>>>> +	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
>>>> +	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
>>>> +	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
>>>> +	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
>>>> +	int superres_init_luma_subpel_x = 0;
>>>> +	int superres_init_chroma_subpel_x = 0;
>>>> +	int superres_is_scaled = 0;
>>>> +	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
>>>> +	int upscaledLumaPlaneW, downscaledLumaPlaneW;
>>>> +	int downscaledChromaPlaneW, upscaledChromaPlaneW;
>>>> +	int stepLumaX, stepChromaX;
>>>> +	int errLuma, errChroma;
>>>> +	int initialLumaSubpelX, initialChromaSubpelX;
>>> You haven't used camel case so far, are you sure this is allowed ?
>> I will change them.
>>
>>>> +	int width = 0;
>>>> +
>>>> +	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
>>>> +		superres_scale_denominator = frame->superres_denom;
>>>> +
>>>> +	if (superres_scale_denominator <= SCALE_NUMERATOR)
>>>> +		goto set_regs;
>>>> +
>>>> +	width = (frame->upscaled_width * SCALE_NUMERATOR +
>>>> +		(superres_scale_denominator / 2)) / superres_scale_denominator;
>>>> +
>>>> +	if (width < min_w)
>>>> +		width = min_w;
>>>> +
>>>> +	if (width == frame->upscaled_width)
>>>> +		goto set_regs;
>>>> +
>>>> +	superres_is_scaled = 1;
>>>> +	upscaledLumaPlaneW = frame->upscaled_width;
>>>> +	downscaledLumaPlaneW = width;
>>>> +	downscaledChromaPlaneW = (downscaledLumaPlaneW + 1) >> 1;
>>>> +	upscaledChromaPlaneW = (upscaledLumaPlaneW + 1) >> 1;
>>>> +	stepLumaX =
>>>> +		((downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) +
>>>> +		 (upscaledLumaPlaneW / 2)) / upscaledLumaPlaneW;
>>>> +	stepChromaX =
>>>> +		((downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) +
>>>> +		 (upscaledChromaPlaneW / 2)) / upscaledChromaPlaneW;
>>>> +	errLuma =
>>>> +		(upscaledLumaPlaneW * stepLumaX)
>>>> +		- (downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS);
>>>> +	errChroma =
>>>> +		(upscaledChromaPlaneW * stepChromaX)
>>>> +		- (downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS);
>>>> +	initialLumaSubpelX =
>>>> +		((-((upscaledLumaPlaneW - downscaledLumaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
>>>> +		  + upscaledLumaPlaneW / 2)
>>>> +		 / upscaledLumaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errLuma / 2)
>>>> +		& RS_SCALE_SUBPEL_MASK;
>>>> +	initialChromaSubpelX =
>>>> +		((-((upscaledChromaPlaneW - downscaledChromaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
>>>> +		  + upscaledChromaPlaneW / 2)
>>>> +		 / upscaledChromaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errChroma / 2)
>>>> +		& RS_SCALE_SUBPEL_MASK;
>>>> +	superres_luma_step = stepLumaX;
>>>> +	superres_chroma_step = stepChromaX;
>>>> +	superres_luma_step_invra =
>>>> +		((upscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledLumaPlaneW / 2))
>>>> +		/ downscaledLumaPlaneW;
>>>> +	superres_chroma_step_invra =
>>>> +		((upscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledChromaPlaneW / 2))
>>>> +		/ downscaledChromaPlaneW;
>>>> +	superres_init_luma_subpel_x = initialLumaSubpelX;
>>>> +	superres_init_chroma_subpel_x = initialChromaSubpelX;
>>>> +
>>>> +set_regs:
>>>> +	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
>>>> +
>>>> +	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
>>>> +		hantro_reg_write(vpu, &av1_scale_denom_minus9,
>>>> +				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
>>>> +	else
>>>> +		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
>>>> +	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
>>>> +	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
>>>> +			 superres_luma_step_invra);
>>>> +	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
>>>> +			 superres_chroma_step_invra);
>>>> +	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
>>>> +			 superres_init_luma_subpel_x);
>>>> +	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
>>>> +			 superres_init_chroma_subpel_x);
>>>> +	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	int pic_width_in_cbs = ALIGN(frame->frame_width_minus_1 + 1, 8) >> 3;
>>>> +	int pic_height_in_cbs = ALIGN(frame->frame_height_minus_1 + 1, 8) >> 3;
>>> Aren't these DIV_ROUND_UP(val + 1, 8) ?
>>>
>>>> +	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
>>>> +			    - (frame->frame_width_minus_1 + 1);
>>>> +	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
>>>> +			     - (frame->frame_height_minus_1 + 1);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
>>>> +	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
>>>> +	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
>>>> +	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
>>>> +
>>>> +	rockchip_vpu981_av1_dec_set_superres_params(ctx);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	bool use_ref_frame_mvs =
>>>> +	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
>>>> +	int cur_frame_offset = frame->order_hint;
>>>> +	int alt_frame_offset = 0;
>>>> +	int gld_frame_offset = 0;
>>>> +	int bwd_frame_offset = 0;
>>>> +	int alt2_frame_offset = 0;
>>>> +	int refs_selected[3] = { 0, 0, 0 };
>>>> +	int cur_mi_cols = (frame->frame_width_minus_1 + 8) >> 3;
>>>> +	int cur_mi_rows = (frame->frame_height_minus_1 + 8) >> 3;
>>> This looks like a contraction of (val + 1 + 7) / 8, so in short, another
>>> DIV_ROUNDUP().
>>>
>>>> +	int cur_offset[V4L2_AV1_NUM_REF_FRAMES - 1];
>>>> +	int cur_roffset[V4L2_AV1_NUM_REF_FRAMES - 1];
>>> This looks like V4L2_AV1_REFS_PER_FRAME. Daniel, should be remove this
>>> V4L2_AV1_NUM_REF_FRAMES ? Its redundant with V4L2_AV1_TOTAL_REFS_PER_FRAME ...
>>>
>>>> +	int mf_types[3] = { 0, 0, 0 };
>>>> +	int ref_stamp = 2;
>>>> +	int ref_ind = 0;
>>>> +	int rf, idx;
>>>> +
>>>> +	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
>>>> +	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
>>>> +	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
>>>> +	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
>>>> +
>>>> +	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
>>>> +	if (idx >= 0) {
>>>> +		int alt_frame_offset_in_lst =
>>>> +			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
>>>> +		bool is_lst_overlay =
>>>> +		    (alt_frame_offset_in_lst == gld_frame_offset);
>>>> +
>>>> +		if (!is_lst_overlay) {
>>>> +			int lst_mi_cols =
>>>> +			    (av1_dec->frame_refs[idx].width + 7) >> 3;
>>> DIV_ROUDNUP()
>>>
>>>> +			int lst_mi_rows =
>>>> +			    (av1_dec->frame_refs[idx].height + 7) >> 3;
>>> again. I'll stop pointing these out, I think you can find them.
>>>
>>>> +			bool lst_intra_only =
>>>> +			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>>>> +
>>>> +			if (lst_mi_cols == cur_mi_cols
>>>> +			    && lst_mi_rows == cur_mi_rows && !lst_intra_only) {
>>>> +				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
>>>> +				refs_selected[ref_ind++] = LST_BUF_IDX;
>>>> +			}
>>>> +		}
>>>> +		ref_stamp--;
>>>> +	}
>>>> +
>>>> +	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
>>>> +	if (rockchip_vpu981_av1_dec_get_relative_dist
>>>> +	    (ctx, bwd_frame_offset, cur_frame_offset) > 0) {
>>>> +		int bwd_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
>>>> +		int bwd_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
>>>> +		bool bwd_intra_only =
>>>> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>>>> +
>>>> +		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
>>>> +		    !bwd_intra_only) {
>>>> +			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
>>>> +			refs_selected[ref_ind++] = BWD_BUF_IDX;
>>>> +			ref_stamp--;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
>>>> +	if (rockchip_vpu981_av1_dec_get_relative_dist
>>>> +	    (ctx, alt2_frame_offset, cur_frame_offset) > 0) {
>>>> +		int alt2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
>>>> +		int alt2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
>>>> +		bool alt2_intra_only =
>>>> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>>>> +
>>>> +		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows
>>>> +		    && !alt2_intra_only) {
>>>> +			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
>>>> +			refs_selected[ref_ind++] = ALT2_BUF_IDX;
>>>> +			ref_stamp--;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
>>>> +	if (rockchip_vpu981_av1_dec_get_relative_dist
>>>> +	    (ctx, alt_frame_offset, cur_frame_offset) > 0 && ref_stamp >= 0) {
>>>> +		int alt_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
>>>> +		int alt_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
>>>> +		bool alt_intra_only =
>>>> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>>>> +
>>>> +		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
>>>> +		    !alt_intra_only) {
>>>> +			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
>>>> +			refs_selected[ref_ind++] = ALT_BUF_IDX;
>>>> +			ref_stamp--;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
>>>> +	if (idx >= 0 && ref_stamp >= 0) {
>>>> +		int lst2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
>>>> +		int lst2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
>>> I said I'd stop, but can't this be calculate once and saved in the reference
>>> frame structure ?
>> Indeed that is a simplification, I will implement it.
>>
>>>> +		bool lst2_intra_only =
>>>> +		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
>>>> +
>>>> +		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows
>>>> +		    && !lst2_intra_only) {
>>>> +			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
>>>> +			refs_selected[ref_ind++] = LST2_BUF_IDX;
>>>> +			ref_stamp--;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	for (rf = 0; rf < V4L2_AV1_NUM_REF_FRAMES - 1; ++rf) {
>>>> +		idx = rockchip_vpu981_get_frame_index(ctx, rf);
>>>> +		if (idx >= 0) {
>>>> +			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
>>>> +
>>>> +			cur_offset[rf] =
>>>> +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +								      cur_frame_offset,
>>>> +								      rf_order_hint);
>>>> +			cur_roffset[rf] =
>>>> +			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +								      rf_order_hint,
>>>> +								      cur_frame_offset);
>>>> +		} else {
>>>> +			cur_offset[rf] = 0;
>>>> +			cur_roffset[rf] = 0;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
>>>> +	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
>>>> +	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
>>>> +	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
>>>> +
>>>> +	if (use_ref_frame_mvs && ref_ind > 0 &&
>>>> +	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
>>>> +	    && cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
>>>> +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
>>>> +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
>>>> +		int val;
>>>> +
>>>> +		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
>>>> +
>>>> +	if (use_ref_frame_mvs && ref_ind > 1 &&
>>>> +	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
>>>> +	    && cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
>>>> +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
>>>> +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
>>>> +		int val;
>>>> +
>>>> +		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
>>>> +	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
>>>> +
>>>> +	if (use_ref_frame_mvs && ref_ind > 2 &&
>>>> +	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
>>>> +	    && cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
>>>> +		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
>>>> +		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
>>>> +		int val;
>>>> +
>>>> +		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
>>>> +
>>>> +		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
>>>> +				rf_order_hint,
>>>> +				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
>>>> +		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
>>>> +	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
>>>> +	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
>>>> +	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
>>>> +	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
>>>> +	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
>>>> +	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
>>>> +	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
>>>> +	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
>>>> +	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
>>>> +	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
>>>> +	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
>>>> +	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
>>>> +	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
>>>> +	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>>> +	int frame_type = frame->frame_type;
>>>> +	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
>>>> +	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	int i, ref_frames = 0;
>>>> +	bool scale_enable = false;
>>>> +
>>>> +	if (IS_INTRA(frame_type) && !allow_intrabc)
>>>> +		return;
>>>> +
>>>> +	if (!allow_intrabc) {
>>>> +		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
>>>> +			int idx = rockchip_vpu981_get_frame_index(ctx, i);
>>>> +
>>>> +			if (idx >= 0)
>>>> +				ref_count[idx]++;
>>>> +		}
>>>> +
>>>> +		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
>>>> +			if (ref_count[i])
>>>> +				ref_frames++;
>>>> +		}
>>>> +	} else {
>>>> +		ref_frames = 1;
>>>> +	}
>>>> +	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
>>>> +
>>>> +	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
>>>> +
>>>> +	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_NUM_REF_FRAMES; i++) {
>>>> +		uint32_t ref = i - 1;
>>>> +		int idx = 0;
>>>> +		int width, height;
>>>> +
>>>> +		if (allow_intrabc) {
>>>> +			idx = av1_dec->current_frame_index;
>>>> +			width = frame->frame_width_minus_1 + 1;
>>>> +			height = frame->frame_height_minus_1 + 1;
>>>> +		} else {
>>>> +			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
>>>> +				idx = rockchip_vpu981_get_frame_index(ctx, ref);
>>>> +			width = av1_dec->frame_refs[idx].width;
>>>> +			height = av1_dec->frame_refs[idx].height;
>>>> +		}
>>>> +
>>>> +		scale_enable |=
>>>> +		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
>>>> +						    height);
>>>> +
>>>> +		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
>>>> +						      av1_dec->ref_frame_sign_bias[i]);
>>>> +	}
>>>> +	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_ref0_gm_mode,
>>>> +			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_ref1_gm_mode,
>>>> +			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_ref2_gm_mode,
>>>> +			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_ref3_gm_mode,
>>>> +			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_ref4_gm_mode,
>>>> +			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_ref5_gm_mode,
>>>> +			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
>>>> +	hantro_reg_write(vpu, &av1_ref6_gm_mode,
>>>> +			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
>>>> +
>>>> +	rockchip_vpu981_av1_dec_set_other_frames(ctx);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_skip_mode,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
>>>> +	hantro_reg_write(vpu, &av1_tempor_mvp_e,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
>>>> +	hantro_reg_write(vpu, &av1_delta_lf_res_log,
>>>> +			 ctrls->frame->loop_filter.delta_lf_res);
>>>> +	hantro_reg_write(vpu, &av1_delta_lf_multi,
>>>> +			 !!(ctrls->frame->loop_filter.flags
>>>> +			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
>>>> +	hantro_reg_write(vpu, &av1_delta_lf_present,
>>>> +			 !!(ctrls->frame->loop_filter.flags
>>>> +			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
>>>> +	hantro_reg_write(vpu, &av1_disable_cdf_update,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
>>>> +	hantro_reg_write(vpu, &av1_allow_warp,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
>>>> +	hantro_reg_write(vpu, &av1_show_frame,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
>>>> +	hantro_reg_write(vpu, &av1_switchable_motion_mode,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
>>>> +	hantro_reg_write(vpu, &av1_enable_cdef,
>>>> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
>>>> +	hantro_reg_write(vpu, &av1_allow_masked_compound,
>>>> +			 !!(ctrls->sequence->flags
>>>> +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
>>>> +	hantro_reg_write(vpu, &av1_allow_interintra,
>>>> +			 !!(ctrls->sequence->flags
>>>> +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
>>>> +	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
>>>> +			 !!(ctrls->sequence->flags
>>>> +			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
>>>> +	hantro_reg_write(vpu, &av1_allow_filter_intra,
>>>> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
>>>> +	hantro_reg_write(vpu, &av1_enable_jnt_comp,
>>>> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
>>>> +	hantro_reg_write(vpu, &av1_enable_dual_filter,
>>>> +			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
>>>> +	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
>>>> +	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
>>>> +	hantro_reg_write(vpu, &av1_allow_intrabc,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
>>>> +
>>>> +	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
>>>> +		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
>>>> +	else
>>>> +		hantro_reg_write(vpu, &av1_force_interger_mv,
>>>> +				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
>>>> +	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
>>>> +	hantro_reg_write(vpu, &av1_delta_q_present,
>>>> +			 !!(ctrls->frame->quantization.flags
>>>> +			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
>>>> +	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
>>>> +	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
>>>> +	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
>>> I believe these register are read by PP when using format 0, perhaps something
>>> nice to comment about, as they will impact the pixel produce by the post-
>>> processor, which can be a surprising side effect. Bit 11:8 also hold
>>> sw_bit_depth_out_minus8, but I don't know if its used for AV1, in my doc its
>>> only used for AVS2 CODEC.
>> I have no indication about that in MPP code.
>> Post processor output pixel format is selected when writing in av1_pp_out_format field.
> You now have access to the same doc as me.
>
>
>>> I think its good to underline that in VC8000/VC9000, some registers are shared
>>> across multiple CODECs.
>>>
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
>>>> +	hantro_reg_write(vpu, &av1_high_prec_mv_e,
>>>> +			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
>>>> +	hantro_reg_write(vpu, &av1_comp_pred_mode,
>>>> +			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
>>>> +	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
>>>> +	hantro_reg_write(vpu, &av1_max_cb_size,
>>>> +			 (ctrls->sequence->flags
>>>> +			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
>>>> +	hantro_reg_write(vpu, &av1_min_cb_size, 3);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
>>>> +	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
>>>> +	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
>>>> +	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
>>>> +	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
>>>> +	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
>>>> +	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
>>>> +		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
>>>> +		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
>>>> +		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
>>>> +	} else {
>>>> +		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
>>>> +		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
>>>> +		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
>>>> +	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
>>>> +	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_skip_ref0,
>>>> +			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
>>>> +	hantro_reg_write(vpu, &av1_skip_ref1,
>>>> +			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
>>>> +	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
>>>> +}
>>>> +
>>>> +static void
>>>> +rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
>>>> +					 struct vb2_v4l2_buffer *vb2_src)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
>>>> +	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
>>>> +	    ctrls->tile_group_entry;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	dma_addr_t src_dma;
>>>> +	u32 src_len, src_buf_len;
>>>> +	int start_bit, offset;
>>>> +
>>>> +	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
>>>> +	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
>>>> +	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
>>>> +
>>>> +	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
>>>> +	offset = group_entry[0].tile_offset & ~0xf;
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
>>>> +	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
>>>> +	hantro_reg_write(vpu, &av1_stream_len, src_len);
>>>> +	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
>>>> +	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
>>>> +}
>>>> +
>>>> +static void
>>>> +rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	struct hantro_decoded_buffer *dst;
>>>> +	struct vb2_v4l2_buffer *vb2_dst;
>>>> +	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
>>>> +	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
>>>> +	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
>>>> +
>>>> +	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
>>>> +	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
>>>> +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
>>>> +	chroma_addr = luma_addr + cr_offset;
>>>> +	mv_addr = luma_addr + mv_offset;
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
>>>> +	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
>>>> +	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
>>>> +}
>>>> +
>>>> +int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	struct vb2_v4l2_buffer *vb2_src;
>>>> +	int ret;
>>>> +
>>>> +	hantro_start_prepare_run(ctx);
>>>> +
>>>> +	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
>>>> +	if (ret)
>>>> +		goto prepare_error;
>>>> +
>>>> +	vb2_src = hantro_get_src_buf(ctx);
>>>> +	if (!vb2_src)
>>>> +		goto prepare_error;
>>>> +
>>>> +	rockchip_vpu981_av1_dec_clean_refs(ctx);
>>>> +	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
>>>> +
>>>> +	rockchip_vpu981_av1_dec_set_parameters(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_global_model(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_tile_info(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_segmentation(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_cdef(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_lr(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_prob(ctx);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
>>>> +	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
>>>> +	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
>>>> +	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
>>>> +	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
>>>> +	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_dec_alignment, 64);
>>>> +	hantro_reg_write(vpu, &av1_apf_disable, 0);
>>>> +	hantro_reg_write(vpu, &av1_apf_threshold, 8);
>>>> +	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
>>>> +	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
>>>> +	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
>>>> +	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
>>>> +	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
>>>> +	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
>>>> +	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
>>>> +	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
>>>> +
>>>> +	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
>>>> +	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
>>>> +
>>>> +	hantro_end_prepare_run(ctx);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_dec_e, 1);
>>>> +
>>>> +	return 0;
>>>> +
>>>> +prepare_error:
>>>> +	hantro_end_prepare_run(ctx);
>>>> +	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +	int width = ctx->dst_fmt.width;
>>>> +	int height = ctx->dst_fmt.height;
>>>> +	struct vb2_v4l2_buffer *vb2_dst;
>>>> +	size_t chroma_offset;
>>>> +	dma_addr_t dst_dma;
>>>> +
>>>> +	vb2_dst = hantro_get_dst_buf(ctx);
>>>> +
>>>> +	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
>>>> +	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
>>>> +	    ctx->dst_fmt.height;
>>>> +
>>>> +	/* enable post processor */
>>>> +	hantro_reg_write(vpu, &av1_pp_out_e, 1);
>>>> +	hantro_reg_write(vpu, &av1_pp_in_format, 0);
>>>> +	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
>>>> +	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
>>>> +	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
>>>> +	hantro_reg_write(vpu, &av1_pp_out_height, height);
>>>> +	hantro_reg_write(vpu, &av1_pp_out_width, width);
>>>> +	hantro_reg_write(vpu, &av1_pp_out_y_stride,
>>>> +			 ctx->dst_fmt.plane_fmt[0].bytesperline);
>>>> +	hantro_reg_write(vpu, &av1_pp_out_c_stride,
>>>> +			 ctx->dst_fmt.plane_fmt[0].bytesperline);
>>>> +	switch (ctx->dst_fmt.pixelformat) {
>>>> +	case V4L2_PIX_FMT_P010:
>>>> +		hantro_reg_write(vpu, &av1_pp_out_format, 1);
>>>> +		break;
>>>> +	case V4L2_PIX_FMT_NV12:
>>>> +		hantro_reg_write(vpu, &av1_pp_out_format, 3);
>>>> +		break;
>>>> +	default:
>>>> +		hantro_reg_write(vpu, &av1_pp_out_format, 0);
>>>> +	}
>>>> +
>>>> +	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
>>>> +	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
>>>> +	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
>>>> +	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
>>>> +	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
>>>> +	hantro_reg_write(vpu, &av1_pp_up_level, 0);
>>>> +	hantro_reg_write(vpu, &av1_pp_down_level, 0);
>>>> +	hantro_reg_write(vpu, &av1_pp_exist, 0);
>>>> +
>>>> +	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
>>>> +	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
>>>> +}
>>>> +
>>>> +static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
>>>> +{
>>>> +	struct hantro_dev *vpu = ctx->dev;
>>>> +
>>>> +	/* disable post processor */
>>>> +	hantro_reg_write(vpu, &av1_pp_out_e, 0);
>>>> +}
>>>> +
>>>> +const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
>>>> +	.enable = rockchip_vpu981_postproc_enable,
>>>> +	.disable = rockchip_vpu981_postproc_disable,
>>>> +};
>>> As enabling the post-proc is done by the common driver, we need a mechanism to
>>> altern the core decisions, as we really need to enable post-proc to produce
>>> filmgrain. Fortunatly, not applying this filter isn't visually bad, but that's
>>> not the intent of the video author, so should not be like this by default.
>> I do agree but I think it is out of the scope of this patch.
> I believe I managed to changed your mind, please do carry these discussions in
> public place though, linux-media IRC channel is a good place.
>
>>>> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>>>> new file mode 100644
>>>> index 000000000000..182e6c830ff6
>>>> --- /dev/null
>>>> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
>>>> @@ -0,0 +1,477 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>>> +/*
>>>> + * Copyright (c) 2022, Collabora
>>>> + *
>>>> + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
>>>> + */
>>>> +
>>>> +#ifndef _ROCKCHIP_VPU981_REGS_H_
>>>> +#define _ROCKCHIP_VPU981_REGS_H_
>>>> +
>>>> +#include "hantro.h"
>>>> +
>>>> +#define AV1_SWREG(nr)	((nr) * 4)
>>>> +
>>>> +#define AV1_DEC_REG(b, s, m) \
>>>> +	((const struct hantro_reg) { \
>>>> +		.base = AV1_SWREG(b), \
>>>> +		.shift = s, \
>>>> +		.mask = m, \
>>>> +	})
>>> Just a note that a lot of this is identical to VC8000 register sets. But until
>>> someone upstream VC8000 and till we have more VC9000 support, its fine to keep
>>> that as if it was RK specific.
>>>
>>>> +
>>>> +#define AV1_REG_INTERRUPT		AV1_SWREG(1)
>>>> +#define AV1_REG_INTERRUPT_DEC_RDY_INT	BIT(12)
>>>> +
>>>> +#define AV1_REG_CONFIG			AV1_SWREG(2)
>>>> +#define AV1_REG_CONFIG_DEC_CLK_GATE_E	BIT(10)
>>>> +
>>>> +#define av1_dec_e			AV1_DEC_REG(1, 0, 0x1)
>>>> +#define av1_dec_abort_e			AV1_DEC_REG(1, 5, 0x1)
>>>> +#define av1_dec_tile_int_e		AV1_DEC_REG(1, 7, 0x1)
>>>> +
>>>> +#define av1_dec_clk_gate_e		AV1_DEC_REG(2, 10, 0x1)
>>>> +
>>>> +#define av1_dec_out_ec_bypass		AV1_DEC_REG(3, 8,  0x1)
>>>> +#define av1_write_mvs_e			AV1_DEC_REG(3, 12, 0x1)
>>>> +#define av1_filtering_dis		AV1_DEC_REG(3, 14, 0x1)
>>>> +#define av1_dec_out_dis			AV1_DEC_REG(3, 15, 0x1)
>>>> +#define av1_dec_out_ec_byte_word	AV1_DEC_REG(3, 16, 0x1)
>>>> +#define av1_skip_mode			AV1_DEC_REG(3, 26, 0x1)
>>>> +#define av1_dec_mode			AV1_DEC_REG(3, 27, 0x1f)
>>>> +
>>>> +#define av1_ref_frames			AV1_DEC_REG(4, 0, 0xf)
>>>> +#define av1_pic_height_in_cbs		AV1_DEC_REG(4, 6, 0x1fff)
>>>> +#define av1_pic_width_in_cbs		AV1_DEC_REG(4, 19, 0x1fff)
>>>> +
>>>> +#define av1_ref_scaling_enable		AV1_DEC_REG(5, 0, 0x1)
>>>> +#define av1_filt_level_base_gt32	AV1_DEC_REG(5, 1, 0x1)
>>>> +#define av1_error_resilient		AV1_DEC_REG(5, 2, 0x1)
>>>> +#define av1_force_interger_mv		AV1_DEC_REG(5, 3, 0x1)
>>>> +#define av1_allow_intrabc		AV1_DEC_REG(5, 4, 0x1)
>>>> +#define av1_allow_screen_content_tools	AV1_DEC_REG(5, 5, 0x1)
>>>> +#define av1_reduced_tx_set_used		AV1_DEC_REG(5, 6, 0x1)
>>>> +#define av1_enable_dual_filter		AV1_DEC_REG(5, 7, 0x1)
>>>> +#define av1_enable_jnt_comp		AV1_DEC_REG(5, 8, 0x1)
>>>> +#define av1_allow_filter_intra		AV1_DEC_REG(5, 9, 0x1)
>>>> +#define av1_enable_intra_edge_filter	AV1_DEC_REG(5, 10, 0x1)
>>>> +#define av1_tempor_mvp_e		AV1_DEC_REG(5, 11, 0x1)
>>>> +#define av1_allow_interintra		AV1_DEC_REG(5, 12, 0x1)
>>>> +#define av1_allow_masked_compound	AV1_DEC_REG(5, 13, 0x1)
>>>> +#define av1_enable_cdef			AV1_DEC_REG(5, 14, 0x1)
>>>> +#define av1_switchable_motion_mode	AV1_DEC_REG(5, 15, 0x1)
>>>> +#define av1_show_frame			AV1_DEC_REG(5, 16, 0x1)
>>>> +#define av1_superres_is_scaled		AV1_DEC_REG(5, 17, 0x1)
>>>> +#define av1_allow_warp			AV1_DEC_REG(5, 18, 0x1)
>>>> +#define av1_disable_cdf_update		AV1_DEC_REG(5, 19, 0x1)
>>>> +#define av1_preskip_segid		AV1_DEC_REG(5, 20, 0x1)
>>>> +#define av1_delta_lf_present		AV1_DEC_REG(5, 21, 0x1)
>>>> +#define av1_delta_lf_multi		AV1_DEC_REG(5, 22, 0x1)
>>>> +#define av1_delta_lf_res_log		AV1_DEC_REG(5, 23, 0x3)
>>>> +#define av1_strm_start_bit		AV1_DEC_REG(5, 25, 0x7f)
>>>> +
>>>> +#define	av1_stream_len			AV1_DEC_REG(6, 0, 0xffffffff)
>>>> +
>>>> +#define av1_delta_q_present		AV1_DEC_REG(7, 0, 0x1)
>>>> +#define av1_delta_q_res_log		AV1_DEC_REG(7, 1, 0x3)
>>>> +#define av1_cdef_damping		AV1_DEC_REG(7, 3, 0x3)
>>>> +#define av1_cdef_bits			AV1_DEC_REG(7, 5, 0x3)
>>>> +#define av1_apply_grain			AV1_DEC_REG(7, 7, 0x1)
>>>> +#define av1_num_y_points_b		AV1_DEC_REG(7, 8, 0x1)
>>>> +#define av1_num_cb_points_b		AV1_DEC_REG(7, 9, 0x1)
>>>> +#define av1_num_cr_points_b		AV1_DEC_REG(7, 10, 0x1)
>>>> +#define av1_overlap_flag		AV1_DEC_REG(7, 11, 0x1)
>>>> +#define av1_clip_to_restricted_range	AV1_DEC_REG(7, 12, 0x1)
>>>> +#define av1_chroma_scaling_from_luma	AV1_DEC_REG(7, 13, 0x1)
>>>> +#define av1_random_seed			AV1_DEC_REG(7, 14, 0xffff)
>>>> +#define av1_blackwhite_e		AV1_DEC_REG(7, 30, 0x1)
>>>> +
>>>> +#define av1_scaling_shift		AV1_DEC_REG(8, 0, 0xf)
>>>> +#define av1_bit_depth_c_minus8		AV1_DEC_REG(8, 4, 0x3)
>>>> +#define av1_bit_depth_y_minus8		AV1_DEC_REG(8, 6, 0x3)
>>>> +#define av1_quant_base_qindex		AV1_DEC_REG(8, 8, 0xff)
>>>> +#define av1_idr_pic_e			AV1_DEC_REG(8, 16, 0x1)
>>>> +#define av1_superres_pic_width		AV1_DEC_REG(8, 17, 0x7fff)
>>>> +
>>>> +#define av1_ref4_sign_bias		AV1_DEC_REG(9, 2, 0x1)
>>>> +#define av1_ref5_sign_bias		AV1_DEC_REG(9, 3, 0x1)
>>>> +#define av1_ref6_sign_bias		AV1_DEC_REG(9, 4, 0x1)
>>>> +#define av1_mf1_type			AV1_DEC_REG(9, 5, 0x7)
>>>> +#define av1_mf2_type			AV1_DEC_REG(9, 8, 0x7)
>>>> +#define av1_mf3_type			AV1_DEC_REG(9, 11, 0x7)
>>>> +#define av1_scale_denom_minus9		AV1_DEC_REG(9, 14, 0x7)
>>>> +#define av1_last_active_seg		AV1_DEC_REG(9, 17, 0x7)
>>>> +#define av1_context_update_tile_id	AV1_DEC_REG(9, 20, 0xfff)
>>>> +
>>>> +#define av1_tile_transpose		AV1_DEC_REG(10, 0, 0x1)
>>>> +#define av1_tile_enable			AV1_DEC_REG(10, 1, 0x1)
>>>> +#define av1_multicore_full_width	AV1_DEC_REG(10,	2, 0xff)
>>>> +#define av1_num_tile_rows_8k		AV1_DEC_REG(10, 10, 0x7f)
>>>> +#define av1_num_tile_cols_8k		AV1_DEC_REG(10, 17, 0x7f)
>>>> +#define av1_multicore_tile_start_x	AV1_DEC_REG(10, 24, 0xff)
>>>> +
>>>> +#define av1_use_temporal3_mvs		AV1_DEC_REG(11, 0, 0x1)
>>>> +#define av1_use_temporal2_mvs		AV1_DEC_REG(11, 1, 0x1)
>>>> +#define av1_use_temporal1_mvs		AV1_DEC_REG(11, 2, 0x1)
>>>> +#define av1_use_temporal0_mvs		AV1_DEC_REG(11, 3, 0x1)
>>>> +#define av1_comp_pred_mode		AV1_DEC_REG(11, 4, 0x3)
>>>> +#define av1_high_prec_mv_e		AV1_DEC_REG(11, 7, 0x1)
>>>> +#define av1_mcomp_filt_type		AV1_DEC_REG(11, 8, 0x7)
>>>> +#define av1_multicore_expect_context_update	AV1_DEC_REG(11, 11, 0x1)
>>>> +#define av1_multicore_sbx_offset	AV1_DEC_REG(11, 12, 0x7f)
>>>> +#define av1_ulticore_tile_col		AV1_DEC_REG(11, 19, 0x7f)
>>>> +#define av1_transform_mode		AV1_DEC_REG(11, 27, 0x7)
>>>> +#define av1_dec_tile_size_mag		AV1_DEC_REG(11, 30, 0x3)
>>>> +
>>>> +#define av1_seg_quant_sign		AV1_DEC_REG(12, 2, 0xff)
>>>> +#define av1_max_cb_size			AV1_DEC_REG(12, 10, 0x7)
>>>> +#define av1_min_cb_size			AV1_DEC_REG(12, 13, 0x7)
>>>> +#define av1_comp_pred_fixed_ref		AV1_DEC_REG(12, 16, 0x7)
>>>> +#define av1_multicore_tile_width	AV1_DEC_REG(12, 19, 0x7f)
>>>> +#define av1_pic_height_pad		AV1_DEC_REG(12, 26, 0x7)
>>>> +#define av1_pic_width_pad		AV1_DEC_REG(12, 29, 0x7)
>>>> +
>>>> +#define av1_segment_e			AV1_DEC_REG(13, 0, 0x1)
>>>> +#define av1_segment_upd_e		AV1_DEC_REG(13, 1, 0x1)
>>>> +#define av1_segment_temp_upd_e		AV1_DEC_REG(13, 2, 0x1)
>>>> +#define av1_comp_pred_var_ref0_av1	AV1_DEC_REG(13, 3, 0x7)
>>>> +#define av1_comp_pred_var_ref1_av1	AV1_DEC_REG(13, 6, 0x7)
>>>> +#define av1_lossless_e			AV1_DEC_REG(13, 9, 0x1)
>>>> +#define av1_qp_delta_ch_ac_av1		AV1_DEC_REG(13, 11, 0x7f)
>>>> +#define av1_qp_delta_ch_dc_av1		AV1_DEC_REG(13, 18, 0x7f)
>>>> +#define av1_qp_delta_y_dc_av1		AV1_DEC_REG(13, 25, 0x7f)
>>>> +
>>>> +#define av1_quant_seg0			AV1_DEC_REG(14, 0, 0xff)
>>>> +#define av1_filt_level_seg0		AV1_DEC_REG(14, 8, 0x3f)
>>>> +#define av1_skip_seg0			AV1_DEC_REG(14, 14, 0x1)
>>>> +#define av1_refpic_seg0			AV1_DEC_REG(14, 15, 0xf)
>>>> +#define av1_filt_level_delta0_seg0	AV1_DEC_REG(14, 19, 0x7f)
>>>> +#define av1_filt_level0			AV1_DEC_REG(14, 26, 0x3f)
>>>> +
>>>> +#define av1_quant_seg1			AV1_DEC_REG(15, 0, 0xff)
>>>> +#define av1_filt_level_seg1		AV1_DEC_REG(15, 8, 0x3f)
>>>> +#define av1_skip_seg1			AV1_DEC_REG(15, 14, 0x1)
>>>> +#define av1_refpic_seg1			AV1_DEC_REG(15, 15, 0xf)
>>>> +#define av1_filt_level_delta0_seg1	AV1_DEC_REG(15, 19, 0x7f)
>>>> +#define av1_filt_level1			AV1_DEC_REG(15, 26, 0x3f)
>>>> +
>>>> +#define av1_quant_seg2			AV1_DEC_REG(16, 0, 0xff)
>>>> +#define av1_filt_level_seg2		AV1_DEC_REG(16, 8, 0x3f)
>>>> +#define av1_skip_seg2			AV1_DEC_REG(16, 14, 0x1)
>>>> +#define av1_refpic_seg2			AV1_DEC_REG(16, 15, 0xf)
>>>> +#define av1_filt_level_delta0_seg2	AV1_DEC_REG(16, 19, 0x7f)
>>>> +#define av1_filt_level2			AV1_DEC_REG(16, 26, 0x3f)
>>>> +
>>>> +#define av1_quant_seg3			AV1_DEC_REG(17, 0, 0xff)
>>>> +#define av1_filt_level_seg3		AV1_DEC_REG(17, 8, 0x3f)
>>>> +#define av1_skip_seg3			AV1_DEC_REG(17, 14, 0x1)
>>>> +#define av1_refpic_seg3			AV1_DEC_REG(17, 15, 0xf)
>>>> +#define av1_filt_level_delta0_seg3	AV1_DEC_REG(17, 19, 0x7f)
>>>> +#define av1_filt_level3			AV1_DEC_REG(17, 26, 0x3f)
>>>> +
>>>> +#define av1_quant_seg4			AV1_DEC_REG(18, 0, 0xff)
>>>> +#define av1_filt_level_seg4		AV1_DEC_REG(18, 8, 0x3f)
>>>> +#define av1_skip_seg4			AV1_DEC_REG(18, 14, 0x1)
>>>> +#define av1_refpic_seg4			AV1_DEC_REG(18, 15, 0xf)
>>>> +#define av1_filt_level_delta0_seg4	AV1_DEC_REG(18, 19, 0x7f)
>>>> +#define av1_lr_type			AV1_DEC_REG(18, 26, 0x3f)
>>>> +
>>>> +#define av1_quant_seg5			AV1_DEC_REG(19, 0, 0xff)
>>>> +#define av1_filt_level_seg5		AV1_DEC_REG(19, 8, 0x3f)
>>>> +#define av1_skip_seg5			AV1_DEC_REG(19, 14, 0x1)
>>>> +#define av1_refpic_seg5			AV1_DEC_REG(19, 15, 0xf)
>>>> +#define av1_filt_level_delta0_seg5	AV1_DEC_REG(19, 19, 0x7f)
>>>> +#define av1_lr_unit_size		AV1_DEC_REG(19, 26, 0x3f)
>>>> +
>>>> +#define av1_filt_level_delta1_seg0	AV1_DEC_REG(20, 0, 0x7f)
>>>> +#define av1_filt_level_delta2_seg0	AV1_DEC_REG(20, 7, 0x7f)
>>>> +#define av1_filt_level_delta3_seg0	AV1_DEC_REG(20, 14, 0x7f)
>>>> +#define av1_global_mv_seg0		AV1_DEC_REG(20, 21, 0x1)
>>>> +#define av1_mf1_last_offset		AV1_DEC_REG(20, 22, 0x1ff)
>>>> +
>>>> +#define av1_filt_level_delta1_seg1	AV1_DEC_REG(21, 0, 0x7f)
>>>> +#define av1_filt_level_delta2_seg1	AV1_DEC_REG(21, 7, 0x7f)
>>>> +#define av1_filt_level_delta3_seg1	AV1_DEC_REG(21, 14, 0x7f)
>>>> +#define av1_global_mv_seg1		AV1_DEC_REG(21, 21, 0x1)
>>>> +#define av1_mf1_last2_offset		AV1_DEC_REG(21, 22, 0x1ff)
>>>> +
>>>> +#define av1_filt_level_delta1_seg2	AV1_DEC_REG(22, 0, 0x7f)
>>>> +#define av1_filt_level_delta2_seg2	AV1_DEC_REG(22, 7, 0x7f)
>>>> +#define av1_filt_level_delta3_seg2	AV1_DEC_REG(22, 14, 0x7f)
>>>> +#define av1_global_mv_seg2		AV1_DEC_REG(22, 21, 0x1)
>>>> +#define av1_mf1_last3_offset		AV1_DEC_REG(22, 22, 0x1ff)
>>>> +
>>>> +#define av1_filt_level_delta1_seg3	AV1_DEC_REG(23, 0, 0x7f)
>>>> +#define av1_filt_level_delta2_seg3	AV1_DEC_REG(23, 7, 0x7f)
>>>> +#define av1_filt_level_delta3_seg3	AV1_DEC_REG(23, 14, 0x7f)
>>>> +#define av1_global_mv_seg3		AV1_DEC_REG(23, 21, 0x1)
>>>> +#define av1_mf1_golden_offset		AV1_DEC_REG(23, 22, 0x1ff)
>>>> +
>>>> +#define av1_filt_level_delta1_seg4	AV1_DEC_REG(24, 0, 0x7f)
>>>> +#define av1_filt_level_delta2_seg4	AV1_DEC_REG(24, 7, 0x7f)
>>>> +#define av1_filt_level_delta3_seg4	AV1_DEC_REG(24, 14, 0x7f)
>>>> +#define av1_global_mv_seg4		AV1_DEC_REG(24, 21, 0x1)
>>>> +#define av1_mf1_bwdref_offset		AV1_DEC_REG(24, 22, 0x1ff)
>>>> +
>>>> +#define av1_filt_level_delta1_seg5	AV1_DEC_REG(25, 0, 0x7f)
>>>> +#define av1_filt_level_delta2_seg5	AV1_DEC_REG(25, 7, 0x7f)
>>>> +#define av1_filt_level_delta3_seg5	AV1_DEC_REG(25, 14, 0x7f)
>>>> +#define av1_global_mv_seg5		AV1_DEC_REG(25, 21, 0x1)
>>>> +#define av1_mf1_altref2_offset		AV1_DEC_REG(25, 22, 0x1ff)
>>>> +
>>>> +#define av1_filt_level_delta1_seg6	AV1_DEC_REG(26, 0, 0x7f)
>>>> +#define av1_filt_level_delta2_seg6	AV1_DEC_REG(26, 7, 0x7f)
>>>> +#define av1_filt_level_delta3_seg6	AV1_DEC_REG(26, 14, 0x7f)
>>>> +#define av1_global_mv_seg6		AV1_DEC_REG(26, 21, 0x1)
>>>> +#define av1_mf1_altref_offset		AV1_DEC_REG(26, 22, 0x1ff)
>>>> +
>>>> +#define av1_filt_level_delta1_seg7	AV1_DEC_REG(27, 0, 0x7f)
>>>> +#define av1_filt_level_delta2_seg7	AV1_DEC_REG(27, 7, 0x7f)
>>>> +#define av1_filt_level_delta3_seg7	AV1_DEC_REG(27, 14, 0x7f)
>>>> +#define av1_global_mv_seg7		AV1_DEC_REG(27, 21, 0x1)
>>>> +#define av1_mf2_last_offset		AV1_DEC_REG(27, 22, 0x1ff)
>>>> +
>>>> +#define av1_cb_offset			AV1_DEC_REG(28, 0, 0x1ff)
>>>> +#define av1_cb_luma_mult		AV1_DEC_REG(28, 9, 0xff)
>>>> +#define av1_cb_mult			AV1_DEC_REG(28, 17, 0xff)
>>>> +#define	av1_quant_delta_v_dc		AV1_DEC_REG(28, 25, 0x7f)
>>>> +
>>>> +#define av1_cr_offset			AV1_DEC_REG(29, 0, 0x1ff)
>>>> +#define av1_cr_luma_mult		AV1_DEC_REG(29, 9, 0xff)
>>>> +#define av1_cr_mult			AV1_DEC_REG(29, 17, 0xff)
>>>> +#define	av1_quant_delta_v_ac		AV1_DEC_REG(29, 25, 0x7f)
>>>> +
>>>> +#define av1_filt_ref_adj_5		AV1_DEC_REG(30, 0, 0x7f)
>>>> +#define av1_filt_ref_adj_4		AV1_DEC_REG(30, 7, 0x7f)
>>>> +#define av1_filt_mb_adj_1		AV1_DEC_REG(30, 14, 0x7f)
>>>> +#define av1_filt_mb_adj_0		AV1_DEC_REG(30, 21, 0x7f)
>>>> +#define av1_filt_sharpness		AV1_DEC_REG(30, 28, 0x7)
>>>> +
>>>> +#define av1_quant_seg6			AV1_DEC_REG(31, 0, 0xff)
>>>> +#define av1_filt_level_seg6		AV1_DEC_REG(31, 8, 0x3f)
>>>> +#define av1_skip_seg6			AV1_DEC_REG(31, 14, 0x1)
>>>> +#define av1_refpic_seg6			AV1_DEC_REG(31, 15, 0xf)
>>>> +#define av1_filt_level_delta0_seg6	AV1_DEC_REG(31, 19, 0x7f)
>>>> +#define av1_skip_ref0			AV1_DEC_REG(31, 26, 0xf)
>>>> +
>>>> +#define av1_quant_seg7			AV1_DEC_REG(32, 0, 0xff)
>>>> +#define av1_filt_level_seg7		AV1_DEC_REG(32, 8, 0x3f)
>>>> +#define av1_skip_seg7			AV1_DEC_REG(32, 14, 0x1)
>>>> +#define av1_refpic_seg7			AV1_DEC_REG(32, 15, 0xf)
>>>> +#define av1_filt_level_delta0_seg7	AV1_DEC_REG(32, 19, 0x7f)
>>>> +#define av1_skip_ref1			AV1_DEC_REG(32, 26, 0xf)
>>>> +
>>>> +#define av1_ref0_height			AV1_DEC_REG(33, 0, 0xffff)
>>>> +#define av1_ref0_width			AV1_DEC_REG(33, 16, 0xffff)
>>>> +
>>>> +#define av1_ref1_height			AV1_DEC_REG(34, 0, 0xffff)
>>>> +#define av1_ref1_width			AV1_DEC_REG(34, 16, 0xffff)
>>>> +
>>>> +#define av1_ref2_height			AV1_DEC_REG(35, 0, 0xffff)
>>>> +#define av1_ref2_width			AV1_DEC_REG(35, 16, 0xffff)
>>>> +
>>>> +#define av1_ref0_ver_scale		AV1_DEC_REG(36, 0, 0xffff)
>>>> +#define av1_ref0_hor_scale		AV1_DEC_REG(36, 16, 0xffff)
>>>> +
>>>> +#define av1_ref1_ver_scale		AV1_DEC_REG(37, 0, 0xffff)
>>>> +#define av1_ref1_hor_scale		AV1_DEC_REG(37, 16, 0xffff)
>>>> +
>>>> +#define av1_ref2_ver_scale		AV1_DEC_REG(38, 0, 0xffff)
>>>> +#define av1_ref2_hor_scale		AV1_DEC_REG(38, 16, 0xffff)
>>>> +
>>>> +#define av1_ref3_ver_scale		AV1_DEC_REG(39, 0, 0xffff)
>>>> +#define av1_ref3_hor_scale		AV1_DEC_REG(39, 16, 0xffff)
>>>> +
>>>> +#define av1_ref4_ver_scale		AV1_DEC_REG(40, 0, 0xffff)
>>>> +#define av1_ref4_hor_scale		AV1_DEC_REG(40, 16, 0xffff)
>>>> +
>>>> +#define av1_ref5_ver_scale		AV1_DEC_REG(41, 0, 0xffff)
>>>> +#define av1_ref5_hor_scale		AV1_DEC_REG(41, 16, 0xffff)
>>>> +
>>>> +#define av1_ref6_ver_scale		AV1_DEC_REG(42, 0, 0xffff)
>>>> +#define av1_ref6_hor_scale		AV1_DEC_REG(42, 16, 0xffff)
>>>> +
>>>> +#define av1_ref3_height			AV1_DEC_REG(43, 0, 0xffff)
>>>> +#define av1_ref3_width			AV1_DEC_REG(43, 16, 0xffff)
>>>> +
>>>> +#define av1_ref4_height			AV1_DEC_REG(44, 0, 0xffff)
>>>> +#define av1_ref4_width			AV1_DEC_REG(44, 16, 0xffff)
>>>> +
>>>> +#define av1_ref5_height			AV1_DEC_REG(45, 0, 0xffff)
>>>> +#define av1_ref5_width			AV1_DEC_REG(45, 16, 0xffff)
>>>> +
>>>> +#define av1_ref6_height			AV1_DEC_REG(46, 0, 0xffff)
>>>> +#define av1_ref6_width			AV1_DEC_REG(46, 16, 0xffff)
>>>> +
>>>> +#define av1_mf2_last2_offset		AV1_DEC_REG(47, 0, 0x1ff)
>>>> +#define av1_mf2_last3_offset		AV1_DEC_REG(47, 9, 0x1ff)
>>>> +#define av1_mf2_golden_offset		AV1_DEC_REG(47, 18, 0x1ff)
>>>> +#define av1_qmlevel_y			AV1_DEC_REG(47, 27, 0xf)
>>>> +
>>>> +#define av1_mf2_bwdref_offset		AV1_DEC_REG(48, 0, 0x1ff)
>>>> +#define av1_mf2_altref2_offset		AV1_DEC_REG(48, 9, 0x1ff)
>>>> +#define av1_mf2_altref_offset		AV1_DEC_REG(48, 18, 0x1ff)
>>>> +#define av1_qmlevel_u			AV1_DEC_REG(48, 27, 0xf)
>>>> +
>>>> +#define av1_filt_ref_adj_6		AV1_DEC_REG(49, 0, 0x7f)
>>>> +#define av1_filt_ref_adj_7		AV1_DEC_REG(49, 7, 0x7f)
>>>> +#define av1_qmlevel_v			AV1_DEC_REG(49, 14, 0xf)
>>>> +
>>>> +#define av1_superres_chroma_step	AV1_DEC_REG(51, 0, 0x3fff)
>>>> +#define av1_superres_luma_step		AV1_DEC_REG(51, 14, 0x3fff)
>>>> +
>>>> +#define av1_superres_init_chroma_subpel_x	AV1_DEC_REG(52, 0, 0x3fff)
>>>> +#define av1_superres_init_luma_subpel_x		AV1_DEC_REG(52, 14, 0x3fff)
>>>> +
>>>> +#define av1_cdef_chroma_secondary_strength	AV1_DEC_REG(53, 0, 0xffff)
>>>> +#define av1_cdef_luma_secondary_strength	AV1_DEC_REG(53, 16, 0xffff)
>>>> +
>>>> +#define av1_apf_threshold		AV1_DEC_REG(55, 0, 0xffff)
>>>> +#define av1_apf_single_pu_mode		AV1_DEC_REG(55, 30, 0x1)
>>>> +#define av1_apf_disable			AV1_DEC_REG(55, 30, 0x1)
>>>> +
>>>> +#define av1_dec_max_burst		AV1_DEC_REG(58, 0, 0xff)
>>>> +#define av1_dec_buswidth		AV1_DEC_REG(58, 8, 0x7)
>>>> +#define av1_dec_multicore_mode		AV1_DEC_REG(58, 11, 0x3)
>>>> +#define av1_dec_axi_wd_id_e		AV1_DEC_REG(58,	13, 0x1)
>>>> +#define av1_dec_axi_rd_id_e		AV1_DEC_REG(58, 14, 0x1)
>>>> +#define av1_dec_mc_polltime		AV1_DEC_REG(58, 17, 0x3ff)
>>>> +#define av1_dec_mc_pollmode		AV1_DEC_REG(58,	27, 0x3)
>>>> +
>>>> +#define av1_filt_ref_adj_3		AV1_DEC_REG(59, 0, 0x3f)
>>>> +#define av1_filt_ref_adj_2		AV1_DEC_REG(59, 7, 0x3f)
>>>> +#define av1_filt_ref_adj_1		AV1_DEC_REG(59, 14, 0x3f)
>>>> +#define av1_filt_ref_adj_0		AV1_DEC_REG(59, 21, 0x3f)
>>>> +#define av1_ref0_sign_bias		AV1_DEC_REG(59, 28, 0x1)
>>>> +#define av1_ref1_sign_bias		AV1_DEC_REG(59, 29, 0x1)
>>>> +#define av1_ref2_sign_bias		AV1_DEC_REG(59, 30, 0x1)
>>>> +#define av1_ref3_sign_bias		AV1_DEC_REG(59, 31, 0x1)
>>>> +
>>>> +#define av1_cur_last_roffset		AV1_DEC_REG(184, 0, 0x1ff)
>>>> +#define av1_cur_last_offset		AV1_DEC_REG(184, 9, 0x1ff)
>>>> +#define av1_mf3_last_offset		AV1_DEC_REG(184, 18, 0x1ff)
>>>> +#define av1_ref0_gm_mode		AV1_DEC_REG(184, 27, 0x3)
>>>> +
>>>> +#define av1_cur_last2_roffset		AV1_DEC_REG(185, 0, 0x1ff)
>>>> +#define av1_cur_last2_offset		AV1_DEC_REG(185, 9, 0x1ff)
>>>> +#define av1_mf3_last2_offset		AV1_DEC_REG(185, 18, 0x1ff)
>>>> +#define av1_ref1_gm_mode		AV1_DEC_REG(185, 27, 0x3)
>>>> +
>>>> +#define av1_cur_last3_roffset		AV1_DEC_REG(186, 0, 0x1ff)
>>>> +#define av1_cur_last3_offset		AV1_DEC_REG(186, 9, 0x1ff)
>>>> +#define av1_mf3_last3_offset		AV1_DEC_REG(186, 18, 0x1ff)
>>>> +#define av1_ref2_gm_mode		AV1_DEC_REG(186, 27, 0x3)
>>>> +
>>>> +#define av1_cur_golden_roffset		AV1_DEC_REG(187, 0, 0x1ff)
>>>> +#define av1_cur_golden_offset		AV1_DEC_REG(187, 9, 0x1ff)
>>>> +#define av1_mf3_golden_offset		AV1_DEC_REG(187, 18, 0x1ff)
>>>> +#define av1_ref3_gm_mode		AV1_DEC_REG(187, 27, 0x3)
>>>> +
>>>> +#define av1_cur_bwdref_roffset		AV1_DEC_REG(188, 0, 0x1ff)
>>>> +#define av1_cur_bwdref_offset		AV1_DEC_REG(188, 9, 0x1ff)
>>>> +#define av1_mf3_bwdref_offset		AV1_DEC_REG(188, 18, 0x1ff)
>>>> +#define av1_ref4_gm_mode		AV1_DEC_REG(188, 27, 0x3)
>>>> +
>>>> +#define av1_cur_altref2_roffset		AV1_DEC_REG(257, 0, 0x1ff)
>>>> +#define av1_cur_altref2_offset		AV1_DEC_REG(257, 9, 0x1ff)
>>>> +#define av1_mf3_altref2_offset		AV1_DEC_REG(257, 18, 0x1ff)
>>>> +#define av1_ref5_gm_mode		AV1_DEC_REG(257, 27, 0x3)
>>>> +
>>>> +#define av1_strm_buffer_len		AV1_DEC_REG(258, 0, 0xffffffff)
>>>> +
>>>> +#define av1_strm_start_offset		AV1_DEC_REG(259, 0, 0xffffffff)
>>>> +
>>>> +#define av1_ppd_blend_exist		AV1_DEC_REG(260, 21, 0x1)
>>>> +#define av1_ppd_dith_exist		AV1_DEC_REG(260, 23, 0x1)
>>>> +#define av1_ablend_crop_e		AV1_DEC_REG(260, 24, 0x1)
>>>> +#define av1_pp_format_p010_e		AV1_DEC_REG(260, 25, 0x1)
>>>> +#define av1_pp_format_customer1_e	AV1_DEC_REG(260, 26, 0x1)
>>>> +#define av1_pp_crop_exist		AV1_DEC_REG(260, 27, 0x1)
>>>> +#define av1_pp_up_level			AV1_DEC_REG(260, 28, 0x1)
>>>> +#define av1_pp_down_level		AV1_DEC_REG(260, 29, 0x3)
>>>> +#define av1_pp_exist			AV1_DEC_REG(260, 31, 0x1)
>>>> +
>>>> +#define av1_cur_altref_roffset		AV1_DEC_REG(262, 0, 0x1ff)
>>>> +#define av1_cur_altref_offset		AV1_DEC_REG(262, 9, 0x1ff)
>>>> +#define av1_mf3_altref_offset		AV1_DEC_REG(262, 18, 0x1ff)
>>>> +#define av1_ref6_gm_mode		AV1_DEC_REG(262, 27, 0x3)
>>>> +
>>>> +#define av1_cdef_luma_primary_strength	AV1_DEC_REG(263, 0, 0xffffffff)
>>>> +
>>>> +#define av1_cdef_chroma_primary_strength AV1_DEC_REG(264, 0, 0xffffffff)
>>>> +
>>>> +#define av1_axi_arqos			AV1_DEC_REG(265, 0, 0xf)
>>>> +#define av1_axi_awqos			AV1_DEC_REG(265, 4, 0xf)
>>>> +#define av1_axi_wr_ostd_threshold	AV1_DEC_REG(265, 8, 0x3ff)
>>>> +#define av1_axi_rd_ostd_threshold	AV1_DEC_REG(265, 18, 0x3ff)
>>>> +#define av1_axi_wr_4k_dis		AV1_DEC_REG(265, 31, 0x1)
>>>> +
>>>> +#define av1_128bit_mode			AV1_DEC_REG(266, 5, 0x1)
>>>> +#define av1_wr_shaper_bypass		AV1_DEC_REG(266, 10, 0x1)
>>>> +#define av1_error_conceal_e		AV1_DEC_REG(266, 30, 0x1)
>>>> +
>>>> +#define av1_superres_chroma_step_invra	AV1_DEC_REG(298, 0, 0xffff)
>>>> +#define av1_superres_luma_step_invra	AV1_DEC_REG(298, 16, 0xffff)
>>>> +
>>>> +#define av1_dec_alignment		AV1_DEC_REG(314, 0, 0xffff)
>>>> +
>>>> +#define av1_ext_timeout_cycles		AV1_DEC_REG(318, 0, 0x7fffffff)
>>>> +#define av1_ext_timeout_override_e	AV1_DEC_REG(318, 31, 0x1)
>>>> +
>>>> +#define av1_timeout_cycles		AV1_DEC_REG(319, 0, 0x7fffffff)
>>>> +#define av1_timeout_override_e		AV1_DEC_REG(319, 31, 0x1)
>>>> +
>>>> +#define av1_pp_out_e			AV1_DEC_REG(320, 0, 0x1)
>>>> +#define av1_pp_cr_first			AV1_DEC_REG(320, 1, 0x1)
>>>> +#define av1_pp_out_mode			AV1_DEC_REG(320, 2, 0x1)
>>>> +#define av1_pp_out_tile_e		AV1_DEC_REG(320, 3, 0x1)
>>>> +#define av1_pp_status			AV1_DEC_REG(320, 4, 0xf)
>>>> +#define av1_pp_in_blk_size		AV1_DEC_REG(320, 8, 0x7)
>>>> +#define av1_pp_out_p010_fmt		AV1_DEC_REG(320, 11, 0x3)
>>>> +#define av1_pp_out_rgb_fmt		AV1_DEC_REG(320, 13, 0x1f)
>>>> +#define av1_rgb_range_max		AV1_DEC_REG(320, 18, 0xfff)
>>>> +#define av1_pp_rgb_planar		AV1_DEC_REG(320, 30, 0x1)
>>>> +
>>>> +#define av1_scale_hratio		AV1_DEC_REG(322, 0, 0x3ffff)
>>>> +#define av1_pp_out_format		AV1_DEC_REG(322, 18, 0x1f)
>>>> +#define av1_ver_scale_mode		AV1_DEC_REG(322, 23, 0x3)
>>>> +#define av1_hor_scale_mode		AV1_DEC_REG(322, 25, 0x3)
>>>> +#define av1_pp_in_format		AV1_DEC_REG(322, 27, 0x1f)
>>>> +
>>>> +#define av1_pp_out_c_stride		AV1_DEC_REG(329, 0, 0xffff)
>>>> +#define av1_pp_out_y_stride		AV1_DEC_REG(329, 16, 0xffff)
>>>> +
>>>> +#define av1_pp_in_height		AV1_DEC_REG(331, 0, 0xffff)
>>>> +#define av1_pp_in_width			AV1_DEC_REG(331, 16, 0xffff)
>>>> +
>>>> +#define av1_pp_out_height		AV1_DEC_REG(332, 0, 0xffff)
>>>> +#define av1_pp_out_width		AV1_DEC_REG(332, 16, 0xffff)
>>>> +
>>>> +#define av1_pp1_dup_ver			AV1_DEC_REG(394, 0, 0xff)
>>>> +#define av1_pp1_dup_hor			AV1_DEC_REG(394, 8, 0xff)
>>>> +#define av1_pp0_dup_ver			AV1_DEC_REG(394, 16, 0xff)
>>>> +#define av1_pp0_dup_hor			AV1_DEC_REG(394, 24, 0xff)
>>>> +
>>>> +#define AV1_TILE_OUT_LU			(AV1_SWREG(65))
>>>> +#define AV1_REFERENCE_Y(i)		(AV1_SWREG(67) + ((i) * 0x8))
>>>> +#define AV1_SEGMENTATION		(AV1_SWREG(81))
>>>> +#define AV1_GLOBAL_MODEL		(AV1_SWREG(83))
>>>> +#define AV1_CDEF_COL			(AV1_SWREG(85))
>>>> +#define AV1_SR_COL			(AV1_SWREG(89))
>>>> +#define AV1_LR_COL			(AV1_SWREG(91))
>>>> +#define AV1_FILM_GRAIN			(AV1_SWREG(95))
>>>> +#define AV1_TILE_OUT_CH			(AV1_SWREG(99))
>>>> +#define AV1_REFERENCE_CB(i)		(AV1_SWREG(101) + ((i) * 0x8))
>>>> +#define AV1_TILE_OUT_MV			(AV1_SWREG(133))
>>>> +#define AV1_REFERENCE_MV(i)		(AV1_SWREG(135) + ((i) * 0x8))
>>>> +#define AV1_TILE_BASE			(AV1_SWREG(167))
>>>> +#define AV1_INPUT_STREAM		(AV1_SWREG(169))
>>>> +#define AV1_PROP_TABLE_OUT		(AV1_SWREG(171))
>>>> +#define AV1_PROP_TABLE			(AV1_SWREG(173))
>>>> +#define AV1_MC_SYNC_CURR		(AV1_SWREG(175))
>>>> +#define AV1_MC_SYNC_LEFT		(AV1_SWREG(177))
>>>> +#define AV1_DB_DATA_COL			(AV1_SWREG(179))
>>>> +#define AV1_DB_CTRL_COL			(AV1_SWREG(183))
>>>> +#define AV1_PP_OUT_LU			(AV1_SWREG(326))
>>>> +#define AV1_PP_OUT_CH			(AV1_SWREG(328))
>>>> +
>>>> +#endif /* _ROCKCHIP_VPU981_REGS_H_ */
Daniel Almeida Dec. 23, 2022, 11:15 p.m. UTC | #6
> > +	int cur_offset[V4L2_AV1_NUM_REF_FRAMES - 1];
> > +	int cur_roffset[V4L2_AV1_NUM_REF_FRAMES - 1];
> 
> This looks like V4L2_AV1_REFS_PER_FRAME. Daniel, should be remove
> this
> V4L2_AV1_NUM_REF_FRAMES ? Its redundant with
> V4L2_AV1_TOTAL_REFS_PER_FRAME ...

Hi. These are different. NUM_REF_FRAMES is the size of the "DPB" while
TOTAL_REFS_PER_FRAME is the maximum number of references a frame can
use. It just so happens that in AV1 these two are close in absolute
value (i.e. 7 vs 8).

Using VP9 as a comparison, the DPB size is still 8, but REFS_PER_FRAME
is 3 (meaning a frame can specificy LAST, GOLDEN and ALTREF values).

As this is per spec and a mere convenience, I vote for keeping it. 

-- Daniel
Daniel Almeida Dec. 23, 2022, 11:20 p.m. UTC | #7
Ah, I was too quick on that answer :/

I see that they have REFS_PER_FRAME (7), TOTAL_REFS_PER_FRAME (8) _and_
NUM_REF_FRAMES (8), in which case it is redundant indeed. I will remove
that on v4.

-- Daniel

On Fri, 2022-12-23 at 20:15 -0300, Daniel Almeida wrote:
> > > +       int cur_offset[V4L2_AV1_NUM_REF_FRAMES - 1];
> > > +       int cur_roffset[V4L2_AV1_NUM_REF_FRAMES - 1];
> > 
> > This looks like V4L2_AV1_REFS_PER_FRAME. Daniel, should be remove
> > this
> > V4L2_AV1_NUM_REF_FRAMES ? Its redundant with
> > V4L2_AV1_TOTAL_REFS_PER_FRAME ...
> 
> Hi. These are different. NUM_REF_FRAMES is the size of the "DPB"
> while
> TOTAL_REFS_PER_FRAME is the maximum number of references a frame can
> use. It just so happens that in AV1 these two are close in absolute
> value (i.e. 7 vs 8).
> 
> Using VP9 as a comparison, the DPB size is still 8, but
> REFS_PER_FRAME
> is 3 (meaning a frame can specificy LAST, GOLDEN and ALTREF values).
> 
> As this is per spec and a mere convenience, I vote for keeping it. 
> 
> -- Daniel
diff mbox series

Patch

diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
index d2b2679c00eb..c9a9806ab8c5 100644
--- a/drivers/media/platform/verisilicon/Makefile
+++ b/drivers/media/platform/verisilicon/Makefile
@@ -18,6 +18,7 @@  hantro-vpu-y += \
 		rockchip_vpu2_hw_h264_dec.o \
 		rockchip_vpu2_hw_mpeg2_dec.o \
 		rockchip_vpu2_hw_vp8_dec.o \
+		rockchip_vpu981_hw_av1_dec.o \
 		rockchip_av1_entropymode.o \
 		hantro_jpeg.o \
 		hantro_h264.o \
diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
index e395aeeae2f4..3c0a995998a5 100644
--- a/drivers/media/platform/verisilicon/hantro_hw.h
+++ b/drivers/media/platform/verisilicon/hantro_hw.h
@@ -37,6 +37,9 @@ 
 
 #define NUM_REF_PICTURES	(V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
 
+#define AV1_REF_LIST_SIZE	8
+#define AV1_MAX_FRAME_BUF_COUNT	(V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
+
 struct hantro_dev;
 struct hantro_ctx;
 struct hantro_buf;
@@ -250,23 +253,80 @@  struct hantro_vp9_dec_hw_ctx {
 };
 
 /**
- * hantro_av1_dec_hw_ctx
+ * struct hantro_av1_dec_ctrls
+ * @sequence:		AV1 Sequence
+ * @tile_group_entry:	AV1 Tile Group entry
+ * @frame:		AV1 Frame Header OBU
+ * @film_grain:		AV1 Film Grain
+ */
+struct hantro_av1_dec_ctrls {
+	const struct v4l2_ctrl_av1_sequence *sequence;
+	const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry;
+	const struct v4l2_ctrl_av1_frame *frame;
+	const struct v4l2_ctrl_av1_film_grain *film_grain;
+};
+
+struct hantro_av1_frame_ref {
+	int width;
+	int height;
+	u64 timestamp;
+	enum v4l2_av1_frame_type frame_type;
+	int ref_count;
+	u32 order_hint;
+	u32 order_hints[V4L2_AV1_NUM_REF_FRAMES];
+	int gm_mode;
+	struct vb2_v4l2_buffer *vb2_ref;
+};
+
+/**
+ * struct hantro_av1_dec_hw_ctx
+ * @db_data_col:	db tile col data buffer
+ * @db_ctrl_col:	db tile col ctrl buffer
+ * @cdef_col:		cdef tile col buffer
+ * @sr_col:		sr tile col buffer
+ * @lr_col:		lr tile col buffer
+ * @global_model:	global model buffer
+ * @tile_info:		tile info buffer
+ * @segment:		segmentation info buffer
+ * @prob_tbl:		probability table
+ * @prob_tbl_out:	probability table output
+ * @tile_buf:		tile buffer
+ * @ctrls:		V4L2 controls attached to a run
+ * @frame_refs:		reference frames info slots
+ * @ref_frame_sign_bias: array of sign bias
+ * @num_tile_cols_allocated: number of allocated tiles
  * @cdfs:		current probabilities structure
  * @cdfs_ndvc:		current mv probabilities structure
  * @default_cdfs:	default probabilities structure
  * @default_cdfs_ndvc:	default mv probabilties structure
  * @cdfs_last:		stored probabilities structures
  * @cdfs_last_ndvc:	stored mv probabilities structures
+ * @current_frame_index: index of the current in frame_refs array
  */
 struct hantro_av1_dec_hw_ctx {
+	struct hantro_aux_buf db_data_col;
+	struct hantro_aux_buf db_ctrl_col;
+	struct hantro_aux_buf cdef_col;
+	struct hantro_aux_buf sr_col;
+	struct hantro_aux_buf lr_col;
+	struct hantro_aux_buf global_model;
+	struct hantro_aux_buf tile_info;
+	struct hantro_aux_buf segment;
+	struct hantro_aux_buf prob_tbl;
+	struct hantro_aux_buf prob_tbl_out;
+	struct hantro_aux_buf tile_buf;
+	struct hantro_av1_dec_ctrls ctrls;
+	struct hantro_av1_frame_ref frame_refs[AV1_MAX_FRAME_BUF_COUNT];
+	uint32_t ref_frame_sign_bias[AV1_REF_LIST_SIZE];
+	unsigned int num_tile_cols_allocated;
 	struct av1cdfs *cdfs;
 	struct mvcdfs  *cdfs_ndvc;
 	struct av1cdfs default_cdfs;
 	struct mvcdfs  default_cdfs_ndvc;
 	struct av1cdfs cdfs_last[NUM_REF_FRAMES];
 	struct mvcdfs  cdfs_last_ndvc[NUM_REF_FRAMES];
+	int current_frame_index;
 };
-
 /**
  * struct hantro_postproc_ctx
  *
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
new file mode 100644
index 000000000000..a183e4f35e00
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
@@ -0,0 +1,2067 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Collabora
+ *
+ * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
+ */
+
+#include <media/v4l2-mem2mem.h>
+#include "hantro.h"
+#include "hantro_v4l2.h"
+#include "rockchip_vpu981_regs.h"
+
+#define AV1_DEC_MODE		17
+#define GM_GLOBAL_MODELS_PER_FRAME	7
+#define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
+#define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
+#define AV1_MAX_TILES		128
+#define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
+#define AV1DEC_MAX_PIC_BUFFERS	24
+#define AV1_REF_SCALE_SHIFT	14
+#define AV1_INVALID_IDX		-1
+#define MAX_FRAME_DISTANCE	31
+#define AV1_PRIMARY_REF_NONE	7
+#define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
+/*
+ * These 3 values aren't defined enum v4l2_av1_segment_feature because
+ * they are not part of the specification
+ */
+#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
+#define V4L2_AV1_SEG_LVL_ALT_LF_U	3
+#define V4L2_AV1_SEG_LVL_ALT_LF_V	4
+
+#define CLIP3(l, h, v)      ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
+
+#define SUPERRES_SCALE_BITS 3
+#define SCALE_NUMERATOR 8
+#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
+
+#define RS_SUBPEL_BITS 6
+#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
+#define RS_SCALE_SUBPEL_BITS 14
+#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
+#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
+#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
+
+#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
+
+#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
+
+#define DIV_LUT_PREC_BITS 14
+#define DIV_LUT_BITS 8
+#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
+#define WARP_PARAM_REDUCE_BITS 6
+#define WARPEDMODEL_PREC_BITS 16
+
+#define AV1_DIV_ROUND_UP_POW2(value, n)			\
+({							\
+	typeof(n) _n  = n;				\
+	typeof(value) _value = value;			\
+	(_value + (BIT(_n) >> 1)) >> _n;		\
+})
+
+#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
+({									\
+	typeof(n) _n_  = n;						\
+	typeof(value) _value_ = value;					\
+	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
+		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
+})
+
+struct rockchip_av1_film_grain {
+	uint8_t scaling_lut_y[256];
+	uint8_t scaling_lut_cb[256];
+	uint8_t scaling_lut_cr[256];
+	int16_t cropped_luma_grain_block[4096];
+	int16_t cropped_chroma_grain_block[1024 * 2];
+};
+
+static const short div_lut[DIV_LUT_NUM + 1] = {
+	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
+	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
+	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
+	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
+	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
+	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
+	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
+	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
+	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
+	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
+	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
+	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
+	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
+	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
+	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
+	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
+	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
+	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
+	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
+	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
+	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
+	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
+	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
+	8240,  8224,  8208,  8192,
+};
+
+static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	u64 timestamp;
+	int i, idx = frame->ref_frame_idx[ref];
+
+	if (idx >= AV1_MAX_FRAME_BUF_COUNT || idx < 0)
+		return AV1_INVALID_IDX;
+
+	timestamp = frame->reference_frame_ts[idx];
+	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
+		if (av1_dec->frame_refs[i].ref_count == 0)
+			continue;
+		if (av1_dec->frame_refs[i].timestamp == timestamp)
+			return i;
+	}
+
+	return AV1_INVALID_IDX;
+}
+
+static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
+
+	if (idx != AV1_INVALID_IDX)
+		return av1_dec->frame_refs[idx].order_hint;
+
+	return 0;
+}
+
+static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
+					     u64 timestamp)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	int i;
+
+	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
+		if (av1_dec->frame_refs[i].ref_count == 0) {
+			int j;
+
+			av1_dec->frame_refs[i].width =
+			    frame->frame_width_minus_1 + 1;
+			av1_dec->frame_refs[i].height =
+			    frame->frame_height_minus_1 + 1;
+			av1_dec->frame_refs[i].timestamp = timestamp;
+			av1_dec->frame_refs[i].frame_type = frame->frame_type;
+			av1_dec->frame_refs[i].order_hint = frame->order_hint;
+			av1_dec->frame_refs[i].gm_mode =
+				frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME + i];
+			if (!av1_dec->frame_refs[i].vb2_ref)
+				av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
+
+			for (j = 0; j < V4L2_AV1_NUM_REF_FRAMES; j++)
+				av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
+
+			av1_dec->frame_refs[i].ref_count++;
+			av1_dec->current_frame_index = i;
+			return i;
+		}
+	}
+
+	return AV1_INVALID_IDX;
+}
+
+static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+	if (idx < 0)
+		return;
+
+	av1_dec->frame_refs[idx].ref_count--;
+
+	if (av1_dec->frame_refs[idx].ref_count < 0)
+		pr_warn("AV1 reference frames refcounting error (idx %d)\n", idx);
+}
+
+static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+
+	int ref, idx;
+
+	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
+		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
+		bool used = false;
+
+		if (av1_dec->frame_refs[idx].ref_count == 0)
+			continue;
+
+		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
+			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
+				used = true;
+		}
+
+		if (!used)
+			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
+	}
+}
+
+static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
+{
+	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
+}
+
+static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
+{
+	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
+
+	return ALIGN((cr_offset * 3) / 2, 64);
+}
+
+void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+	if (av1_dec->db_data_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
+				  av1_dec->db_data_col.cpu,
+				  av1_dec->db_data_col.dma);
+	av1_dec->db_data_col.cpu = NULL;
+
+	if (av1_dec->db_ctrl_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
+				  av1_dec->db_ctrl_col.cpu,
+				  av1_dec->db_ctrl_col.dma);
+	av1_dec->db_ctrl_col.cpu = NULL;
+
+	if (av1_dec->cdef_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
+				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
+	av1_dec->cdef_col.cpu = NULL;
+
+	if (av1_dec->sr_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
+				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
+	av1_dec->sr_col.cpu = NULL;
+
+	if (av1_dec->lr_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
+				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
+	av1_dec->lr_col.cpu = NULL;
+}
+
+static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
+	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
+	unsigned int height_in_sb = height / 64;
+	unsigned int stripe_num = ((height + 8) + 63) / 64;
+	size_t size;
+
+	if (num_tile_cols <= av1_dec->num_tile_cols_allocated)
+		return 0;
+
+	rockchip_vpu981_av1_dec_tiles_free(ctx);
+
+	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
+	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						      &av1_dec->db_data_col.dma,
+						      GFP_KERNEL);
+	if (!av1_dec->db_data_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->db_data_col.size = size;
+
+	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
+	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						      &av1_dec->db_ctrl_col.dma,
+						      GFP_KERNEL);
+	if (!av1_dec->db_ctrl_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->db_ctrl_col.size = size;
+
+	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
+	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						   &av1_dec->cdef_col.dma,
+						   GFP_KERNEL);
+	if (!av1_dec->cdef_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->cdef_col.size = size;
+
+	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
+	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						 &av1_dec->sr_col.dma,
+						 GFP_KERNEL);
+	if (!av1_dec->sr_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->sr_col.size = size;
+
+	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
+	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						 &av1_dec->lr_col.dma,
+						 GFP_KERNEL);
+	if (!av1_dec->lr_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->lr_col.size = size;
+
+	av1_dec->num_tile_cols_allocated = num_tile_cols;
+	return 0;
+
+buffer_allocation_error:
+	rockchip_vpu981_av1_dec_tiles_free(ctx);
+	return -ENOMEM;
+}
+
+void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+	if (av1_dec->global_model.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
+				  av1_dec->global_model.cpu,
+				  av1_dec->global_model.dma);
+	av1_dec->global_model.cpu = NULL;
+
+	if (av1_dec->tile_info.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
+				  av1_dec->tile_info.cpu,
+				  av1_dec->tile_info.dma);
+	av1_dec->tile_info.cpu = NULL;
+
+	if (av1_dec->prob_tbl.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
+				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
+	av1_dec->prob_tbl.cpu = NULL;
+
+	if (av1_dec->prob_tbl_out.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
+				  av1_dec->prob_tbl_out.cpu,
+				  av1_dec->prob_tbl_out.dma);
+	av1_dec->prob_tbl_out.cpu = NULL;
+
+	if (av1_dec->tile_buf.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
+				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
+	av1_dec->tile_buf.cpu = NULL;
+
+	rockchip_vpu981_av1_dec_tiles_free(ctx);
+}
+
+int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+	memset(av1_dec, 0, sizeof(*av1_dec));
+
+	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
+						       &av1_dec->global_model.dma,
+						       GFP_KERNEL);
+	if (!av1_dec->global_model.cpu)
+		return -ENOMEM;
+	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
+
+	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
+						    &av1_dec->tile_info.dma,
+						    GFP_KERNEL);
+	if (!av1_dec->tile_info.cpu)
+		return -ENOMEM;
+	av1_dec->tile_info.size = AV1_MAX_TILES;
+
+	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
+						   ALIGN(sizeof(struct av1cdfs), 2048),
+						   &av1_dec->prob_tbl.dma,
+						   GFP_KERNEL);
+	if (!av1_dec->prob_tbl.cpu)
+		return -ENOMEM;
+	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
+
+	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
+						       ALIGN(sizeof(struct av1cdfs), 2048),
+						       &av1_dec->prob_tbl_out.dma,
+						       GFP_KERNEL);
+	if (!av1_dec->prob_tbl_out.cpu)
+		return -ENOMEM;
+	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
+	av1_dec->cdfs = &av1_dec->default_cdfs;
+	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
+
+	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
+
+	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
+						   AV1_TILE_SIZE,
+						   &av1_dec->tile_buf.dma,
+						   GFP_KERNEL);
+	if (!av1_dec->tile_buf.cpu)
+		return -ENOMEM;
+	av1_dec->tile_buf.size = AV1_TILE_SIZE;
+
+	return 0;
+}
+
+static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+
+	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
+	if (WARN_ON(!ctrls->sequence))
+		return -EINVAL;
+
+	ctrls->tile_group_entry =
+	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
+	if (WARN_ON(!ctrls->tile_group_entry))
+		return -EINVAL;
+
+	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
+	if (WARN_ON(!ctrls->frame))
+		return -EINVAL;
+
+	ctrls->film_grain =
+	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
+
+	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
+}
+
+static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
+{
+	if (n == 0)
+		return 0;
+	return 31 ^ __builtin_clz(n);
+}
+
+static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
+{
+	int f;
+	uint64_t e;
+
+	*shift = rockchip_vpu981_av1_dec_get_msb(d);
+	/* e is obtained from D after resetting the most significant 1 bit. */
+	e = d - ((u32)1 << *shift);
+	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
+	if (*shift > DIV_LUT_BITS)
+		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
+	else
+		f = e << (DIV_LUT_BITS - *shift);
+	if (f > DIV_LUT_NUM)
+		return -1;
+	*shift += DIV_LUT_PREC_BITS;
+	/* Use f as lookup into the precomputed table of multipliers */
+	return div_lut[f];
+}
+
+static void rockchip_vpu981_av1_dec_get_shear_params(const uint32_t *params,
+	int64_t *alpha, int64_t *beta, int64_t *gamma, int64_t *delta)
+{
+	const int *mat = params;
+	short shift;
+	short y;
+	long long gv, dv;
+
+	if (mat[2] <= 0)
+		return;
+
+	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
+	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
+
+	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
+
+	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
+
+	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
+
+	dv = ((long long)mat[3] * mat[4]) * y;
+	*delta = clamp_val(
+		mat[5] -
+		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
+		S16_MIN, S16_MAX);
+
+	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
+		 * (1 << WARP_PARAM_REDUCE_BITS);
+	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
+		* (1 << WARP_PARAM_REDUCE_BITS);
+	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
+		 * (1 << WARP_PARAM_REDUCE_BITS);
+	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
+		* (1 << WARP_PARAM_REDUCE_BITS);
+}
+
+static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
+	uint8_t *dst = av1_dec->global_model.cpu;
+	struct hantro_dev *vpu = ctx->dev;
+	int ref_frame, i;
+
+	memset(dst, 0, GLOBAL_MODEL_SIZE);
+	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
+		int64_t alpha = 0, beta = 0, gamma = 0, delta = 0;
+
+		for (i = 0; i < 6; ++i) {
+			if (i == 2)
+				*(int32_t *)dst =
+					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
+			else if (i == 3)
+				*(int32_t *)dst =
+					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
+			else
+				*(int32_t *)dst =
+					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
+			dst += 4;
+		}
+
+		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
+			rockchip_vpu981_av1_dec_get_shear_params(
+					&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
+					&alpha, &beta, &gamma, &delta);
+
+		*(int16_t *)dst = alpha;
+		dst += 2;
+		*(int16_t *)dst = beta;
+		dst += 2;
+		*(int16_t *)dst = gamma;
+		dst += 2;
+		*(int16_t *)dst = delta;
+		dst += 2;
+	}
+
+	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	struct v4l2_av1_tile_info tile_info = ctrls->frame->tile_info;
+	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
+	    ctrls->tile_group_entry;
+	int context_update_y =
+	    tile_info.context_update_tile_id / tile_info.tile_cols;
+	int context_update_x =
+	    tile_info.context_update_tile_id % tile_info.tile_cols;
+	int context_update_tile_id =
+	    context_update_x * tile_info.tile_rows + context_update_y;
+	uint8_t *dst = av1_dec->tile_info.cpu;
+	struct hantro_dev *vpu = ctx->dev;
+	int tile0, tile1;
+
+	memset(dst, 0, av1_dec->tile_info.size);
+
+	for (tile0 = 0; tile0 < tile_info.tile_cols; tile0++) {
+		for (tile1 = 0; tile1 < tile_info.tile_rows; tile1++) {
+			int tile_id = tile1 * tile_info.tile_cols + tile0;
+			uint32_t start, end;
+			uint32_t y0 =
+			    tile_info.height_in_sbs_minus_1[tile1] + 1;
+			uint32_t x0 = tile_info.width_in_sbs_minus_1[tile0] + 1;
+
+			// tile size in SB units (width,height)
+			*dst++ = x0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = y0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+
+			// tile start position
+			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
+			*dst++ = start & 255;
+			*dst++ = (start >> 8) & 255;
+			*dst++ = (start >> 16) & 255;
+			*dst++ = (start >> 24) & 255;
+
+			// # of bytes in tile data
+			end = start + group_entry[tile_id].tile_size;
+			*dst++ = end & 255;
+			*dst++ = (end >> 8) & 255;
+			*dst++ = (end >> 16) & 255;
+			*dst++ = (end >> 24) & 255;
+		}
+	}
+
+	hantro_reg_write(vpu, &av1_multicore_expect_context_update,
+			 !!(context_update_x == 0));
+	hantro_reg_write(vpu, &av1_tile_enable, !!((tile_info.tile_cols > 1)
+						   || (tile_info.tile_rows > 1)));
+	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info.tile_cols);
+	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info.tile_rows);
+	hantro_reg_write(vpu, &av1_context_update_tile_id,
+			 context_update_tile_id);
+	hantro_reg_write(vpu, &av1_tile_transpose, 1);
+	if (context_update_tile_id) {
+		hantro_reg_write(vpu, &av1_dec_tile_size_mag,
+				 tile_info.tile_size_bytes);
+	} else
+		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
+
+	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
+}
+
+static int rockchip_vpu981_av1_dec_get_relative_dist(struct hantro_ctx *ctx,
+						     int a, int b)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	int bits = ctrls->sequence->order_hint_bits - 1;
+	int diff, m;
+
+	if (!ctrls->sequence->order_hint_bits)
+		return 0;
+
+	diff = a - b;
+	m = 1 << bits;
+	diff = (diff & (m - 1)) - (diff & m);
+
+	return diff;
+}
+
+static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
+	int i;
+
+	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
+		for (i = 0; i < AV1_REF_LIST_SIZE; i++)
+			av1_dec->ref_frame_sign_bias[i] = 0;
+
+		return;
+	}
+	// Identify the nearest forward and backward references.
+	for (i = 0; i < AV1_REF_LIST_SIZE - 1; i++) {
+		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
+			int rel_off =
+			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+								      rockchip_vpu981_get_order_hint
+								      (ctx, i),
+								      frame->order_hint);
+			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
+		}
+	}
+}
+
+static bool
+rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
+				int width, int height)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_decoded_buffer *dst;
+	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
+	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
+	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
+	int cur_width = frame->frame_width_minus_1 + 1;
+	int cur_height = frame->frame_height_minus_1 + 1;
+	int scale_width =
+	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
+	int scale_height =
+	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
+
+	switch (ref) {
+	case 0:
+		hantro_reg_write(vpu, &av1_ref0_height, height);
+		hantro_reg_write(vpu, &av1_ref0_width, width);
+		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
+		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
+		break;
+	case 1:
+		hantro_reg_write(vpu, &av1_ref1_height, height);
+		hantro_reg_write(vpu, &av1_ref1_width, width);
+		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
+		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
+		break;
+	case 2:
+		hantro_reg_write(vpu, &av1_ref2_height, height);
+		hantro_reg_write(vpu, &av1_ref2_width, width);
+		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
+		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
+		break;
+	case 3:
+		hantro_reg_write(vpu, &av1_ref3_height, height);
+		hantro_reg_write(vpu, &av1_ref3_width, width);
+		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
+		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
+		break;
+	case 4:
+		hantro_reg_write(vpu, &av1_ref4_height, height);
+		hantro_reg_write(vpu, &av1_ref4_width, width);
+		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
+		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
+		break;
+	case 5:
+		hantro_reg_write(vpu, &av1_ref5_height, height);
+		hantro_reg_write(vpu, &av1_ref5_width, width);
+		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
+		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
+		break;
+	case 6:
+		hantro_reg_write(vpu, &av1_ref6_height, height);
+		hantro_reg_write(vpu, &av1_ref6_width, width);
+		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
+		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
+		break;
+	default:
+		pr_warn("AV1 invalid reference frame index\n");
+	}
+
+	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
+	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
+	chroma_addr = luma_addr + cr_offset;
+	mv_addr = luma_addr + mv_offset;
+
+	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
+	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
+	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
+
+	return (scale_width != (1 << AV1_REF_SCALE_SHIFT))
+		|| (scale_height != (1 << AV1_REF_SCALE_SHIFT));
+}
+
+static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
+						  int ref, int val)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	switch (ref) {
+	case 0:
+		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
+		break;
+	case 1:
+		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
+		break;
+	case 2:
+		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
+		break;
+	case 3:
+		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
+		break;
+	case 4:
+		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
+		break;
+	case 5:
+		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
+		break;
+	case 6:
+		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
+		break;
+	default:
+		pr_warn("AV1 invalid sign bias index\n");
+		break;
+	}
+}
+
+static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
+	uint32_t segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
+	struct hantro_dev *vpu = ctx->dev;
+	uint8_t segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
+
+	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)
+	    && (frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME)) {
+		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
+
+		if (idx >= 0) {
+			dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
+			size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
+			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
+
+			luma_addr =
+				hantro_get_dec_buf_addr(ctx,
+							&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
+			chroma_addr = luma_addr + cr_offset;
+			mv_addr = luma_addr + mv_offset;
+
+			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
+			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
+		}
+	}
+
+	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
+			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
+	hantro_reg_write(vpu, &av1_segment_upd_e,
+			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
+	hantro_reg_write(vpu, &av1_segment_e,
+			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
+
+	hantro_reg_write(vpu, &av1_error_resilient,
+			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
+
+	if (IS_INTRA(frame->frame_type)
+	    || !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
+		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
+	}
+
+	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED)) {
+		int s;
+
+		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
+			if (seg->feature_enabled[s] &
+			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
+				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
+				    CLIP3(0, 255,
+					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]));
+				segsign |=
+					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
+			}
+
+			if (seg->feature_enabled[s] &
+			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
+				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
+					CLIP3(-63, 63,
+					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]));
+
+			if (seg->feature_enabled[s] &
+			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
+				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
+				    CLIP3(-63, 63,
+					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]));
+
+			if (seg->feature_enabled[s] &
+			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
+				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
+				    CLIP3(-63, 63,
+					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]));
+
+			if (seg->feature_enabled[s] &
+			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
+				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
+				    CLIP3(-63, 63,
+					abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]));
+
+			if (frame->frame_type && seg->feature_enabled[s] &
+			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
+				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
+
+			if (seg->feature_enabled[s] &
+			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
+				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
+
+			if (seg->feature_enabled[s] &
+			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
+				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
+		}
+	}
+
+	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
+		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
+			if (seg->feature_enabled[i]
+			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
+				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
+				last_active_seg = max(i, last_active_seg);
+			}
+		}
+	}
+
+	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
+	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
+
+	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
+
+	/* Write QP, filter level, ref frame and skip for every segment */
+	hantro_reg_write(vpu, &av1_quant_seg0,
+			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
+	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
+			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
+			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
+			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
+			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+	hantro_reg_write(vpu, &av1_refpic_seg0,
+			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
+	hantro_reg_write(vpu, &av1_skip_seg0,
+			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
+	hantro_reg_write(vpu, &av1_global_mv_seg0,
+			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+	hantro_reg_write(vpu, &av1_quant_seg1,
+			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
+	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
+			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
+			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
+			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
+			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+	hantro_reg_write(vpu, &av1_refpic_seg1,
+			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
+	hantro_reg_write(vpu, &av1_skip_seg1,
+			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
+	hantro_reg_write(vpu, &av1_global_mv_seg1,
+			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+	hantro_reg_write(vpu, &av1_quant_seg2,
+			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
+	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
+			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
+			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
+			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
+			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+	hantro_reg_write(vpu, &av1_refpic_seg2,
+			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
+	hantro_reg_write(vpu, &av1_skip_seg2,
+			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
+	hantro_reg_write(vpu, &av1_global_mv_seg2,
+			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+	hantro_reg_write(vpu, &av1_quant_seg3,
+			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
+	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
+			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
+			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
+			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
+			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+	hantro_reg_write(vpu, &av1_refpic_seg3,
+			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
+	hantro_reg_write(vpu, &av1_skip_seg3,
+			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
+	hantro_reg_write(vpu, &av1_global_mv_seg3,
+			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+	hantro_reg_write(vpu, &av1_quant_seg4,
+			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
+	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
+			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
+			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
+			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
+			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+	hantro_reg_write(vpu, &av1_refpic_seg4,
+			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
+	hantro_reg_write(vpu, &av1_skip_seg4,
+			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
+	hantro_reg_write(vpu, &av1_global_mv_seg4,
+			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+	hantro_reg_write(vpu, &av1_quant_seg5,
+			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
+	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
+			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
+			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
+			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
+			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+	hantro_reg_write(vpu, &av1_refpic_seg5,
+			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
+	hantro_reg_write(vpu, &av1_skip_seg5,
+			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
+	hantro_reg_write(vpu, &av1_global_mv_seg5,
+			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+	hantro_reg_write(vpu, &av1_quant_seg6,
+			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
+	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
+			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
+			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
+			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
+			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+	hantro_reg_write(vpu, &av1_refpic_seg6,
+			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
+	hantro_reg_write(vpu, &av1_skip_seg6,
+			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
+	hantro_reg_write(vpu, &av1_global_mv_seg6,
+			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+
+	hantro_reg_write(vpu, &av1_quant_seg7,
+			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
+	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
+			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
+	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
+			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
+	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
+			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
+	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
+			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
+	hantro_reg_write(vpu, &av1_refpic_seg7,
+			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
+	hantro_reg_write(vpu, &av1_skip_seg7,
+			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
+	hantro_reg_write(vpu, &av1_global_mv_seg7,
+			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
+}
+
+static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
+	const struct v4l2_av1_quantization *quantization = &frame->quantization;
+	int i;
+
+	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
+		int qindex = quantization->base_q_idx;
+
+		if (segmentation->feature_enabled[i] &
+		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
+			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
+		}
+		qindex = CLIP3(0, 255, qindex);
+
+		if (qindex
+		    || quantization->delta_q_y_dc
+		    || quantization->delta_q_u_dc
+		    || quantization->delta_q_u_ac
+		    || quantization->delta_q_v_dc || quantization->delta_q_v_ac)
+			return false;
+	}
+	return true;
+}
+
+static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
+	bool filtering_dis = (loop_filter->level[0] == 0)
+			     && (loop_filter->level[1] == 0);
+	struct hantro_dev *vpu = ctx->dev;
+
+	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
+	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
+	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
+
+	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
+	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
+	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
+	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
+
+	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED
+	    && !rockchip_vpu981_av1_dec_is_lossless(ctx)
+	    && !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
+		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
+				 loop_filter->ref_deltas[0]);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
+				 loop_filter->ref_deltas[1]);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
+				 loop_filter->ref_deltas[2]);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
+				 loop_filter->ref_deltas[3]);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
+				 loop_filter->ref_deltas[4]);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
+				 loop_filter->ref_deltas[5]);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
+				 loop_filter->ref_deltas[6]);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
+				 loop_filter->ref_deltas[7]);
+		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
+				 loop_filter->mode_deltas[0]);
+		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
+				 loop_filter->mode_deltas[1]);
+	} else {
+		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
+		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
+		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
+		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
+	}
+
+	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
+	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
+}
+
+static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	bool frame_is_intra = IS_INTRA(frame->frame_type);
+	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
+	int i;
+
+	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
+		return;
+
+	for (i = 0; i < NUM_REF_FRAMES; i++) {
+		if (frame->refresh_frame_flags & (1 << i)) {
+			struct mvcdfs stored_mv_cdf;
+
+			rockchip_av1_get_cdfs(ctx, i);
+			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
+			*av1_dec->cdfs = *out_cdfs;
+			if (frame_is_intra) {
+				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
+				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
+			}
+			rockchip_av1_store_cdfs(ctx,
+						frame->refresh_frame_flags);
+			break;
+		}
+	}
+}
+
+void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
+{
+	rockchip_vpu981_av1_dec_update_prob(ctx);
+}
+
+static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_quantization *quantization = &frame->quantization;
+	struct hantro_dev *vpu = ctx->dev;
+	bool error_resilient_mode =
+	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
+	bool frame_is_intra = IS_INTRA(frame->frame_type);
+
+	if (error_resilient_mode || frame_is_intra
+	    || frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
+		av1_dec->cdfs = &av1_dec->default_cdfs;
+		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
+		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
+						 av1_dec->cdfs);
+	} else {
+		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
+	}
+	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
+
+	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
+
+	if (frame_is_intra) {
+		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
+		/* Overwrite MV context area with intrabc MV context */
+		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
+		       sizeof(struct mvcdfs));
+	}
+
+	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
+	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_cdef *cdef = &frame->cdef;
+	struct hantro_dev *vpu = ctx->dev;
+	uint32_t luma_pri_strength = 0;
+	uint16_t luma_sec_strength = 0;
+	uint32_t chroma_pri_strength = 0;
+	uint16_t chroma_sec_strength = 0;
+	int i;
+
+	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
+	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
+
+	for (i = 0; i < (1 << cdef->bits); i++) {
+		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
+		if (cdef->y_sec_strength[i] == 4)
+			luma_sec_strength |= 3 << (i * 2);
+		else
+			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
+
+		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
+		if (cdef->uv_sec_strength[i] == 4)
+			chroma_sec_strength |= 3 << (i * 2);
+		else
+			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
+	}
+
+	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
+			 luma_pri_strength);
+	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
+			 luma_sec_strength);
+	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
+			 chroma_pri_strength);
+	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
+			 chroma_sec_strength);
+
+	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_loop_restoration *loop_restoration =
+	    &frame->loop_restoration;
+	struct hantro_dev *vpu = ctx->dev;
+	uint16_t lr_type = 0, lr_unit_size = 0;
+	uint8_t restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
+	int i;
+
+	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
+		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
+		restoration_unit_size[1] =
+		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
+		restoration_unit_size[2] =
+		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
+	}
+
+	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
+		lr_type |=
+		    loop_restoration->frame_restoration_type[i] << (i * 2);
+		lr_unit_size |= restoration_unit_size[i] << (i * 2);
+	}
+
+	hantro_reg_write(vpu, &av1_lr_type, lr_type);
+	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
+	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	struct hantro_dev *vpu = ctx->dev;
+	uint8_t superres_scale_denominator = SCALE_NUMERATOR;
+	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
+	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
+	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
+	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
+	int superres_init_luma_subpel_x = 0;
+	int superres_init_chroma_subpel_x = 0;
+	int superres_is_scaled = 0;
+	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
+	int upscaledLumaPlaneW, downscaledLumaPlaneW;
+	int downscaledChromaPlaneW, upscaledChromaPlaneW;
+	int stepLumaX, stepChromaX;
+	int errLuma, errChroma;
+	int initialLumaSubpelX, initialChromaSubpelX;
+	int width = 0;
+
+	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
+		superres_scale_denominator = frame->superres_denom;
+
+	if (superres_scale_denominator <= SCALE_NUMERATOR)
+		goto set_regs;
+
+	width = (frame->upscaled_width * SCALE_NUMERATOR +
+		(superres_scale_denominator / 2)) / superres_scale_denominator;
+
+	if (width < min_w)
+		width = min_w;
+
+	if (width == frame->upscaled_width)
+		goto set_regs;
+
+	superres_is_scaled = 1;
+	upscaledLumaPlaneW = frame->upscaled_width;
+	downscaledLumaPlaneW = width;
+	downscaledChromaPlaneW = (downscaledLumaPlaneW + 1) >> 1;
+	upscaledChromaPlaneW = (upscaledLumaPlaneW + 1) >> 1;
+	stepLumaX =
+		((downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) +
+		 (upscaledLumaPlaneW / 2)) / upscaledLumaPlaneW;
+	stepChromaX =
+		((downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) +
+		 (upscaledChromaPlaneW / 2)) / upscaledChromaPlaneW;
+	errLuma =
+		(upscaledLumaPlaneW * stepLumaX)
+		- (downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS);
+	errChroma =
+		(upscaledChromaPlaneW * stepChromaX)
+		- (downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS);
+	initialLumaSubpelX =
+		((-((upscaledLumaPlaneW - downscaledLumaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
+		  + upscaledLumaPlaneW / 2)
+		 / upscaledLumaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errLuma / 2)
+		& RS_SCALE_SUBPEL_MASK;
+	initialChromaSubpelX =
+		((-((upscaledChromaPlaneW - downscaledChromaPlaneW) << (RS_SCALE_SUBPEL_BITS - 1))
+		  + upscaledChromaPlaneW / 2)
+		 / upscaledChromaPlaneW + (1 << (RS_SCALE_EXTRA_BITS - 1)) - errChroma / 2)
+		& RS_SCALE_SUBPEL_MASK;
+	superres_luma_step = stepLumaX;
+	superres_chroma_step = stepChromaX;
+	superres_luma_step_invra =
+		((upscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledLumaPlaneW / 2))
+		/ downscaledLumaPlaneW;
+	superres_chroma_step_invra =
+		((upscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) + (downscaledChromaPlaneW / 2))
+		/ downscaledChromaPlaneW;
+	superres_init_luma_subpel_x = initialLumaSubpelX;
+	superres_init_chroma_subpel_x = initialChromaSubpelX;
+
+set_regs:
+	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
+
+	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
+		hantro_reg_write(vpu, &av1_scale_denom_minus9,
+				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
+	else
+		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
+
+	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
+	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
+	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
+			 superres_luma_step_invra);
+	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
+			 superres_chroma_step_invra);
+	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
+			 superres_init_luma_subpel_x);
+	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
+			 superres_init_chroma_subpel_x);
+	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
+
+	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
+}
+
+static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	struct hantro_dev *vpu = ctx->dev;
+	int pic_width_in_cbs = ALIGN(frame->frame_width_minus_1 + 1, 8) >> 3;
+	int pic_height_in_cbs = ALIGN(frame->frame_height_minus_1 + 1, 8) >> 3;
+	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
+			    - (frame->frame_width_minus_1 + 1);
+	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
+			     - (frame->frame_height_minus_1 + 1);
+
+	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
+	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
+	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
+	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
+
+	rockchip_vpu981_av1_dec_set_superres_params(ctx);
+}
+
+static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	struct hantro_dev *vpu = ctx->dev;
+	bool use_ref_frame_mvs =
+	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
+	int cur_frame_offset = frame->order_hint;
+	int alt_frame_offset = 0;
+	int gld_frame_offset = 0;
+	int bwd_frame_offset = 0;
+	int alt2_frame_offset = 0;
+	int refs_selected[3] = { 0, 0, 0 };
+	int cur_mi_cols = (frame->frame_width_minus_1 + 8) >> 3;
+	int cur_mi_rows = (frame->frame_height_minus_1 + 8) >> 3;
+	int cur_offset[V4L2_AV1_NUM_REF_FRAMES - 1];
+	int cur_roffset[V4L2_AV1_NUM_REF_FRAMES - 1];
+	int mf_types[3] = { 0, 0, 0 };
+	int ref_stamp = 2;
+	int ref_ind = 0;
+	int rf, idx;
+
+	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
+	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
+	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
+	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
+
+	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
+	if (idx >= 0) {
+		int alt_frame_offset_in_lst =
+			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
+		bool is_lst_overlay =
+		    (alt_frame_offset_in_lst == gld_frame_offset);
+
+		if (!is_lst_overlay) {
+			int lst_mi_cols =
+			    (av1_dec->frame_refs[idx].width + 7) >> 3;
+			int lst_mi_rows =
+			    (av1_dec->frame_refs[idx].height + 7) >> 3;
+			bool lst_intra_only =
+			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+			if (lst_mi_cols == cur_mi_cols
+			    && lst_mi_rows == cur_mi_rows && !lst_intra_only) {
+				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
+				refs_selected[ref_ind++] = LST_BUF_IDX;
+			}
+		}
+		ref_stamp--;
+	}
+
+	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
+	if (rockchip_vpu981_av1_dec_get_relative_dist
+	    (ctx, bwd_frame_offset, cur_frame_offset) > 0) {
+		int bwd_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
+		int bwd_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
+		bool bwd_intra_only =
+		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
+		    !bwd_intra_only) {
+			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
+			refs_selected[ref_ind++] = BWD_BUF_IDX;
+			ref_stamp--;
+		}
+	}
+
+	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
+	if (rockchip_vpu981_av1_dec_get_relative_dist
+	    (ctx, alt2_frame_offset, cur_frame_offset) > 0) {
+		int alt2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
+		int alt2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
+		bool alt2_intra_only =
+		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows
+		    && !alt2_intra_only) {
+			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
+			refs_selected[ref_ind++] = ALT2_BUF_IDX;
+			ref_stamp--;
+		}
+	}
+
+	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
+	if (rockchip_vpu981_av1_dec_get_relative_dist
+	    (ctx, alt_frame_offset, cur_frame_offset) > 0 && ref_stamp >= 0) {
+		int alt_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
+		int alt_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
+		bool alt_intra_only =
+		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
+		    !alt_intra_only) {
+			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
+			refs_selected[ref_ind++] = ALT_BUF_IDX;
+			ref_stamp--;
+		}
+	}
+
+	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
+	if (idx >= 0 && ref_stamp >= 0) {
+		int lst2_mi_cols = (av1_dec->frame_refs[idx].width + 7) >> 3;
+		int lst2_mi_rows = (av1_dec->frame_refs[idx].height + 7) >> 3;
+		bool lst2_intra_only =
+		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
+
+		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows
+		    && !lst2_intra_only) {
+			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
+			refs_selected[ref_ind++] = LST2_BUF_IDX;
+			ref_stamp--;
+		}
+	}
+
+	for (rf = 0; rf < V4L2_AV1_NUM_REF_FRAMES - 1; ++rf) {
+		idx = rockchip_vpu981_get_frame_index(ctx, rf);
+		if (idx >= 0) {
+			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
+
+			cur_offset[rf] =
+			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+								      cur_frame_offset,
+								      rf_order_hint);
+			cur_roffset[rf] =
+			    rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+								      rf_order_hint,
+								      cur_frame_offset);
+		} else {
+			cur_offset[rf] = 0;
+			cur_roffset[rf] = 0;
+		}
+	}
+
+	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
+	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
+	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
+	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
+
+	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
+	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
+	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
+	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
+	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
+	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
+	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
+
+	if (use_ref_frame_mvs && ref_ind > 0 &&
+	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
+	    && cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
+		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
+		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
+		int val;
+
+		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
+		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
+		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
+		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
+		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
+		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
+		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
+		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
+	}
+
+	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
+	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
+	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
+	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
+	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
+	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
+	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
+
+	if (use_ref_frame_mvs && ref_ind > 1 &&
+	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
+	    && cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
+		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
+		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
+		int val;
+
+		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
+		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
+		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
+		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
+		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
+		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
+		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
+		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
+	}
+
+	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
+	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
+	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
+	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
+	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
+	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
+	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
+
+	if (use_ref_frame_mvs && ref_ind > 2 &&
+	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE
+	    && cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
+		int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
+		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
+		int val;
+
+		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST_FRAME]);
+		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST2_FRAME]);
+		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_LAST3_FRAME]);
+		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_GOLDEN_FRAME]);
+		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_BWDREF_FRAME]);
+		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF2_FRAME]);
+		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
+
+		val = rockchip_vpu981_av1_dec_get_relative_dist(ctx,
+				rf_order_hint,
+				av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME]);
+		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
+	}
+
+	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
+	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
+	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
+	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
+	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
+	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
+	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
+
+	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
+	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
+	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
+	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
+	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
+	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
+	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
+
+	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
+	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
+	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
+}
+
+static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	int frame_type = frame->frame_type;
+	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
+	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
+	struct hantro_dev *vpu = ctx->dev;
+	int i, ref_frames = 0;
+	bool scale_enable = false;
+
+	if (IS_INTRA(frame_type) && !allow_intrabc)
+		return;
+
+	if (!allow_intrabc) {
+		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
+			int idx = rockchip_vpu981_get_frame_index(ctx, i);
+
+			if (idx >= 0)
+				ref_count[idx]++;
+		}
+
+		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
+			if (ref_count[i])
+				ref_frames++;
+		}
+	} else {
+		ref_frames = 1;
+	}
+	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
+
+	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
+
+	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_NUM_REF_FRAMES; i++) {
+		uint32_t ref = i - 1;
+		int idx = 0;
+		int width, height;
+
+		if (allow_intrabc) {
+			idx = av1_dec->current_frame_index;
+			width = frame->frame_width_minus_1 + 1;
+			height = frame->frame_height_minus_1 + 1;
+		} else {
+			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
+				idx = rockchip_vpu981_get_frame_index(ctx, ref);
+			width = av1_dec->frame_refs[idx].width;
+			height = av1_dec->frame_refs[idx].height;
+		}
+
+		scale_enable |=
+		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
+						    height);
+
+		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
+						      av1_dec->ref_frame_sign_bias[i]);
+	}
+	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
+
+	hantro_reg_write(vpu, &av1_ref0_gm_mode,
+			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
+	hantro_reg_write(vpu, &av1_ref1_gm_mode,
+			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
+	hantro_reg_write(vpu, &av1_ref2_gm_mode,
+			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
+	hantro_reg_write(vpu, &av1_ref3_gm_mode,
+			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
+	hantro_reg_write(vpu, &av1_ref4_gm_mode,
+			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
+	hantro_reg_write(vpu, &av1_ref5_gm_mode,
+			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
+	hantro_reg_write(vpu, &av1_ref6_gm_mode,
+			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
+
+	rockchip_vpu981_av1_dec_set_other_frames(ctx);
+}
+
+static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+
+	hantro_reg_write(vpu, &av1_skip_mode,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
+	hantro_reg_write(vpu, &av1_tempor_mvp_e,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
+	hantro_reg_write(vpu, &av1_delta_lf_res_log,
+			 ctrls->frame->loop_filter.delta_lf_res);
+	hantro_reg_write(vpu, &av1_delta_lf_multi,
+			 !!(ctrls->frame->loop_filter.flags
+			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
+	hantro_reg_write(vpu, &av1_delta_lf_present,
+			 !!(ctrls->frame->loop_filter.flags
+			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
+	hantro_reg_write(vpu, &av1_disable_cdf_update,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
+	hantro_reg_write(vpu, &av1_allow_warp,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
+	hantro_reg_write(vpu, &av1_show_frame,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
+	hantro_reg_write(vpu, &av1_switchable_motion_mode,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
+	hantro_reg_write(vpu, &av1_enable_cdef,
+			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
+	hantro_reg_write(vpu, &av1_allow_masked_compound,
+			 !!(ctrls->sequence->flags
+			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
+	hantro_reg_write(vpu, &av1_allow_interintra,
+			 !!(ctrls->sequence->flags
+			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
+	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
+			 !!(ctrls->sequence->flags
+			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
+	hantro_reg_write(vpu, &av1_allow_filter_intra,
+			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
+	hantro_reg_write(vpu, &av1_enable_jnt_comp,
+			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
+	hantro_reg_write(vpu, &av1_enable_dual_filter,
+			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
+	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
+	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
+	hantro_reg_write(vpu, &av1_allow_intrabc,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
+
+	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
+		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
+	else
+		hantro_reg_write(vpu, &av1_force_interger_mv,
+				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
+
+	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
+	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
+	hantro_reg_write(vpu, &av1_delta_q_present,
+			 !!(ctrls->frame->quantization.flags
+			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
+
+	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
+	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
+	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
+	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
+
+	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
+	hantro_reg_write(vpu, &av1_high_prec_mv_e,
+			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
+	hantro_reg_write(vpu, &av1_comp_pred_mode,
+			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
+	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
+	hantro_reg_write(vpu, &av1_max_cb_size,
+			 (ctrls->sequence->flags
+			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
+	hantro_reg_write(vpu, &av1_min_cb_size, 3);
+
+	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
+	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
+	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
+	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
+	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
+	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
+	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
+	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
+	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
+	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
+	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
+
+	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
+	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
+	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
+	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
+		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
+		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
+		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
+	} else {
+		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
+		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
+		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
+	}
+
+	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
+	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
+	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
+
+	hantro_reg_write(vpu, &av1_skip_ref0,
+			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
+	hantro_reg_write(vpu, &av1_skip_ref1,
+			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
+
+	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
+	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
+}
+
+static void
+rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
+					 struct vb2_v4l2_buffer *vb2_src)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
+	    ctrls->tile_group_entry;
+	struct hantro_dev *vpu = ctx->dev;
+	dma_addr_t src_dma;
+	u32 src_len, src_buf_len;
+	int start_bit, offset;
+
+	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
+	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
+	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
+
+	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
+	offset = group_entry[0].tile_offset & ~0xf;
+
+	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
+	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
+	hantro_reg_write(vpu, &av1_stream_len, src_len);
+	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
+	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
+}
+
+static void
+rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_decoded_buffer *dst;
+	struct vb2_v4l2_buffer *vb2_dst;
+	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
+	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
+	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
+
+	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
+	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
+	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
+	chroma_addr = luma_addr + cr_offset;
+	mv_addr = luma_addr + mv_offset;
+
+	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
+	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
+	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
+}
+
+int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct vb2_v4l2_buffer *vb2_src;
+	int ret;
+
+	hantro_start_prepare_run(ctx);
+
+	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
+	if (ret)
+		goto prepare_error;
+
+	vb2_src = hantro_get_src_buf(ctx);
+	if (!vb2_src)
+		goto prepare_error;
+
+	rockchip_vpu981_av1_dec_clean_refs(ctx);
+	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
+
+	rockchip_vpu981_av1_dec_set_parameters(ctx);
+	rockchip_vpu981_av1_dec_set_global_model(ctx);
+	rockchip_vpu981_av1_dec_set_tile_info(ctx);
+	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
+	rockchip_vpu981_av1_dec_set_segmentation(ctx);
+	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
+	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
+	rockchip_vpu981_av1_dec_set_cdef(ctx);
+	rockchip_vpu981_av1_dec_set_lr(ctx);
+	rockchip_vpu981_av1_dec_set_prob(ctx);
+
+	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
+	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
+	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
+	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
+	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
+
+	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
+	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
+
+	hantro_reg_write(vpu, &av1_dec_alignment, 64);
+	hantro_reg_write(vpu, &av1_apf_disable, 0);
+	hantro_reg_write(vpu, &av1_apf_threshold, 8);
+	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
+	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
+	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
+	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
+	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
+
+	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
+	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
+	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
+	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
+
+	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
+	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
+
+	hantro_end_prepare_run(ctx);
+
+	hantro_reg_write(vpu, &av1_dec_e, 1);
+
+	return 0;
+
+prepare_error:
+	hantro_end_prepare_run(ctx);
+	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
+	return ret;
+}
+
+static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	int width = ctx->dst_fmt.width;
+	int height = ctx->dst_fmt.height;
+	struct vb2_v4l2_buffer *vb2_dst;
+	size_t chroma_offset;
+	dma_addr_t dst_dma;
+
+	vb2_dst = hantro_get_dst_buf(ctx);
+
+	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
+	    ctx->dst_fmt.height;
+
+	/* enable post processor */
+	hantro_reg_write(vpu, &av1_pp_out_e, 1);
+	hantro_reg_write(vpu, &av1_pp_in_format, 0);
+	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
+	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
+
+	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
+	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
+	hantro_reg_write(vpu, &av1_pp_out_height, height);
+	hantro_reg_write(vpu, &av1_pp_out_width, width);
+	hantro_reg_write(vpu, &av1_pp_out_y_stride,
+			 ctx->dst_fmt.plane_fmt[0].bytesperline);
+	hantro_reg_write(vpu, &av1_pp_out_c_stride,
+			 ctx->dst_fmt.plane_fmt[0].bytesperline);
+	switch (ctx->dst_fmt.pixelformat) {
+	case V4L2_PIX_FMT_P010:
+		hantro_reg_write(vpu, &av1_pp_out_format, 1);
+		break;
+	case V4L2_PIX_FMT_NV12:
+		hantro_reg_write(vpu, &av1_pp_out_format, 3);
+		break;
+	default:
+		hantro_reg_write(vpu, &av1_pp_out_format, 0);
+	}
+
+	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
+	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
+	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
+	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
+	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
+	hantro_reg_write(vpu, &av1_pp_up_level, 0);
+	hantro_reg_write(vpu, &av1_pp_down_level, 0);
+	hantro_reg_write(vpu, &av1_pp_exist, 0);
+
+	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
+	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
+}
+
+static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	/* disable post processor */
+	hantro_reg_write(vpu, &av1_pp_out_e, 0);
+}
+
+const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
+	.enable = rockchip_vpu981_postproc_enable,
+	.disable = rockchip_vpu981_postproc_disable,
+};
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
new file mode 100644
index 000000000000..182e6c830ff6
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
@@ -0,0 +1,477 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022, Collabora
+ *
+ * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
+ */
+
+#ifndef _ROCKCHIP_VPU981_REGS_H_
+#define _ROCKCHIP_VPU981_REGS_H_
+
+#include "hantro.h"
+
+#define AV1_SWREG(nr)	((nr) * 4)
+
+#define AV1_DEC_REG(b, s, m) \
+	((const struct hantro_reg) { \
+		.base = AV1_SWREG(b), \
+		.shift = s, \
+		.mask = m, \
+	})
+
+#define AV1_REG_INTERRUPT		AV1_SWREG(1)
+#define AV1_REG_INTERRUPT_DEC_RDY_INT	BIT(12)
+
+#define AV1_REG_CONFIG			AV1_SWREG(2)
+#define AV1_REG_CONFIG_DEC_CLK_GATE_E	BIT(10)
+
+#define av1_dec_e			AV1_DEC_REG(1, 0, 0x1)
+#define av1_dec_abort_e			AV1_DEC_REG(1, 5, 0x1)
+#define av1_dec_tile_int_e		AV1_DEC_REG(1, 7, 0x1)
+
+#define av1_dec_clk_gate_e		AV1_DEC_REG(2, 10, 0x1)
+
+#define av1_dec_out_ec_bypass		AV1_DEC_REG(3, 8,  0x1)
+#define av1_write_mvs_e			AV1_DEC_REG(3, 12, 0x1)
+#define av1_filtering_dis		AV1_DEC_REG(3, 14, 0x1)
+#define av1_dec_out_dis			AV1_DEC_REG(3, 15, 0x1)
+#define av1_dec_out_ec_byte_word	AV1_DEC_REG(3, 16, 0x1)
+#define av1_skip_mode			AV1_DEC_REG(3, 26, 0x1)
+#define av1_dec_mode			AV1_DEC_REG(3, 27, 0x1f)
+
+#define av1_ref_frames			AV1_DEC_REG(4, 0, 0xf)
+#define av1_pic_height_in_cbs		AV1_DEC_REG(4, 6, 0x1fff)
+#define av1_pic_width_in_cbs		AV1_DEC_REG(4, 19, 0x1fff)
+
+#define av1_ref_scaling_enable		AV1_DEC_REG(5, 0, 0x1)
+#define av1_filt_level_base_gt32	AV1_DEC_REG(5, 1, 0x1)
+#define av1_error_resilient		AV1_DEC_REG(5, 2, 0x1)
+#define av1_force_interger_mv		AV1_DEC_REG(5, 3, 0x1)
+#define av1_allow_intrabc		AV1_DEC_REG(5, 4, 0x1)
+#define av1_allow_screen_content_tools	AV1_DEC_REG(5, 5, 0x1)
+#define av1_reduced_tx_set_used		AV1_DEC_REG(5, 6, 0x1)
+#define av1_enable_dual_filter		AV1_DEC_REG(5, 7, 0x1)
+#define av1_enable_jnt_comp		AV1_DEC_REG(5, 8, 0x1)
+#define av1_allow_filter_intra		AV1_DEC_REG(5, 9, 0x1)
+#define av1_enable_intra_edge_filter	AV1_DEC_REG(5, 10, 0x1)
+#define av1_tempor_mvp_e		AV1_DEC_REG(5, 11, 0x1)
+#define av1_allow_interintra		AV1_DEC_REG(5, 12, 0x1)
+#define av1_allow_masked_compound	AV1_DEC_REG(5, 13, 0x1)
+#define av1_enable_cdef			AV1_DEC_REG(5, 14, 0x1)
+#define av1_switchable_motion_mode	AV1_DEC_REG(5, 15, 0x1)
+#define av1_show_frame			AV1_DEC_REG(5, 16, 0x1)
+#define av1_superres_is_scaled		AV1_DEC_REG(5, 17, 0x1)
+#define av1_allow_warp			AV1_DEC_REG(5, 18, 0x1)
+#define av1_disable_cdf_update		AV1_DEC_REG(5, 19, 0x1)
+#define av1_preskip_segid		AV1_DEC_REG(5, 20, 0x1)
+#define av1_delta_lf_present		AV1_DEC_REG(5, 21, 0x1)
+#define av1_delta_lf_multi		AV1_DEC_REG(5, 22, 0x1)
+#define av1_delta_lf_res_log		AV1_DEC_REG(5, 23, 0x3)
+#define av1_strm_start_bit		AV1_DEC_REG(5, 25, 0x7f)
+
+#define	av1_stream_len			AV1_DEC_REG(6, 0, 0xffffffff)
+
+#define av1_delta_q_present		AV1_DEC_REG(7, 0, 0x1)
+#define av1_delta_q_res_log		AV1_DEC_REG(7, 1, 0x3)
+#define av1_cdef_damping		AV1_DEC_REG(7, 3, 0x3)
+#define av1_cdef_bits			AV1_DEC_REG(7, 5, 0x3)
+#define av1_apply_grain			AV1_DEC_REG(7, 7, 0x1)
+#define av1_num_y_points_b		AV1_DEC_REG(7, 8, 0x1)
+#define av1_num_cb_points_b		AV1_DEC_REG(7, 9, 0x1)
+#define av1_num_cr_points_b		AV1_DEC_REG(7, 10, 0x1)
+#define av1_overlap_flag		AV1_DEC_REG(7, 11, 0x1)
+#define av1_clip_to_restricted_range	AV1_DEC_REG(7, 12, 0x1)
+#define av1_chroma_scaling_from_luma	AV1_DEC_REG(7, 13, 0x1)
+#define av1_random_seed			AV1_DEC_REG(7, 14, 0xffff)
+#define av1_blackwhite_e		AV1_DEC_REG(7, 30, 0x1)
+
+#define av1_scaling_shift		AV1_DEC_REG(8, 0, 0xf)
+#define av1_bit_depth_c_minus8		AV1_DEC_REG(8, 4, 0x3)
+#define av1_bit_depth_y_minus8		AV1_DEC_REG(8, 6, 0x3)
+#define av1_quant_base_qindex		AV1_DEC_REG(8, 8, 0xff)
+#define av1_idr_pic_e			AV1_DEC_REG(8, 16, 0x1)
+#define av1_superres_pic_width		AV1_DEC_REG(8, 17, 0x7fff)
+
+#define av1_ref4_sign_bias		AV1_DEC_REG(9, 2, 0x1)
+#define av1_ref5_sign_bias		AV1_DEC_REG(9, 3, 0x1)
+#define av1_ref6_sign_bias		AV1_DEC_REG(9, 4, 0x1)
+#define av1_mf1_type			AV1_DEC_REG(9, 5, 0x7)
+#define av1_mf2_type			AV1_DEC_REG(9, 8, 0x7)
+#define av1_mf3_type			AV1_DEC_REG(9, 11, 0x7)
+#define av1_scale_denom_minus9		AV1_DEC_REG(9, 14, 0x7)
+#define av1_last_active_seg		AV1_DEC_REG(9, 17, 0x7)
+#define av1_context_update_tile_id	AV1_DEC_REG(9, 20, 0xfff)
+
+#define av1_tile_transpose		AV1_DEC_REG(10, 0, 0x1)
+#define av1_tile_enable			AV1_DEC_REG(10, 1, 0x1)
+#define av1_multicore_full_width	AV1_DEC_REG(10,	2, 0xff)
+#define av1_num_tile_rows_8k		AV1_DEC_REG(10, 10, 0x7f)
+#define av1_num_tile_cols_8k		AV1_DEC_REG(10, 17, 0x7f)
+#define av1_multicore_tile_start_x	AV1_DEC_REG(10, 24, 0xff)
+
+#define av1_use_temporal3_mvs		AV1_DEC_REG(11, 0, 0x1)
+#define av1_use_temporal2_mvs		AV1_DEC_REG(11, 1, 0x1)
+#define av1_use_temporal1_mvs		AV1_DEC_REG(11, 2, 0x1)
+#define av1_use_temporal0_mvs		AV1_DEC_REG(11, 3, 0x1)
+#define av1_comp_pred_mode		AV1_DEC_REG(11, 4, 0x3)
+#define av1_high_prec_mv_e		AV1_DEC_REG(11, 7, 0x1)
+#define av1_mcomp_filt_type		AV1_DEC_REG(11, 8, 0x7)
+#define av1_multicore_expect_context_update	AV1_DEC_REG(11, 11, 0x1)
+#define av1_multicore_sbx_offset	AV1_DEC_REG(11, 12, 0x7f)
+#define av1_ulticore_tile_col		AV1_DEC_REG(11, 19, 0x7f)
+#define av1_transform_mode		AV1_DEC_REG(11, 27, 0x7)
+#define av1_dec_tile_size_mag		AV1_DEC_REG(11, 30, 0x3)
+
+#define av1_seg_quant_sign		AV1_DEC_REG(12, 2, 0xff)
+#define av1_max_cb_size			AV1_DEC_REG(12, 10, 0x7)
+#define av1_min_cb_size			AV1_DEC_REG(12, 13, 0x7)
+#define av1_comp_pred_fixed_ref		AV1_DEC_REG(12, 16, 0x7)
+#define av1_multicore_tile_width	AV1_DEC_REG(12, 19, 0x7f)
+#define av1_pic_height_pad		AV1_DEC_REG(12, 26, 0x7)
+#define av1_pic_width_pad		AV1_DEC_REG(12, 29, 0x7)
+
+#define av1_segment_e			AV1_DEC_REG(13, 0, 0x1)
+#define av1_segment_upd_e		AV1_DEC_REG(13, 1, 0x1)
+#define av1_segment_temp_upd_e		AV1_DEC_REG(13, 2, 0x1)
+#define av1_comp_pred_var_ref0_av1	AV1_DEC_REG(13, 3, 0x7)
+#define av1_comp_pred_var_ref1_av1	AV1_DEC_REG(13, 6, 0x7)
+#define av1_lossless_e			AV1_DEC_REG(13, 9, 0x1)
+#define av1_qp_delta_ch_ac_av1		AV1_DEC_REG(13, 11, 0x7f)
+#define av1_qp_delta_ch_dc_av1		AV1_DEC_REG(13, 18, 0x7f)
+#define av1_qp_delta_y_dc_av1		AV1_DEC_REG(13, 25, 0x7f)
+
+#define av1_quant_seg0			AV1_DEC_REG(14, 0, 0xff)
+#define av1_filt_level_seg0		AV1_DEC_REG(14, 8, 0x3f)
+#define av1_skip_seg0			AV1_DEC_REG(14, 14, 0x1)
+#define av1_refpic_seg0			AV1_DEC_REG(14, 15, 0xf)
+#define av1_filt_level_delta0_seg0	AV1_DEC_REG(14, 19, 0x7f)
+#define av1_filt_level0			AV1_DEC_REG(14, 26, 0x3f)
+
+#define av1_quant_seg1			AV1_DEC_REG(15, 0, 0xff)
+#define av1_filt_level_seg1		AV1_DEC_REG(15, 8, 0x3f)
+#define av1_skip_seg1			AV1_DEC_REG(15, 14, 0x1)
+#define av1_refpic_seg1			AV1_DEC_REG(15, 15, 0xf)
+#define av1_filt_level_delta0_seg1	AV1_DEC_REG(15, 19, 0x7f)
+#define av1_filt_level1			AV1_DEC_REG(15, 26, 0x3f)
+
+#define av1_quant_seg2			AV1_DEC_REG(16, 0, 0xff)
+#define av1_filt_level_seg2		AV1_DEC_REG(16, 8, 0x3f)
+#define av1_skip_seg2			AV1_DEC_REG(16, 14, 0x1)
+#define av1_refpic_seg2			AV1_DEC_REG(16, 15, 0xf)
+#define av1_filt_level_delta0_seg2	AV1_DEC_REG(16, 19, 0x7f)
+#define av1_filt_level2			AV1_DEC_REG(16, 26, 0x3f)
+
+#define av1_quant_seg3			AV1_DEC_REG(17, 0, 0xff)
+#define av1_filt_level_seg3		AV1_DEC_REG(17, 8, 0x3f)
+#define av1_skip_seg3			AV1_DEC_REG(17, 14, 0x1)
+#define av1_refpic_seg3			AV1_DEC_REG(17, 15, 0xf)
+#define av1_filt_level_delta0_seg3	AV1_DEC_REG(17, 19, 0x7f)
+#define av1_filt_level3			AV1_DEC_REG(17, 26, 0x3f)
+
+#define av1_quant_seg4			AV1_DEC_REG(18, 0, 0xff)
+#define av1_filt_level_seg4		AV1_DEC_REG(18, 8, 0x3f)
+#define av1_skip_seg4			AV1_DEC_REG(18, 14, 0x1)
+#define av1_refpic_seg4			AV1_DEC_REG(18, 15, 0xf)
+#define av1_filt_level_delta0_seg4	AV1_DEC_REG(18, 19, 0x7f)
+#define av1_lr_type			AV1_DEC_REG(18, 26, 0x3f)
+
+#define av1_quant_seg5			AV1_DEC_REG(19, 0, 0xff)
+#define av1_filt_level_seg5		AV1_DEC_REG(19, 8, 0x3f)
+#define av1_skip_seg5			AV1_DEC_REG(19, 14, 0x1)
+#define av1_refpic_seg5			AV1_DEC_REG(19, 15, 0xf)
+#define av1_filt_level_delta0_seg5	AV1_DEC_REG(19, 19, 0x7f)
+#define av1_lr_unit_size		AV1_DEC_REG(19, 26, 0x3f)
+
+#define av1_filt_level_delta1_seg0	AV1_DEC_REG(20, 0, 0x7f)
+#define av1_filt_level_delta2_seg0	AV1_DEC_REG(20, 7, 0x7f)
+#define av1_filt_level_delta3_seg0	AV1_DEC_REG(20, 14, 0x7f)
+#define av1_global_mv_seg0		AV1_DEC_REG(20, 21, 0x1)
+#define av1_mf1_last_offset		AV1_DEC_REG(20, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg1	AV1_DEC_REG(21, 0, 0x7f)
+#define av1_filt_level_delta2_seg1	AV1_DEC_REG(21, 7, 0x7f)
+#define av1_filt_level_delta3_seg1	AV1_DEC_REG(21, 14, 0x7f)
+#define av1_global_mv_seg1		AV1_DEC_REG(21, 21, 0x1)
+#define av1_mf1_last2_offset		AV1_DEC_REG(21, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg2	AV1_DEC_REG(22, 0, 0x7f)
+#define av1_filt_level_delta2_seg2	AV1_DEC_REG(22, 7, 0x7f)
+#define av1_filt_level_delta3_seg2	AV1_DEC_REG(22, 14, 0x7f)
+#define av1_global_mv_seg2		AV1_DEC_REG(22, 21, 0x1)
+#define av1_mf1_last3_offset		AV1_DEC_REG(22, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg3	AV1_DEC_REG(23, 0, 0x7f)
+#define av1_filt_level_delta2_seg3	AV1_DEC_REG(23, 7, 0x7f)
+#define av1_filt_level_delta3_seg3	AV1_DEC_REG(23, 14, 0x7f)
+#define av1_global_mv_seg3		AV1_DEC_REG(23, 21, 0x1)
+#define av1_mf1_golden_offset		AV1_DEC_REG(23, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg4	AV1_DEC_REG(24, 0, 0x7f)
+#define av1_filt_level_delta2_seg4	AV1_DEC_REG(24, 7, 0x7f)
+#define av1_filt_level_delta3_seg4	AV1_DEC_REG(24, 14, 0x7f)
+#define av1_global_mv_seg4		AV1_DEC_REG(24, 21, 0x1)
+#define av1_mf1_bwdref_offset		AV1_DEC_REG(24, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg5	AV1_DEC_REG(25, 0, 0x7f)
+#define av1_filt_level_delta2_seg5	AV1_DEC_REG(25, 7, 0x7f)
+#define av1_filt_level_delta3_seg5	AV1_DEC_REG(25, 14, 0x7f)
+#define av1_global_mv_seg5		AV1_DEC_REG(25, 21, 0x1)
+#define av1_mf1_altref2_offset		AV1_DEC_REG(25, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg6	AV1_DEC_REG(26, 0, 0x7f)
+#define av1_filt_level_delta2_seg6	AV1_DEC_REG(26, 7, 0x7f)
+#define av1_filt_level_delta3_seg6	AV1_DEC_REG(26, 14, 0x7f)
+#define av1_global_mv_seg6		AV1_DEC_REG(26, 21, 0x1)
+#define av1_mf1_altref_offset		AV1_DEC_REG(26, 22, 0x1ff)
+
+#define av1_filt_level_delta1_seg7	AV1_DEC_REG(27, 0, 0x7f)
+#define av1_filt_level_delta2_seg7	AV1_DEC_REG(27, 7, 0x7f)
+#define av1_filt_level_delta3_seg7	AV1_DEC_REG(27, 14, 0x7f)
+#define av1_global_mv_seg7		AV1_DEC_REG(27, 21, 0x1)
+#define av1_mf2_last_offset		AV1_DEC_REG(27, 22, 0x1ff)
+
+#define av1_cb_offset			AV1_DEC_REG(28, 0, 0x1ff)
+#define av1_cb_luma_mult		AV1_DEC_REG(28, 9, 0xff)
+#define av1_cb_mult			AV1_DEC_REG(28, 17, 0xff)
+#define	av1_quant_delta_v_dc		AV1_DEC_REG(28, 25, 0x7f)
+
+#define av1_cr_offset			AV1_DEC_REG(29, 0, 0x1ff)
+#define av1_cr_luma_mult		AV1_DEC_REG(29, 9, 0xff)
+#define av1_cr_mult			AV1_DEC_REG(29, 17, 0xff)
+#define	av1_quant_delta_v_ac		AV1_DEC_REG(29, 25, 0x7f)
+
+#define av1_filt_ref_adj_5		AV1_DEC_REG(30, 0, 0x7f)
+#define av1_filt_ref_adj_4		AV1_DEC_REG(30, 7, 0x7f)
+#define av1_filt_mb_adj_1		AV1_DEC_REG(30, 14, 0x7f)
+#define av1_filt_mb_adj_0		AV1_DEC_REG(30, 21, 0x7f)
+#define av1_filt_sharpness		AV1_DEC_REG(30, 28, 0x7)
+
+#define av1_quant_seg6			AV1_DEC_REG(31, 0, 0xff)
+#define av1_filt_level_seg6		AV1_DEC_REG(31, 8, 0x3f)
+#define av1_skip_seg6			AV1_DEC_REG(31, 14, 0x1)
+#define av1_refpic_seg6			AV1_DEC_REG(31, 15, 0xf)
+#define av1_filt_level_delta0_seg6	AV1_DEC_REG(31, 19, 0x7f)
+#define av1_skip_ref0			AV1_DEC_REG(31, 26, 0xf)
+
+#define av1_quant_seg7			AV1_DEC_REG(32, 0, 0xff)
+#define av1_filt_level_seg7		AV1_DEC_REG(32, 8, 0x3f)
+#define av1_skip_seg7			AV1_DEC_REG(32, 14, 0x1)
+#define av1_refpic_seg7			AV1_DEC_REG(32, 15, 0xf)
+#define av1_filt_level_delta0_seg7	AV1_DEC_REG(32, 19, 0x7f)
+#define av1_skip_ref1			AV1_DEC_REG(32, 26, 0xf)
+
+#define av1_ref0_height			AV1_DEC_REG(33, 0, 0xffff)
+#define av1_ref0_width			AV1_DEC_REG(33, 16, 0xffff)
+
+#define av1_ref1_height			AV1_DEC_REG(34, 0, 0xffff)
+#define av1_ref1_width			AV1_DEC_REG(34, 16, 0xffff)
+
+#define av1_ref2_height			AV1_DEC_REG(35, 0, 0xffff)
+#define av1_ref2_width			AV1_DEC_REG(35, 16, 0xffff)
+
+#define av1_ref0_ver_scale		AV1_DEC_REG(36, 0, 0xffff)
+#define av1_ref0_hor_scale		AV1_DEC_REG(36, 16, 0xffff)
+
+#define av1_ref1_ver_scale		AV1_DEC_REG(37, 0, 0xffff)
+#define av1_ref1_hor_scale		AV1_DEC_REG(37, 16, 0xffff)
+
+#define av1_ref2_ver_scale		AV1_DEC_REG(38, 0, 0xffff)
+#define av1_ref2_hor_scale		AV1_DEC_REG(38, 16, 0xffff)
+
+#define av1_ref3_ver_scale		AV1_DEC_REG(39, 0, 0xffff)
+#define av1_ref3_hor_scale		AV1_DEC_REG(39, 16, 0xffff)
+
+#define av1_ref4_ver_scale		AV1_DEC_REG(40, 0, 0xffff)
+#define av1_ref4_hor_scale		AV1_DEC_REG(40, 16, 0xffff)
+
+#define av1_ref5_ver_scale		AV1_DEC_REG(41, 0, 0xffff)
+#define av1_ref5_hor_scale		AV1_DEC_REG(41, 16, 0xffff)
+
+#define av1_ref6_ver_scale		AV1_DEC_REG(42, 0, 0xffff)
+#define av1_ref6_hor_scale		AV1_DEC_REG(42, 16, 0xffff)
+
+#define av1_ref3_height			AV1_DEC_REG(43, 0, 0xffff)
+#define av1_ref3_width			AV1_DEC_REG(43, 16, 0xffff)
+
+#define av1_ref4_height			AV1_DEC_REG(44, 0, 0xffff)
+#define av1_ref4_width			AV1_DEC_REG(44, 16, 0xffff)
+
+#define av1_ref5_height			AV1_DEC_REG(45, 0, 0xffff)
+#define av1_ref5_width			AV1_DEC_REG(45, 16, 0xffff)
+
+#define av1_ref6_height			AV1_DEC_REG(46, 0, 0xffff)
+#define av1_ref6_width			AV1_DEC_REG(46, 16, 0xffff)
+
+#define av1_mf2_last2_offset		AV1_DEC_REG(47, 0, 0x1ff)
+#define av1_mf2_last3_offset		AV1_DEC_REG(47, 9, 0x1ff)
+#define av1_mf2_golden_offset		AV1_DEC_REG(47, 18, 0x1ff)
+#define av1_qmlevel_y			AV1_DEC_REG(47, 27, 0xf)
+
+#define av1_mf2_bwdref_offset		AV1_DEC_REG(48, 0, 0x1ff)
+#define av1_mf2_altref2_offset		AV1_DEC_REG(48, 9, 0x1ff)
+#define av1_mf2_altref_offset		AV1_DEC_REG(48, 18, 0x1ff)
+#define av1_qmlevel_u			AV1_DEC_REG(48, 27, 0xf)
+
+#define av1_filt_ref_adj_6		AV1_DEC_REG(49, 0, 0x7f)
+#define av1_filt_ref_adj_7		AV1_DEC_REG(49, 7, 0x7f)
+#define av1_qmlevel_v			AV1_DEC_REG(49, 14, 0xf)
+
+#define av1_superres_chroma_step	AV1_DEC_REG(51, 0, 0x3fff)
+#define av1_superres_luma_step		AV1_DEC_REG(51, 14, 0x3fff)
+
+#define av1_superres_init_chroma_subpel_x	AV1_DEC_REG(52, 0, 0x3fff)
+#define av1_superres_init_luma_subpel_x		AV1_DEC_REG(52, 14, 0x3fff)
+
+#define av1_cdef_chroma_secondary_strength	AV1_DEC_REG(53, 0, 0xffff)
+#define av1_cdef_luma_secondary_strength	AV1_DEC_REG(53, 16, 0xffff)
+
+#define av1_apf_threshold		AV1_DEC_REG(55, 0, 0xffff)
+#define av1_apf_single_pu_mode		AV1_DEC_REG(55, 30, 0x1)
+#define av1_apf_disable			AV1_DEC_REG(55, 30, 0x1)
+
+#define av1_dec_max_burst		AV1_DEC_REG(58, 0, 0xff)
+#define av1_dec_buswidth		AV1_DEC_REG(58, 8, 0x7)
+#define av1_dec_multicore_mode		AV1_DEC_REG(58, 11, 0x3)
+#define av1_dec_axi_wd_id_e		AV1_DEC_REG(58,	13, 0x1)
+#define av1_dec_axi_rd_id_e		AV1_DEC_REG(58, 14, 0x1)
+#define av1_dec_mc_polltime		AV1_DEC_REG(58, 17, 0x3ff)
+#define av1_dec_mc_pollmode		AV1_DEC_REG(58,	27, 0x3)
+
+#define av1_filt_ref_adj_3		AV1_DEC_REG(59, 0, 0x3f)
+#define av1_filt_ref_adj_2		AV1_DEC_REG(59, 7, 0x3f)
+#define av1_filt_ref_adj_1		AV1_DEC_REG(59, 14, 0x3f)
+#define av1_filt_ref_adj_0		AV1_DEC_REG(59, 21, 0x3f)
+#define av1_ref0_sign_bias		AV1_DEC_REG(59, 28, 0x1)
+#define av1_ref1_sign_bias		AV1_DEC_REG(59, 29, 0x1)
+#define av1_ref2_sign_bias		AV1_DEC_REG(59, 30, 0x1)
+#define av1_ref3_sign_bias		AV1_DEC_REG(59, 31, 0x1)
+
+#define av1_cur_last_roffset		AV1_DEC_REG(184, 0, 0x1ff)
+#define av1_cur_last_offset		AV1_DEC_REG(184, 9, 0x1ff)
+#define av1_mf3_last_offset		AV1_DEC_REG(184, 18, 0x1ff)
+#define av1_ref0_gm_mode		AV1_DEC_REG(184, 27, 0x3)
+
+#define av1_cur_last2_roffset		AV1_DEC_REG(185, 0, 0x1ff)
+#define av1_cur_last2_offset		AV1_DEC_REG(185, 9, 0x1ff)
+#define av1_mf3_last2_offset		AV1_DEC_REG(185, 18, 0x1ff)
+#define av1_ref1_gm_mode		AV1_DEC_REG(185, 27, 0x3)
+
+#define av1_cur_last3_roffset		AV1_DEC_REG(186, 0, 0x1ff)
+#define av1_cur_last3_offset		AV1_DEC_REG(186, 9, 0x1ff)
+#define av1_mf3_last3_offset		AV1_DEC_REG(186, 18, 0x1ff)
+#define av1_ref2_gm_mode		AV1_DEC_REG(186, 27, 0x3)
+
+#define av1_cur_golden_roffset		AV1_DEC_REG(187, 0, 0x1ff)
+#define av1_cur_golden_offset		AV1_DEC_REG(187, 9, 0x1ff)
+#define av1_mf3_golden_offset		AV1_DEC_REG(187, 18, 0x1ff)
+#define av1_ref3_gm_mode		AV1_DEC_REG(187, 27, 0x3)
+
+#define av1_cur_bwdref_roffset		AV1_DEC_REG(188, 0, 0x1ff)
+#define av1_cur_bwdref_offset		AV1_DEC_REG(188, 9, 0x1ff)
+#define av1_mf3_bwdref_offset		AV1_DEC_REG(188, 18, 0x1ff)
+#define av1_ref4_gm_mode		AV1_DEC_REG(188, 27, 0x3)
+
+#define av1_cur_altref2_roffset		AV1_DEC_REG(257, 0, 0x1ff)
+#define av1_cur_altref2_offset		AV1_DEC_REG(257, 9, 0x1ff)
+#define av1_mf3_altref2_offset		AV1_DEC_REG(257, 18, 0x1ff)
+#define av1_ref5_gm_mode		AV1_DEC_REG(257, 27, 0x3)
+
+#define av1_strm_buffer_len		AV1_DEC_REG(258, 0, 0xffffffff)
+
+#define av1_strm_start_offset		AV1_DEC_REG(259, 0, 0xffffffff)
+
+#define av1_ppd_blend_exist		AV1_DEC_REG(260, 21, 0x1)
+#define av1_ppd_dith_exist		AV1_DEC_REG(260, 23, 0x1)
+#define av1_ablend_crop_e		AV1_DEC_REG(260, 24, 0x1)
+#define av1_pp_format_p010_e		AV1_DEC_REG(260, 25, 0x1)
+#define av1_pp_format_customer1_e	AV1_DEC_REG(260, 26, 0x1)
+#define av1_pp_crop_exist		AV1_DEC_REG(260, 27, 0x1)
+#define av1_pp_up_level			AV1_DEC_REG(260, 28, 0x1)
+#define av1_pp_down_level		AV1_DEC_REG(260, 29, 0x3)
+#define av1_pp_exist			AV1_DEC_REG(260, 31, 0x1)
+
+#define av1_cur_altref_roffset		AV1_DEC_REG(262, 0, 0x1ff)
+#define av1_cur_altref_offset		AV1_DEC_REG(262, 9, 0x1ff)
+#define av1_mf3_altref_offset		AV1_DEC_REG(262, 18, 0x1ff)
+#define av1_ref6_gm_mode		AV1_DEC_REG(262, 27, 0x3)
+
+#define av1_cdef_luma_primary_strength	AV1_DEC_REG(263, 0, 0xffffffff)
+
+#define av1_cdef_chroma_primary_strength AV1_DEC_REG(264, 0, 0xffffffff)
+
+#define av1_axi_arqos			AV1_DEC_REG(265, 0, 0xf)
+#define av1_axi_awqos			AV1_DEC_REG(265, 4, 0xf)
+#define av1_axi_wr_ostd_threshold	AV1_DEC_REG(265, 8, 0x3ff)
+#define av1_axi_rd_ostd_threshold	AV1_DEC_REG(265, 18, 0x3ff)
+#define av1_axi_wr_4k_dis		AV1_DEC_REG(265, 31, 0x1)
+
+#define av1_128bit_mode			AV1_DEC_REG(266, 5, 0x1)
+#define av1_wr_shaper_bypass		AV1_DEC_REG(266, 10, 0x1)
+#define av1_error_conceal_e		AV1_DEC_REG(266, 30, 0x1)
+
+#define av1_superres_chroma_step_invra	AV1_DEC_REG(298, 0, 0xffff)
+#define av1_superres_luma_step_invra	AV1_DEC_REG(298, 16, 0xffff)
+
+#define av1_dec_alignment		AV1_DEC_REG(314, 0, 0xffff)
+
+#define av1_ext_timeout_cycles		AV1_DEC_REG(318, 0, 0x7fffffff)
+#define av1_ext_timeout_override_e	AV1_DEC_REG(318, 31, 0x1)
+
+#define av1_timeout_cycles		AV1_DEC_REG(319, 0, 0x7fffffff)
+#define av1_timeout_override_e		AV1_DEC_REG(319, 31, 0x1)
+
+#define av1_pp_out_e			AV1_DEC_REG(320, 0, 0x1)
+#define av1_pp_cr_first			AV1_DEC_REG(320, 1, 0x1)
+#define av1_pp_out_mode			AV1_DEC_REG(320, 2, 0x1)
+#define av1_pp_out_tile_e		AV1_DEC_REG(320, 3, 0x1)
+#define av1_pp_status			AV1_DEC_REG(320, 4, 0xf)
+#define av1_pp_in_blk_size		AV1_DEC_REG(320, 8, 0x7)
+#define av1_pp_out_p010_fmt		AV1_DEC_REG(320, 11, 0x3)
+#define av1_pp_out_rgb_fmt		AV1_DEC_REG(320, 13, 0x1f)
+#define av1_rgb_range_max		AV1_DEC_REG(320, 18, 0xfff)
+#define av1_pp_rgb_planar		AV1_DEC_REG(320, 30, 0x1)
+
+#define av1_scale_hratio		AV1_DEC_REG(322, 0, 0x3ffff)
+#define av1_pp_out_format		AV1_DEC_REG(322, 18, 0x1f)
+#define av1_ver_scale_mode		AV1_DEC_REG(322, 23, 0x3)
+#define av1_hor_scale_mode		AV1_DEC_REG(322, 25, 0x3)
+#define av1_pp_in_format		AV1_DEC_REG(322, 27, 0x1f)
+
+#define av1_pp_out_c_stride		AV1_DEC_REG(329, 0, 0xffff)
+#define av1_pp_out_y_stride		AV1_DEC_REG(329, 16, 0xffff)
+
+#define av1_pp_in_height		AV1_DEC_REG(331, 0, 0xffff)
+#define av1_pp_in_width			AV1_DEC_REG(331, 16, 0xffff)
+
+#define av1_pp_out_height		AV1_DEC_REG(332, 0, 0xffff)
+#define av1_pp_out_width		AV1_DEC_REG(332, 16, 0xffff)
+
+#define av1_pp1_dup_ver			AV1_DEC_REG(394, 0, 0xff)
+#define av1_pp1_dup_hor			AV1_DEC_REG(394, 8, 0xff)
+#define av1_pp0_dup_ver			AV1_DEC_REG(394, 16, 0xff)
+#define av1_pp0_dup_hor			AV1_DEC_REG(394, 24, 0xff)
+
+#define AV1_TILE_OUT_LU			(AV1_SWREG(65))
+#define AV1_REFERENCE_Y(i)		(AV1_SWREG(67) + ((i) * 0x8))
+#define AV1_SEGMENTATION		(AV1_SWREG(81))
+#define AV1_GLOBAL_MODEL		(AV1_SWREG(83))
+#define AV1_CDEF_COL			(AV1_SWREG(85))
+#define AV1_SR_COL			(AV1_SWREG(89))
+#define AV1_LR_COL			(AV1_SWREG(91))
+#define AV1_FILM_GRAIN			(AV1_SWREG(95))
+#define AV1_TILE_OUT_CH			(AV1_SWREG(99))
+#define AV1_REFERENCE_CB(i)		(AV1_SWREG(101) + ((i) * 0x8))
+#define AV1_TILE_OUT_MV			(AV1_SWREG(133))
+#define AV1_REFERENCE_MV(i)		(AV1_SWREG(135) + ((i) * 0x8))
+#define AV1_TILE_BASE			(AV1_SWREG(167))
+#define AV1_INPUT_STREAM		(AV1_SWREG(169))
+#define AV1_PROP_TABLE_OUT		(AV1_SWREG(171))
+#define AV1_PROP_TABLE			(AV1_SWREG(173))
+#define AV1_MC_SYNC_CURR		(AV1_SWREG(175))
+#define AV1_MC_SYNC_LEFT		(AV1_SWREG(177))
+#define AV1_DB_DATA_COL			(AV1_SWREG(179))
+#define AV1_DB_CTRL_COL			(AV1_SWREG(183))
+#define AV1_PP_OUT_LU			(AV1_SWREG(326))
+#define AV1_PP_OUT_CH			(AV1_SWREG(328))
+
+#endif /* _ROCKCHIP_VPU981_REGS_H_ */