diff mbox series

[03/13] media: rockchip: rga: allocate DMA descriptors per buffer

Message ID 20230914-rockchip-rga-multiplanar-v1-3-abfd77260ae3@pengutronix.de (mailing list archive)
State New, archived
Headers show
Series media: rockchip: rga: add support for multi-planar formats | expand

Commit Message

Michael Tretter Sept. 14, 2023, 12:40 p.m. UTC
The RGA driver allocates two buffers for the DMA descriptors of the
input and output buffers. Whenever a new job is processed, the
descriptor list is updated for the current buffers.

By updating the descriptor list during buf_prepare, it is possible to
correctly fail DMABUF imports if the buffers that shall be imported are
not within the 32 bit address range that can be addressed by the RGA.

Managing the DMA descriptor list with the buffer also makes it easier to
track the buffer mapping and the plane offsets into this mapping.

The cost is that the driver now requires DMA coherent memory per buffer
for the descriptor list. However, the size scales with the size of the
video buffers and is not allocated if the RGA is not used.

While at it, use dma_alloc_coherent to allocate the descriptors and get
rid of the virt_to_phys calls to get the physical addresses.

Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
---
 drivers/media/platform/rockchip/rga/rga-buf.c | 78 +++++++++++++++++----------
 drivers/media/platform/rockchip/rga/rga-hw.c  | 26 ++++-----
 drivers/media/platform/rockchip/rga/rga.c     | 31 ++---------
 drivers/media/platform/rockchip/rga/rga.h     | 21 ++++++--
 4 files changed, 86 insertions(+), 70 deletions(-)

Comments

Robin Murphy Sept. 14, 2023, 3:12 p.m. UTC | #1
On 2023-09-14 13:40, Michael Tretter wrote:
> The RGA driver allocates two buffers for the DMA descriptors of the
> input and output buffers. Whenever a new job is processed, the
> descriptor list is updated for the current buffers.
> 
> By updating the descriptor list during buf_prepare, it is possible to
> correctly fail DMABUF imports if the buffers that shall be imported are
> not within the 32 bit address range that can be addressed by the RGA.
> 
> Managing the DMA descriptor list with the buffer also makes it easier to
> track the buffer mapping and the plane offsets into this mapping.
> 
> The cost is that the driver now requires DMA coherent memory per buffer
> for the descriptor list. However, the size scales with the size of the
> video buffers and is not allocated if the RGA is not used.
> 
> While at it, use dma_alloc_coherent to allocate the descriptors and get
> rid of the virt_to_phys calls to get the physical addresses.
> 
> Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
> ---
>   drivers/media/platform/rockchip/rga/rga-buf.c | 78 +++++++++++++++++----------
>   drivers/media/platform/rockchip/rga/rga-hw.c  | 26 ++++-----
>   drivers/media/platform/rockchip/rga/rga.c     | 31 ++---------
>   drivers/media/platform/rockchip/rga/rga.h     | 21 ++++++--
>   4 files changed, 86 insertions(+), 70 deletions(-)
> 
> diff --git a/drivers/media/platform/rockchip/rga/rga-buf.c b/drivers/media/platform/rockchip/rga/rga-buf.c
> index df5ebc90e32d..e8dcc0d5cb90 100644
> --- a/drivers/media/platform/rockchip/rga/rga-buf.c
> +++ b/drivers/media/platform/rockchip/rga/rga-buf.c
> @@ -55,16 +55,54 @@ rga_queue_setup(struct vb2_queue *vq,
>   	return 0;
>   }
>   
> +static int rga_buf_init(struct vb2_buffer *vb)
> +{
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct rga_vb_buffer *rbuf = vb_to_rga(vbuf);
> +	struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> +	struct rockchip_rga *rga = ctx->rga;
> +	struct rga_frame *f = rga_get_frame(ctx, vb->vb2_queue->type);
> +	int n_desc = 0;
> +
> +	n_desc = DIV_ROUND_UP(f->size, PAGE_SIZE);
> +
> +	rbuf->n_desc = n_desc;
> +	rbuf->dma_desc = dma_alloc_coherent(rga->dev,
> +					    rbuf->n_desc * sizeof(*rbuf->dma_desc),
> +					    &rbuf->dma_desc_pa, GFP_KERNEL);
> +	if (!rbuf->dma_desc)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
>   static int rga_buf_prepare(struct vb2_buffer *vb)
>   {
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct rga_vb_buffer *rbuf = vb_to_rga(vbuf);
>   	struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
>   	struct rga_frame *f = rga_get_frame(ctx, vb->vb2_queue->type);
> +	struct rockchip_rga *rga = ctx->rga;
> +	int n_desc;
>   
>   	if (IS_ERR(f))
>   		return PTR_ERR(f);
>   
>   	vb2_set_plane_payload(vb, 0, f->size);
>   
> +	/* Create local MMU table for RGA */
> +	n_desc = fill_descriptors(rbuf->dma_desc,
> +				  vb2_dma_sg_plane_desc(vb, 0));
> +	if (n_desc < 0) {
> +		dev_err(rga->dev, "Failed to map buffer");
> +		return n_desc;
> +	}
> +
> +	/* sync local MMU table for RGA */
> +	dma_sync_single_for_device(rga->dev, rbuf->dma_desc_pa,
> +				   n_desc * sizeof(*rbuf->dma_desc),
> +				   DMA_BIDIRECTIONAL);

This sync is still wrong (and DMA_API_DEBUG should complain), but also 
now pointless if you're using a coherent buffer (as is the one in 
rga_cmd_set(), seemingly)

Thanks,
Robin.

> +
>   	return 0;
>   }
>   
> @@ -76,6 +114,17 @@ static void rga_buf_queue(struct vb2_buffer *vb)
>   	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
>   }
>   
> +static void rga_buf_cleanup(struct vb2_buffer *vb)
> +{
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct rga_vb_buffer *rbuf = vb_to_rga(vbuf);
> +	struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> +	struct rockchip_rga *rga = ctx->rga;
> +
> +	dma_free_coherent(rga->dev, rbuf->n_desc * sizeof(*rbuf->dma_desc),
> +			  rbuf->dma_desc, rbuf->dma_desc_pa);
> +}
> +
>   static void rga_buf_return_buffers(struct vb2_queue *q,
>   				   enum vb2_buffer_state state)
>   {
> @@ -119,37 +168,12 @@ static void rga_buf_stop_streaming(struct vb2_queue *q)
>   
>   const struct vb2_ops rga_qops = {
>   	.queue_setup = rga_queue_setup,
> +	.buf_init = rga_buf_init,
>   	.buf_prepare = rga_buf_prepare,
>   	.buf_queue = rga_buf_queue,
> +	.buf_cleanup = rga_buf_cleanup,
>   	.wait_prepare = vb2_ops_wait_prepare,
>   	.wait_finish = vb2_ops_wait_finish,
>   	.start_streaming = rga_buf_start_streaming,
>   	.stop_streaming = rga_buf_stop_streaming,
>   };
> -
> -/* RGA MMU is a 1-Level MMU, so it can't be used through the IOMMU API.
> - * We use it more like a scatter-gather list.
> - */
> -void rga_buf_map(struct vb2_buffer *vb)
> -{
> -	struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> -	struct rockchip_rga *rga = ctx->rga;
> -	struct rga_dma_desc *pages;
> -	unsigned int num_desc = 0;
> -
> -	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
> -		pages = rga->src_mmu_pages;
> -	else
> -		pages = rga->dst_mmu_pages;
> -
> -	/* Create local MMU table for RGA */
> -	num_desc = fill_descriptors(pages, vb2_dma_sg_plane_desc(vb, 0));
> -	if (num_desc < 0) {
> -		dev_err(rga->dev, "Failed to map buffer");
> -		return;
> -	}
> -
> -	/* sync local MMU table for RGA */
> -	dma_sync_single_for_device(rga->dev, virt_to_phys(pages),
> -				   num_desc * sizeof(*pages), DMA_BIDIRECTIONAL);
> -}
> diff --git a/drivers/media/platform/rockchip/rga/rga-hw.c b/drivers/media/platform/rockchip/rga/rga-hw.c
> index aaa96f256356..b391d97d4632 100644
> --- a/drivers/media/platform/rockchip/rga/rga-hw.c
> +++ b/drivers/media/platform/rockchip/rga/rga-hw.c
> @@ -119,40 +119,40 @@ static struct rga_addr_offset *rga_lookup_draw_pos(struct
>   	return NULL;
>   }
>   
> -static void rga_cmd_set_src_addr(struct rga_ctx *ctx, void *mmu_pages)
> +static void rga_cmd_set_src_addr(struct rga_ctx *ctx, dma_addr_t dma_addr)
>   {
>   	struct rockchip_rga *rga = ctx->rga;
>   	u32 *dest = rga->cmdbuf_virt;
>   	unsigned int reg;
>   
>   	reg = RGA_MMU_SRC_BASE - RGA_MODE_BASE_REG;
> -	dest[reg >> 2] = virt_to_phys(mmu_pages) >> 4;
> +	dest[reg >> 2] = dma_addr >> 4;
>   
>   	reg = RGA_MMU_CTRL1 - RGA_MODE_BASE_REG;
>   	dest[reg >> 2] |= 0x7;
>   }
>   
> -static void rga_cmd_set_src1_addr(struct rga_ctx *ctx, void *mmu_pages)
> +static void rga_cmd_set_src1_addr(struct rga_ctx *ctx, dma_addr_t dma_addr)
>   {
>   	struct rockchip_rga *rga = ctx->rga;
>   	u32 *dest = rga->cmdbuf_virt;
>   	unsigned int reg;
>   
>   	reg = RGA_MMU_SRC1_BASE - RGA_MODE_BASE_REG;
> -	dest[reg >> 2] = virt_to_phys(mmu_pages) >> 4;
> +	dest[reg >> 2] = dma_addr >> 4;
>   
>   	reg = RGA_MMU_CTRL1 - RGA_MODE_BASE_REG;
>   	dest[reg >> 2] |= 0x7 << 4;
>   }
>   
> -static void rga_cmd_set_dst_addr(struct rga_ctx *ctx, void *mmu_pages)
> +static void rga_cmd_set_dst_addr(struct rga_ctx *ctx, dma_addr_t dma_addr)
>   {
>   	struct rockchip_rga *rga = ctx->rga;
>   	u32 *dest = rga->cmdbuf_virt;
>   	unsigned int reg;
>   
>   	reg = RGA_MMU_DST_BASE - RGA_MODE_BASE_REG;
> -	dest[reg >> 2] = virt_to_phys(mmu_pages) >> 4;
> +	dest[reg >> 2] = dma_addr >> 4;
>   
>   	reg = RGA_MMU_CTRL1 - RGA_MODE_BASE_REG;
>   	dest[reg >> 2] |= 0x7 << 8;
> @@ -375,20 +375,21 @@ static void rga_cmd_set_mode(struct rga_ctx *ctx)
>   	dest[(RGA_MODE_CTRL - RGA_MODE_BASE_REG) >> 2] = mode.val;
>   }
>   
> -static void rga_cmd_set(struct rga_ctx *ctx)
> +static void rga_cmd_set(struct rga_ctx *ctx,
> +			struct rga_vb_buffer *src, struct rga_vb_buffer *dst)
>   {
>   	struct rockchip_rga *rga = ctx->rga;
>   
>   	memset(rga->cmdbuf_virt, 0, RGA_CMDBUF_SIZE * 4);
>   
> -	rga_cmd_set_src_addr(ctx, rga->src_mmu_pages);
> +	rga_cmd_set_src_addr(ctx, src->dma_desc_pa);
>   	/*
>   	 * Due to hardware bug,
>   	 * src1 mmu also should be configured when using alpha blending.
>   	 */
> -	rga_cmd_set_src1_addr(ctx, rga->dst_mmu_pages);
> +	rga_cmd_set_src1_addr(ctx, dst->dma_desc_pa);
>   
> -	rga_cmd_set_dst_addr(ctx, rga->dst_mmu_pages);
> +	rga_cmd_set_dst_addr(ctx, dst->dma_desc_pa);
>   	rga_cmd_set_mode(ctx);
>   
>   	rga_cmd_set_trans_info(ctx);
> @@ -400,11 +401,12 @@ static void rga_cmd_set(struct rga_ctx *ctx)
>   		PAGE_SIZE, DMA_BIDIRECTIONAL);
>   }
>   
> -void rga_hw_start(struct rockchip_rga *rga)
> +void rga_hw_start(struct rockchip_rga *rga,
> +		  struct rga_vb_buffer *src, struct rga_vb_buffer *dst)
>   {
>   	struct rga_ctx *ctx = rga->curr;
>   
> -	rga_cmd_set(ctx);
> +	rga_cmd_set(ctx, src, dst);
>   
>   	rga_write(rga, RGA_SYS_CTRL, 0x00);
>   
> diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c
> index 25f5b5eebf13..f18fccc7b204 100644
> --- a/drivers/media/platform/rockchip/rga/rga.c
> +++ b/drivers/media/platform/rockchip/rga/rga.c
> @@ -45,10 +45,7 @@ static void device_run(void *prv)
>   	src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
>   	dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
>   
> -	rga_buf_map(&src->vb2_buf);
> -	rga_buf_map(&dst->vb2_buf);
> -
> -	rga_hw_start(rga);
> +	rga_hw_start(rga, vb_to_rga(src), vb_to_rga(dst));
>   
>   	spin_unlock_irqrestore(&rga->ctrl_lock, flags);
>   }
> @@ -101,7 +98,7 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
>   	src_vq->drv_priv = ctx;
>   	src_vq->ops = &rga_qops;
>   	src_vq->mem_ops = &vb2_dma_sg_memops;
> -	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	src_vq->buf_struct_size = sizeof(struct rga_vb_buffer);
>   	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>   	src_vq->lock = &ctx->rga->mutex;
>   	src_vq->dev = ctx->rga->v4l2_dev.dev;
> @@ -115,7 +112,7 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
>   	dst_vq->drv_priv = ctx;
>   	dst_vq->ops = &rga_qops;
>   	dst_vq->mem_ops = &vb2_dma_sg_memops;
> -	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	dst_vq->buf_struct_size = sizeof(struct rga_vb_buffer);
>   	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>   	dst_vq->lock = &ctx->rga->mutex;
>   	dst_vq->dev = ctx->rga->v4l2_dev.dev;
> @@ -872,26 +869,13 @@ static int rga_probe(struct platform_device *pdev)
>   		goto rel_m2m;
>   	}
>   
> -	rga->src_mmu_pages =
> -		(unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3);
> -	if (!rga->src_mmu_pages) {
> -		ret = -ENOMEM;
> -		goto free_dma;
> -	}
> -	rga->dst_mmu_pages =
> -		(unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3);
> -	if (!rga->dst_mmu_pages) {
> -		ret = -ENOMEM;
> -		goto free_src_pages;
> -	}
> -
>   	def_frame.stride = (def_frame.width * def_frame.fmt->depth) >> 3;
>   	def_frame.size = def_frame.stride * def_frame.height;
>   
>   	ret = video_register_device(vfd, VFL_TYPE_VIDEO, -1);
>   	if (ret) {
>   		v4l2_err(&rga->v4l2_dev, "Failed to register video device\n");
> -		goto free_dst_pages;
> +		goto free_dma;
>   	}
>   
>   	v4l2_info(&rga->v4l2_dev, "Registered %s as /dev/%s\n",
> @@ -899,10 +883,6 @@ static int rga_probe(struct platform_device *pdev)
>   
>   	return 0;
>   
> -free_dst_pages:
> -	free_pages((unsigned long)rga->dst_mmu_pages, 3);
> -free_src_pages:
> -	free_pages((unsigned long)rga->src_mmu_pages, 3);
>   free_dma:
>   	dma_free_attrs(rga->dev, RGA_CMDBUF_SIZE, rga->cmdbuf_virt,
>   		       rga->cmdbuf_phy, DMA_ATTR_WRITE_COMBINE);
> @@ -925,9 +905,6 @@ static void rga_remove(struct platform_device *pdev)
>   	dma_free_attrs(rga->dev, RGA_CMDBUF_SIZE, rga->cmdbuf_virt,
>   		       rga->cmdbuf_phy, DMA_ATTR_WRITE_COMBINE);
>   
> -	free_pages((unsigned long)rga->src_mmu_pages, 3);
> -	free_pages((unsigned long)rga->dst_mmu_pages, 3);
> -
>   	v4l2_info(&rga->v4l2_dev, "Removing\n");
>   
>   	v4l2_m2m_release(rga->m2m_dev);
> diff --git a/drivers/media/platform/rockchip/rga/rga.h b/drivers/media/platform/rockchip/rga/rga.h
> index 22f7da28ac51..ae984d5a236d 100644
> --- a/drivers/media/platform/rockchip/rga/rga.h
> +++ b/drivers/media/platform/rockchip/rga/rga.h
> @@ -85,15 +85,27 @@ struct rockchip_rga {
>   	struct rga_ctx *curr;
>   	dma_addr_t cmdbuf_phy;
>   	void *cmdbuf_virt;
> -	struct rga_dma_desc *src_mmu_pages;
> -	struct rga_dma_desc *dst_mmu_pages;
>   };
>   
> +struct rga_vb_buffer {
> +	struct vb2_v4l2_buffer vb_buf;
> +	struct list_head queue;
> +
> +	/* RGA MMU mapping for this buffer */
> +	struct rga_dma_desc *dma_desc;
> +	dma_addr_t dma_desc_pa;
> +	int n_desc;
> +};
> +
> +static inline struct rga_vb_buffer *vb_to_rga(struct vb2_v4l2_buffer *vb)
> +{
> +	return container_of(vb, struct rga_vb_buffer, vb_buf);
> +}
> +
>   struct rga_frame *rga_get_frame(struct rga_ctx *ctx, enum v4l2_buf_type type);
>   
>   /* RGA Buffers Manage */
>   extern const struct vb2_ops rga_qops;
> -void rga_buf_map(struct vb2_buffer *vb);
>   
>   /* RGA Hardware */
>   static inline void rga_write(struct rockchip_rga *rga, u32 reg, u32 value)
> @@ -114,6 +126,7 @@ static inline void rga_mod(struct rockchip_rga *rga, u32 reg, u32 val, u32 mask)
>   	rga_write(rga, reg, temp);
>   };
>   
> -void rga_hw_start(struct rockchip_rga *rga);
> +void rga_hw_start(struct rockchip_rga *rga,
> +		  struct rga_vb_buffer *src, struct rga_vb_buffer *dst);
>   
>   #endif
>
diff mbox series

Patch

diff --git a/drivers/media/platform/rockchip/rga/rga-buf.c b/drivers/media/platform/rockchip/rga/rga-buf.c
index df5ebc90e32d..e8dcc0d5cb90 100644
--- a/drivers/media/platform/rockchip/rga/rga-buf.c
+++ b/drivers/media/platform/rockchip/rga/rga-buf.c
@@ -55,16 +55,54 @@  rga_queue_setup(struct vb2_queue *vq,
 	return 0;
 }
 
+static int rga_buf_init(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct rga_vb_buffer *rbuf = vb_to_rga(vbuf);
+	struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+	struct rockchip_rga *rga = ctx->rga;
+	struct rga_frame *f = rga_get_frame(ctx, vb->vb2_queue->type);
+	int n_desc = 0;
+
+	n_desc = DIV_ROUND_UP(f->size, PAGE_SIZE);
+
+	rbuf->n_desc = n_desc;
+	rbuf->dma_desc = dma_alloc_coherent(rga->dev,
+					    rbuf->n_desc * sizeof(*rbuf->dma_desc),
+					    &rbuf->dma_desc_pa, GFP_KERNEL);
+	if (!rbuf->dma_desc)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int rga_buf_prepare(struct vb2_buffer *vb)
 {
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct rga_vb_buffer *rbuf = vb_to_rga(vbuf);
 	struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
 	struct rga_frame *f = rga_get_frame(ctx, vb->vb2_queue->type);
+	struct rockchip_rga *rga = ctx->rga;
+	int n_desc;
 
 	if (IS_ERR(f))
 		return PTR_ERR(f);
 
 	vb2_set_plane_payload(vb, 0, f->size);
 
+	/* Create local MMU table for RGA */
+	n_desc = fill_descriptors(rbuf->dma_desc,
+				  vb2_dma_sg_plane_desc(vb, 0));
+	if (n_desc < 0) {
+		dev_err(rga->dev, "Failed to map buffer");
+		return n_desc;
+	}
+
+	/* sync local MMU table for RGA */
+	dma_sync_single_for_device(rga->dev, rbuf->dma_desc_pa,
+				   n_desc * sizeof(*rbuf->dma_desc),
+				   DMA_BIDIRECTIONAL);
+
 	return 0;
 }
 
@@ -76,6 +114,17 @@  static void rga_buf_queue(struct vb2_buffer *vb)
 	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
 }
 
+static void rga_buf_cleanup(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct rga_vb_buffer *rbuf = vb_to_rga(vbuf);
+	struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+	struct rockchip_rga *rga = ctx->rga;
+
+	dma_free_coherent(rga->dev, rbuf->n_desc * sizeof(*rbuf->dma_desc),
+			  rbuf->dma_desc, rbuf->dma_desc_pa);
+}
+
 static void rga_buf_return_buffers(struct vb2_queue *q,
 				   enum vb2_buffer_state state)
 {
@@ -119,37 +168,12 @@  static void rga_buf_stop_streaming(struct vb2_queue *q)
 
 const struct vb2_ops rga_qops = {
 	.queue_setup = rga_queue_setup,
+	.buf_init = rga_buf_init,
 	.buf_prepare = rga_buf_prepare,
 	.buf_queue = rga_buf_queue,
+	.buf_cleanup = rga_buf_cleanup,
 	.wait_prepare = vb2_ops_wait_prepare,
 	.wait_finish = vb2_ops_wait_finish,
 	.start_streaming = rga_buf_start_streaming,
 	.stop_streaming = rga_buf_stop_streaming,
 };
-
-/* RGA MMU is a 1-Level MMU, so it can't be used through the IOMMU API.
- * We use it more like a scatter-gather list.
- */
-void rga_buf_map(struct vb2_buffer *vb)
-{
-	struct rga_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
-	struct rockchip_rga *rga = ctx->rga;
-	struct rga_dma_desc *pages;
-	unsigned int num_desc = 0;
-
-	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
-		pages = rga->src_mmu_pages;
-	else
-		pages = rga->dst_mmu_pages;
-
-	/* Create local MMU table for RGA */
-	num_desc = fill_descriptors(pages, vb2_dma_sg_plane_desc(vb, 0));
-	if (num_desc < 0) {
-		dev_err(rga->dev, "Failed to map buffer");
-		return;
-	}
-
-	/* sync local MMU table for RGA */
-	dma_sync_single_for_device(rga->dev, virt_to_phys(pages),
-				   num_desc * sizeof(*pages), DMA_BIDIRECTIONAL);
-}
diff --git a/drivers/media/platform/rockchip/rga/rga-hw.c b/drivers/media/platform/rockchip/rga/rga-hw.c
index aaa96f256356..b391d97d4632 100644
--- a/drivers/media/platform/rockchip/rga/rga-hw.c
+++ b/drivers/media/platform/rockchip/rga/rga-hw.c
@@ -119,40 +119,40 @@  static struct rga_addr_offset *rga_lookup_draw_pos(struct
 	return NULL;
 }
 
-static void rga_cmd_set_src_addr(struct rga_ctx *ctx, void *mmu_pages)
+static void rga_cmd_set_src_addr(struct rga_ctx *ctx, dma_addr_t dma_addr)
 {
 	struct rockchip_rga *rga = ctx->rga;
 	u32 *dest = rga->cmdbuf_virt;
 	unsigned int reg;
 
 	reg = RGA_MMU_SRC_BASE - RGA_MODE_BASE_REG;
-	dest[reg >> 2] = virt_to_phys(mmu_pages) >> 4;
+	dest[reg >> 2] = dma_addr >> 4;
 
 	reg = RGA_MMU_CTRL1 - RGA_MODE_BASE_REG;
 	dest[reg >> 2] |= 0x7;
 }
 
-static void rga_cmd_set_src1_addr(struct rga_ctx *ctx, void *mmu_pages)
+static void rga_cmd_set_src1_addr(struct rga_ctx *ctx, dma_addr_t dma_addr)
 {
 	struct rockchip_rga *rga = ctx->rga;
 	u32 *dest = rga->cmdbuf_virt;
 	unsigned int reg;
 
 	reg = RGA_MMU_SRC1_BASE - RGA_MODE_BASE_REG;
-	dest[reg >> 2] = virt_to_phys(mmu_pages) >> 4;
+	dest[reg >> 2] = dma_addr >> 4;
 
 	reg = RGA_MMU_CTRL1 - RGA_MODE_BASE_REG;
 	dest[reg >> 2] |= 0x7 << 4;
 }
 
-static void rga_cmd_set_dst_addr(struct rga_ctx *ctx, void *mmu_pages)
+static void rga_cmd_set_dst_addr(struct rga_ctx *ctx, dma_addr_t dma_addr)
 {
 	struct rockchip_rga *rga = ctx->rga;
 	u32 *dest = rga->cmdbuf_virt;
 	unsigned int reg;
 
 	reg = RGA_MMU_DST_BASE - RGA_MODE_BASE_REG;
-	dest[reg >> 2] = virt_to_phys(mmu_pages) >> 4;
+	dest[reg >> 2] = dma_addr >> 4;
 
 	reg = RGA_MMU_CTRL1 - RGA_MODE_BASE_REG;
 	dest[reg >> 2] |= 0x7 << 8;
@@ -375,20 +375,21 @@  static void rga_cmd_set_mode(struct rga_ctx *ctx)
 	dest[(RGA_MODE_CTRL - RGA_MODE_BASE_REG) >> 2] = mode.val;
 }
 
-static void rga_cmd_set(struct rga_ctx *ctx)
+static void rga_cmd_set(struct rga_ctx *ctx,
+			struct rga_vb_buffer *src, struct rga_vb_buffer *dst)
 {
 	struct rockchip_rga *rga = ctx->rga;
 
 	memset(rga->cmdbuf_virt, 0, RGA_CMDBUF_SIZE * 4);
 
-	rga_cmd_set_src_addr(ctx, rga->src_mmu_pages);
+	rga_cmd_set_src_addr(ctx, src->dma_desc_pa);
 	/*
 	 * Due to hardware bug,
 	 * src1 mmu also should be configured when using alpha blending.
 	 */
-	rga_cmd_set_src1_addr(ctx, rga->dst_mmu_pages);
+	rga_cmd_set_src1_addr(ctx, dst->dma_desc_pa);
 
-	rga_cmd_set_dst_addr(ctx, rga->dst_mmu_pages);
+	rga_cmd_set_dst_addr(ctx, dst->dma_desc_pa);
 	rga_cmd_set_mode(ctx);
 
 	rga_cmd_set_trans_info(ctx);
@@ -400,11 +401,12 @@  static void rga_cmd_set(struct rga_ctx *ctx)
 		PAGE_SIZE, DMA_BIDIRECTIONAL);
 }
 
-void rga_hw_start(struct rockchip_rga *rga)
+void rga_hw_start(struct rockchip_rga *rga,
+		  struct rga_vb_buffer *src, struct rga_vb_buffer *dst)
 {
 	struct rga_ctx *ctx = rga->curr;
 
-	rga_cmd_set(ctx);
+	rga_cmd_set(ctx, src, dst);
 
 	rga_write(rga, RGA_SYS_CTRL, 0x00);
 
diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c
index 25f5b5eebf13..f18fccc7b204 100644
--- a/drivers/media/platform/rockchip/rga/rga.c
+++ b/drivers/media/platform/rockchip/rga/rga.c
@@ -45,10 +45,7 @@  static void device_run(void *prv)
 	src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
 	dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
 
-	rga_buf_map(&src->vb2_buf);
-	rga_buf_map(&dst->vb2_buf);
-
-	rga_hw_start(rga);
+	rga_hw_start(rga, vb_to_rga(src), vb_to_rga(dst));
 
 	spin_unlock_irqrestore(&rga->ctrl_lock, flags);
 }
@@ -101,7 +98,7 @@  queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
 	src_vq->drv_priv = ctx;
 	src_vq->ops = &rga_qops;
 	src_vq->mem_ops = &vb2_dma_sg_memops;
-	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+	src_vq->buf_struct_size = sizeof(struct rga_vb_buffer);
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->lock = &ctx->rga->mutex;
 	src_vq->dev = ctx->rga->v4l2_dev.dev;
@@ -115,7 +112,7 @@  queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
 	dst_vq->drv_priv = ctx;
 	dst_vq->ops = &rga_qops;
 	dst_vq->mem_ops = &vb2_dma_sg_memops;
-	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+	dst_vq->buf_struct_size = sizeof(struct rga_vb_buffer);
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->lock = &ctx->rga->mutex;
 	dst_vq->dev = ctx->rga->v4l2_dev.dev;
@@ -872,26 +869,13 @@  static int rga_probe(struct platform_device *pdev)
 		goto rel_m2m;
 	}
 
-	rga->src_mmu_pages =
-		(unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3);
-	if (!rga->src_mmu_pages) {
-		ret = -ENOMEM;
-		goto free_dma;
-	}
-	rga->dst_mmu_pages =
-		(unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3);
-	if (!rga->dst_mmu_pages) {
-		ret = -ENOMEM;
-		goto free_src_pages;
-	}
-
 	def_frame.stride = (def_frame.width * def_frame.fmt->depth) >> 3;
 	def_frame.size = def_frame.stride * def_frame.height;
 
 	ret = video_register_device(vfd, VFL_TYPE_VIDEO, -1);
 	if (ret) {
 		v4l2_err(&rga->v4l2_dev, "Failed to register video device\n");
-		goto free_dst_pages;
+		goto free_dma;
 	}
 
 	v4l2_info(&rga->v4l2_dev, "Registered %s as /dev/%s\n",
@@ -899,10 +883,6 @@  static int rga_probe(struct platform_device *pdev)
 
 	return 0;
 
-free_dst_pages:
-	free_pages((unsigned long)rga->dst_mmu_pages, 3);
-free_src_pages:
-	free_pages((unsigned long)rga->src_mmu_pages, 3);
 free_dma:
 	dma_free_attrs(rga->dev, RGA_CMDBUF_SIZE, rga->cmdbuf_virt,
 		       rga->cmdbuf_phy, DMA_ATTR_WRITE_COMBINE);
@@ -925,9 +905,6 @@  static void rga_remove(struct platform_device *pdev)
 	dma_free_attrs(rga->dev, RGA_CMDBUF_SIZE, rga->cmdbuf_virt,
 		       rga->cmdbuf_phy, DMA_ATTR_WRITE_COMBINE);
 
-	free_pages((unsigned long)rga->src_mmu_pages, 3);
-	free_pages((unsigned long)rga->dst_mmu_pages, 3);
-
 	v4l2_info(&rga->v4l2_dev, "Removing\n");
 
 	v4l2_m2m_release(rga->m2m_dev);
diff --git a/drivers/media/platform/rockchip/rga/rga.h b/drivers/media/platform/rockchip/rga/rga.h
index 22f7da28ac51..ae984d5a236d 100644
--- a/drivers/media/platform/rockchip/rga/rga.h
+++ b/drivers/media/platform/rockchip/rga/rga.h
@@ -85,15 +85,27 @@  struct rockchip_rga {
 	struct rga_ctx *curr;
 	dma_addr_t cmdbuf_phy;
 	void *cmdbuf_virt;
-	struct rga_dma_desc *src_mmu_pages;
-	struct rga_dma_desc *dst_mmu_pages;
 };
 
+struct rga_vb_buffer {
+	struct vb2_v4l2_buffer vb_buf;
+	struct list_head queue;
+
+	/* RGA MMU mapping for this buffer */
+	struct rga_dma_desc *dma_desc;
+	dma_addr_t dma_desc_pa;
+	int n_desc;
+};
+
+static inline struct rga_vb_buffer *vb_to_rga(struct vb2_v4l2_buffer *vb)
+{
+	return container_of(vb, struct rga_vb_buffer, vb_buf);
+}
+
 struct rga_frame *rga_get_frame(struct rga_ctx *ctx, enum v4l2_buf_type type);
 
 /* RGA Buffers Manage */
 extern const struct vb2_ops rga_qops;
-void rga_buf_map(struct vb2_buffer *vb);
 
 /* RGA Hardware */
 static inline void rga_write(struct rockchip_rga *rga, u32 reg, u32 value)
@@ -114,6 +126,7 @@  static inline void rga_mod(struct rockchip_rga *rga, u32 reg, u32 val, u32 mask)
 	rga_write(rga, reg, temp);
 };
 
-void rga_hw_start(struct rockchip_rga *rga);
+void rga_hw_start(struct rockchip_rga *rga,
+		  struct rga_vb_buffer *src, struct rga_vb_buffer *dst);
 
 #endif