diff mbox series

media: venus: use contig vb2 ops

Message ID 20201214125703.866998-1-acourbot@chromium.org (mailing list archive)
State Accepted
Commit cc82fd691a3a7e8b46bd94fe7cacb2835015df22
Headers show
Series media: venus: use contig vb2 ops | expand

Commit Message

Alexandre Courbot Dec. 14, 2020, 12:57 p.m. UTC
This driver uses the SG vb2 ops, but effectively only ever accesses the
first entry of the SG table, indicating that it expects a flat layout.
Switch it to use the contiguous ops to make sure this expected invariant
is always enforced. Since the device is supposed to be behind an IOMMU
this should have little to none practical consequences beyond making the
driver not rely on a particular behavior of the SG implementation.

Reported-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
---
Hi everyone,

It probably doesn't hurt to fix this issue before some actual issue happens.
I have tested this patch on Chrome OS and playback was just as fine as with
the SG ops.

 drivers/media/platform/Kconfig              | 2 +-
 drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
 drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
 drivers/media/platform/qcom/venus/venc.c    | 6 +++---
 4 files changed, 9 insertions(+), 14 deletions(-)

Comments

Tomasz Figa Dec. 15, 2020, 9:02 a.m. UTC | #1
On Mon, Dec 14, 2020 at 9:57 PM Alexandre Courbot <acourbot@chromium.org> wrote:
>
> This driver uses the SG vb2 ops, but effectively only ever accesses the
> first entry of the SG table, indicating that it expects a flat layout.
> Switch it to use the contiguous ops to make sure this expected invariant
> is always enforced. Since the device is supposed to be behind an IOMMU
> this should have little to none practical consequences beyond making the
> driver not rely on a particular behavior of the SG implementation.
>
> Reported-by: Tomasz Figa <tfiga@chromium.org>
> Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
> ---
> Hi everyone,
>
> It probably doesn't hurt to fix this issue before some actual issue happens.
> I have tested this patch on Chrome OS and playback was just as fine as with
> the SG ops.
>
>  drivers/media/platform/Kconfig              | 2 +-
>  drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
>  drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
>  drivers/media/platform/qcom/venus/venc.c    | 6 +++---
>  4 files changed, 9 insertions(+), 14 deletions(-)
>

Reviewed-by: Tomasz Figa <tfiga@chromium.org>

Best regards,
Tomasz

> diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
> index 35a18d388f3f..d9d7954111f2 100644
> --- a/drivers/media/platform/Kconfig
> +++ b/drivers/media/platform/Kconfig
> @@ -533,7 +533,7 @@ config VIDEO_QCOM_VENUS
>         depends on INTERCONNECT || !INTERCONNECT
>         select QCOM_MDT_LOADER if ARCH_QCOM
>         select QCOM_SCM if ARCH_QCOM
> -       select VIDEOBUF2_DMA_SG
> +       select VIDEOBUF2_DMA_CONTIG
>         select V4L2_MEM2MEM_DEV
>         help
>           This is a V4L2 driver for Qualcomm Venus video accelerator
> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
> index 50439eb1ffea..859d260f002b 100644
> --- a/drivers/media/platform/qcom/venus/helpers.c
> +++ b/drivers/media/platform/qcom/venus/helpers.c
> @@ -7,7 +7,7 @@
>  #include <linux/mutex.h>
>  #include <linux/slab.h>
>  #include <linux/kernel.h>
> -#include <media/videobuf2-dma-sg.h>
> +#include <media/videobuf2-dma-contig.h>
>  #include <media/v4l2-mem2mem.h>
>  #include <asm/div64.h>
>
> @@ -1284,14 +1284,9 @@ int venus_helper_vb2_buf_init(struct vb2_buffer *vb)
>         struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
>         struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
>         struct venus_buffer *buf = to_venus_buffer(vbuf);
> -       struct sg_table *sgt;
> -
> -       sgt = vb2_dma_sg_plane_desc(vb, 0);
> -       if (!sgt)
> -               return -EFAULT;
>
>         buf->size = vb2_plane_size(vb, 0);
> -       buf->dma_addr = sg_dma_address(sgt->sgl);
> +       buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
>
>         if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
>                 list_add_tail(&buf->reg_list, &inst->registeredbufs);
> diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
> index 8488411204c3..3fb277c81aca 100644
> --- a/drivers/media/platform/qcom/venus/vdec.c
> +++ b/drivers/media/platform/qcom/venus/vdec.c
> @@ -13,7 +13,7 @@
>  #include <media/v4l2-event.h>
>  #include <media/v4l2-ctrls.h>
>  #include <media/v4l2-mem2mem.h>
> -#include <media/videobuf2-dma-sg.h>
> +#include <media/videobuf2-dma-contig.h>
>
>  #include "hfi_venus_io.h"
>  #include "hfi_parser.h"
> @@ -1461,7 +1461,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>         src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>         src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>         src_vq->ops = &vdec_vb2_ops;
> -       src_vq->mem_ops = &vb2_dma_sg_memops;
> +       src_vq->mem_ops = &vb2_dma_contig_memops;
>         src_vq->drv_priv = inst;
>         src_vq->buf_struct_size = sizeof(struct venus_buffer);
>         src_vq->allow_zero_bytesused = 1;
> @@ -1475,7 +1475,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>         dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>         dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>         dst_vq->ops = &vdec_vb2_ops;
> -       dst_vq->mem_ops = &vb2_dma_sg_memops;
> +       dst_vq->mem_ops = &vb2_dma_contig_memops;
>         dst_vq->drv_priv = inst;
>         dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>         dst_vq->allow_zero_bytesused = 1;
> diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
> index 1c61602c5de1..a09550cd1dba 100644
> --- a/drivers/media/platform/qcom/venus/venc.c
> +++ b/drivers/media/platform/qcom/venus/venc.c
> @@ -10,7 +10,7 @@
>  #include <linux/pm_runtime.h>
>  #include <linux/slab.h>
>  #include <media/v4l2-mem2mem.h>
> -#include <media/videobuf2-dma-sg.h>
> +#include <media/videobuf2-dma-contig.h>
>  #include <media/v4l2-ioctl.h>
>  #include <media/v4l2-event.h>
>  #include <media/v4l2-ctrls.h>
> @@ -1001,7 +1001,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>         src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>         src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>         src_vq->ops = &venc_vb2_ops;
> -       src_vq->mem_ops = &vb2_dma_sg_memops;
> +       src_vq->mem_ops = &vb2_dma_contig_memops;
>         src_vq->drv_priv = inst;
>         src_vq->buf_struct_size = sizeof(struct venus_buffer);
>         src_vq->allow_zero_bytesused = 1;
> @@ -1017,7 +1017,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>         dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>         dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>         dst_vq->ops = &venc_vb2_ops;
> -       dst_vq->mem_ops = &vb2_dma_sg_memops;
> +       dst_vq->mem_ops = &vb2_dma_contig_memops;
>         dst_vq->drv_priv = inst;
>         dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>         dst_vq->allow_zero_bytesused = 1;
> --
> 2.29.2.684.gfbc64c5ab5-goog
>
Stanimir Varbanov Dec. 15, 2020, 11:16 a.m. UTC | #2
Hi,

Cc: Robin

On 12/14/20 2:57 PM, Alexandre Courbot wrote:
> This driver uses the SG vb2 ops, but effectively only ever accesses the
> first entry of the SG table, indicating that it expects a flat layout.
> Switch it to use the contiguous ops to make sure this expected invariant

Under what circumstances the sg table will has nents > 1? I came down to
[1] but not sure I got it right.

I'm afraid that for systems with low amount of system memory and when
the memory become fragmented, the driver will not work. That's why I
started with sg allocator.

[1]
https://elixir.bootlin.com/linux/v5.10.1/source/drivers/iommu/dma-iommu.c#L782

> is always enforced. Since the device is supposed to be behind an IOMMU
> this should have little to none practical consequences beyond making the
> driver not rely on a particular behavior of the SG implementation.
> 
> Reported-by: Tomasz Figa <tfiga@chromium.org>
> Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
> ---
> Hi everyone,
> 
> It probably doesn't hurt to fix this issue before some actual issue happens.
> I have tested this patch on Chrome OS and playback was just as fine as with
> the SG ops.
> 
>  drivers/media/platform/Kconfig              | 2 +-
>  drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
>  drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
>  drivers/media/platform/qcom/venus/venc.c    | 6 +++---
>  4 files changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
> index 35a18d388f3f..d9d7954111f2 100644
> --- a/drivers/media/platform/Kconfig
> +++ b/drivers/media/platform/Kconfig
> @@ -533,7 +533,7 @@ config VIDEO_QCOM_VENUS
>  	depends on INTERCONNECT || !INTERCONNECT
>  	select QCOM_MDT_LOADER if ARCH_QCOM
>  	select QCOM_SCM if ARCH_QCOM
> -	select VIDEOBUF2_DMA_SG
> +	select VIDEOBUF2_DMA_CONTIG
>  	select V4L2_MEM2MEM_DEV
>  	help
>  	  This is a V4L2 driver for Qualcomm Venus video accelerator
> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
> index 50439eb1ffea..859d260f002b 100644
> --- a/drivers/media/platform/qcom/venus/helpers.c
> +++ b/drivers/media/platform/qcom/venus/helpers.c
> @@ -7,7 +7,7 @@
>  #include <linux/mutex.h>
>  #include <linux/slab.h>
>  #include <linux/kernel.h>
> -#include <media/videobuf2-dma-sg.h>
> +#include <media/videobuf2-dma-contig.h>
>  #include <media/v4l2-mem2mem.h>
>  #include <asm/div64.h>
>  
> @@ -1284,14 +1284,9 @@ int venus_helper_vb2_buf_init(struct vb2_buffer *vb)
>  	struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
>  	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
>  	struct venus_buffer *buf = to_venus_buffer(vbuf);
> -	struct sg_table *sgt;
> -
> -	sgt = vb2_dma_sg_plane_desc(vb, 0);
> -	if (!sgt)
> -		return -EFAULT;
>  
>  	buf->size = vb2_plane_size(vb, 0);
> -	buf->dma_addr = sg_dma_address(sgt->sgl);

Can we do it:

	if (WARN_ON(sgt->nents > 1))
		return -EFAULT;

I understand that logically using dma-sg when the flat layout is
expected by the hardware is wrong, but I haven't seen issues until now.

> +	buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
>  
>  	if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
>  		list_add_tail(&buf->reg_list, &inst->registeredbufs);
> diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
> index 8488411204c3..3fb277c81aca 100644
> --- a/drivers/media/platform/qcom/venus/vdec.c
> +++ b/drivers/media/platform/qcom/venus/vdec.c
> @@ -13,7 +13,7 @@
>  #include <media/v4l2-event.h>
>  #include <media/v4l2-ctrls.h>
>  #include <media/v4l2-mem2mem.h>
> -#include <media/videobuf2-dma-sg.h>
> +#include <media/videobuf2-dma-contig.h>
>  
>  #include "hfi_venus_io.h"
>  #include "hfi_parser.h"
> @@ -1461,7 +1461,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>  	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>  	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>  	src_vq->ops = &vdec_vb2_ops;
> -	src_vq->mem_ops = &vb2_dma_sg_memops;
> +	src_vq->mem_ops = &vb2_dma_contig_memops;
>  	src_vq->drv_priv = inst;
>  	src_vq->buf_struct_size = sizeof(struct venus_buffer);
>  	src_vq->allow_zero_bytesused = 1;
> @@ -1475,7 +1475,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>  	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>  	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>  	dst_vq->ops = &vdec_vb2_ops;
> -	dst_vq->mem_ops = &vb2_dma_sg_memops;
> +	dst_vq->mem_ops = &vb2_dma_contig_memops;
>  	dst_vq->drv_priv = inst;
>  	dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>  	dst_vq->allow_zero_bytesused = 1;
> diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
> index 1c61602c5de1..a09550cd1dba 100644
> --- a/drivers/media/platform/qcom/venus/venc.c
> +++ b/drivers/media/platform/qcom/venus/venc.c
> @@ -10,7 +10,7 @@
>  #include <linux/pm_runtime.h>
>  #include <linux/slab.h>
>  #include <media/v4l2-mem2mem.h>
> -#include <media/videobuf2-dma-sg.h>
> +#include <media/videobuf2-dma-contig.h>
>  #include <media/v4l2-ioctl.h>
>  #include <media/v4l2-event.h>
>  #include <media/v4l2-ctrls.h>
> @@ -1001,7 +1001,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>  	src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>  	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>  	src_vq->ops = &venc_vb2_ops;
> -	src_vq->mem_ops = &vb2_dma_sg_memops;
> +	src_vq->mem_ops = &vb2_dma_contig_memops;
>  	src_vq->drv_priv = inst;
>  	src_vq->buf_struct_size = sizeof(struct venus_buffer);
>  	src_vq->allow_zero_bytesused = 1;
> @@ -1017,7 +1017,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>  	dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>  	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>  	dst_vq->ops = &venc_vb2_ops;
> -	dst_vq->mem_ops = &vb2_dma_sg_memops;
> +	dst_vq->mem_ops = &vb2_dma_contig_memops;
>  	dst_vq->drv_priv = inst;
>  	dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>  	dst_vq->allow_zero_bytesused = 1;
>
Robin Murphy Dec. 15, 2020, 11:34 a.m. UTC | #3
On 2020-12-15 11:16, Stanimir Varbanov wrote:
> Hi,
> 
> Cc: Robin
> 
> On 12/14/20 2:57 PM, Alexandre Courbot wrote:
>> This driver uses the SG vb2 ops, but effectively only ever accesses the
>> first entry of the SG table, indicating that it expects a flat layout.
>> Switch it to use the contiguous ops to make sure this expected invariant
> 
> Under what circumstances the sg table will has nents > 1? I came down to
> [1] but not sure I got it right.
> 
> I'm afraid that for systems with low amount of system memory and when
> the memory become fragmented, the driver will not work. That's why I
> started with sg allocator.

Despite what videobuf-dma-contig seems to assume, dma_alloc_coherent() 
makes no guarantee of providing physically contiguous memory. What it 
provides is *DMA contiguous* memory, i.e. from the point of view of the 
device. When an IOMMU is present and managed by the DMA API, such 
buffers may be assembled out of physically-scattered pages (particularly 
under memory pressure/fragmentation).

Robin.

> 
> [1]
> https://elixir.bootlin.com/linux/v5.10.1/source/drivers/iommu/dma-iommu.c#L782
> 
>> is always enforced. Since the device is supposed to be behind an IOMMU
>> this should have little to none practical consequences beyond making the
>> driver not rely on a particular behavior of the SG implementation.
>>
>> Reported-by: Tomasz Figa <tfiga@chromium.org>
>> Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
>> ---
>> Hi everyone,
>>
>> It probably doesn't hurt to fix this issue before some actual issue happens.
>> I have tested this patch on Chrome OS and playback was just as fine as with
>> the SG ops.
>>
>>   drivers/media/platform/Kconfig              | 2 +-
>>   drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
>>   drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
>>   drivers/media/platform/qcom/venus/venc.c    | 6 +++---
>>   4 files changed, 9 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
>> index 35a18d388f3f..d9d7954111f2 100644
>> --- a/drivers/media/platform/Kconfig
>> +++ b/drivers/media/platform/Kconfig
>> @@ -533,7 +533,7 @@ config VIDEO_QCOM_VENUS
>>   	depends on INTERCONNECT || !INTERCONNECT
>>   	select QCOM_MDT_LOADER if ARCH_QCOM
>>   	select QCOM_SCM if ARCH_QCOM
>> -	select VIDEOBUF2_DMA_SG
>> +	select VIDEOBUF2_DMA_CONTIG
>>   	select V4L2_MEM2MEM_DEV
>>   	help
>>   	  This is a V4L2 driver for Qualcomm Venus video accelerator
>> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
>> index 50439eb1ffea..859d260f002b 100644
>> --- a/drivers/media/platform/qcom/venus/helpers.c
>> +++ b/drivers/media/platform/qcom/venus/helpers.c
>> @@ -7,7 +7,7 @@
>>   #include <linux/mutex.h>
>>   #include <linux/slab.h>
>>   #include <linux/kernel.h>
>> -#include <media/videobuf2-dma-sg.h>
>> +#include <media/videobuf2-dma-contig.h>
>>   #include <media/v4l2-mem2mem.h>
>>   #include <asm/div64.h>
>>   
>> @@ -1284,14 +1284,9 @@ int venus_helper_vb2_buf_init(struct vb2_buffer *vb)
>>   	struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
>>   	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
>>   	struct venus_buffer *buf = to_venus_buffer(vbuf);
>> -	struct sg_table *sgt;
>> -
>> -	sgt = vb2_dma_sg_plane_desc(vb, 0);
>> -	if (!sgt)
>> -		return -EFAULT;
>>   
>>   	buf->size = vb2_plane_size(vb, 0);
>> -	buf->dma_addr = sg_dma_address(sgt->sgl);
> 
> Can we do it:
> 
> 	if (WARN_ON(sgt->nents > 1))
> 		return -EFAULT;
> 
> I understand that logically using dma-sg when the flat layout is
> expected by the hardware is wrong, but I haven't seen issues until now.
> 
>> +	buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
>>   
>>   	if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
>>   		list_add_tail(&buf->reg_list, &inst->registeredbufs);
>> diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
>> index 8488411204c3..3fb277c81aca 100644
>> --- a/drivers/media/platform/qcom/venus/vdec.c
>> +++ b/drivers/media/platform/qcom/venus/vdec.c
>> @@ -13,7 +13,7 @@
>>   #include <media/v4l2-event.h>
>>   #include <media/v4l2-ctrls.h>
>>   #include <media/v4l2-mem2mem.h>
>> -#include <media/videobuf2-dma-sg.h>
>> +#include <media/videobuf2-dma-contig.h>
>>   
>>   #include "hfi_venus_io.h"
>>   #include "hfi_parser.h"
>> @@ -1461,7 +1461,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>   	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>>   	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>   	src_vq->ops = &vdec_vb2_ops;
>> -	src_vq->mem_ops = &vb2_dma_sg_memops;
>> +	src_vq->mem_ops = &vb2_dma_contig_memops;
>>   	src_vq->drv_priv = inst;
>>   	src_vq->buf_struct_size = sizeof(struct venus_buffer);
>>   	src_vq->allow_zero_bytesused = 1;
>> @@ -1475,7 +1475,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>   	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>>   	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>   	dst_vq->ops = &vdec_vb2_ops;
>> -	dst_vq->mem_ops = &vb2_dma_sg_memops;
>> +	dst_vq->mem_ops = &vb2_dma_contig_memops;
>>   	dst_vq->drv_priv = inst;
>>   	dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>>   	dst_vq->allow_zero_bytesused = 1;
>> diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
>> index 1c61602c5de1..a09550cd1dba 100644
>> --- a/drivers/media/platform/qcom/venus/venc.c
>> +++ b/drivers/media/platform/qcom/venus/venc.c
>> @@ -10,7 +10,7 @@
>>   #include <linux/pm_runtime.h>
>>   #include <linux/slab.h>
>>   #include <media/v4l2-mem2mem.h>
>> -#include <media/videobuf2-dma-sg.h>
>> +#include <media/videobuf2-dma-contig.h>
>>   #include <media/v4l2-ioctl.h>
>>   #include <media/v4l2-event.h>
>>   #include <media/v4l2-ctrls.h>
>> @@ -1001,7 +1001,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>   	src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>>   	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>   	src_vq->ops = &venc_vb2_ops;
>> -	src_vq->mem_ops = &vb2_dma_sg_memops;
>> +	src_vq->mem_ops = &vb2_dma_contig_memops;
>>   	src_vq->drv_priv = inst;
>>   	src_vq->buf_struct_size = sizeof(struct venus_buffer);
>>   	src_vq->allow_zero_bytesused = 1;
>> @@ -1017,7 +1017,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>   	dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>>   	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>   	dst_vq->ops = &venc_vb2_ops;
>> -	dst_vq->mem_ops = &vb2_dma_sg_memops;
>> +	dst_vq->mem_ops = &vb2_dma_contig_memops;
>>   	dst_vq->drv_priv = inst;
>>   	dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>>   	dst_vq->allow_zero_bytesused = 1;
>>
>
Tomasz Figa Dec. 15, 2020, 11:47 a.m. UTC | #4
On Tue, Dec 15, 2020 at 8:16 PM Stanimir Varbanov
<stanimir.varbanov@linaro.org> wrote:
>
> Hi,
>
> Cc: Robin
>
> On 12/14/20 2:57 PM, Alexandre Courbot wrote:
> > This driver uses the SG vb2 ops, but effectively only ever accesses the
> > first entry of the SG table, indicating that it expects a flat layout.
> > Switch it to use the contiguous ops to make sure this expected invariant
>
> Under what circumstances the sg table will has nents > 1? I came down to
> [1] but not sure I got it right.
>
> I'm afraid that for systems with low amount of system memory and when
> the memory become fragmented, the driver will not work. That's why I
> started with sg allocator.

It is exactly the opposite. The vb2-dma-contig allocator is "contig"
in terms of the DMA (aka IOVA) address space. In other words, it
guarantees that having one DMA address and length fully describes the
buffer. This seems to be the requirement of the hardware/firmware
handled by the venus driver. If the device is behind an IOMMU, which
is the case for the SoCs in question, the underlying DMA ops will
actually allocate a discontiguous set of pages, so it has nothing to
do to system memory amount or fragmentation. If for some reason the
IOMMU can't be used, there is no way around, the memory needs to be
contiguous because of the hardware/firmware/driver expectation.

On the other hand, the vb2-dma-sg allocator doesn't have any
continuity guarantees for the DMA, or any other, address space. The
current code works fine, because it calls dma_map_sg() on the whole
set of pages and that ends up mapping it contiguously in the IOVA
space, but that's just an implementation detail, not an API guarantee.

Best regards,
Tomasz

>
> [1]
> https://elixir.bootlin.com/linux/v5.10.1/source/drivers/iommu/dma-iommu.c#L782
>
> > is always enforced. Since the device is supposed to be behind an IOMMU
> > this should have little to none practical consequences beyond making the
> > driver not rely on a particular behavior of the SG implementation.
> >
> > Reported-by: Tomasz Figa <tfiga@chromium.org>
> > Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
> > ---
> > Hi everyone,
> >
> > It probably doesn't hurt to fix this issue before some actual issue happens.
> > I have tested this patch on Chrome OS and playback was just as fine as with
> > the SG ops.
> >
> >  drivers/media/platform/Kconfig              | 2 +-
> >  drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
> >  drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
> >  drivers/media/platform/qcom/venus/venc.c    | 6 +++---
> >  4 files changed, 9 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
> > index 35a18d388f3f..d9d7954111f2 100644
> > --- a/drivers/media/platform/Kconfig
> > +++ b/drivers/media/platform/Kconfig
> > @@ -533,7 +533,7 @@ config VIDEO_QCOM_VENUS
> >       depends on INTERCONNECT || !INTERCONNECT
> >       select QCOM_MDT_LOADER if ARCH_QCOM
> >       select QCOM_SCM if ARCH_QCOM
> > -     select VIDEOBUF2_DMA_SG
> > +     select VIDEOBUF2_DMA_CONTIG
> >       select V4L2_MEM2MEM_DEV
> >       help
> >         This is a V4L2 driver for Qualcomm Venus video accelerator
> > diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
> > index 50439eb1ffea..859d260f002b 100644
> > --- a/drivers/media/platform/qcom/venus/helpers.c
> > +++ b/drivers/media/platform/qcom/venus/helpers.c
> > @@ -7,7 +7,7 @@
> >  #include <linux/mutex.h>
> >  #include <linux/slab.h>
> >  #include <linux/kernel.h>
> > -#include <media/videobuf2-dma-sg.h>
> > +#include <media/videobuf2-dma-contig.h>
> >  #include <media/v4l2-mem2mem.h>
> >  #include <asm/div64.h>
> >
> > @@ -1284,14 +1284,9 @@ int venus_helper_vb2_buf_init(struct vb2_buffer *vb)
> >       struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
> >       struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> >       struct venus_buffer *buf = to_venus_buffer(vbuf);
> > -     struct sg_table *sgt;
> > -
> > -     sgt = vb2_dma_sg_plane_desc(vb, 0);
> > -     if (!sgt)
> > -             return -EFAULT;
> >
> >       buf->size = vb2_plane_size(vb, 0);
> > -     buf->dma_addr = sg_dma_address(sgt->sgl);
>
> Can we do it:
>
>         if (WARN_ON(sgt->nents > 1))
>                 return -EFAULT;
>
> I understand that logically using dma-sg when the flat layout is
> expected by the hardware is wrong, but I haven't seen issues until now.
>
> > +     buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
> >
> >       if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
> >               list_add_tail(&buf->reg_list, &inst->registeredbufs);
> > diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
> > index 8488411204c3..3fb277c81aca 100644
> > --- a/drivers/media/platform/qcom/venus/vdec.c
> > +++ b/drivers/media/platform/qcom/venus/vdec.c
> > @@ -13,7 +13,7 @@
> >  #include <media/v4l2-event.h>
> >  #include <media/v4l2-ctrls.h>
> >  #include <media/v4l2-mem2mem.h>
> > -#include <media/videobuf2-dma-sg.h>
> > +#include <media/videobuf2-dma-contig.h>
> >
> >  #include "hfi_venus_io.h"
> >  #include "hfi_parser.h"
> > @@ -1461,7 +1461,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
> >       src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> >       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> >       src_vq->ops = &vdec_vb2_ops;
> > -     src_vq->mem_ops = &vb2_dma_sg_memops;
> > +     src_vq->mem_ops = &vb2_dma_contig_memops;
> >       src_vq->drv_priv = inst;
> >       src_vq->buf_struct_size = sizeof(struct venus_buffer);
> >       src_vq->allow_zero_bytesused = 1;
> > @@ -1475,7 +1475,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
> >       dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> >       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> >       dst_vq->ops = &vdec_vb2_ops;
> > -     dst_vq->mem_ops = &vb2_dma_sg_memops;
> > +     dst_vq->mem_ops = &vb2_dma_contig_memops;
> >       dst_vq->drv_priv = inst;
> >       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
> >       dst_vq->allow_zero_bytesused = 1;
> > diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
> > index 1c61602c5de1..a09550cd1dba 100644
> > --- a/drivers/media/platform/qcom/venus/venc.c
> > +++ b/drivers/media/platform/qcom/venus/venc.c
> > @@ -10,7 +10,7 @@
> >  #include <linux/pm_runtime.h>
> >  #include <linux/slab.h>
> >  #include <media/v4l2-mem2mem.h>
> > -#include <media/videobuf2-dma-sg.h>
> > +#include <media/videobuf2-dma-contig.h>
> >  #include <media/v4l2-ioctl.h>
> >  #include <media/v4l2-event.h>
> >  #include <media/v4l2-ctrls.h>
> > @@ -1001,7 +1001,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
> >       src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
> >       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> >       src_vq->ops = &venc_vb2_ops;
> > -     src_vq->mem_ops = &vb2_dma_sg_memops;
> > +     src_vq->mem_ops = &vb2_dma_contig_memops;
> >       src_vq->drv_priv = inst;
> >       src_vq->buf_struct_size = sizeof(struct venus_buffer);
> >       src_vq->allow_zero_bytesused = 1;
> > @@ -1017,7 +1017,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
> >       dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
> >       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> >       dst_vq->ops = &venc_vb2_ops;
> > -     dst_vq->mem_ops = &vb2_dma_sg_memops;
> > +     dst_vq->mem_ops = &vb2_dma_contig_memops;
> >       dst_vq->drv_priv = inst;
> >       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
> >       dst_vq->allow_zero_bytesused = 1;
> >
>
> --
> regards,
> Stan
Robin Murphy Dec. 15, 2020, 1:32 p.m. UTC | #5
On 2020-12-15 11:47, Tomasz Figa wrote:
> On Tue, Dec 15, 2020 at 8:16 PM Stanimir Varbanov
> <stanimir.varbanov@linaro.org> wrote:
>>
>> Hi,
>>
>> Cc: Robin
>>
>> On 12/14/20 2:57 PM, Alexandre Courbot wrote:
>>> This driver uses the SG vb2 ops, but effectively only ever accesses the
>>> first entry of the SG table, indicating that it expects a flat layout.
>>> Switch it to use the contiguous ops to make sure this expected invariant
>>
>> Under what circumstances the sg table will has nents > 1? I came down to
>> [1] but not sure I got it right.
>>
>> I'm afraid that for systems with low amount of system memory and when
>> the memory become fragmented, the driver will not work. That's why I
>> started with sg allocator.
> 
> It is exactly the opposite. The vb2-dma-contig allocator is "contig"
> in terms of the DMA (aka IOVA) address space. In other words, it
> guarantees that having one DMA address and length fully describes the
> buffer. This seems to be the requirement of the hardware/firmware
> handled by the venus driver. If the device is behind an IOMMU, which
> is the case for the SoCs in question, the underlying DMA ops will
> actually allocate a discontiguous set of pages, so it has nothing to
> do to system memory amount or fragmentation. If for some reason the
> IOMMU can't be used, there is no way around, the memory needs to be
> contiguous because of the hardware/firmware/driver expectation.
> 
> On the other hand, the vb2-dma-sg allocator doesn't have any
> continuity guarantees for the DMA, or any other, address space.

Yes, intuitively one would assume that the sg code was for devices with 
native scatter-gather capability that can deal with an actual set of 
buffer descriptors, rather than just a single pointer (which is the 
original purpose of scatterlists, after all). I've always been slightly 
puzzled why the two seem to be quite so similar.

> The
> current code works fine, because it calls dma_map_sg() on the whole
> set of pages and that ends up mapping it contiguously in the IOVA
> space, but that's just an implementation detail, not an API guarantee.

Oh, the fun we've had over that implementation detail! :P

Robin.

> Best regards,
> Tomasz
> 
>>
>> [1]
>> https://elixir.bootlin.com/linux/v5.10.1/source/drivers/iommu/dma-iommu.c#L782
>>
>>> is always enforced. Since the device is supposed to be behind an IOMMU
>>> this should have little to none practical consequences beyond making the
>>> driver not rely on a particular behavior of the SG implementation.
>>>
>>> Reported-by: Tomasz Figa <tfiga@chromium.org>
>>> Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
>>> ---
>>> Hi everyone,
>>>
>>> It probably doesn't hurt to fix this issue before some actual issue happens.
>>> I have tested this patch on Chrome OS and playback was just as fine as with
>>> the SG ops.
>>>
>>>   drivers/media/platform/Kconfig              | 2 +-
>>>   drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
>>>   drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
>>>   drivers/media/platform/qcom/venus/venc.c    | 6 +++---
>>>   4 files changed, 9 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
>>> index 35a18d388f3f..d9d7954111f2 100644
>>> --- a/drivers/media/platform/Kconfig
>>> +++ b/drivers/media/platform/Kconfig
>>> @@ -533,7 +533,7 @@ config VIDEO_QCOM_VENUS
>>>        depends on INTERCONNECT || !INTERCONNECT
>>>        select QCOM_MDT_LOADER if ARCH_QCOM
>>>        select QCOM_SCM if ARCH_QCOM
>>> -     select VIDEOBUF2_DMA_SG
>>> +     select VIDEOBUF2_DMA_CONTIG
>>>        select V4L2_MEM2MEM_DEV
>>>        help
>>>          This is a V4L2 driver for Qualcomm Venus video accelerator
>>> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
>>> index 50439eb1ffea..859d260f002b 100644
>>> --- a/drivers/media/platform/qcom/venus/helpers.c
>>> +++ b/drivers/media/platform/qcom/venus/helpers.c
>>> @@ -7,7 +7,7 @@
>>>   #include <linux/mutex.h>
>>>   #include <linux/slab.h>
>>>   #include <linux/kernel.h>
>>> -#include <media/videobuf2-dma-sg.h>
>>> +#include <media/videobuf2-dma-contig.h>
>>>   #include <media/v4l2-mem2mem.h>
>>>   #include <asm/div64.h>
>>>
>>> @@ -1284,14 +1284,9 @@ int venus_helper_vb2_buf_init(struct vb2_buffer *vb)
>>>        struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
>>>        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
>>>        struct venus_buffer *buf = to_venus_buffer(vbuf);
>>> -     struct sg_table *sgt;
>>> -
>>> -     sgt = vb2_dma_sg_plane_desc(vb, 0);
>>> -     if (!sgt)
>>> -             return -EFAULT;
>>>
>>>        buf->size = vb2_plane_size(vb, 0);
>>> -     buf->dma_addr = sg_dma_address(sgt->sgl);
>>
>> Can we do it:
>>
>>          if (WARN_ON(sgt->nents > 1))
>>                  return -EFAULT;
>>
>> I understand that logically using dma-sg when the flat layout is
>> expected by the hardware is wrong, but I haven't seen issues until now.
>>
>>> +     buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
>>>
>>>        if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
>>>                list_add_tail(&buf->reg_list, &inst->registeredbufs);
>>> diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
>>> index 8488411204c3..3fb277c81aca 100644
>>> --- a/drivers/media/platform/qcom/venus/vdec.c
>>> +++ b/drivers/media/platform/qcom/venus/vdec.c
>>> @@ -13,7 +13,7 @@
>>>   #include <media/v4l2-event.h>
>>>   #include <media/v4l2-ctrls.h>
>>>   #include <media/v4l2-mem2mem.h>
>>> -#include <media/videobuf2-dma-sg.h>
>>> +#include <media/videobuf2-dma-contig.h>
>>>
>>>   #include "hfi_venus_io.h"
>>>   #include "hfi_parser.h"
>>> @@ -1461,7 +1461,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>>        src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>>>        src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>>        src_vq->ops = &vdec_vb2_ops;
>>> -     src_vq->mem_ops = &vb2_dma_sg_memops;
>>> +     src_vq->mem_ops = &vb2_dma_contig_memops;
>>>        src_vq->drv_priv = inst;
>>>        src_vq->buf_struct_size = sizeof(struct venus_buffer);
>>>        src_vq->allow_zero_bytesused = 1;
>>> @@ -1475,7 +1475,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>>        dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>>>        dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>>        dst_vq->ops = &vdec_vb2_ops;
>>> -     dst_vq->mem_ops = &vb2_dma_sg_memops;
>>> +     dst_vq->mem_ops = &vb2_dma_contig_memops;
>>>        dst_vq->drv_priv = inst;
>>>        dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>>>        dst_vq->allow_zero_bytesused = 1;
>>> diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
>>> index 1c61602c5de1..a09550cd1dba 100644
>>> --- a/drivers/media/platform/qcom/venus/venc.c
>>> +++ b/drivers/media/platform/qcom/venus/venc.c
>>> @@ -10,7 +10,7 @@
>>>   #include <linux/pm_runtime.h>
>>>   #include <linux/slab.h>
>>>   #include <media/v4l2-mem2mem.h>
>>> -#include <media/videobuf2-dma-sg.h>
>>> +#include <media/videobuf2-dma-contig.h>
>>>   #include <media/v4l2-ioctl.h>
>>>   #include <media/v4l2-event.h>
>>>   #include <media/v4l2-ctrls.h>
>>> @@ -1001,7 +1001,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>>        src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>>>        src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>>        src_vq->ops = &venc_vb2_ops;
>>> -     src_vq->mem_ops = &vb2_dma_sg_memops;
>>> +     src_vq->mem_ops = &vb2_dma_contig_memops;
>>>        src_vq->drv_priv = inst;
>>>        src_vq->buf_struct_size = sizeof(struct venus_buffer);
>>>        src_vq->allow_zero_bytesused = 1;
>>> @@ -1017,7 +1017,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>>        dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>>>        dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>>        dst_vq->ops = &venc_vb2_ops;
>>> -     dst_vq->mem_ops = &vb2_dma_sg_memops;
>>> +     dst_vq->mem_ops = &vb2_dma_contig_memops;
>>>        dst_vq->drv_priv = inst;
>>>        dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>>>        dst_vq->allow_zero_bytesused = 1;
>>>
>>
>> --
>> regards,
>> Stan
Stanimir Varbanov Dec. 15, 2020, 1:54 p.m. UTC | #6
Hi Tomasz,

On 12/15/20 1:47 PM, Tomasz Figa wrote:
> On Tue, Dec 15, 2020 at 8:16 PM Stanimir Varbanov
> <stanimir.varbanov@linaro.org> wrote:
>>
>> Hi,
>>
>> Cc: Robin
>>
>> On 12/14/20 2:57 PM, Alexandre Courbot wrote:
>>> This driver uses the SG vb2 ops, but effectively only ever accesses the
>>> first entry of the SG table, indicating that it expects a flat layout.
>>> Switch it to use the contiguous ops to make sure this expected invariant
>>
>> Under what circumstances the sg table will has nents > 1? I came down to
>> [1] but not sure I got it right.
>>
>> I'm afraid that for systems with low amount of system memory and when
>> the memory become fragmented, the driver will not work. That's why I
>> started with sg allocator.
> 
> It is exactly the opposite. The vb2-dma-contig allocator is "contig"
> in terms of the DMA (aka IOVA) address space. In other words, it
> guarantees that having one DMA address and length fully describes the

Ahh, I missed that part. Looks like I misunderstood videobu2 contig
allocator.

> buffer. This seems to be the requirement of the hardware/firmware
> handled by the venus driver. If the device is behind an IOMMU, which
> is the case for the SoCs in question, the underlying DMA ops will
> actually allocate a discontiguous set of pages, so it has nothing to
> do to system memory amount or fragmentation. If for some reason the
> IOMMU can't be used, there is no way around, the memory needs to be
> contiguous because of the hardware/firmware/driver expectation.
> 
> On the other hand, the vb2-dma-sg allocator doesn't have any
> continuity guarantees for the DMA, or any other, address space. The
> current code works fine, because it calls dma_map_sg() on the whole
> set of pages and that ends up mapping it contiguously in the IOVA
> space, but that's just an implementation detail, not an API guarantee.

It was good to know. Thanks for the explanation.

> 
> Best regards,
> Tomasz
> 
>>
>> [1]
>> https://elixir.bootlin.com/linux/v5.10.1/source/drivers/iommu/dma-iommu.c#L782
>>
>>> is always enforced. Since the device is supposed to be behind an IOMMU
>>> this should have little to none practical consequences beyond making the
>>> driver not rely on a particular behavior of the SG implementation.
>>>
>>> Reported-by: Tomasz Figa <tfiga@chromium.org>
>>> Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
>>> ---
>>> Hi everyone,
>>>
>>> It probably doesn't hurt to fix this issue before some actual issue happens.
>>> I have tested this patch on Chrome OS and playback was just as fine as with
>>> the SG ops.
>>>
>>>  drivers/media/platform/Kconfig              | 2 +-
>>>  drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
>>>  drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
>>>  drivers/media/platform/qcom/venus/venc.c    | 6 +++---
>>>  4 files changed, 9 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
>>> index 35a18d388f3f..d9d7954111f2 100644
>>> --- a/drivers/media/platform/Kconfig
>>> +++ b/drivers/media/platform/Kconfig
>>> @@ -533,7 +533,7 @@ config VIDEO_QCOM_VENUS
>>>       depends on INTERCONNECT || !INTERCONNECT
>>>       select QCOM_MDT_LOADER if ARCH_QCOM
>>>       select QCOM_SCM if ARCH_QCOM
>>> -     select VIDEOBUF2_DMA_SG
>>> +     select VIDEOBUF2_DMA_CONTIG
>>>       select V4L2_MEM2MEM_DEV
>>>       help
>>>         This is a V4L2 driver for Qualcomm Venus video accelerator
>>> diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
>>> index 50439eb1ffea..859d260f002b 100644
>>> --- a/drivers/media/platform/qcom/venus/helpers.c
>>> +++ b/drivers/media/platform/qcom/venus/helpers.c
>>> @@ -7,7 +7,7 @@
>>>  #include <linux/mutex.h>
>>>  #include <linux/slab.h>
>>>  #include <linux/kernel.h>
>>> -#include <media/videobuf2-dma-sg.h>
>>> +#include <media/videobuf2-dma-contig.h>
>>>  #include <media/v4l2-mem2mem.h>
>>>  #include <asm/div64.h>
>>>
>>> @@ -1284,14 +1284,9 @@ int venus_helper_vb2_buf_init(struct vb2_buffer *vb)
>>>       struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
>>>       struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
>>>       struct venus_buffer *buf = to_venus_buffer(vbuf);
>>> -     struct sg_table *sgt;
>>> -
>>> -     sgt = vb2_dma_sg_plane_desc(vb, 0);
>>> -     if (!sgt)
>>> -             return -EFAULT;
>>>
>>>       buf->size = vb2_plane_size(vb, 0);
>>> -     buf->dma_addr = sg_dma_address(sgt->sgl);
>>
>> Can we do it:
>>
>>         if (WARN_ON(sgt->nents > 1))
>>                 return -EFAULT;
>>
>> I understand that logically using dma-sg when the flat layout is
>> expected by the hardware is wrong, but I haven't seen issues until now.
>>
>>> +     buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
>>>
>>>       if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
>>>               list_add_tail(&buf->reg_list, &inst->registeredbufs);
>>> diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
>>> index 8488411204c3..3fb277c81aca 100644
>>> --- a/drivers/media/platform/qcom/venus/vdec.c
>>> +++ b/drivers/media/platform/qcom/venus/vdec.c
>>> @@ -13,7 +13,7 @@
>>>  #include <media/v4l2-event.h>
>>>  #include <media/v4l2-ctrls.h>
>>>  #include <media/v4l2-mem2mem.h>
>>> -#include <media/videobuf2-dma-sg.h>
>>> +#include <media/videobuf2-dma-contig.h>
>>>
>>>  #include "hfi_venus_io.h"
>>>  #include "hfi_parser.h"
>>> @@ -1461,7 +1461,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>>       src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>>>       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>>       src_vq->ops = &vdec_vb2_ops;
>>> -     src_vq->mem_ops = &vb2_dma_sg_memops;
>>> +     src_vq->mem_ops = &vb2_dma_contig_memops;
>>>       src_vq->drv_priv = inst;
>>>       src_vq->buf_struct_size = sizeof(struct venus_buffer);
>>>       src_vq->allow_zero_bytesused = 1;
>>> @@ -1475,7 +1475,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>>       dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
>>>       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>>       dst_vq->ops = &vdec_vb2_ops;
>>> -     dst_vq->mem_ops = &vb2_dma_sg_memops;
>>> +     dst_vq->mem_ops = &vb2_dma_contig_memops;
>>>       dst_vq->drv_priv = inst;
>>>       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>>>       dst_vq->allow_zero_bytesused = 1;
>>> diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
>>> index 1c61602c5de1..a09550cd1dba 100644
>>> --- a/drivers/media/platform/qcom/venus/venc.c
>>> +++ b/drivers/media/platform/qcom/venus/venc.c
>>> @@ -10,7 +10,7 @@
>>>  #include <linux/pm_runtime.h>
>>>  #include <linux/slab.h>
>>>  #include <media/v4l2-mem2mem.h>
>>> -#include <media/videobuf2-dma-sg.h>
>>> +#include <media/videobuf2-dma-contig.h>
>>>  #include <media/v4l2-ioctl.h>
>>>  #include <media/v4l2-event.h>
>>>  #include <media/v4l2-ctrls.h>
>>> @@ -1001,7 +1001,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>>       src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>>>       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>>       src_vq->ops = &venc_vb2_ops;
>>> -     src_vq->mem_ops = &vb2_dma_sg_memops;
>>> +     src_vq->mem_ops = &vb2_dma_contig_memops;
>>>       src_vq->drv_priv = inst;
>>>       src_vq->buf_struct_size = sizeof(struct venus_buffer);
>>>       src_vq->allow_zero_bytesused = 1;
>>> @@ -1017,7 +1017,7 @@ static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
>>>       dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
>>>       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
>>>       dst_vq->ops = &venc_vb2_ops;
>>> -     dst_vq->mem_ops = &vb2_dma_sg_memops;
>>> +     dst_vq->mem_ops = &vb2_dma_contig_memops;
>>>       dst_vq->drv_priv = inst;
>>>       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
>>>       dst_vq->allow_zero_bytesused = 1;
>>>
>>
>> --
>> regards,
>> Stan
Nicolas Dufresne Dec. 15, 2020, 7:21 p.m. UTC | #7
Le mardi 15 décembre 2020 à 15:54 +0200, Stanimir Varbanov a écrit :
> Hi Tomasz,
> 
> On 12/15/20 1:47 PM, Tomasz Figa wrote:
> > On Tue, Dec 15, 2020 at 8:16 PM Stanimir Varbanov
> > <stanimir.varbanov@linaro.org> wrote:
> > > 
> > > Hi,
> > > 
> > > Cc: Robin
> > > 
> > > On 12/14/20 2:57 PM, Alexandre Courbot wrote:
> > > > This driver uses the SG vb2 ops, but effectively only ever accesses the
> > > > first entry of the SG table, indicating that it expects a flat layout.
> > > > Switch it to use the contiguous ops to make sure this expected invariant
> > > 
> > > Under what circumstances the sg table will has nents > 1? I came down to
> > > [1] but not sure I got it right.
> > > 
> > > I'm afraid that for systems with low amount of system memory and when
> > > the memory become fragmented, the driver will not work. That's why I
> > > started with sg allocator.
> > 
> > It is exactly the opposite. The vb2-dma-contig allocator is "contig"
> > in terms of the DMA (aka IOVA) address space. In other words, it
> > guarantees that having one DMA address and length fully describes the
> 
> Ahh, I missed that part. Looks like I misunderstood videobu2 contig
> allocator.

I'm learning everyday too, but I'm surprised I don't see a call to
vb2_dma_contig_set_max_seg_size() in this driver (I could also just have missed
a patch when overlooking this thread) ?

The reason I'm asking, doc says it should be called by driver supporting IOMMU,
which seems to be the case for such drivers (MFC, exynos4-is, exynos-gsc, mtk-
mdp, s5p-g2d, hantro, rkvdec, zoran, ti-vpe, ..). I posting it, worst case it's
all covered and we are good, otherwise perhaps a downstream patch didn't make it
?

/**
 * vb2_dma_contig_set_max_seg_size() - configure DMA max segment size
 * @dev:        device for configuring DMA parameters
 * @size:       size of DMA max segment size to set
 *
 * To allow mapping the scatter-list into a single chunk in the DMA
 * address space, the device is required to have the DMA max segment
 * size parameter set to a value larger than the buffer size. Otherwise,
 * the DMA-mapping subsystem will split the mapping into max segment
 * size chunks. This function sets the DMA max segment size
 * parameter to let DMA-mapping map a buffer as a single chunk in DMA
 * address space.
 * This code assumes that the DMA-mapping subsystem will merge all
 * scatterlist segments if this is really possible (for example when
 * an IOMMU is available and enabled).
 * Ideally, this parameter should be set by the generic bus code, but it
 * is left with the default 64KiB value due to historical litmiations in
 * other subsystems (like limited USB host drivers) and there no good
 * place to set it to the proper value.
 * This function should be called from the drivers, which are known to
 * operate on platforms with IOMMU and provide access to shared buffers
 * (either USERPTR or DMABUF). This should be done before initializing
 * videobuf2 queue.
 */

regards,
Nicolas

> 
> > buffer. This seems to be the requirement of the hardware/firmware
> > handled by the venus driver. If the device is behind an IOMMU, which
> > is the case for the SoCs in question, the underlying DMA ops will
> > actually allocate a discontiguous set of pages, so it has nothing to
> > do to system memory amount or fragmentation. If for some reason the
> > IOMMU can't be used, there is no way around, the memory needs to be
> > contiguous because of the hardware/firmware/driver expectation.
> > 
> > On the other hand, the vb2-dma-sg allocator doesn't have any
> > continuity guarantees for the DMA, or any other, address space. The
> > current code works fine, because it calls dma_map_sg() on the whole
> > set of pages and that ends up mapping it contiguously in the IOVA
> > space, but that's just an implementation detail, not an API guarantee.
> 
> It was good to know. Thanks for the explanation.
> 
> > 
> > Best regards,
> > Tomasz
> > 
> > > 
> > > [1]
> > > https://elixir.bootlin.com/linux/v5.10.1/source/drivers/iommu/dma-iommu.c#L782
> > > 
> > > > is always enforced. Since the device is supposed to be behind an IOMMU
> > > > this should have little to none practical consequences beyond making the
> > > > driver not rely on a particular behavior of the SG implementation.
> > > > 
> > > > Reported-by: Tomasz Figa <tfiga@chromium.org>
> > > > Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
> > > > ---
> > > > Hi everyone,
> > > > 
> > > > It probably doesn't hurt to fix this issue before some actual issue
> > > > happens.
> > > > I have tested this patch on Chrome OS and playback was just as fine as
> > > > with
> > > > the SG ops.
> > > > 
> > > >  drivers/media/platform/Kconfig              | 2 +-
> > > >  drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
> > > >  drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
> > > >  drivers/media/platform/qcom/venus/venc.c    | 6 +++---
> > > >  4 files changed, 9 insertions(+), 14 deletions(-)
> > > > 
> > > > diff --git a/drivers/media/platform/Kconfig
> > > > b/drivers/media/platform/Kconfig
> > > > index 35a18d388f3f..d9d7954111f2 100644
> > > > --- a/drivers/media/platform/Kconfig
> > > > +++ b/drivers/media/platform/Kconfig
> > > > @@ -533,7 +533,7 @@ config VIDEO_QCOM_VENUS
> > > >       depends on INTERCONNECT || !INTERCONNECT
> > > >       select QCOM_MDT_LOADER if ARCH_QCOM
> > > >       select QCOM_SCM if ARCH_QCOM
> > > > -     select VIDEOBUF2_DMA_SG
> > > > +     select VIDEOBUF2_DMA_CONTIG
> > > >       select V4L2_MEM2MEM_DEV
> > > >       help
> > > >         This is a V4L2 driver for Qualcomm Venus video accelerator
> > > > diff --git a/drivers/media/platform/qcom/venus/helpers.c
> > > > b/drivers/media/platform/qcom/venus/helpers.c
> > > > index 50439eb1ffea..859d260f002b 100644
> > > > --- a/drivers/media/platform/qcom/venus/helpers.c
> > > > +++ b/drivers/media/platform/qcom/venus/helpers.c
> > > > @@ -7,7 +7,7 @@
> > > >  #include <linux/mutex.h>
> > > >  #include <linux/slab.h>
> > > >  #include <linux/kernel.h>
> > > > -#include <media/videobuf2-dma-sg.h>
> > > > +#include <media/videobuf2-dma-contig.h>
> > > >  #include <media/v4l2-mem2mem.h>
> > > >  #include <asm/div64.h>
> > > > 
> > > > @@ -1284,14 +1284,9 @@ int venus_helper_vb2_buf_init(struct vb2_buffer
> > > > *vb)
> > > >       struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
> > > >       struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> > > >       struct venus_buffer *buf = to_venus_buffer(vbuf);
> > > > -     struct sg_table *sgt;
> > > > -
> > > > -     sgt = vb2_dma_sg_plane_desc(vb, 0);
> > > > -     if (!sgt)
> > > > -             return -EFAULT;
> > > > 
> > > >       buf->size = vb2_plane_size(vb, 0);
> > > > -     buf->dma_addr = sg_dma_address(sgt->sgl);
> > > 
> > > Can we do it:
> > > 
> > >         if (WARN_ON(sgt->nents > 1))
> > >                 return -EFAULT;
> > > 
> > > I understand that logically using dma-sg when the flat layout is
> > > expected by the hardware is wrong, but I haven't seen issues until now.
> > > 
> > > > +     buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
> > > > 
> > > >       if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
> > > >               list_add_tail(&buf->reg_list, &inst->registeredbufs);
> > > > diff --git a/drivers/media/platform/qcom/venus/vdec.c
> > > > b/drivers/media/platform/qcom/venus/vdec.c
> > > > index 8488411204c3..3fb277c81aca 100644
> > > > --- a/drivers/media/platform/qcom/venus/vdec.c
> > > > +++ b/drivers/media/platform/qcom/venus/vdec.c
> > > > @@ -13,7 +13,7 @@
> > > >  #include <media/v4l2-event.h>
> > > >  #include <media/v4l2-ctrls.h>
> > > >  #include <media/v4l2-mem2mem.h>
> > > > -#include <media/videobuf2-dma-sg.h>
> > > > +#include <media/videobuf2-dma-contig.h>
> > > > 
> > > >  #include "hfi_venus_io.h"
> > > >  #include "hfi_parser.h"
> > > > @@ -1461,7 +1461,7 @@ static int m2m_queue_init(void *priv, struct
> > > > vb2_queue *src_vq,
> > > >       src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> > > >       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > >       src_vq->ops = &vdec_vb2_ops;
> > > > -     src_vq->mem_ops = &vb2_dma_sg_memops;
> > > > +     src_vq->mem_ops = &vb2_dma_contig_memops;
> > > >       src_vq->drv_priv = inst;
> > > >       src_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > >       src_vq->allow_zero_bytesused = 1;
> > > > @@ -1475,7 +1475,7 @@ static int m2m_queue_init(void *priv, struct
> > > > vb2_queue *src_vq,
> > > >       dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> > > >       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > >       dst_vq->ops = &vdec_vb2_ops;
> > > > -     dst_vq->mem_ops = &vb2_dma_sg_memops;
> > > > +     dst_vq->mem_ops = &vb2_dma_contig_memops;
> > > >       dst_vq->drv_priv = inst;
> > > >       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > >       dst_vq->allow_zero_bytesused = 1;
> > > > diff --git a/drivers/media/platform/qcom/venus/venc.c
> > > > b/drivers/media/platform/qcom/venus/venc.c
> > > > index 1c61602c5de1..a09550cd1dba 100644
> > > > --- a/drivers/media/platform/qcom/venus/venc.c
> > > > +++ b/drivers/media/platform/qcom/venus/venc.c
> > > > @@ -10,7 +10,7 @@
> > > >  #include <linux/pm_runtime.h>
> > > >  #include <linux/slab.h>
> > > >  #include <media/v4l2-mem2mem.h>
> > > > -#include <media/videobuf2-dma-sg.h>
> > > > +#include <media/videobuf2-dma-contig.h>
> > > >  #include <media/v4l2-ioctl.h>
> > > >  #include <media/v4l2-event.h>
> > > >  #include <media/v4l2-ctrls.h>
> > > > @@ -1001,7 +1001,7 @@ static int m2m_queue_init(void *priv, struct
> > > > vb2_queue *src_vq,
> > > >       src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
> > > >       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > >       src_vq->ops = &venc_vb2_ops;
> > > > -     src_vq->mem_ops = &vb2_dma_sg_memops;
> > > > +     src_vq->mem_ops = &vb2_dma_contig_memops;
> > > >       src_vq->drv_priv = inst;
> > > >       src_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > >       src_vq->allow_zero_bytesused = 1;
> > > > @@ -1017,7 +1017,7 @@ static int m2m_queue_init(void *priv, struct
> > > > vb2_queue *src_vq,
> > > >       dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
> > > >       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > >       dst_vq->ops = &venc_vb2_ops;
> > > > -     dst_vq->mem_ops = &vb2_dma_sg_memops;
> > > > +     dst_vq->mem_ops = &vb2_dma_contig_memops;
> > > >       dst_vq->drv_priv = inst;
> > > >       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > >       dst_vq->allow_zero_bytesused = 1;
> > > > 
> > > 
> > > --
> > > regards,
> > > Stan
>
Tomasz Figa Dec. 16, 2020, 3:15 a.m. UTC | #8
On Wed, Dec 16, 2020 at 4:21 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>
> Le mardi 15 décembre 2020 à 15:54 +0200, Stanimir Varbanov a écrit :
> > Hi Tomasz,
> >
> > On 12/15/20 1:47 PM, Tomasz Figa wrote:
> > > On Tue, Dec 15, 2020 at 8:16 PM Stanimir Varbanov
> > > <stanimir.varbanov@linaro.org> wrote:
> > > >
> > > > Hi,
> > > >
> > > > Cc: Robin
> > > >
> > > > On 12/14/20 2:57 PM, Alexandre Courbot wrote:
> > > > > This driver uses the SG vb2 ops, but effectively only ever accesses the
> > > > > first entry of the SG table, indicating that it expects a flat layout.
> > > > > Switch it to use the contiguous ops to make sure this expected invariant
> > > >
> > > > Under what circumstances the sg table will has nents > 1? I came down to
> > > > [1] but not sure I got it right.
> > > >
> > > > I'm afraid that for systems with low amount of system memory and when
> > > > the memory become fragmented, the driver will not work. That's why I
> > > > started with sg allocator.
> > >
> > > It is exactly the opposite. The vb2-dma-contig allocator is "contig"
> > > in terms of the DMA (aka IOVA) address space. In other words, it
> > > guarantees that having one DMA address and length fully describes the
> >
> > Ahh, I missed that part. Looks like I misunderstood videobu2 contig
> > allocator.
>
> I'm learning everyday too, but I'm surprised I don't see a call to
> vb2_dma_contig_set_max_seg_size() in this driver (I could also just have missed
> a patch when overlooking this thread) ?
>
> The reason I'm asking, doc says it should be called by driver supporting IOMMU,
> which seems to be the case for such drivers (MFC, exynos4-is, exynos-gsc, mtk-
> mdp, s5p-g2d, hantro, rkvdec, zoran, ti-vpe, ..). I posting it, worst case it's
> all covered and we are good, otherwise perhaps a downstream patch didn't make it
> ?
>
> /**
>  * vb2_dma_contig_set_max_seg_size() - configure DMA max segment size
>  * @dev:        device for configuring DMA parameters
>  * @size:       size of DMA max segment size to set
>  *
>  * To allow mapping the scatter-list into a single chunk in the DMA
>  * address space, the device is required to have the DMA max segment
>  * size parameter set to a value larger than the buffer size. Otherwise,
>  * the DMA-mapping subsystem will split the mapping into max segment
>  * size chunks. This function sets the DMA max segment size
>  * parameter to let DMA-mapping map a buffer as a single chunk in DMA
>  * address space.
>  * This code assumes that the DMA-mapping subsystem will merge all
>  * scatterlist segments if this is really possible (for example when
>  * an IOMMU is available and enabled).
>  * Ideally, this parameter should be set by the generic bus code, but it
>  * is left with the default 64KiB value due to historical litmiations in
>  * other subsystems (like limited USB host drivers) and there no good
>  * place to set it to the proper value.
>  * This function should be called from the drivers, which are known to
>  * operate on platforms with IOMMU and provide access to shared buffers
>  * (either USERPTR or DMABUF). This should be done before initializing
>  * videobuf2 queue.
>  */

It does call dma_set_max_seg_size() directly:
https://elixir.bootlin.com/linux/latest/source/drivers/media/platform/qcom/venus/core.c#L230

Actually, why do we even need a vb2 helper for this?

>
> regards,
> Nicolas
>
> >
> > > buffer. This seems to be the requirement of the hardware/firmware
> > > handled by the venus driver. If the device is behind an IOMMU, which
> > > is the case for the SoCs in question, the underlying DMA ops will
> > > actually allocate a discontiguous set of pages, so it has nothing to
> > > do to system memory amount or fragmentation. If for some reason the
> > > IOMMU can't be used, there is no way around, the memory needs to be
> > > contiguous because of the hardware/firmware/driver expectation.
> > >
> > > On the other hand, the vb2-dma-sg allocator doesn't have any
> > > continuity guarantees for the DMA, or any other, address space. The
> > > current code works fine, because it calls dma_map_sg() on the whole
> > > set of pages and that ends up mapping it contiguously in the IOVA
> > > space, but that's just an implementation detail, not an API guarantee.
> >
> > It was good to know. Thanks for the explanation.
> >
> > >
> > > Best regards,
> > > Tomasz
> > >
> > > >
> > > > [1]
> > > > https://elixir.bootlin.com/linux/v5.10.1/source/drivers/iommu/dma-iommu.c#L782
> > > >
> > > > > is always enforced. Since the device is supposed to be behind an IOMMU
> > > > > this should have little to none practical consequences beyond making the
> > > > > driver not rely on a particular behavior of the SG implementation.
> > > > >
> > > > > Reported-by: Tomasz Figa <tfiga@chromium.org>
> > > > > Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
> > > > > ---
> > > > > Hi everyone,
> > > > >
> > > > > It probably doesn't hurt to fix this issue before some actual issue
> > > > > happens.
> > > > > I have tested this patch on Chrome OS and playback was just as fine as
> > > > > with
> > > > > the SG ops.
> > > > >
> > > > >  drivers/media/platform/Kconfig              | 2 +-
> > > > >  drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
> > > > >  drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
> > > > >  drivers/media/platform/qcom/venus/venc.c    | 6 +++---
> > > > >  4 files changed, 9 insertions(+), 14 deletions(-)
> > > > >
> > > > > diff --git a/drivers/media/platform/Kconfig
> > > > > b/drivers/media/platform/Kconfig
> > > > > index 35a18d388f3f..d9d7954111f2 100644
> > > > > --- a/drivers/media/platform/Kconfig
> > > > > +++ b/drivers/media/platform/Kconfig
> > > > > @@ -533,7 +533,7 @@ config VIDEO_QCOM_VENUS
> > > > >       depends on INTERCONNECT || !INTERCONNECT
> > > > >       select QCOM_MDT_LOADER if ARCH_QCOM
> > > > >       select QCOM_SCM if ARCH_QCOM
> > > > > -     select VIDEOBUF2_DMA_SG
> > > > > +     select VIDEOBUF2_DMA_CONTIG
> > > > >       select V4L2_MEM2MEM_DEV
> > > > >       help
> > > > >         This is a V4L2 driver for Qualcomm Venus video accelerator
> > > > > diff --git a/drivers/media/platform/qcom/venus/helpers.c
> > > > > b/drivers/media/platform/qcom/venus/helpers.c
> > > > > index 50439eb1ffea..859d260f002b 100644
> > > > > --- a/drivers/media/platform/qcom/venus/helpers.c
> > > > > +++ b/drivers/media/platform/qcom/venus/helpers.c
> > > > > @@ -7,7 +7,7 @@
> > > > >  #include <linux/mutex.h>
> > > > >  #include <linux/slab.h>
> > > > >  #include <linux/kernel.h>
> > > > > -#include <media/videobuf2-dma-sg.h>
> > > > > +#include <media/videobuf2-dma-contig.h>
> > > > >  #include <media/v4l2-mem2mem.h>
> > > > >  #include <asm/div64.h>
> > > > >
> > > > > @@ -1284,14 +1284,9 @@ int venus_helper_vb2_buf_init(struct vb2_buffer
> > > > > *vb)
> > > > >       struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
> > > > >       struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> > > > >       struct venus_buffer *buf = to_venus_buffer(vbuf);
> > > > > -     struct sg_table *sgt;
> > > > > -
> > > > > -     sgt = vb2_dma_sg_plane_desc(vb, 0);
> > > > > -     if (!sgt)
> > > > > -             return -EFAULT;
> > > > >
> > > > >       buf->size = vb2_plane_size(vb, 0);
> > > > > -     buf->dma_addr = sg_dma_address(sgt->sgl);
> > > >
> > > > Can we do it:
> > > >
> > > >         if (WARN_ON(sgt->nents > 1))
> > > >                 return -EFAULT;
> > > >
> > > > I understand that logically using dma-sg when the flat layout is
> > > > expected by the hardware is wrong, but I haven't seen issues until now.
> > > >
> > > > > +     buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
> > > > >
> > > > >       if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
> > > > >               list_add_tail(&buf->reg_list, &inst->registeredbufs);
> > > > > diff --git a/drivers/media/platform/qcom/venus/vdec.c
> > > > > b/drivers/media/platform/qcom/venus/vdec.c
> > > > > index 8488411204c3..3fb277c81aca 100644
> > > > > --- a/drivers/media/platform/qcom/venus/vdec.c
> > > > > +++ b/drivers/media/platform/qcom/venus/vdec.c
> > > > > @@ -13,7 +13,7 @@
> > > > >  #include <media/v4l2-event.h>
> > > > >  #include <media/v4l2-ctrls.h>
> > > > >  #include <media/v4l2-mem2mem.h>
> > > > > -#include <media/videobuf2-dma-sg.h>
> > > > > +#include <media/videobuf2-dma-contig.h>
> > > > >
> > > > >  #include "hfi_venus_io.h"
> > > > >  #include "hfi_parser.h"
> > > > > @@ -1461,7 +1461,7 @@ static int m2m_queue_init(void *priv, struct
> > > > > vb2_queue *src_vq,
> > > > >       src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> > > > >       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > > >       src_vq->ops = &vdec_vb2_ops;
> > > > > -     src_vq->mem_ops = &vb2_dma_sg_memops;
> > > > > +     src_vq->mem_ops = &vb2_dma_contig_memops;
> > > > >       src_vq->drv_priv = inst;
> > > > >       src_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > > >       src_vq->allow_zero_bytesused = 1;
> > > > > @@ -1475,7 +1475,7 @@ static int m2m_queue_init(void *priv, struct
> > > > > vb2_queue *src_vq,
> > > > >       dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> > > > >       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > > >       dst_vq->ops = &vdec_vb2_ops;
> > > > > -     dst_vq->mem_ops = &vb2_dma_sg_memops;
> > > > > +     dst_vq->mem_ops = &vb2_dma_contig_memops;
> > > > >       dst_vq->drv_priv = inst;
> > > > >       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > > >       dst_vq->allow_zero_bytesused = 1;
> > > > > diff --git a/drivers/media/platform/qcom/venus/venc.c
> > > > > b/drivers/media/platform/qcom/venus/venc.c
> > > > > index 1c61602c5de1..a09550cd1dba 100644
> > > > > --- a/drivers/media/platform/qcom/venus/venc.c
> > > > > +++ b/drivers/media/platform/qcom/venus/venc.c
> > > > > @@ -10,7 +10,7 @@
> > > > >  #include <linux/pm_runtime.h>
> > > > >  #include <linux/slab.h>
> > > > >  #include <media/v4l2-mem2mem.h>
> > > > > -#include <media/videobuf2-dma-sg.h>
> > > > > +#include <media/videobuf2-dma-contig.h>
> > > > >  #include <media/v4l2-ioctl.h>
> > > > >  #include <media/v4l2-event.h>
> > > > >  #include <media/v4l2-ctrls.h>
> > > > > @@ -1001,7 +1001,7 @@ static int m2m_queue_init(void *priv, struct
> > > > > vb2_queue *src_vq,
> > > > >       src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
> > > > >       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > > >       src_vq->ops = &venc_vb2_ops;
> > > > > -     src_vq->mem_ops = &vb2_dma_sg_memops;
> > > > > +     src_vq->mem_ops = &vb2_dma_contig_memops;
> > > > >       src_vq->drv_priv = inst;
> > > > >       src_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > > >       src_vq->allow_zero_bytesused = 1;
> > > > > @@ -1017,7 +1017,7 @@ static int m2m_queue_init(void *priv, struct
> > > > > vb2_queue *src_vq,
> > > > >       dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
> > > > >       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > > >       dst_vq->ops = &venc_vb2_ops;
> > > > > -     dst_vq->mem_ops = &vb2_dma_sg_memops;
> > > > > +     dst_vq->mem_ops = &vb2_dma_contig_memops;
> > > > >       dst_vq->drv_priv = inst;
> > > > >       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > > >       dst_vq->allow_zero_bytesused = 1;
> > > > >
> > > >
> > > > --
> > > > regards,
> > > > Stan
> >
>
>
Tomasz Figa March 1, 2021, 9:23 a.m. UTC | #9
Hi Alex, Stanimir,

On Wed, Dec 16, 2020 at 12:15 PM Tomasz Figa <tfiga@chromium.org> wrote:
>
> On Wed, Dec 16, 2020 at 4:21 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
> >
> > Le mardi 15 décembre 2020 à 15:54 +0200, Stanimir Varbanov a écrit :
> > > Hi Tomasz,
> > >
> > > On 12/15/20 1:47 PM, Tomasz Figa wrote:
> > > > On Tue, Dec 15, 2020 at 8:16 PM Stanimir Varbanov
> > > > <stanimir.varbanov@linaro.org> wrote:
> > > > >
> > > > > Hi,
> > > > >
> > > > > Cc: Robin
> > > > >
> > > > > On 12/14/20 2:57 PM, Alexandre Courbot wrote:
> > > > > > This driver uses the SG vb2 ops, but effectively only ever accesses the
> > > > > > first entry of the SG table, indicating that it expects a flat layout.
> > > > > > Switch it to use the contiguous ops to make sure this expected invariant
> > > > >
> > > > > Under what circumstances the sg table will has nents > 1? I came down to
> > > > > [1] but not sure I got it right.
> > > > >
> > > > > I'm afraid that for systems with low amount of system memory and when
> > > > > the memory become fragmented, the driver will not work. That's why I
> > > > > started with sg allocator.
> > > >
> > > > It is exactly the opposite. The vb2-dma-contig allocator is "contig"
> > > > in terms of the DMA (aka IOVA) address space. In other words, it
> > > > guarantees that having one DMA address and length fully describes the
> > >
> > > Ahh, I missed that part. Looks like I misunderstood videobu2 contig
> > > allocator.
> >
> > I'm learning everyday too, but I'm surprised I don't see a call to
> > vb2_dma_contig_set_max_seg_size() in this driver (I could also just have missed
> > a patch when overlooking this thread) ?
> >
> > The reason I'm asking, doc says it should be called by driver supporting IOMMU,
> > which seems to be the case for such drivers (MFC, exynos4-is, exynos-gsc, mtk-
> > mdp, s5p-g2d, hantro, rkvdec, zoran, ti-vpe, ..). I posting it, worst case it's
> > all covered and we are good, otherwise perhaps a downstream patch didn't make it
> > ?
> >
> > /**
> >  * vb2_dma_contig_set_max_seg_size() - configure DMA max segment size
> >  * @dev:        device for configuring DMA parameters
> >  * @size:       size of DMA max segment size to set
> >  *
> >  * To allow mapping the scatter-list into a single chunk in the DMA
> >  * address space, the device is required to have the DMA max segment
> >  * size parameter set to a value larger than the buffer size. Otherwise,
> >  * the DMA-mapping subsystem will split the mapping into max segment
> >  * size chunks. This function sets the DMA max segment size
> >  * parameter to let DMA-mapping map a buffer as a single chunk in DMA
> >  * address space.
> >  * This code assumes that the DMA-mapping subsystem will merge all
> >  * scatterlist segments if this is really possible (for example when
> >  * an IOMMU is available and enabled).
> >  * Ideally, this parameter should be set by the generic bus code, but it
> >  * is left with the default 64KiB value due to historical litmiations in
> >  * other subsystems (like limited USB host drivers) and there no good
> >  * place to set it to the proper value.
> >  * This function should be called from the drivers, which are known to
> >  * operate on platforms with IOMMU and provide access to shared buffers
> >  * (either USERPTR or DMABUF). This should be done before initializing
> >  * videobuf2 queue.
> >  */
>
> It does call dma_set_max_seg_size() directly:
> https://elixir.bootlin.com/linux/latest/source/drivers/media/platform/qcom/venus/core.c#L230
>
> Actually, why do we even need a vb2 helper for this?
>

What's the plan for this patch?

Best regards,
Tomasz

> >
> > regards,
> > Nicolas
> >
> > >
> > > > buffer. This seems to be the requirement of the hardware/firmware
> > > > handled by the venus driver. If the device is behind an IOMMU, which
> > > > is the case for the SoCs in question, the underlying DMA ops will
> > > > actually allocate a discontiguous set of pages, so it has nothing to
> > > > do to system memory amount or fragmentation. If for some reason the
> > > > IOMMU can't be used, there is no way around, the memory needs to be
> > > > contiguous because of the hardware/firmware/driver expectation.
> > > >
> > > > On the other hand, the vb2-dma-sg allocator doesn't have any
> > > > continuity guarantees for the DMA, or any other, address space. The
> > > > current code works fine, because it calls dma_map_sg() on the whole
> > > > set of pages and that ends up mapping it contiguously in the IOVA
> > > > space, but that's just an implementation detail, not an API guarantee.
> > >
> > > It was good to know. Thanks for the explanation.
> > >
> > > >
> > > > Best regards,
> > > > Tomasz
> > > >
> > > > >
> > > > > [1]
> > > > > https://elixir.bootlin.com/linux/v5.10.1/source/drivers/iommu/dma-iommu.c#L782
> > > > >
> > > > > > is always enforced. Since the device is supposed to be behind an IOMMU
> > > > > > this should have little to none practical consequences beyond making the
> > > > > > driver not rely on a particular behavior of the SG implementation.
> > > > > >
> > > > > > Reported-by: Tomasz Figa <tfiga@chromium.org>
> > > > > > Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
> > > > > > ---
> > > > > > Hi everyone,
> > > > > >
> > > > > > It probably doesn't hurt to fix this issue before some actual issue
> > > > > > happens.
> > > > > > I have tested this patch on Chrome OS and playback was just as fine as
> > > > > > with
> > > > > > the SG ops.
> > > > > >
> > > > > >  drivers/media/platform/Kconfig              | 2 +-
> > > > > >  drivers/media/platform/qcom/venus/helpers.c | 9 ++-------
> > > > > >  drivers/media/platform/qcom/venus/vdec.c    | 6 +++---
> > > > > >  drivers/media/platform/qcom/venus/venc.c    | 6 +++---
> > > > > >  4 files changed, 9 insertions(+), 14 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/media/platform/Kconfig
> > > > > > b/drivers/media/platform/Kconfig
> > > > > > index 35a18d388f3f..d9d7954111f2 100644
> > > > > > --- a/drivers/media/platform/Kconfig
> > > > > > +++ b/drivers/media/platform/Kconfig
> > > > > > @@ -533,7 +533,7 @@ config VIDEO_QCOM_VENUS
> > > > > >       depends on INTERCONNECT || !INTERCONNECT
> > > > > >       select QCOM_MDT_LOADER if ARCH_QCOM
> > > > > >       select QCOM_SCM if ARCH_QCOM
> > > > > > -     select VIDEOBUF2_DMA_SG
> > > > > > +     select VIDEOBUF2_DMA_CONTIG
> > > > > >       select V4L2_MEM2MEM_DEV
> > > > > >       help
> > > > > >         This is a V4L2 driver for Qualcomm Venus video accelerator
> > > > > > diff --git a/drivers/media/platform/qcom/venus/helpers.c
> > > > > > b/drivers/media/platform/qcom/venus/helpers.c
> > > > > > index 50439eb1ffea..859d260f002b 100644
> > > > > > --- a/drivers/media/platform/qcom/venus/helpers.c
> > > > > > +++ b/drivers/media/platform/qcom/venus/helpers.c
> > > > > > @@ -7,7 +7,7 @@
> > > > > >  #include <linux/mutex.h>
> > > > > >  #include <linux/slab.h>
> > > > > >  #include <linux/kernel.h>
> > > > > > -#include <media/videobuf2-dma-sg.h>
> > > > > > +#include <media/videobuf2-dma-contig.h>
> > > > > >  #include <media/v4l2-mem2mem.h>
> > > > > >  #include <asm/div64.h>
> > > > > >
> > > > > > @@ -1284,14 +1284,9 @@ int venus_helper_vb2_buf_init(struct vb2_buffer
> > > > > > *vb)
> > > > > >       struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
> > > > > >       struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> > > > > >       struct venus_buffer *buf = to_venus_buffer(vbuf);
> > > > > > -     struct sg_table *sgt;
> > > > > > -
> > > > > > -     sgt = vb2_dma_sg_plane_desc(vb, 0);
> > > > > > -     if (!sgt)
> > > > > > -             return -EFAULT;
> > > > > >
> > > > > >       buf->size = vb2_plane_size(vb, 0);
> > > > > > -     buf->dma_addr = sg_dma_address(sgt->sgl);
> > > > >
> > > > > Can we do it:
> > > > >
> > > > >         if (WARN_ON(sgt->nents > 1))
> > > > >                 return -EFAULT;
> > > > >
> > > > > I understand that logically using dma-sg when the flat layout is
> > > > > expected by the hardware is wrong, but I haven't seen issues until now.
> > > > >
> > > > > > +     buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
> > > > > >
> > > > > >       if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
> > > > > >               list_add_tail(&buf->reg_list, &inst->registeredbufs);
> > > > > > diff --git a/drivers/media/platform/qcom/venus/vdec.c
> > > > > > b/drivers/media/platform/qcom/venus/vdec.c
> > > > > > index 8488411204c3..3fb277c81aca 100644
> > > > > > --- a/drivers/media/platform/qcom/venus/vdec.c
> > > > > > +++ b/drivers/media/platform/qcom/venus/vdec.c
> > > > > > @@ -13,7 +13,7 @@
> > > > > >  #include <media/v4l2-event.h>
> > > > > >  #include <media/v4l2-ctrls.h>
> > > > > >  #include <media/v4l2-mem2mem.h>
> > > > > > -#include <media/videobuf2-dma-sg.h>
> > > > > > +#include <media/videobuf2-dma-contig.h>
> > > > > >
> > > > > >  #include "hfi_venus_io.h"
> > > > > >  #include "hfi_parser.h"
> > > > > > @@ -1461,7 +1461,7 @@ static int m2m_queue_init(void *priv, struct
> > > > > > vb2_queue *src_vq,
> > > > > >       src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> > > > > >       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > > > >       src_vq->ops = &vdec_vb2_ops;
> > > > > > -     src_vq->mem_ops = &vb2_dma_sg_memops;
> > > > > > +     src_vq->mem_ops = &vb2_dma_contig_memops;
> > > > > >       src_vq->drv_priv = inst;
> > > > > >       src_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > > > >       src_vq->allow_zero_bytesused = 1;
> > > > > > @@ -1475,7 +1475,7 @@ static int m2m_queue_init(void *priv, struct
> > > > > > vb2_queue *src_vq,
> > > > > >       dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> > > > > >       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > > > >       dst_vq->ops = &vdec_vb2_ops;
> > > > > > -     dst_vq->mem_ops = &vb2_dma_sg_memops;
> > > > > > +     dst_vq->mem_ops = &vb2_dma_contig_memops;
> > > > > >       dst_vq->drv_priv = inst;
> > > > > >       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > > > >       dst_vq->allow_zero_bytesused = 1;
> > > > > > diff --git a/drivers/media/platform/qcom/venus/venc.c
> > > > > > b/drivers/media/platform/qcom/venus/venc.c
> > > > > > index 1c61602c5de1..a09550cd1dba 100644
> > > > > > --- a/drivers/media/platform/qcom/venus/venc.c
> > > > > > +++ b/drivers/media/platform/qcom/venus/venc.c
> > > > > > @@ -10,7 +10,7 @@
> > > > > >  #include <linux/pm_runtime.h>
> > > > > >  #include <linux/slab.h>
> > > > > >  #include <media/v4l2-mem2mem.h>
> > > > > > -#include <media/videobuf2-dma-sg.h>
> > > > > > +#include <media/videobuf2-dma-contig.h>
> > > > > >  #include <media/v4l2-ioctl.h>
> > > > > >  #include <media/v4l2-event.h>
> > > > > >  #include <media/v4l2-ctrls.h>
> > > > > > @@ -1001,7 +1001,7 @@ static int m2m_queue_init(void *priv, struct
> > > > > > vb2_queue *src_vq,
> > > > > >       src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
> > > > > >       src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > > > >       src_vq->ops = &venc_vb2_ops;
> > > > > > -     src_vq->mem_ops = &vb2_dma_sg_memops;
> > > > > > +     src_vq->mem_ops = &vb2_dma_contig_memops;
> > > > > >       src_vq->drv_priv = inst;
> > > > > >       src_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > > > >       src_vq->allow_zero_bytesused = 1;
> > > > > > @@ -1017,7 +1017,7 @@ static int m2m_queue_init(void *priv, struct
> > > > > > vb2_queue *src_vq,
> > > > > >       dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
> > > > > >       dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > > > > >       dst_vq->ops = &venc_vb2_ops;
> > > > > > -     dst_vq->mem_ops = &vb2_dma_sg_memops;
> > > > > > +     dst_vq->mem_ops = &vb2_dma_contig_memops;
> > > > > >       dst_vq->drv_priv = inst;
> > > > > >       dst_vq->buf_struct_size = sizeof(struct venus_buffer);
> > > > > >       dst_vq->allow_zero_bytesused = 1;
> > > > > >
> > > > >
> > > > > --
> > > > > regards,
> > > > > Stan
> > >
> >
> >
Stanimir Varbanov March 1, 2021, 10:22 a.m. UTC | #10
On 3/1/21 11:23 AM, Tomasz Figa wrote:
> Hi Alex, Stanimir,
> 
> On Wed, Dec 16, 2020 at 12:15 PM Tomasz Figa <tfiga@chromium.org> wrote:
>>
>> On Wed, Dec 16, 2020 at 4:21 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>>>
>>> Le mardi 15 décembre 2020 à 15:54 +0200, Stanimir Varbanov a écrit :
>>>> Hi Tomasz,
>>>>
>>>> On 12/15/20 1:47 PM, Tomasz Figa wrote:
>>>>> On Tue, Dec 15, 2020 at 8:16 PM Stanimir Varbanov
>>>>> <stanimir.varbanov@linaro.org> wrote:
>>>>>>
>>>>>> Hi,
>>>>>>
>>>>>> Cc: Robin
>>>>>>
>>>>>> On 12/14/20 2:57 PM, Alexandre Courbot wrote:
>>>>>>> This driver uses the SG vb2 ops, but effectively only ever accesses the
>>>>>>> first entry of the SG table, indicating that it expects a flat layout.
>>>>>>> Switch it to use the contiguous ops to make sure this expected invariant
>>>>>>
>>>>>> Under what circumstances the sg table will has nents > 1? I came down to
>>>>>> [1] but not sure I got it right.
>>>>>>
>>>>>> I'm afraid that for systems with low amount of system memory and when
>>>>>> the memory become fragmented, the driver will not work. That's why I
>>>>>> started with sg allocator.
>>>>>
>>>>> It is exactly the opposite. The vb2-dma-contig allocator is "contig"
>>>>> in terms of the DMA (aka IOVA) address space. In other words, it
>>>>> guarantees that having one DMA address and length fully describes the
>>>>
>>>> Ahh, I missed that part. Looks like I misunderstood videobu2 contig
>>>> allocator.
>>>
>>> I'm learning everyday too, but I'm surprised I don't see a call to
>>> vb2_dma_contig_set_max_seg_size() in this driver (I could also just have missed
>>> a patch when overlooking this thread) ?
>>>
>>> The reason I'm asking, doc says it should be called by driver supporting IOMMU,
>>> which seems to be the case for such drivers (MFC, exynos4-is, exynos-gsc, mtk-
>>> mdp, s5p-g2d, hantro, rkvdec, zoran, ti-vpe, ..). I posting it, worst case it's
>>> all covered and we are good, otherwise perhaps a downstream patch didn't make it
>>> ?
>>>
>>> /**
>>>  * vb2_dma_contig_set_max_seg_size() - configure DMA max segment size
>>>  * @dev:        device for configuring DMA parameters
>>>  * @size:       size of DMA max segment size to set
>>>  *
>>>  * To allow mapping the scatter-list into a single chunk in the DMA
>>>  * address space, the device is required to have the DMA max segment
>>>  * size parameter set to a value larger than the buffer size. Otherwise,
>>>  * the DMA-mapping subsystem will split the mapping into max segment
>>>  * size chunks. This function sets the DMA max segment size
>>>  * parameter to let DMA-mapping map a buffer as a single chunk in DMA
>>>  * address space.
>>>  * This code assumes that the DMA-mapping subsystem will merge all
>>>  * scatterlist segments if this is really possible (for example when
>>>  * an IOMMU is available and enabled).
>>>  * Ideally, this parameter should be set by the generic bus code, but it
>>>  * is left with the default 64KiB value due to historical litmiations in
>>>  * other subsystems (like limited USB host drivers) and there no good
>>>  * place to set it to the proper value.
>>>  * This function should be called from the drivers, which are known to
>>>  * operate on platforms with IOMMU and provide access to shared buffers
>>>  * (either USERPTR or DMABUF). This should be done before initializing
>>>  * videobuf2 queue.
>>>  */
>>
>> It does call dma_set_max_seg_size() directly:
>> https://elixir.bootlin.com/linux/latest/source/drivers/media/platform/qcom/venus/core.c#L230
>>
>> Actually, why do we even need a vb2 helper for this?
>>
> 
> What's the plan for this patch?

It will be part of v5.12.
Tomasz Figa March 1, 2021, 10:26 a.m. UTC | #11
On Mon, Mar 1, 2021 at 7:22 PM Stanimir Varbanov
<stanimir.varbanov@linaro.org> wrote:
>
>
>
> On 3/1/21 11:23 AM, Tomasz Figa wrote:
> > Hi Alex, Stanimir,
> >
> > On Wed, Dec 16, 2020 at 12:15 PM Tomasz Figa <tfiga@chromium.org> wrote:
> >>
> >> On Wed, Dec 16, 2020 at 4:21 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
> >>>
> >>> Le mardi 15 décembre 2020 à 15:54 +0200, Stanimir Varbanov a écrit :
> >>>> Hi Tomasz,
> >>>>
> >>>> On 12/15/20 1:47 PM, Tomasz Figa wrote:
> >>>>> On Tue, Dec 15, 2020 at 8:16 PM Stanimir Varbanov
> >>>>> <stanimir.varbanov@linaro.org> wrote:
> >>>>>>
> >>>>>> Hi,
> >>>>>>
> >>>>>> Cc: Robin
> >>>>>>
> >>>>>> On 12/14/20 2:57 PM, Alexandre Courbot wrote:
> >>>>>>> This driver uses the SG vb2 ops, but effectively only ever accesses the
> >>>>>>> first entry of the SG table, indicating that it expects a flat layout.
> >>>>>>> Switch it to use the contiguous ops to make sure this expected invariant
> >>>>>>
> >>>>>> Under what circumstances the sg table will has nents > 1? I came down to
> >>>>>> [1] but not sure I got it right.
> >>>>>>
> >>>>>> I'm afraid that for systems with low amount of system memory and when
> >>>>>> the memory become fragmented, the driver will not work. That's why I
> >>>>>> started with sg allocator.
> >>>>>
> >>>>> It is exactly the opposite. The vb2-dma-contig allocator is "contig"
> >>>>> in terms of the DMA (aka IOVA) address space. In other words, it
> >>>>> guarantees that having one DMA address and length fully describes the
> >>>>
> >>>> Ahh, I missed that part. Looks like I misunderstood videobu2 contig
> >>>> allocator.
> >>>
> >>> I'm learning everyday too, but I'm surprised I don't see a call to
> >>> vb2_dma_contig_set_max_seg_size() in this driver (I could also just have missed
> >>> a patch when overlooking this thread) ?
> >>>
> >>> The reason I'm asking, doc says it should be called by driver supporting IOMMU,
> >>> which seems to be the case for such drivers (MFC, exynos4-is, exynos-gsc, mtk-
> >>> mdp, s5p-g2d, hantro, rkvdec, zoran, ti-vpe, ..). I posting it, worst case it's
> >>> all covered and we are good, otherwise perhaps a downstream patch didn't make it
> >>> ?
> >>>
> >>> /**
> >>>  * vb2_dma_contig_set_max_seg_size() - configure DMA max segment size
> >>>  * @dev:        device for configuring DMA parameters
> >>>  * @size:       size of DMA max segment size to set
> >>>  *
> >>>  * To allow mapping the scatter-list into a single chunk in the DMA
> >>>  * address space, the device is required to have the DMA max segment
> >>>  * size parameter set to a value larger than the buffer size. Otherwise,
> >>>  * the DMA-mapping subsystem will split the mapping into max segment
> >>>  * size chunks. This function sets the DMA max segment size
> >>>  * parameter to let DMA-mapping map a buffer as a single chunk in DMA
> >>>  * address space.
> >>>  * This code assumes that the DMA-mapping subsystem will merge all
> >>>  * scatterlist segments if this is really possible (for example when
> >>>  * an IOMMU is available and enabled).
> >>>  * Ideally, this parameter should be set by the generic bus code, but it
> >>>  * is left with the default 64KiB value due to historical litmiations in
> >>>  * other subsystems (like limited USB host drivers) and there no good
> >>>  * place to set it to the proper value.
> >>>  * This function should be called from the drivers, which are known to
> >>>  * operate on platforms with IOMMU and provide access to shared buffers
> >>>  * (either USERPTR or DMABUF). This should be done before initializing
> >>>  * videobuf2 queue.
> >>>  */
> >>
> >> It does call dma_set_max_seg_size() directly:
> >> https://elixir.bootlin.com/linux/latest/source/drivers/media/platform/qcom/venus/core.c#L230
> >>
> >> Actually, why do we even need a vb2 helper for this?
> >>
> >
> > What's the plan for this patch?
>
> It will be part of v5.12.

Great, thanks!
patchwork-bot+linux-arm-msm@kernel.org March 1, 2021, 7:59 p.m. UTC | #12
Hello:

This patch was applied to qcom/linux.git (refs/heads/for-next):

On Mon, 14 Dec 2020 21:57:03 +0900 you wrote:
> This driver uses the SG vb2 ops, but effectively only ever accesses the
> first entry of the SG table, indicating that it expects a flat layout.
> Switch it to use the contiguous ops to make sure this expected invariant
> is always enforced. Since the device is supposed to be behind an IOMMU
> this should have little to none practical consequences beyond making the
> driver not rely on a particular behavior of the SG implementation.
> 
> [...]

Here is the summary with links:
  - media: venus: use contig vb2 ops
    https://git.kernel.org/qcom/c/cc82fd691a3a

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
diff mbox series

Patch

diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 35a18d388f3f..d9d7954111f2 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -533,7 +533,7 @@  config VIDEO_QCOM_VENUS
 	depends on INTERCONNECT || !INTERCONNECT
 	select QCOM_MDT_LOADER if ARCH_QCOM
 	select QCOM_SCM if ARCH_QCOM
-	select VIDEOBUF2_DMA_SG
+	select VIDEOBUF2_DMA_CONTIG
 	select V4L2_MEM2MEM_DEV
 	help
 	  This is a V4L2 driver for Qualcomm Venus video accelerator
diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index 50439eb1ffea..859d260f002b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -7,7 +7,7 @@ 
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
-#include <media/videobuf2-dma-sg.h>
+#include <media/videobuf2-dma-contig.h>
 #include <media/v4l2-mem2mem.h>
 #include <asm/div64.h>
 
@@ -1284,14 +1284,9 @@  int venus_helper_vb2_buf_init(struct vb2_buffer *vb)
 	struct venus_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
 	struct venus_buffer *buf = to_venus_buffer(vbuf);
-	struct sg_table *sgt;
-
-	sgt = vb2_dma_sg_plane_desc(vb, 0);
-	if (!sgt)
-		return -EFAULT;
 
 	buf->size = vb2_plane_size(vb, 0);
-	buf->dma_addr = sg_dma_address(sgt->sgl);
+	buf->dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
 
 	if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
 		list_add_tail(&buf->reg_list, &inst->registeredbufs);
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index 8488411204c3..3fb277c81aca 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -13,7 +13,7 @@ 
 #include <media/v4l2-event.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-mem2mem.h>
-#include <media/videobuf2-dma-sg.h>
+#include <media/videobuf2-dma-contig.h>
 
 #include "hfi_venus_io.h"
 #include "hfi_parser.h"
@@ -1461,7 +1461,7 @@  static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->ops = &vdec_vb2_ops;
-	src_vq->mem_ops = &vb2_dma_sg_memops;
+	src_vq->mem_ops = &vb2_dma_contig_memops;
 	src_vq->drv_priv = inst;
 	src_vq->buf_struct_size = sizeof(struct venus_buffer);
 	src_vq->allow_zero_bytesused = 1;
@@ -1475,7 +1475,7 @@  static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->ops = &vdec_vb2_ops;
-	dst_vq->mem_ops = &vb2_dma_sg_memops;
+	dst_vq->mem_ops = &vb2_dma_contig_memops;
 	dst_vq->drv_priv = inst;
 	dst_vq->buf_struct_size = sizeof(struct venus_buffer);
 	dst_vq->allow_zero_bytesused = 1;
diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index 1c61602c5de1..a09550cd1dba 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -10,7 +10,7 @@ 
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <media/v4l2-mem2mem.h>
-#include <media/videobuf2-dma-sg.h>
+#include <media/videobuf2-dma-contig.h>
 #include <media/v4l2-ioctl.h>
 #include <media/v4l2-event.h>
 #include <media/v4l2-ctrls.h>
@@ -1001,7 +1001,7 @@  static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
 	src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	src_vq->ops = &venc_vb2_ops;
-	src_vq->mem_ops = &vb2_dma_sg_memops;
+	src_vq->mem_ops = &vb2_dma_contig_memops;
 	src_vq->drv_priv = inst;
 	src_vq->buf_struct_size = sizeof(struct venus_buffer);
 	src_vq->allow_zero_bytesused = 1;
@@ -1017,7 +1017,7 @@  static int m2m_queue_init(void *priv, struct vb2_queue *src_vq,
 	dst_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF;
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
 	dst_vq->ops = &venc_vb2_ops;
-	dst_vq->mem_ops = &vb2_dma_sg_memops;
+	dst_vq->mem_ops = &vb2_dma_contig_memops;
 	dst_vq->drv_priv = inst;
 	dst_vq->buf_struct_size = sizeof(struct venus_buffer);
 	dst_vq->allow_zero_bytesused = 1;