diff mbox

[1/2] videobuf2-dma-sg: Allocate pages as contiguous as possible

Message ID 1374253355-3788-2-git-send-email-ricardo.ribalda@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ricardo Ribalda Delgado July 19, 2013, 5:02 p.m. UTC
Most DMA engines have limitations regarding the number of DMA segments
(sg-buffers) that they can handle. Videobuffers can easily spread
through houndreds of pages.

In the previous aproach, the pages were allocated individually, this
could led to the creation houndreds of dma segments (sg-buffers) that
could not be handled by some DMA engines.

This patch tries to minimize the number of DMA segments by using
alloc_pages. In the worst case it will behave as before, but most
of the times it will reduce the number of dma segments

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
---
 drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 11 deletions(-)

Comments

Jonathan Corbet July 19, 2013, 8:16 p.m. UTC | #1
On Fri, 19 Jul 2013 19:02:33 +0200
Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com> wrote:

> Most DMA engines have limitations regarding the number of DMA segments
> (sg-buffers) that they can handle. Videobuffers can easily spread
> through houndreds of pages.
> 
> In the previous aproach, the pages were allocated individually, this
> could led to the creation houndreds of dma segments (sg-buffers) that
> could not be handled by some DMA engines.
> 
> This patch tries to minimize the number of DMA segments by using
> alloc_pages. In the worst case it will behave as before, but most
> of the times it will reduce the number of dma segments

So I looked this over and I have a few questions...

> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> index 16ae3dc..c053605 100644
> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>  
>  static void vb2_dma_sg_put(void *buf_priv);
>  
> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> +		gfp_t gfp_flags)
> +{
> +	unsigned int last_page = 0;
> +	int size = buf->sg_desc.size;
> +
> +	while (size > 0) {
> +		struct page *pages;
> +		int order;
> +		int i;
> +
> +		order = get_order(size);
> +		/* Dont over allocate*/
> +		if ((PAGE_SIZE << order) > size)
> +			order--;

Terrible things will happen if size < PAGE_SIZE.  Presumably that should
never happen, or perhaps one could say any caller who does that will get
what they deserve.

Have you considered alloc_pages_exact(), though?  That might result in
fewer segments overall.

> +		pages = NULL;
> +		while (!pages) {
> +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> +					__GFP_NOWARN | gfp_flags, order);
> +			if (pages)
> +				break;
> +
> +			if (order == 0)
> +				while (last_page--) {
> +					__free_page(buf->pages[last_page]);

If I understand things, this is wrong; you relly need free_pages() with the
correct order.  Or, at least, that would be the case if you kept the pages
together, but that leads to my biggest question...

> +					return -ENOMEM;
> +				}
> +			order--;
> +		}
> +
> +		split_page(pages, order);
> +		for (i = 0; i < (1<<order); i++) {
> +			buf->pages[last_page] = pages[i];
> +			sg_set_page(&buf->sg_desc.sglist[last_page],
> +					buf->pages[last_page], PAGE_SIZE, 0);
> +			last_page++;
> +		}

You've gone to all this trouble to get a higher-order allocation so you'd
have fewer segments, then you undo it all by splitting things apart into
individual pages.  Why?  Clearly I'm missing something, this seems to
defeat the purpose of the whole exercise?

Thanks,

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ricardo Ribalda Delgado July 19, 2013, 9:57 p.m. UTC | #2
Hello Jonathan:

Thanks for your review. I am making a driver for a camera that
produces 4Mpx images with up to 10 bytes per pixel!. The camera has a
dma engine capable of moving up to 255 sg sectors.

In the original implementation of vb2-dma-sg, every page was allocated
independently, dividing the memory in more than 255 sectors.

If the memory is very segmented, then there is nothing I can do, but
if there are high order pages available I would like to use them.

The original assumption is that all the pages that compose a high
order page are contiguous in physical memory.

On Fri, Jul 19, 2013 at 10:16 PM, Jonathan Corbet <corbet@lwn.net> wrote:
> On Fri, 19 Jul 2013 19:02:33 +0200
> Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com> wrote:
>
>> Most DMA engines have limitations regarding the number of DMA segments
>> (sg-buffers) that they can handle. Videobuffers can easily spread
>> through houndreds of pages.
>>
>> In the previous aproach, the pages were allocated individually, this
>> could led to the creation houndreds of dma segments (sg-buffers) that
>> could not be handled by some DMA engines.
>>
>> This patch tries to minimize the number of DMA segments by using
>> alloc_pages. In the worst case it will behave as before, but most
>> of the times it will reduce the number of dma segments
>
> So I looked this over and I have a few questions...
>
>> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> index 16ae3dc..c053605 100644
>> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>>
>>  static void vb2_dma_sg_put(void *buf_priv);
>>
>> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
>> +             gfp_t gfp_flags)
>> +{
>> +     unsigned int last_page = 0;
>> +     int size = buf->sg_desc.size;
>> +
>> +     while (size > 0) {
>> +             struct page *pages;
>> +             int order;
>> +             int i;
>> +
>> +             order = get_order(size);
>> +             /* Dont over allocate*/
>> +             if ((PAGE_SIZE << order) > size)
>> +                     order--;
>
> Terrible things will happen if size < PAGE_SIZE.  Presumably that should
> never happen, or perhaps one could say any caller who does that will get
> what they deserve.

The caller function is vb2_dma_sg_alloc which according to its
comments is already page aligned, so that should be covered.
https://linuxtv.org/patch/18095/

>
> Have you considered alloc_pages_exact(), though?  That might result in
> fewer segments overall.

In the previous implementation I used alloc_pages_exact, but there
were two things that made me change my mind. One is that the comments
of the function says that you should free the pages with
free_pages_exact, so  should get track of the segments. The other is
that alloc_pages_exact split the highest pages into order 0, so there
could be a situation that for only one page I would split a higher
order page, and those are scarce.

>
>> +             pages = NULL;
>> +             while (!pages) {
>> +                     pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
>> +                                     __GFP_NOWARN | gfp_flags, order);
>> +                     if (pages)
>> +                             break;
>> +
>> +                     if (order == 0)
>> +                             while (last_page--) {
>> +                                     __free_page(buf->pages[last_page]);
>
> If I understand things, this is wrong; you relly need free_pages() with the
> correct order.  Or, at least, that would be the case if you kept the pages
> together, but that leads to my biggest question...

Pages are splitted, so I believe that this is right.

>
>> +                                     return -ENOMEM;
>> +                             }
>> +                     order--;
>> +             }
>> +
>> +             split_page(pages, order);
>> +             for (i = 0; i < (1<<order); i++) {
>> +                     buf->pages[last_page] = pages[i];
>> +                     sg_set_page(&buf->sg_desc.sglist[last_page],
>> +                                     buf->pages[last_page], PAGE_SIZE, 0);
>> +                     last_page++;
>> +             }
>
> You've gone to all this trouble to get a higher-order allocation so you'd
> have fewer segments, then you undo it all by splitting things apart into
> individual pages.  Why?  Clearly I'm missing something, this seems to
> defeat the purpose of the whole exercise?

I got to all this trouble to get memory as physically contiguous as
possible. I don't care if they belong to one or multiple pages, in
fact my dma controller only understand about physical addresses.

 If I don't split the pages then the calls to vm_map_ram and
vm_insert_page will fail, please take a look to:
https://lkml.org/lkml/2013/7/17/285 there I post the code that does
not split the pages and  to
http://marc.info/?l=linux-mm&m=124404111608622 where another poor guy
complains about not been able to use vm_insert_page on higher order
pages :).


Again, thank you very much for your review.

>
> Thanks,
>
> jon



--
Ricardo Ribalda
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marek Szyprowski July 29, 2013, 11:27 a.m. UTC | #3
Hello,

On 7/19/2013 10:16 PM, Jonathan Corbet wrote:
> On Fri, 19 Jul 2013 19:02:33 +0200
> Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com> wrote:
>
> > Most DMA engines have limitations regarding the number of DMA segments
> > (sg-buffers) that they can handle. Videobuffers can easily spread
> > through houndreds of pages.
> >
> > In the previous aproach, the pages were allocated individually, this
> > could led to the creation houndreds of dma segments (sg-buffers) that
> > could not be handled by some DMA engines.
> >
> > This patch tries to minimize the number of DMA segments by using
> > alloc_pages. In the worst case it will behave as before, but most
> > of the times it will reduce the number of dma segments
>
> So I looked this over and I have a few questions...
>
> > diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> > index 16ae3dc..c053605 100644
> > --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> > +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> > @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
> >
> >  static void vb2_dma_sg_put(void *buf_priv);
> >
> > +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> > +		gfp_t gfp_flags)
> > +{
> > +	unsigned int last_page = 0;
> > +	int size = buf->sg_desc.size;
> > +
> > +	while (size > 0) {
> > +		struct page *pages;
> > +		int order;
> > +		int i;
> > +
> > +		order = get_order(size);
> > +		/* Dont over allocate*/
> > +		if ((PAGE_SIZE << order) > size)
> > +			order--;
>
> Terrible things will happen if size < PAGE_SIZE.  Presumably that should
> never happen, or perhaps one could say any caller who does that will get
> what they deserve.

I think that page size alignment for requested buffer size should be added
at vb2 core. V4L2 buffer API is page oriented and it really makes no sense
to allocate buffers which are not a multiple of page size.


> Have you considered alloc_pages_exact(), though?  That might result in
> fewer segments overall.
>
> > +		pages = NULL;
> > +		while (!pages) {
> > +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> > +					__GFP_NOWARN | gfp_flags, order);
> > +			if (pages)
> > +				break;
> > +
> > +			if (order == 0)
> > +				while (last_page--) {
> > +					__free_page(buf->pages[last_page]);
>
> If I understand things, this is wrong; you relly need free_pages() with the
> correct order.  Or, at least, that would be the case if you kept the pages
> together, but that leads to my biggest question...
>
> > +					return -ENOMEM;
> > +				}
> > +			order--;
> > +		}
> > +
> > +		split_page(pages, order);
> > +		for (i = 0; i < (1<<order); i++) {
> > +			buf->pages[last_page] = pages[i];
> > +			sg_set_page(&buf->sg_desc.sglist[last_page],
> > +					buf->pages[last_page], PAGE_SIZE, 0);
> > +			last_page++;
> > +		}
>
> You've gone to all this trouble to get a higher-order allocation so you'd
> have fewer segments, then you undo it all by splitting things apart into
> individual pages.  Why?  Clearly I'm missing something, this seems to
> defeat the purpose of the whole exercise?

Individual zero-order pages are required to get them mapped to userspace in
mmap callback.

Best regards
Jonathan Corbet July 29, 2013, 3:16 p.m. UTC | #4
On Mon, 29 Jul 2013 13:27:12 +0200
Marek Szyprowski <m.szyprowski@samsung.com> wrote:

> > You've gone to all this trouble to get a higher-order allocation so you'd
> > have fewer segments, then you undo it all by splitting things apart into
> > individual pages.  Why?  Clearly I'm missing something, this seems to
> > defeat the purpose of the whole exercise?  
> 
> Individual zero-order pages are required to get them mapped to userspace in
> mmap callback.

Yeah, Ricardo explained that too.  The right solution might be to fix that
problem rather than work around it, but I can see why one might shy at that
task! :)

I do wonder, though, if an intermediate solution using huge pages might be
the best approach?  That would get the number of segments down pretty far,
and using huge pages for buffers would reduce TLB pressure significantly
if the CPU is working through the data at all.  Meanwhile, inserting huge
pages into the process's address space should work easily.  Just a thought.

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sakari Ailus July 31, 2013, 6:37 a.m. UTC | #5
Hi Jon and Sylwester,

On Mon, Jul 29, 2013 at 09:16:44AM -0600, Jonathan Corbet wrote:
> On Mon, 29 Jul 2013 13:27:12 +0200
> Marek Szyprowski <m.szyprowski@samsung.com> wrote:
> 
> > > You've gone to all this trouble to get a higher-order allocation so you'd
> > > have fewer segments, then you undo it all by splitting things apart into
> > > individual pages.  Why?  Clearly I'm missing something, this seems to
> > > defeat the purpose of the whole exercise?  
> > 
> > Individual zero-order pages are required to get them mapped to userspace in
> > mmap callback.
> 
> Yeah, Ricardo explained that too.  The right solution might be to fix that
> problem rather than work around it, but I can see why one might shy at that
> task! :)
> 
> I do wonder, though, if an intermediate solution using huge pages might be
> the best approach?  That would get the number of segments down pretty far,
> and using huge pages for buffers would reduce TLB pressure significantly
> if the CPU is working through the data at all.  Meanwhile, inserting huge
> pages into the process's address space should work easily.  Just a thought.

My ack to that.

And in the case of dma-buf the buffer doesn't need to be mapped to user
space. It'd be quite nice to be able to share higher order allocations even
if they couldn't be mapped to user space as such.

Using 2 MiB pages would probably solve Ricardo's issue, but used alone
they'd waste lots of memory for small buffers. If small pages (in Ricardo's
case) were used when 2 MiB pages would be too big, e.g. 1 MiB buffer would
already have 256 pages in it. Perhaps it'd be useful to specify whether
large pages should be always preferred over smaller ones.
Ricardo Ribalda Delgado Aug. 1, 2013, 1:59 p.m. UTC | #6
Hi Sakarius

I think the whole point of the videobuf2 is sharing pages with the
user space, so until vm_insert_page does not support high order pages
we should split them. Unfortunately the mm is completely out of my
topic, so I don't think that I could be very useful there.

With my patch, in the worst case scenario, the image is split in as
many blocks as today, but in a normal setup the ammount of blocks is 1
or two blocks in total!.

Best regards.





On Wed, Jul 31, 2013 at 8:37 AM, Sakari Ailus <sakari.ailus@iki.fi> wrote:
> Hi Jon and Sylwester,
>
> On Mon, Jul 29, 2013 at 09:16:44AM -0600, Jonathan Corbet wrote:
>> On Mon, 29 Jul 2013 13:27:12 +0200
>> Marek Szyprowski <m.szyprowski@samsung.com> wrote:
>>
>> > > You've gone to all this trouble to get a higher-order allocation so you'd
>> > > have fewer segments, then you undo it all by splitting things apart into
>> > > individual pages.  Why?  Clearly I'm missing something, this seems to
>> > > defeat the purpose of the whole exercise?
>> >
>> > Individual zero-order pages are required to get them mapped to userspace in
>> > mmap callback.
>>
>> Yeah, Ricardo explained that too.  The right solution might be to fix that
>> problem rather than work around it, but I can see why one might shy at that
>> task! :)
>>
>> I do wonder, though, if an intermediate solution using huge pages might be
>> the best approach?  That would get the number of segments down pretty far,
>> and using huge pages for buffers would reduce TLB pressure significantly
>> if the CPU is working through the data at all.  Meanwhile, inserting huge
>> pages into the process's address space should work easily.  Just a thought.
>
> My ack to that.
>
> And in the case of dma-buf the buffer doesn't need to be mapped to user
> space. It'd be quite nice to be able to share higher order allocations even
> if they couldn't be mapped to user space as such.
>
> Using 2 MiB pages would probably solve Ricardo's issue, but used alone
> they'd waste lots of memory for small buffers. If small pages (in Ricardo's
> case) were used when 2 MiB pages would be too big, e.g. 1 MiB buffer would
> already have 256 pages in it. Perhaps it'd be useful to specify whether
> large pages should be always preferred over smaller ones.
>
> --
> Kind regards,
>
> Sakari Ailus
> e-mail: sakari.ailus@iki.fi     XMPP: sailus@retiisi.org.uk
Marek Szyprowski Aug. 2, 2013, 6:38 a.m. UTC | #7
Hello,

On 7/19/2013 7:02 PM, Ricardo Ribalda Delgado wrote:
> Most DMA engines have limitations regarding the number of DMA segments
> (sg-buffers) that they can handle. Videobuffers can easily spread
> through houndreds of pages.
>
> In the previous aproach, the pages were allocated individually, this
> could led to the creation houndreds of dma segments (sg-buffers) that
> could not be handled by some DMA engines.
>
> This patch tries to minimize the number of DMA segments by using
> alloc_pages. In the worst case it will behave as before, but most
> of the times it will reduce the number of dma segments
>
> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>

Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>

> ---
>   drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>   1 file changed, 49 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> index 16ae3dc..c053605 100644
> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>   
>   static void vb2_dma_sg_put(void *buf_priv);
>   
> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> +		gfp_t gfp_flags)
> +{
> +	unsigned int last_page = 0;
> +	int size = buf->sg_desc.size;
> +
> +	while (size > 0) {
> +		struct page *pages;
> +		int order;
> +		int i;
> +
> +		order = get_order(size);
> +		/* Dont over allocate*/
> +		if ((PAGE_SIZE << order) > size)
> +			order--;
> +
> +		pages = NULL;
> +		while (!pages) {
> +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> +					__GFP_NOWARN | gfp_flags, order);
> +			if (pages)
> +				break;
> +
> +			if (order == 0)
> +				while (last_page--) {
> +					__free_page(buf->pages[last_page]);
> +					return -ENOMEM;
> +				}
> +			order--;
> +		}
> +
> +		split_page(pages, order);
> +		for (i = 0; i < (1<<order); i++) {
> +			buf->pages[last_page] = pages[i];
> +			sg_set_page(&buf->sg_desc.sglist[last_page],
> +					buf->pages[last_page], PAGE_SIZE, 0);
> +			last_page++;
> +		}
> +
> +		size -= PAGE_SIZE << order;
> +	}
> +
> +	return 0;
> +}
> +
>   static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_flags)
>   {
>   	struct vb2_dma_sg_buf *buf;
> -	int i;
> +	int ret;
>   
>   	buf = kzalloc(sizeof *buf, GFP_KERNEL);
>   	if (!buf)
> @@ -69,14 +114,9 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>   	if (!buf->pages)
>   		goto fail_pages_array_alloc;
>   
> -	for (i = 0; i < buf->sg_desc.num_pages; ++i) {
> -		buf->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO |
> -					   __GFP_NOWARN | gfp_flags);
> -		if (NULL == buf->pages[i])
> -			goto fail_pages_alloc;
> -		sg_set_page(&buf->sg_desc.sglist[i],
> -			    buf->pages[i], PAGE_SIZE, 0);
> -	}
> +	ret = vb2_dma_sg_alloc_compacted(buf, gfp_flags);
> +	if (ret)
> +		goto fail_pages_alloc;
>   
>   	buf->handler.refcount = &buf->refcount;
>   	buf->handler.put = vb2_dma_sg_put;
> @@ -89,8 +129,6 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>   	return buf;
>   
>   fail_pages_alloc:
> -	while (--i >= 0)
> -		__free_page(buf->pages[i]);
>   	kfree(buf->pages);
>   
>   fail_pages_array_alloc:

Best regards
Andre Heider Aug. 2, 2013, 8:46 a.m. UTC | #8
Hi Ricardo,

sorry for the late answer, but the leak I mentioned in my first reply is still there, see below.

On Fri, Jul 19, 2013 at 07:02:33PM +0200, Ricardo Ribalda Delgado wrote:
> Most DMA engines have limitations regarding the number of DMA segments
> (sg-buffers) that they can handle. Videobuffers can easily spread
> through houndreds of pages.
> 
> In the previous aproach, the pages were allocated individually, this
> could led to the creation houndreds of dma segments (sg-buffers) that
> could not be handled by some DMA engines.
> 
> This patch tries to minimize the number of DMA segments by using
> alloc_pages. In the worst case it will behave as before, but most
> of the times it will reduce the number of dma segments
> 
> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
> ---
>  drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>  1 file changed, 49 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> index 16ae3dc..c053605 100644
> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>  
>  static void vb2_dma_sg_put(void *buf_priv);
>  
> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> +		gfp_t gfp_flags)
> +{
> +	unsigned int last_page = 0;
> +	int size = buf->sg_desc.size;
> +
> +	while (size > 0) {
> +		struct page *pages;
> +		int order;
> +		int i;
> +
> +		order = get_order(size);
> +		/* Dont over allocate*/
> +		if ((PAGE_SIZE << order) > size)
> +			order--;
> +
> +		pages = NULL;
> +		while (!pages) {
> +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> +					__GFP_NOWARN | gfp_flags, order);
> +			if (pages)
> +				break;
> +
> +			if (order == 0)
> +				while (last_page--) {
> +					__free_page(buf->pages[last_page]);
> +					return -ENOMEM;
> +				}

The return statement doesn't make sense in the while() scope, that way you wouldn't need the loop at all.

To prevent leaking pages of prior iterations (those with higher orders), pull the return out of there:

			while (last_page--)
				__free_page(buf->pages[last_page]);
			return -ENOMEM;

Regards,
Andre
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ricardo Ribalda Delgado Aug. 2, 2013, 11:30 a.m. UTC | #9
Hi Andre,

Nice catch! thanks.

I have just uploaded a new version.

https://patchwork.linuxtv.org/patch/19502/
https://patchwork.linuxtv.org/patch/19503/


Thanks for your help

On Fri, Aug 2, 2013 at 10:46 AM, Andre Heider <a.heider@gmail.com> wrote:
> Hi Ricardo,
>
> sorry for the late answer, but the leak I mentioned in my first reply is still there, see below.
>
> On Fri, Jul 19, 2013 at 07:02:33PM +0200, Ricardo Ribalda Delgado wrote:
>> Most DMA engines have limitations regarding the number of DMA segments
>> (sg-buffers) that they can handle. Videobuffers can easily spread
>> through houndreds of pages.
>>
>> In the previous aproach, the pages were allocated individually, this
>> could led to the creation houndreds of dma segments (sg-buffers) that
>> could not be handled by some DMA engines.
>>
>> This patch tries to minimize the number of DMA segments by using
>> alloc_pages. In the worst case it will behave as before, but most
>> of the times it will reduce the number of dma segments
>>
>> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
>> ---
>>  drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>>  1 file changed, 49 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> index 16ae3dc..c053605 100644
>> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>>
>>  static void vb2_dma_sg_put(void *buf_priv);
>>
>> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
>> +             gfp_t gfp_flags)
>> +{
>> +     unsigned int last_page = 0;
>> +     int size = buf->sg_desc.size;
>> +
>> +     while (size > 0) {
>> +             struct page *pages;
>> +             int order;
>> +             int i;
>> +
>> +             order = get_order(size);
>> +             /* Dont over allocate*/
>> +             if ((PAGE_SIZE << order) > size)
>> +                     order--;
>> +
>> +             pages = NULL;
>> +             while (!pages) {
>> +                     pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
>> +                                     __GFP_NOWARN | gfp_flags, order);
>> +                     if (pages)
>> +                             break;
>> +
>> +                     if (order == 0)
>> +                             while (last_page--) {
>> +                                     __free_page(buf->pages[last_page]);
>> +                                     return -ENOMEM;
>> +                             }
>
> The return statement doesn't make sense in the while() scope, that way you wouldn't need the loop at all.
>
> To prevent leaking pages of prior iterations (those with higher orders), pull the return out of there:
>
>                         while (last_page--)
>                                 __free_page(buf->pages[last_page]);
>                         return -ENOMEM;
>
> Regards,
> Andre
Andre Heider Aug. 2, 2013, 1:47 p.m. UTC | #10
Hi Ricardo,

I messed up one thing in my initial reply, sorry :(

And two additional nitpicks, while we're at it.

On Fri, Jul 19, 2013 at 07:02:33PM +0200, Ricardo Ribalda Delgado wrote:
> Most DMA engines have limitations regarding the number of DMA segments
> (sg-buffers) that they can handle. Videobuffers can easily spread
> through houndreds of pages.
> 
> In the previous aproach, the pages were allocated individually, this
> could led to the creation houndreds of dma segments (sg-buffers) that
> could not be handled by some DMA engines.

s/houndreds/hundreds/

> 
> This patch tries to minimize the number of DMA segments by using
> alloc_pages. In the worst case it will behave as before, but most
> of the times it will reduce the number of dma segments
> 
> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>

With those changes you can add:

Reviewed-by: Andre Heider <a.heider@gmail.com>

> ---
>  drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>  1 file changed, 49 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> index 16ae3dc..c053605 100644
> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>  
>  static void vb2_dma_sg_put(void *buf_priv);
>  
> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
> +		gfp_t gfp_flags)
> +{
> +	unsigned int last_page = 0;
> +	int size = buf->sg_desc.size;
> +
> +	while (size > 0) {
> +		struct page *pages;
> +		int order;
> +		int i;
> +
> +		order = get_order(size);
> +		/* Dont over allocate*/
> +		if ((PAGE_SIZE << order) > size)
> +			order--;
> +
> +		pages = NULL;
> +		while (!pages) {
> +			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
> +					__GFP_NOWARN | gfp_flags, order);
> +			if (pages)
> +				break;
> +
> +			if (order == 0)
> +				while (last_page--) {
> +					__free_page(buf->pages[last_page]);
> +					return -ENOMEM;
> +				}
> +			order--;
> +		}
> +
> +		split_page(pages, order);
> +		for (i = 0; i < (1<<order); i++) {

whitespace nit: "(1 << order)"

> +			buf->pages[last_page] = pages[i];

My fault, it should read:

			buf->pages[last_page] = &pages[i];

> +			sg_set_page(&buf->sg_desc.sglist[last_page],
> +					buf->pages[last_page], PAGE_SIZE, 0);
> +			last_page++;
> +		}
> +
> +		size -= PAGE_SIZE << order;
> +	}
> +
> +	return 0;
> +}
> +
>  static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_flags)
>  {
>  	struct vb2_dma_sg_buf *buf;
> -	int i;
> +	int ret;
>  
>  	buf = kzalloc(sizeof *buf, GFP_KERNEL);
>  	if (!buf)
> @@ -69,14 +114,9 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>  	if (!buf->pages)
>  		goto fail_pages_array_alloc;
>  
> -	for (i = 0; i < buf->sg_desc.num_pages; ++i) {
> -		buf->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO |
> -					   __GFP_NOWARN | gfp_flags);
> -		if (NULL == buf->pages[i])
> -			goto fail_pages_alloc;
> -		sg_set_page(&buf->sg_desc.sglist[i],
> -			    buf->pages[i], PAGE_SIZE, 0);
> -	}
> +	ret = vb2_dma_sg_alloc_compacted(buf, gfp_flags);
> +	if (ret)
> +		goto fail_pages_alloc;
>  
>  	buf->handler.refcount = &buf->refcount;
>  	buf->handler.put = vb2_dma_sg_put;
> @@ -89,8 +129,6 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>  	return buf;
>  
>  fail_pages_alloc:
> -	while (--i >= 0)
> -		__free_page(buf->pages[i]);
>  	kfree(buf->pages);
>  
>  fail_pages_array_alloc:
> -- 
> 1.7.10.4
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ricardo Ribalda Delgado Aug. 2, 2013, 2:20 p.m. UTC | #11
Thanks, I have just send a new version.

Regards!

On Fri, Aug 2, 2013 at 3:47 PM, Andre Heider <a.heider@gmail.com> wrote:
> Hi Ricardo,
>
> I messed up one thing in my initial reply, sorry :(
>
> And two additional nitpicks, while we're at it.
>
> On Fri, Jul 19, 2013 at 07:02:33PM +0200, Ricardo Ribalda Delgado wrote:
>> Most DMA engines have limitations regarding the number of DMA segments
>> (sg-buffers) that they can handle. Videobuffers can easily spread
>> through houndreds of pages.
>>
>> In the previous aproach, the pages were allocated individually, this
>> could led to the creation houndreds of dma segments (sg-buffers) that
>> could not be handled by some DMA engines.
>
> s/houndreds/hundreds/
>
>>
>> This patch tries to minimize the number of DMA segments by using
>> alloc_pages. In the worst case it will behave as before, but most
>> of the times it will reduce the number of dma segments
>>
>> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
>
> With those changes you can add:
>
> Reviewed-by: Andre Heider <a.heider@gmail.com>
>
>> ---
>>  drivers/media/v4l2-core/videobuf2-dma-sg.c |   60 +++++++++++++++++++++++-----
>>  1 file changed, 49 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> index 16ae3dc..c053605 100644
>> --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
>> @@ -42,10 +42,55 @@ struct vb2_dma_sg_buf {
>>
>>  static void vb2_dma_sg_put(void *buf_priv);
>>
>> +static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
>> +             gfp_t gfp_flags)
>> +{
>> +     unsigned int last_page = 0;
>> +     int size = buf->sg_desc.size;
>> +
>> +     while (size > 0) {
>> +             struct page *pages;
>> +             int order;
>> +             int i;
>> +
>> +             order = get_order(size);
>> +             /* Dont over allocate*/
>> +             if ((PAGE_SIZE << order) > size)
>> +                     order--;
>> +
>> +             pages = NULL;
>> +             while (!pages) {
>> +                     pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
>> +                                     __GFP_NOWARN | gfp_flags, order);
>> +                     if (pages)
>> +                             break;
>> +
>> +                     if (order == 0)
>> +                             while (last_page--) {
>> +                                     __free_page(buf->pages[last_page]);
>> +                                     return -ENOMEM;
>> +                             }
>> +                     order--;
>> +             }
>> +
>> +             split_page(pages, order);
>> +             for (i = 0; i < (1<<order); i++) {
>
> whitespace nit: "(1 << order)"
>
>> +                     buf->pages[last_page] = pages[i];
>
> My fault, it should read:
>
>                         buf->pages[last_page] = &pages[i];
>
>> +                     sg_set_page(&buf->sg_desc.sglist[last_page],
>> +                                     buf->pages[last_page], PAGE_SIZE, 0);
>> +                     last_page++;
>> +             }
>> +
>> +             size -= PAGE_SIZE << order;
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>>  static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_flags)
>>  {
>>       struct vb2_dma_sg_buf *buf;
>> -     int i;
>> +     int ret;
>>
>>       buf = kzalloc(sizeof *buf, GFP_KERNEL);
>>       if (!buf)
>> @@ -69,14 +114,9 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>>       if (!buf->pages)
>>               goto fail_pages_array_alloc;
>>
>> -     for (i = 0; i < buf->sg_desc.num_pages; ++i) {
>> -             buf->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO |
>> -                                        __GFP_NOWARN | gfp_flags);
>> -             if (NULL == buf->pages[i])
>> -                     goto fail_pages_alloc;
>> -             sg_set_page(&buf->sg_desc.sglist[i],
>> -                         buf->pages[i], PAGE_SIZE, 0);
>> -     }
>> +     ret = vb2_dma_sg_alloc_compacted(buf, gfp_flags);
>> +     if (ret)
>> +             goto fail_pages_alloc;
>>
>>       buf->handler.refcount = &buf->refcount;
>>       buf->handler.put = vb2_dma_sg_put;
>> @@ -89,8 +129,6 @@ static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
>>       return buf;
>>
>>  fail_pages_alloc:
>> -     while (--i >= 0)
>> -             __free_page(buf->pages[i]);
>>       kfree(buf->pages);
>>
>>  fail_pages_array_alloc:
>> --
>> 1.7.10.4
>>
diff mbox

Patch

diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c
index 16ae3dc..c053605 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
@@ -42,10 +42,55 @@  struct vb2_dma_sg_buf {
 
 static void vb2_dma_sg_put(void *buf_priv);
 
+static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf,
+		gfp_t gfp_flags)
+{
+	unsigned int last_page = 0;
+	int size = buf->sg_desc.size;
+
+	while (size > 0) {
+		struct page *pages;
+		int order;
+		int i;
+
+		order = get_order(size);
+		/* Dont over allocate*/
+		if ((PAGE_SIZE << order) > size)
+			order--;
+
+		pages = NULL;
+		while (!pages) {
+			pages = alloc_pages(GFP_KERNEL | __GFP_ZERO |
+					__GFP_NOWARN | gfp_flags, order);
+			if (pages)
+				break;
+
+			if (order == 0)
+				while (last_page--) {
+					__free_page(buf->pages[last_page]);
+					return -ENOMEM;
+				}
+			order--;
+		}
+
+		split_page(pages, order);
+		for (i = 0; i < (1<<order); i++) {
+			buf->pages[last_page] = pages[i];
+			sg_set_page(&buf->sg_desc.sglist[last_page],
+					buf->pages[last_page], PAGE_SIZE, 0);
+			last_page++;
+		}
+
+		size -= PAGE_SIZE << order;
+	}
+
+	return 0;
+}
+
 static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_flags)
 {
 	struct vb2_dma_sg_buf *buf;
-	int i;
+	int ret;
 
 	buf = kzalloc(sizeof *buf, GFP_KERNEL);
 	if (!buf)
@@ -69,14 +114,9 @@  static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
 	if (!buf->pages)
 		goto fail_pages_array_alloc;
 
-	for (i = 0; i < buf->sg_desc.num_pages; ++i) {
-		buf->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO |
-					   __GFP_NOWARN | gfp_flags);
-		if (NULL == buf->pages[i])
-			goto fail_pages_alloc;
-		sg_set_page(&buf->sg_desc.sglist[i],
-			    buf->pages[i], PAGE_SIZE, 0);
-	}
+	ret = vb2_dma_sg_alloc_compacted(buf, gfp_flags);
+	if (ret)
+		goto fail_pages_alloc;
 
 	buf->handler.refcount = &buf->refcount;
 	buf->handler.put = vb2_dma_sg_put;
@@ -89,8 +129,6 @@  static void *vb2_dma_sg_alloc(void *alloc_ctx, unsigned long size, gfp_t gfp_fla
 	return buf;
 
 fail_pages_alloc:
-	while (--i >= 0)
-		__free_page(buf->pages[i]);
 	kfree(buf->pages);
 
 fail_pages_array_alloc: