[v2,1/2] dma/iommu: Add pgsize_bitmap confirmation in __iommu_dma_alloc_pages
diff mbox

Message ID 1459146732-15620-1-git-send-email-yong.wu@mediatek.com
State New
Headers show

Commit Message

Yong Wu March 28, 2016, 6:32 a.m. UTC
Currently __iommu_dma_alloc_pages assumes that all the IOMMU support
the granule of PAGE_SIZE. It call alloc_page to try allocating memory
in the last time. Fortunately the mininum pagesize in all the
current IOMMU is SZ_4K, so this works well.

But there may be a case in which the mininum granule of IOMMU may be
larger than PAGE_SIZE, then it will abort as the IOMMU cann't map the
discontinuous memory within a granule. For example, the pgsize_bitmap
of the IOMMU only has SZ_16K while the PAGE_SIZE is SZ_4K, then we
have to prepare SZ_16K continuous memory at least for each a granule
iommu mapping.

Signed-off-by: Yong Wu <yong.wu@mediatek.com>

---
v2:
 -rebase on v4.6-rc1.
 -add a new patch([1/2] add pgsize_bitmap) here.

 drivers/iommu/dma-iommu.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

Comments

Will Deacon March 29, 2016, 5:02 p.m. UTC | #1
On Mon, Mar 28, 2016 at 02:32:11PM +0800, Yong Wu wrote:
> Currently __iommu_dma_alloc_pages assumes that all the IOMMU support
> the granule of PAGE_SIZE. It call alloc_page to try allocating memory
> in the last time. Fortunately the mininum pagesize in all the
> current IOMMU is SZ_4K, so this works well.
> 
> But there may be a case in which the mininum granule of IOMMU may be
> larger than PAGE_SIZE, then it will abort as the IOMMU cann't map the
> discontinuous memory within a granule. For example, the pgsize_bitmap
> of the IOMMU only has SZ_16K while the PAGE_SIZE is SZ_4K, then we
> have to prepare SZ_16K continuous memory at least for each a granule
> iommu mapping.

Did you find a driver/platform that actually needs this? I certainly
think it's possible, but we don't necessarily need to fix it if it's
not broken yet! Just adding a comment would be enough.

Either way, a couple of review comments inline.

> 
> Signed-off-by: Yong Wu <yong.wu@mediatek.com>
> 
> ---
> v2:
>  -rebase on v4.6-rc1.
>  -add a new patch([1/2] add pgsize_bitmap) here.
> 
>  drivers/iommu/dma-iommu.c | 22 +++++++++++++---------
>  1 file changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 72d6182..75ce71e 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -190,11 +190,13 @@ static void __iommu_dma_free_pages(struct page **pages, int count)
>  	kvfree(pages);
>  }
>  
> -static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
> +static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp,
> +					     unsigned long pgsize_bitmap)
>  {
>  	struct page **pages;
>  	unsigned int i = 0, array_size = count * sizeof(*pages);
> -	unsigned int order = MAX_ORDER;
> +	int min_order = get_order(1 << __ffs(pgsize_bitmap));

1UL

> +	int order = MAX_ORDER;
>  
>  	if (array_size <= PAGE_SIZE)
>  		pages = kzalloc(array_size, GFP_KERNEL);
> @@ -213,13 +215,16 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
>  		/*
>  		 * Higher-order allocations are a convenience rather
>  		 * than a necessity, hence using __GFP_NORETRY until
> -		 * falling back to single-page allocations.
> +		 * falling back to min size allocations.
>  		 */
> -		for (order = min_t(unsigned int, order, __fls(count));
> -		     order > 0; order--) {
> -			page = alloc_pages(gfp | __GFP_NORETRY, order);
> +		for (order = min_t(int, order, __fls(count));
> +		     order >= min_order; order--) {
> +			page = alloc_pages((order == min_order) ? gfp :
> +					   gfp | __GFP_NORETRY, order);
>  			if (!page)
>  				continue;
> +			if (!order)
> +				break;

Isn't this handled by the loop condition?

Will
Doug Anderson April 5, 2016, 5:03 p.m. UTC | #2
Will,

On Tue, Mar 29, 2016 at 10:02 AM, Will Deacon <will.deacon@arm.com> wrote:
> On Mon, Mar 28, 2016 at 02:32:11PM +0800, Yong Wu wrote:
>> Currently __iommu_dma_alloc_pages assumes that all the IOMMU support
>> the granule of PAGE_SIZE. It call alloc_page to try allocating memory
>> in the last time. Fortunately the mininum pagesize in all the
>> current IOMMU is SZ_4K, so this works well.
>>
>> But there may be a case in which the mininum granule of IOMMU may be
>> larger than PAGE_SIZE, then it will abort as the IOMMU cann't map the
>> discontinuous memory within a granule. For example, the pgsize_bitmap
>> of the IOMMU only has SZ_16K while the PAGE_SIZE is SZ_4K, then we
>> have to prepare SZ_16K continuous memory at least for each a granule
>> iommu mapping.
>
> Did you find a driver/platform that actually needs this? I certainly
> think it's possible, but we don't necessarily need to fix it if it's
> not broken yet! Just adding a comment would be enough.

I think Yong Wu was adding this patch in response to your request from
v1 of <https://patchwork.kernel.org/patch/8487511/>.  Fixing this
seemed like a separate issue so I asked Yong Wu to make two patches
rather than sticking this fix in the patch as his original.  Hopefully
this makes sense.

> Either way, a couple of review comments inline.
>
>>
>> Signed-off-by: Yong Wu <yong.wu@mediatek.com>
>>
>> ---
>> v2:
>>  -rebase on v4.6-rc1.
>>  -add a new patch([1/2] add pgsize_bitmap) here.
>>
>>  drivers/iommu/dma-iommu.c | 22 +++++++++++++---------
>>  1 file changed, 13 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>> index 72d6182..75ce71e 100644
>> --- a/drivers/iommu/dma-iommu.c
>> +++ b/drivers/iommu/dma-iommu.c
>> @@ -190,11 +190,13 @@ static void __iommu_dma_free_pages(struct page **pages, int count)
>>       kvfree(pages);
>>  }
>>
>> -static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
>> +static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp,
>> +                                          unsigned long pgsize_bitmap)
>>  {
>>       struct page **pages;
>>       unsigned int i = 0, array_size = count * sizeof(*pages);
>> -     unsigned int order = MAX_ORDER;
>> +     int min_order = get_order(1 << __ffs(pgsize_bitmap));
>
> 1UL

Yong: presumably you will spin with this fix?


>> +     int order = MAX_ORDER;
>>
>>       if (array_size <= PAGE_SIZE)
>>               pages = kzalloc(array_size, GFP_KERNEL);
>> @@ -213,13 +215,16 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
>>               /*
>>                * Higher-order allocations are a convenience rather
>>                * than a necessity, hence using __GFP_NORETRY until
>> -              * falling back to single-page allocations.
>> +              * falling back to min size allocations.
>>                */
>> -             for (order = min_t(unsigned int, order, __fls(count));
>> -                  order > 0; order--) {
>> -                     page = alloc_pages(gfp | __GFP_NORETRY, order);
>> +             for (order = min_t(int, order, __fls(count));
>> +                  order >= min_order; order--) {
>> +                     page = alloc_pages((order == min_order) ? gfp :
>> +                                        gfp | __GFP_NORETRY, order);
>>                       if (!page)
>>                               continue;
>> +                     if (!order)
>> +                             break;
>
> Isn't this handled by the loop condition?

He changed the loop condition to be ">= min_order" instead of "> 0",
so now we can get here with an order == 0.  This makes sense because
when min_order is not 0 you still want to run the code to split the
pages and it is sane not to duplicate that below.

Maybe I'm misunderstanding, though.  Perhaps you can explain how you
think this code should look?

-Doug
Will Deacon April 8, 2016, 1:07 p.m. UTC | #3
On Tue, Apr 05, 2016 at 10:03:32AM -0700, Doug Anderson wrote:
> On Tue, Mar 29, 2016 at 10:02 AM, Will Deacon <will.deacon@arm.com> wrote:
> > On Mon, Mar 28, 2016 at 02:32:11PM +0800, Yong Wu wrote:
> >> @@ -213,13 +215,16 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
> >>               /*
> >>                * Higher-order allocations are a convenience rather
> >>                * than a necessity, hence using __GFP_NORETRY until
> >> -              * falling back to single-page allocations.
> >> +              * falling back to min size allocations.
> >>                */
> >> -             for (order = min_t(unsigned int, order, __fls(count));
> >> -                  order > 0; order--) {
> >> -                     page = alloc_pages(gfp | __GFP_NORETRY, order);
> >> +             for (order = min_t(int, order, __fls(count));
> >> +                  order >= min_order; order--) {
> >> +                     page = alloc_pages((order == min_order) ? gfp :
> >> +                                        gfp | __GFP_NORETRY, order);
> >>                       if (!page)
> >>                               continue;
> >> +                     if (!order)
> >> +                             break;
> >
> > Isn't this handled by the loop condition?
> 
> He changed the loop condition to be ">= min_order" instead of "> 0",
> so now we can get here with an order == 0.  This makes sense because
> when min_order is not 0 you still want to run the code to split the
> pages and it is sane not to duplicate that below.
> 
> Maybe I'm misunderstanding, though.  Perhaps you can explain how you
> think this code should look?

My reading of the code was that we require order >= min_order to enter
the loop. Given that order doesn't change between the loop header and the
if (!order) check, then that must mean we can enter the loop body with
order == 0 and order >= min_order, which means that min_order is allowed
to be negative. That feels weird.

Am I barking up the wrong tree?

Will
Doug Anderson April 8, 2016, 4:50 p.m. UTC | #4
Will,

On Fri, Apr 8, 2016 at 6:07 AM, Will Deacon <will.deacon@arm.com> wrote:
> On Tue, Apr 05, 2016 at 10:03:32AM -0700, Doug Anderson wrote:
>> On Tue, Mar 29, 2016 at 10:02 AM, Will Deacon <will.deacon@arm.com> wrote:
>> > On Mon, Mar 28, 2016 at 02:32:11PM +0800, Yong Wu wrote:
>> >> @@ -213,13 +215,16 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
>> >>               /*
>> >>                * Higher-order allocations are a convenience rather
>> >>                * than a necessity, hence using __GFP_NORETRY until
>> >> -              * falling back to single-page allocations.
>> >> +              * falling back to min size allocations.
>> >>                */
>> >> -             for (order = min_t(unsigned int, order, __fls(count));
>> >> -                  order > 0; order--) {
>> >> -                     page = alloc_pages(gfp | __GFP_NORETRY, order);
>> >> +             for (order = min_t(int, order, __fls(count));
>> >> +                  order >= min_order; order--) {
>> >> +                     page = alloc_pages((order == min_order) ? gfp :
>> >> +                                        gfp | __GFP_NORETRY, order);
>> >>                       if (!page)
>> >>                               continue;
>> >> +                     if (!order)
>> >> +                             break;
>> >
>> > Isn't this handled by the loop condition?
>>
>> He changed the loop condition to be ">= min_order" instead of "> 0",
>> so now we can get here with an order == 0.  This makes sense because
>> when min_order is not 0 you still want to run the code to split the
>> pages and it is sane not to duplicate that below.
>>
>> Maybe I'm misunderstanding, though.  Perhaps you can explain how you
>> think this code should look?
>
> My reading of the code was that we require order >= min_order to enter
> the loop. Given that order doesn't change between the loop header and the
> if (!order) check, then that must mean we can enter the loop body with
> order == 0 and order >= min_order, which means that min_order is allowed
> to be negative. That feels weird.
>
> Am I barking up the wrong tree?

I don't think min_order can be negative.  Certainly we could enter the
loop with order == 0 and min_order == 0, though.


Some examples:

order = 0, min_order = 0
-> Want alloc_pages _without_ __GFP_NORETRY.  OK
-> If alloc_pages fails, return NULL.  OK
-> If alloc pages succeeds, don't need splitting since single page.  OK

order = 1, min_order = 1
-> Want alloc_pages _without_ __GFP_NORETRY.  OK
-> If alloc_pages fails, return NULL.  OK
-> If alloc pages succeeds, DO need splitting.  OK

order = 1, min_order = 0
-> Want alloc_pages with __GFP_NORETRY.  OK
-> If alloc_pages fails, try order = 0.  OK
-> If alloc pages succeeds, DO need splitting.  OK

order = 2, min_order = 1
-> Want alloc_pages with __GFP_NORETRY.  OK
-> If alloc_pages fails, try order = 1.  OK
-> If alloc pages succeeds, DO need splitting.  OK


I think those are all right.  Did I mess up?  You could certainly
structure the loop in a different way but you need to make sure you
handle all of those cases.  If you have an alternate structure that
handles all those, let's consider it.

-Doug
Will Deacon April 8, 2016, 5:30 p.m. UTC | #5
On Fri, Apr 08, 2016 at 09:50:43AM -0700, Doug Anderson wrote:
> On Fri, Apr 8, 2016 at 6:07 AM, Will Deacon <will.deacon@arm.com> wrote:
> > On Tue, Apr 05, 2016 at 10:03:32AM -0700, Doug Anderson wrote:
> >> On Tue, Mar 29, 2016 at 10:02 AM, Will Deacon <will.deacon@arm.com> wrote:
> >> > On Mon, Mar 28, 2016 at 02:32:11PM +0800, Yong Wu wrote:
> >> >> @@ -213,13 +215,16 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
> >> >>               /*
> >> >>                * Higher-order allocations are a convenience rather
> >> >>                * than a necessity, hence using __GFP_NORETRY until
> >> >> -              * falling back to single-page allocations.
> >> >> +              * falling back to min size allocations.
> >> >>                */
> >> >> -             for (order = min_t(unsigned int, order, __fls(count));
> >> >> -                  order > 0; order--) {
> >> >> -                     page = alloc_pages(gfp | __GFP_NORETRY, order);
> >> >> +             for (order = min_t(int, order, __fls(count));
> >> >> +                  order >= min_order; order--) {
> >> >> +                     page = alloc_pages((order == min_order) ? gfp :
> >> >> +                                        gfp | __GFP_NORETRY, order);
> >> >>                       if (!page)
> >> >>                               continue;
> >> >> +                     if (!order)
> >> >> +                             break;
> >> >
> >> > Isn't this handled by the loop condition?
> >>
> >> He changed the loop condition to be ">= min_order" instead of "> 0",
> >> so now we can get here with an order == 0.  This makes sense because
> >> when min_order is not 0 you still want to run the code to split the
> >> pages and it is sane not to duplicate that below.
> >>
> >> Maybe I'm misunderstanding, though.  Perhaps you can explain how you
> >> think this code should look?
> >
> > My reading of the code was that we require order >= min_order to enter
> > the loop. Given that order doesn't change between the loop header and the
> > if (!order) check, then that must mean we can enter the loop body with
> > order == 0 and order >= min_order, which means that min_order is allowed
> > to be negative. That feels weird.
> >
> > Am I barking up the wrong tree?
> 
> I don't think min_order can be negative.  Certainly we could enter the
> loop with order == 0 and min_order == 0, though.

... and in that case, PageCompound will be false, and we'll call split_page
which won't do anything, so we break out.

> 
> Some examples:
> 
> order = 0, min_order = 0
> -> Want alloc_pages _without_ __GFP_NORETRY.  OK
> -> If alloc_pages fails, return NULL.  OK
> -> If alloc pages succeeds, don't need splitting since single page.  OK

[...]

> I think those are all right.  Did I mess up?  You could certainly
> structure the loop in a different way but you need to make sure you
> handle all of those cases.  If you have an alternate structure that
> handles all those, let's consider it.

Right, I don't think the code is broken, I just think the !order check is
confusing and not needed.

Will
Doug Anderson April 8, 2016, 5:34 p.m. UTC | #6
Hi,

On Fri, Apr 8, 2016 at 10:30 AM, Will Deacon <will.deacon@arm.com> wrote:
>> > Am I barking up the wrong tree?
>>
>> I don't think min_order can be negative.  Certainly we could enter the
>> loop with order == 0 and min_order == 0, though.
>
> ... and in that case, PageCompound will be false, and we'll call split_page
> which won't do anything, so we break out.
>
>>
>> Some examples:
>>
>> order = 0, min_order = 0
>> -> Want alloc_pages _without_ __GFP_NORETRY.  OK
>> -> If alloc_pages fails, return NULL.  OK
>> -> If alloc pages succeeds, don't need splitting since single page.  OK
>
> [...]
>
>> I think those are all right.  Did I mess up?  You could certainly
>> structure the loop in a different way but you need to make sure you
>> handle all of those cases.  If you have an alternate structure that
>> handles all those, let's consider it.
>
> Right, I don't think the code is broken, I just think the !order check is
> confusing and not needed.

Ah ha!  Got it.  I didn't dig into split_page() to see that it was a
no-op when "order == 0".  I just know that the old code didn't call
split_page() with order == 0 so I assumed that was wise to keep.  If
we don't need to keep that then agreed that the "if" test can simply
be removed.  :)

-Doug
Yong Wu April 11, 2016, 7:40 a.m. UTC | #7
On Fri, 2016-04-08 at 10:34 -0700, Doug Anderson wrote:
> Hi,
> 
> On Fri, Apr 8, 2016 at 10:30 AM, Will Deacon <will.deacon@arm.com> wrote:
> >> > Am I barking up the wrong tree?
> >>
> >> I don't think min_order can be negative.  Certainly we could enter the
> >> loop with order == 0 and min_order == 0, though.
> >
> > ... and in that case, PageCompound will be false, and we'll call split_page
> > which won't do anything, so we break out.
> >
> >>
> >> Some examples:
> >>
> >> order = 0, min_order = 0
> >> -> Want alloc_pages _without_ __GFP_NORETRY.  OK
> >> -> If alloc_pages fails, return NULL.  OK
> >> -> If alloc pages succeeds, don't need splitting since single page.  OK
> >
> > [...]
> >
> >> I think those are all right.  Did I mess up?  You could certainly
> >> structure the loop in a different way but you need to make sure you
> >> handle all of those cases.  If you have an alternate structure that
> >> handles all those, let's consider it.
> >
> > Right, I don't think the code is broken, I just think the !order check is
> > confusing and not needed.
> 
> Ah ha!  Got it.  I didn't dig into split_page() to see that it was a
> no-op when "order == 0".  I just know that the old code didn't call
> split_page() with order == 0 so I assumed that was wise to keep.  If
> we don't need to keep that then agreed that the "if" test can simply
> be removed.  :)
> 
> -Doug

Hi Will, Doug,

Thanks very much for review this patch, and Thanks Robin's work,
Currently this one is obsoleted since it's included in [1].

[1]:http://lists.linuxfoundation.org/pipermail/iommu/2016-April/016402.html

Patch
diff mbox

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 72d6182..75ce71e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -190,11 +190,13 @@  static void __iommu_dma_free_pages(struct page **pages, int count)
 	kvfree(pages);
 }
 
-static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp,
+					     unsigned long pgsize_bitmap)
 {
 	struct page **pages;
 	unsigned int i = 0, array_size = count * sizeof(*pages);
-	unsigned int order = MAX_ORDER;
+	int min_order = get_order(1 << __ffs(pgsize_bitmap));
+	int order = MAX_ORDER;
 
 	if (array_size <= PAGE_SIZE)
 		pages = kzalloc(array_size, GFP_KERNEL);
@@ -213,13 +215,16 @@  static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 		/*
 		 * Higher-order allocations are a convenience rather
 		 * than a necessity, hence using __GFP_NORETRY until
-		 * falling back to single-page allocations.
+		 * falling back to min size allocations.
 		 */
-		for (order = min_t(unsigned int, order, __fls(count));
-		     order > 0; order--) {
-			page = alloc_pages(gfp | __GFP_NORETRY, order);
+		for (order = min_t(int, order, __fls(count));
+		     order >= min_order; order--) {
+			page = alloc_pages((order == min_order) ? gfp :
+					   gfp | __GFP_NORETRY, order);
 			if (!page)
 				continue;
+			if (!order)
+				break;
 			if (PageCompound(page)) {
 				if (!split_huge_page(page))
 					break;
@@ -229,8 +234,6 @@  static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 				break;
 			}
 		}
-		if (!page)
-			page = alloc_page(gfp);
 		if (!page) {
 			__iommu_dma_free_pages(pages, i);
 			return NULL;
@@ -292,7 +295,8 @@  struct page **iommu_dma_alloc(struct device *dev, size_t size,
 
 	*handle = DMA_ERROR_CODE;
 
-	pages = __iommu_dma_alloc_pages(count, gfp);
+	pages = __iommu_dma_alloc_pages(count, gfp,
+					domain->ops->pgsize_bitmap);
 	if (!pages)
 		return NULL;