diff mbox series

[rfc,v3,2/4] page_pool: add interface for getting and setting pagecnt_bias

Message ID 1626092196-44697-3-git-send-email-linyunsheng@huawei.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series add frag page support in page pool | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers success CCed 14 of 14 maintainers
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 6118 this patch: 6118
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 58 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 6175 this patch: 6175
netdev/header_inline success Link

Commit Message

Yunsheng Lin July 12, 2021, 12:16 p.m. UTC
As suggested by Alexander, "A DMA mapping should be page
aligned anyway so the lower 12 bits would be reserved 0",
so it might make more sense to repurpose the lower 12 bits
of the dma address to store the pagecnt_bias for frag page
support in page pool.

As newly added page_pool_get_pagecnt_bias() may be called
outside of the softirq context, so annotate the access to
page->dma_addr[0] with READ_ONCE() and WRITE_ONCE().

And page_pool_get_pagecnt_bias_ptr() is added to implement
the pagecnt_bias atomic updating when a page is passsed to
the user.

Other three interfaces using page->dma_addr[0] is only called
in the softirq context during normal rx processing, hopefully
the barrier in the rx processing will ensure the correct order
between getting and setting pagecnt_bias.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/net/page_pool.h | 29 +++++++++++++++++++++++++++--
 net/core/page_pool.c    |  8 +++++++-
 2 files changed, 34 insertions(+), 3 deletions(-)

Comments

Alexander Duyck July 12, 2021, 4:02 p.m. UTC | #1
On Mon, Jul 12, 2021 at 5:17 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> As suggested by Alexander, "A DMA mapping should be page
> aligned anyway so the lower 12 bits would be reserved 0",
> so it might make more sense to repurpose the lower 12 bits
> of the dma address to store the pagecnt_bias for frag page
> support in page pool.
>
> As newly added page_pool_get_pagecnt_bias() may be called
> outside of the softirq context, so annotate the access to
> page->dma_addr[0] with READ_ONCE() and WRITE_ONCE().
>
> And page_pool_get_pagecnt_bias_ptr() is added to implement
> the pagecnt_bias atomic updating when a page is passsed to
> the user.
>
> Other three interfaces using page->dma_addr[0] is only called
> in the softirq context during normal rx processing, hopefully
> the barrier in the rx processing will ensure the correct order
> between getting and setting pagecnt_bias.
>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>  include/net/page_pool.h | 29 +++++++++++++++++++++++++++--
>  net/core/page_pool.c    |  8 +++++++-
>  2 files changed, 34 insertions(+), 3 deletions(-)
>
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index 8d7744d..84cd972 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -200,17 +200,42 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>
>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  {
> -       dma_addr_t ret = page->dma_addr[0];
> +       dma_addr_t ret = READ_ONCE(page->dma_addr[0]) & PAGE_MASK;
>         if (sizeof(dma_addr_t) > sizeof(unsigned long))
>                 ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
>         return ret;
>  }
>
> -static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>  {
> +       if (WARN_ON(addr & ~PAGE_MASK))
> +               return false;
> +
>         page->dma_addr[0] = addr;
>         if (sizeof(dma_addr_t) > sizeof(unsigned long))
>                 page->dma_addr[1] = upper_32_bits(addr);
> +
> +       return true;
> +}
> +

Rather than making this a part of the check here it might make more
sense to pull this out and perform the WARN_ON after the check for
dma_mapping_error.

Also it occurs to me that we only really have to do this in the case
where dma_addr_t is larger than the size of a long. Otherwise we could
just have the code split things so that dma_addr[0] is the dma_addr
and dma_addr[1] is our pagecnt_bias value in which case we could
probably just skip the check.

> +static inline int page_pool_get_pagecnt_bias(struct page *page)
> +{
> +       return READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK;
> +}
> +
> +static inline unsigned long *page_pool_pagecnt_bias_ptr(struct page *page)
> +{
> +       return page->dma_addr;
> +}
> +
> +static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)
> +{
> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
> +
> +       dma_addr_0 &= PAGE_MASK;
> +       dma_addr_0 |= bias;
> +
> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
>  }
>
>  static inline bool is_page_pool_compiled_in(void)
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 78838c6..1abefc6 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -198,7 +198,13 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
>         if (dma_mapping_error(pool->p.dev, dma))
>                 return false;
>

So instead of adding to the function below you could just add your
WARN_ON check here with the unmapping call.

> -       page_pool_set_dma_addr(page, dma);
> +       if (unlikely(!page_pool_set_dma_addr(page, dma))) {
> +               dma_unmap_page_attrs(pool->p.dev, dma,
> +                                    PAGE_SIZE << pool->p.order,
> +                                    pool->p.dma_dir,
> +                                    DMA_ATTR_SKIP_CPU_SYNC);
> +               return false;
> +       }
>
>         if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
>                 page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
> --
> 2.7.4
>
Jesper Dangaard Brouer July 12, 2021, 5:20 p.m. UTC | #2
On 12/07/2021 18.02, Alexander Duyck wrote:
> On Mon, Jul 12, 2021 at 5:17 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>>
>> As suggested by Alexander, "A DMA mapping should be page
>> aligned anyway so the lower 12 bits would be reserved 0",
>> so it might make more sense to repurpose the lower 12 bits
>> of the dma address to store the pagecnt_bias for frag page
>> support in page pool.
>>
>> As newly added page_pool_get_pagecnt_bias() may be called
>> outside of the softirq context, so annotate the access to
>> page->dma_addr[0] with READ_ONCE() and WRITE_ONCE().
>>
>> And page_pool_get_pagecnt_bias_ptr() is added to implement
>> the pagecnt_bias atomic updating when a page is passsed to
>> the user.
>>
>> Other three interfaces using page->dma_addr[0] is only called
>> in the softirq context during normal rx processing, hopefully
>> the barrier in the rx processing will ensure the correct order
>> between getting and setting pagecnt_bias.
>>
>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
>> ---
>>   include/net/page_pool.h | 29 +++++++++++++++++++++++++++--
>>   net/core/page_pool.c    |  8 +++++++-
>>   2 files changed, 34 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
>> index 8d7744d..84cd972 100644
>> --- a/include/net/page_pool.h
>> +++ b/include/net/page_pool.h
>> @@ -200,17 +200,42 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>>
>>   static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>>   {
>> -       dma_addr_t ret = page->dma_addr[0];
>> +       dma_addr_t ret = READ_ONCE(page->dma_addr[0]) & PAGE_MASK;
>>          if (sizeof(dma_addr_t) > sizeof(unsigned long))
>>                  ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
>>          return ret;
>>   }
>>
>> -static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>> +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>>   {
>> +       if (WARN_ON(addr & ~PAGE_MASK))
>> +               return false;
>> +
>>          page->dma_addr[0] = addr;
>>          if (sizeof(dma_addr_t) > sizeof(unsigned long))
>>                  page->dma_addr[1] = upper_32_bits(addr);
>> +
>> +       return true;
>> +}
>> +
> 
> Rather than making this a part of the check here it might make more
> sense to pull this out and perform the WARN_ON after the check for
> dma_mapping_error.

I need to point out that I don't like WARN_ON and BUG_ON code in 
fast-path code, because compiler adds 'ud2' assembler instructions that 
influences the instruction-cache fetching in the CPU.  Yes, I have seen 
a measuresable impact from this before.


> Also it occurs to me that we only really have to do this in the case
> where dma_addr_t is larger than the size of a long. Otherwise we could
> just have the code split things so that dma_addr[0] is the dma_addr
> and dma_addr[1] is our pagecnt_bias value in which case we could
> probably just skip the check.

The dance to get 64-bit DMA addr on 32-bit systems is rather ugly and 
confusing, sadly.  We could take advantage of this, I just hope this 
will not make it uglier.


>> +static inline int page_pool_get_pagecnt_bias(struct page *page)
>> +{
>> +       return READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK;
>> +}
>> +
>> +static inline unsigned long *page_pool_pagecnt_bias_ptr(struct page *page)
>> +{
>> +       return page->dma_addr;
>> +}
>> +
>> +static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)
>> +{
>> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
>> +
>> +       dma_addr_0 &= PAGE_MASK;
>> +       dma_addr_0 |= bias;
>> +
>> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
>>   }
>>
>>   static inline bool is_page_pool_compiled_in(void)
>> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>> index 78838c6..1abefc6 100644
>> --- a/net/core/page_pool.c
>> +++ b/net/core/page_pool.c
>> @@ -198,7 +198,13 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
>>          if (dma_mapping_error(pool->p.dev, dma))
>>                  return false;
>>
> 
> So instead of adding to the function below you could just add your
> WARN_ON check here with the unmapping call.
> 
>> -       page_pool_set_dma_addr(page, dma);
>> +       if (unlikely(!page_pool_set_dma_addr(page, dma))) {
>> +               dma_unmap_page_attrs(pool->p.dev, dma,
>> +                                    PAGE_SIZE << pool->p.order,
>> +                                    pool->p.dma_dir,
>> +                                    DMA_ATTR_SKIP_CPU_SYNC);
>> +               return false;
>> +       }
>>
>>          if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
>>                  page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
>> --
>> 2.7.4
>>
>
Ilias Apalodimas July 12, 2021, 6:08 p.m. UTC | #3
[...]
> > > +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> > >   {
> > > +       if (WARN_ON(addr & ~PAGE_MASK))
> > > +               return false;
> > > +
> > >          page->dma_addr[0] = addr;
> > >          if (sizeof(dma_addr_t) > sizeof(unsigned long))
> > >                  page->dma_addr[1] = upper_32_bits(addr);
> > > +
> > > +       return true;
> > > +}
> > > +
> > 
> > Rather than making this a part of the check here it might make more
> > sense to pull this out and perform the WARN_ON after the check for
> > dma_mapping_error.
> 
> I need to point out that I don't like WARN_ON and BUG_ON code in fast-path
> code, because compiler adds 'ud2' assembler instructions that influences the
> instruction-cache fetching in the CPU.  Yes, I have seen a measuresable
> impact from this before.
> 
> 
> > Also it occurs to me that we only really have to do this in the case
> > where dma_addr_t is larger than the size of a long. Otherwise we could
> > just have the code split things so that dma_addr[0] is the dma_addr
> > and dma_addr[1] is our pagecnt_bias value in which case we could
> > probably just skip the check.
> 
> The dance to get 64-bit DMA addr on 32-bit systems is rather ugly and
> confusing, sadly.  We could take advantage of this, I just hope this will
> not make it uglier.

Note here that we can only use this because dma_addr is not aliased to
compound page anymore (after the initial page_pool recycling patchset). 
We must keep this in mind if we even restructure struct page.

Can we do something more radical for this? The 64/32 bit dance is only
there for 32 bit systems with 64 bit dma.  Since the last time we asked
about this no one seemed to care about these, and I really doubt we'll get
an ethernet driver for them (that needs recycling....), can we *only* support 
frag allocation and recycling for 'normal' systems? We could always just r
e-purpose dma_addr[1] for those.

Regards
/Ilias

> 
> 
> > > +static inline int page_pool_get_pagecnt_bias(struct page *page)
> > > +{
> > > +       return READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK;
> > > +}
> > > +
> > > +static inline unsigned long *page_pool_pagecnt_bias_ptr(struct page *page)
> > > +{
> > > +       return page->dma_addr;
> > > +}
> > > +
> > > +static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)
> > > +{
> > > +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
> > > +
> > > +       dma_addr_0 &= PAGE_MASK;
> > > +       dma_addr_0 |= bias;
> > > +
> > > +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
> > >   }
> > > 
> > >   static inline bool is_page_pool_compiled_in(void)
> > > diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> > > index 78838c6..1abefc6 100644
> > > --- a/net/core/page_pool.c
> > > +++ b/net/core/page_pool.c
> > > @@ -198,7 +198,13 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
> > >          if (dma_mapping_error(pool->p.dev, dma))
> > >                  return false;
> > > 
> > 
> > So instead of adding to the function below you could just add your
> > WARN_ON check here with the unmapping call.
> > 
> > > -       page_pool_set_dma_addr(page, dma);
> > > +       if (unlikely(!page_pool_set_dma_addr(page, dma))) {
> > > +               dma_unmap_page_attrs(pool->p.dev, dma,
> > > +                                    PAGE_SIZE << pool->p.order,
> > > +                                    pool->p.dma_dir,
> > > +                                    DMA_ATTR_SKIP_CPU_SYNC);
> > > +               return false;
> > > +       }
> > > 
> > >          if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
> > >                  page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
> > > --
> > > 2.7.4
> > > 
> > 
>
Yunsheng Lin July 13, 2021, 6:38 a.m. UTC | #4
On 2021/7/13 2:08, Ilias Apalodimas wrote:
> [...]
>>>> +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>>>>   {
>>>> +       if (WARN_ON(addr & ~PAGE_MASK))
>>>> +               return false;
>>>> +
>>>>          page->dma_addr[0] = addr;
>>>>          if (sizeof(dma_addr_t) > sizeof(unsigned long))
>>>>                  page->dma_addr[1] = upper_32_bits(addr);
>>>> +
>>>> +       return true;
>>>> +}
>>>> +
>>>
>>> Rather than making this a part of the check here it might make more
>>> sense to pull this out and perform the WARN_ON after the check for
>>> dma_mapping_error.
>>
>> I need to point out that I don't like WARN_ON and BUG_ON code in fast-path
>> code, because compiler adds 'ud2' assembler instructions that influences the
>> instruction-cache fetching in the CPU.  Yes, I have seen a measuresable
>> impact from this before.
>>
>>
>>> Also it occurs to me that we only really have to do this in the case
>>> where dma_addr_t is larger than the size of a long. Otherwise we could
>>> just have the code split things so that dma_addr[0] is the dma_addr
>>> and dma_addr[1] is our pagecnt_bias value in which case we could
>>> probably just skip the check.
>>
>> The dance to get 64-bit DMA addr on 32-bit systems is rather ugly and
>> confusing, sadly.  We could take advantage of this, I just hope this will
>> not make it uglier.
> 
> Note here that we can only use this because dma_addr is not aliased to
> compound page anymore (after the initial page_pool recycling patchset). 
> We must keep this in mind if we even restructure struct page.
> 
> Can we do something more radical for this? The 64/32 bit dance is only
> there for 32 bit systems with 64 bit dma.  Since the last time we asked
> about this no one seemed to care about these, and I really doubt we'll get
> an ethernet driver for them (that needs recycling....), can we *only* support 
> frag allocation and recycling for 'normal' systems? We could always just r
> e-purpose dma_addr[1] for those.

Will define a macro for "sizeof(dma_addr_t) > sizeof(unsigned long)" to
decide whether to use the dma_addr[1], hopefully the compiler will optimize
out the unused code in a specific system.

> 
> Regards
> /Ilias
> 
>>
>>
>>>> +static inline int page_pool_get_pagecnt_bias(struct page *page)
>>>> +{
>>>> +       return READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK;
>>>> +}
>>>> +
>>>> +static inline unsigned long *page_pool_pagecnt_bias_ptr(struct page *page)
>>>> +{
>>>> +       return page->dma_addr;
>>>> +}
>>>> +
>>>> +static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)
>>>> +{
>>>> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
>>>> +
>>>> +       dma_addr_0 &= PAGE_MASK;
>>>> +       dma_addr_0 |= bias;
>>>> +
>>>> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
>>>>   }
>>>>
>>>>   static inline bool is_page_pool_compiled_in(void)
>>>> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>>>> index 78838c6..1abefc6 100644
>>>> --- a/net/core/page_pool.c
>>>> +++ b/net/core/page_pool.c
>>>> @@ -198,7 +198,13 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
>>>>          if (dma_mapping_error(pool->p.dev, dma))
>>>>                  return false;
>>>>
>>>
>>> So instead of adding to the function below you could just add your
>>> WARN_ON check here with the unmapping call.

Ok.

>>>
>>>> -       page_pool_set_dma_addr(page, dma);
>>>> +       if (unlikely(!page_pool_set_dma_addr(page, dma))) {
>>>> +               dma_unmap_page_attrs(pool->p.dev, dma,
>>>> +                                    PAGE_SIZE << pool->p.order,
>>>> +                                    pool->p.dma_dir,
>>>> +                                    DMA_ATTR_SKIP_CPU_SYNC);
>>>> +               return false;
>>>> +       }
>>>>
>>>>          if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
>>>>                  page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
>>>> --
>>>> 2.7.4
>>>>
>>>
>>
> .
>
diff mbox series

Patch

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 8d7744d..84cd972 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -200,17 +200,42 @@  static inline void page_pool_recycle_direct(struct page_pool *pool,
 
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	dma_addr_t ret = page->dma_addr[0];
+	dma_addr_t ret = READ_ONCE(page->dma_addr[0]) & PAGE_MASK;
 	if (sizeof(dma_addr_t) > sizeof(unsigned long))
 		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
 	return ret;
 }
 
-static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
+	if (WARN_ON(addr & ~PAGE_MASK))
+		return false;
+
 	page->dma_addr[0] = addr;
 	if (sizeof(dma_addr_t) > sizeof(unsigned long))
 		page->dma_addr[1] = upper_32_bits(addr);
+
+	return true;
+}
+
+static inline int page_pool_get_pagecnt_bias(struct page *page)
+{
+	return READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK;
+}
+
+static inline unsigned long *page_pool_pagecnt_bias_ptr(struct page *page)
+{
+	return page->dma_addr;
+}
+
+static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)
+{
+	unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
+
+	dma_addr_0 &= PAGE_MASK;
+	dma_addr_0 |= bias;
+
+	WRITE_ONCE(page->dma_addr[0], dma_addr_0);
 }
 
 static inline bool is_page_pool_compiled_in(void)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 78838c6..1abefc6 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -198,7 +198,13 @@  static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
 	if (dma_mapping_error(pool->p.dev, dma))
 		return false;
 
-	page_pool_set_dma_addr(page, dma);
+	if (unlikely(!page_pool_set_dma_addr(page, dma))) {
+		dma_unmap_page_attrs(pool->p.dev, dma,
+				     PAGE_SIZE << pool->p.order,
+				     pool->p.dma_dir,
+				     DMA_ATTR_SKIP_CPU_SYNC);
+		return false;
+	}
 
 	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
 		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);