diff mbox series

[v2] net: ethernet: mtk_eth_soc: fix misuse of mem alloc interface netdev[napi]_alloc_frag

Message ID 1654245968-8067-1-git-send-email-chen45464546@163.com (mailing list archive)
State New, archived
Headers show
Series [v2] net: ethernet: mtk_eth_soc: fix misuse of mem alloc interface netdev[napi]_alloc_frag | expand

Commit Message

Chen Lin June 3, 2022, 8:46 a.m. UTC
When rx_flag == MTK_RX_FLAGS_HWLRO, 
rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
netdev_alloc_frag is for alloction of page fragment only.
Reference to other drivers and Documentation/vm/page_frags.rst

Branch to use alloc_pages when ring->frag_size > PAGE_SIZE.

Signed-off-by: Chen Lin <chen45464546@163.com>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |   22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

Comments

Alexander H Duyck June 3, 2022, 3:25 p.m. UTC | #1
On Fri, Jun 3, 2022 at 2:03 AM Chen Lin <chen45464546@163.com> wrote:
>
> When rx_flag == MTK_RX_FLAGS_HWLRO,
> rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
> netdev_alloc_frag is for alloction of page fragment only.
> Reference to other drivers and Documentation/vm/page_frags.rst
>
> Branch to use alloc_pages when ring->frag_size > PAGE_SIZE.
>
> Signed-off-by: Chen Lin <chen45464546@163.com>
> ---
>  drivers/net/ethernet/mediatek/mtk_eth_soc.c |   22 ++++++++++++++++++++--
>  1 file changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> index b3b3c07..772d903 100644
> --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> @@ -1467,7 +1467,16 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
>                         goto release_desc;
>
>                 /* alloc new buffer */
> -               new_data = napi_alloc_frag(ring->frag_size);
> +               if (ring->frag_size <= PAGE_SIZE) {
> +                       new_data = napi_alloc_frag(ring->frag_size);
> +               } else {
> +                       struct page *page;
> +                       unsigned int order = get_order(ring->frag_size);
> +
> +                       page = alloc_pages(GFP_ATOMIC | __GFP_COMP |
> +                                           __GFP_NOWARN, order);
> +                       new_data = page ? page_address(page) : NULL;
> +               }
>                 if (unlikely(!new_data)) {
>                         netdev->stats.rx_dropped++;
>                         goto release_desc;
> @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
>                 return -ENOMEM;
>
>         for (i = 0; i < rx_dma_size; i++) {
> -               ring->data[i] = netdev_alloc_frag(ring->frag_size);
> +               if (ring->frag_size <= PAGE_SIZE) {
> +                       ring->data[i] = netdev_alloc_frag(ring->frag_size);
> +               } else {
> +                       struct page *page;
> +                       unsigned int order = get_order(ring->frag_size);
> +
> +                       page = alloc_pages(GFP_KERNEL | __GFP_COMP |
> +                                           __GFP_NOWARN, order);
> +                       ring->data[i] = page ? page_address(page) : NULL;
> +               }
>                 if (!ring->data[i])
>                         return -ENOMEM;
>         }

Actually I looked closer at this driver. Is it able to receive frames
larger than 2K? If not there isn't any point in this change.

Based on commit 4fd59792097a ("net: ethernet: mediatek: support
setting MTU") it looks like it doesn't, so odds are this patch is not
necessary.
Alexander H Duyck June 3, 2022, 3:33 p.m. UTC | #2
On Fri, Jun 3, 2022 at 8:25 AM Alexander Duyck
<alexander.duyck@gmail.com> wrote:
>
> On Fri, Jun 3, 2022 at 2:03 AM Chen Lin <chen45464546@163.com> wrote:
> >
> > When rx_flag == MTK_RX_FLAGS_HWLRO,
> > rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
> > netdev_alloc_frag is for alloction of page fragment only.
> > Reference to other drivers and Documentation/vm/page_frags.rst
> >
> > Branch to use alloc_pages when ring->frag_size > PAGE_SIZE.
> >
> > Signed-off-by: Chen Lin <chen45464546@163.com>
> > ---
> >  drivers/net/ethernet/mediatek/mtk_eth_soc.c |   22 ++++++++++++++++++++--
> >  1 file changed, 20 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> > index b3b3c07..772d903 100644
> > --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> > +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> > @@ -1467,7 +1467,16 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
> >                         goto release_desc;
> >
> >                 /* alloc new buffer */
> > -               new_data = napi_alloc_frag(ring->frag_size);
> > +               if (ring->frag_size <= PAGE_SIZE) {
> > +                       new_data = napi_alloc_frag(ring->frag_size);
> > +               } else {
> > +                       struct page *page;
> > +                       unsigned int order = get_order(ring->frag_size);
> > +
> > +                       page = alloc_pages(GFP_ATOMIC | __GFP_COMP |
> > +                                           __GFP_NOWARN, order);
> > +                       new_data = page ? page_address(page) : NULL;
> > +               }
> >                 if (unlikely(!new_data)) {
> >                         netdev->stats.rx_dropped++;
> >                         goto release_desc;
> > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
> >                 return -ENOMEM;
> >
> >         for (i = 0; i < rx_dma_size; i++) {
> > -               ring->data[i] = netdev_alloc_frag(ring->frag_size);
> > +               if (ring->frag_size <= PAGE_SIZE) {
> > +                       ring->data[i] = netdev_alloc_frag(ring->frag_size);
> > +               } else {
> > +                       struct page *page;
> > +                       unsigned int order = get_order(ring->frag_size);
> > +
> > +                       page = alloc_pages(GFP_KERNEL | __GFP_COMP |
> > +                                           __GFP_NOWARN, order);
> > +                       ring->data[i] = page ? page_address(page) : NULL;
> > +               }
> >                 if (!ring->data[i])
> >                         return -ENOMEM;
> >         }
>
> Actually I looked closer at this driver. Is it able to receive frames
> larger than 2K? If not there isn't any point in this change.
>
> Based on commit 4fd59792097a ("net: ethernet: mediatek: support
> setting MTU") it looks like it doesn't, so odds are this patch is not
> necessary.

I spoke too soon. I had overlooked the LRO part. With that being the
case you can probably optimize this code to do away with the get_order
piece entirely, at least during runtime. My main concern is that doing
that in the fast-path will be expensive so you would be much better
off doing something like
get_order(mtk_max_frag_size(MTK_RX_FLAGS_HWLRO)) which would be
converted into a constant at compile time since everything else would
be less than 1 page in size.

Also you could then replace alloc_pages with __get_free_pages which
would take care of the page_address call for you.
Eric Dumazet June 3, 2022, 5:25 p.m. UTC | #3
On Fri, Jun 3, 2022 at 1:46 AM Chen Lin <chen45464546@163.com> wrote:
>
> When rx_flag == MTK_RX_FLAGS_HWLRO,
> rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
> netdev_alloc_frag is for alloction of page fragment only.
> Reference to other drivers and Documentation/vm/page_frags.rst
>
> Branch to use alloc_pages when ring->frag_size > PAGE_SIZE.
>
> Signed-off-by: Chen Lin <chen45464546@163.com>

...

>                         goto release_desc;
> @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
>                 return -ENOMEM;
>
>         for (i = 0; i < rx_dma_size; i++) {
> -               ring->data[i] = netdev_alloc_frag(ring->frag_size);

Note aside, calling netdev_alloc_frag() in a loop like that is adding
GFP_ATOMIC pressure.

mtk_rx_alloc() being in process context, using GFP_KERNEL allocations
would be less aggressive and
have more chances to succeed.

We probably should offer a generic helper. This could be used from
driver/net/tun.c and others.
Jakub Kicinski June 3, 2022, 6:59 p.m. UTC | #4
On Fri, 3 Jun 2022 10:25:16 -0700 Eric Dumazet wrote:
> >                         goto release_desc;
> > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
> >                 return -ENOMEM;
> >
> >         for (i = 0; i < rx_dma_size; i++) {
> > -               ring->data[i] = netdev_alloc_frag(ring->frag_size);  
> 
> Note aside, calling netdev_alloc_frag() in a loop like that is adding
> GFP_ATOMIC pressure.
> 
> mtk_rx_alloc() being in process context, using GFP_KERNEL allocations
> would be less aggressive and
> have more chances to succeed.
> 
> We probably should offer a generic helper. This could be used from
> driver/net/tun.c and others.

Do cases where netdev_alloc_frag() is not run from a process context
from to your mind? My feeling is that the prevailing pattern is what
this driver does, which is netdev_alloc_frag() at startup / open and
napi_alloc_frag() from the datapath. So maybe we can even spare the
detail in the API and have napi_alloc_frag() assume GFP_KERNEL by
default?
Eric Dumazet June 3, 2022, 7:11 p.m. UTC | #5
On Fri, Jun 3, 2022 at 11:59 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Fri, 3 Jun 2022 10:25:16 -0700 Eric Dumazet wrote:
> > >                         goto release_desc;
> > > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
> > >                 return -ENOMEM;
> > >
> > >         for (i = 0; i < rx_dma_size; i++) {
> > > -               ring->data[i] = netdev_alloc_frag(ring->frag_size);
> >
> > Note aside, calling netdev_alloc_frag() in a loop like that is adding
> > GFP_ATOMIC pressure.
> >
> > mtk_rx_alloc() being in process context, using GFP_KERNEL allocations
> > would be less aggressive and
> > have more chances to succeed.
> >
> > We probably should offer a generic helper. This could be used from
> > driver/net/tun.c and others.
>
> Do cases where netdev_alloc_frag() is not run from a process context
> from to your mind? My feeling is that the prevailing pattern is what
> this driver does, which is netdev_alloc_frag() at startup / open and
> napi_alloc_frag() from the datapath. So maybe we can even spare the
> detail in the API and have napi_alloc_frag() assume GFP_KERNEL by
> default?

Yes, we only have to review callers and change the documentation and
implementation.

The confusion/overhead/generalization came with :

commit 7ba7aeabbaba484347cc98fbe9045769ca0d118d
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date:   Fri Jun 7 21:20:34 2019 +0200

    net: Don't disable interrupts in napi_alloc_frag()

    netdev_alloc_frag() can be used from any context and is used by NAPI
    and non-NAPI drivers. Non-NAPI drivers use it in interrupt context
    and NAPI drivers use it during initial allocation (->ndo_open() or
    ->ndo_change_mtu()). Some NAPI drivers share the same function for the
    initial allocation and the allocation in their NAPI callback.

    The interrupts are disabled in order to ensure locked access from every
    context to `netdev_alloc_cache'.

    Let netdev_alloc_frag() check if interrupts are disabled. If they are,
    use `netdev_alloc_cache' otherwise disable BH and invoke
    __napi_alloc_frag() for the allocation. The IRQ check is cheaper
    compared to disabling & enabling interrupts and memory allocation with
    disabled interrupts does not work on -RT.

    Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
    Signed-off-by: David S. Miller <davem@davemloft.net>
Jakub Kicinski June 3, 2022, 7:55 p.m. UTC | #6
On Fri, 3 Jun 2022 12:11:43 -0700 Eric Dumazet wrote:
> Yes, we only have to review callers and change the documentation and
> implementation.
> 
> The confusion/overhead/generalization came with :
> 
> commit 7ba7aeabbaba484347cc98fbe9045769ca0d118d
> Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Date:   Fri Jun 7 21:20:34 2019 +0200
> 
>     net: Don't disable interrupts in napi_alloc_frag()
> 
>     netdev_alloc_frag() can be used from any context and is used by NAPI
>     and non-NAPI drivers. Non-NAPI drivers use it in interrupt context
>     and NAPI drivers use it during initial allocation (->ndo_open() or
>     ->ndo_change_mtu()). Some NAPI drivers share the same function for the  
>     initial allocation and the allocation in their NAPI callback.
> 
>     The interrupts are disabled in order to ensure locked access from every
>     context to `netdev_alloc_cache'.
> 
>     Let netdev_alloc_frag() check if interrupts are disabled. If they are,
>     use `netdev_alloc_cache' otherwise disable BH and invoke
>     __napi_alloc_frag() for the allocation. The IRQ check is cheaper
>     compared to disabling & enabling interrupts and memory allocation with
>     disabled interrupts does not work on -RT.

Hm, should have looked at the code. Were you thinking of adding a
helper which would replace both netdev_ and napi_ variants and DTRT
internally?

An option for getting GFP_KERNEL in there would be having an rtnl frag
cache. Users who need frags on the reconfig path should be under rtnl,
they can call rtnl_alloc_frag(), which can use GFP_KERNEL internally.
Otherwise the GFP_KERNEL frag cache would need to be protected by
another mutex, I presume. Pre-allocating memory before using the napi
cache seems hard.
Chen Lin June 5, 2022, 2:22 a.m. UTC | #7
At 2022-06-03 23:33:25, "Alexander Duyck" <alexander.duyck@gmail.com> wrote:
>On Fri, Jun 3, 2022 at 8:25 AM Alexander Duyck
><alexander.duyck@gmail.com> wrote:
>>
>> On Fri, Jun 3, 2022 at 2:03 AM Chen Lin <chen45464546@163.com> wrote:
>> >
>> > When rx_flag == MTK_RX_FLAGS_HWLRO,
>> > rx_data_len = MTK_MAX_LRO_RX_LENGTH(4096 * 3) > PAGE_SIZE.
>> > netdev_alloc_frag is for alloction of page fragment only.
>> > Reference to other drivers and Documentation/vm/page_frags.rst
>> >
>> > Branch to use alloc_pages when ring->frag_size > PAGE_SIZE.
>> >
>> > Signed-off-by: Chen Lin <chen45464546@163.com>
>> > ---
>> >  drivers/net/ethernet/mediatek/mtk_eth_soc.c |   22 ++++++++++++++++++++--
>> >  1 file changed, 20 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
>> > index b3b3c07..772d903 100644
>> > --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
>> > +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
>> > @@ -1467,7 +1467,16 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
>> >                         goto release_desc;
>> >
>> >                 /* alloc new buffer */
>> > -               new_data = napi_alloc_frag(ring->frag_size);
>> > +               if (ring->frag_size <= PAGE_SIZE) {
>> > +                       new_data = napi_alloc_frag(ring->frag_size);
>> > +               } else {
>> > +                       struct page *page;
>> > +                       unsigned int order = get_order(ring->frag_size);
>> > +
>> > +                       page = alloc_pages(GFP_ATOMIC | __GFP_COMP |
>> > +                                           __GFP_NOWARN, order);
>> > +                       new_data = page ? page_address(page) : NULL;
>> > +               }
>> >                 if (unlikely(!new_data)) {
>> >                         netdev->stats.rx_dropped++;
>> >                         goto release_desc;
>> > @@ -1914,7 +1923,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
>> >                 return -ENOMEM;
>> >
>> >         for (i = 0; i < rx_dma_size; i++) {
>> > -               ring->data[i] = netdev_alloc_frag(ring->frag_size);
>> > +               if (ring->frag_size <= PAGE_SIZE) {
>> > +                       ring->data[i] = netdev_alloc_frag(ring->frag_size);
>> > +               } else {
>> > +                       struct page *page;
>> > +                       unsigned int order = get_order(ring->frag_size);
>> > +
>> > +                       page = alloc_pages(GFP_KERNEL | __GFP_COMP |
>> > +                                           __GFP_NOWARN, order);
>> > +                       ring->data[i] = page ? page_address(page) : NULL;
>> > +               }
>> >                 if (!ring->data[i])
>> >                         return -ENOMEM;
>> >         }
>>
>> Actually I looked closer at this driver. Is it able to receive frames
>> larger than 2K? If not there isn't any point in this change.
>>
>> Based on commit 4fd59792097a ("net: ethernet: mediatek: support
>> setting MTU") it looks like it doesn't, so odds are this patch is not
>> necessary.
>
>I spoke too soon. I had overlooked the LRO part. With that being the
>case you can probably optimize this code to do away with the get_order
>piece entirely, at least during runtime. My main concern is that doing
>that in the fast-path will be expensive so you would be much better
>off doing something like
>get_order(mtk_max_frag_size(MTK_RX_FLAGS_HWLRO)) which would be
>converted into a constant at compile time since everything else would
>be less than 1 page in size.
>
>Also you could then replace alloc_pages with __get_free_pages which
>would take care of the page_address call for you.

Thanks for the tips. I'll try again.
It can also be seen from here it is easy to make mistakes in parameter fragsz.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b3b3c07..772d903 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1467,7 +1467,16 @@  static int mtk_poll_rx(struct napi_struct *napi, int budget,
 			goto release_desc;
 
 		/* alloc new buffer */
-		new_data = napi_alloc_frag(ring->frag_size);
+		if (ring->frag_size <= PAGE_SIZE) {
+			new_data = napi_alloc_frag(ring->frag_size);
+		} else {
+			struct page *page;
+			unsigned int order = get_order(ring->frag_size);
+
+			page = alloc_pages(GFP_ATOMIC | __GFP_COMP |
+					    __GFP_NOWARN, order);
+			new_data = page ? page_address(page) : NULL;
+		}
 		if (unlikely(!new_data)) {
 			netdev->stats.rx_dropped++;
 			goto release_desc;
@@ -1914,7 +1923,16 @@  static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
 		return -ENOMEM;
 
 	for (i = 0; i < rx_dma_size; i++) {
-		ring->data[i] = netdev_alloc_frag(ring->frag_size);
+		if (ring->frag_size <= PAGE_SIZE) {
+			ring->data[i] = netdev_alloc_frag(ring->frag_size);
+		} else {
+			struct page *page;
+			unsigned int order = get_order(ring->frag_size);
+
+			page = alloc_pages(GFP_KERNEL | __GFP_COMP |
+					    __GFP_NOWARN, order);
+			ring->data[i] = page ? page_address(page) : NULL;
+		}
 		if (!ring->data[i])
 			return -ENOMEM;
 	}