diff mbox series

[v2,2/2] mt76: usb: do not always copy the first part of received frames

Message ID 2ed0b595a12944a8cfea14e066bcc4fa24f0ba44.1559293385.git.lorenzo@kernel.org (mailing list archive)
State Superseded
Delegated to: Kalle Valo
Headers show
Series mt76: usb: fix A-MSDU support | expand

Commit Message

Lorenzo Bianconi May 31, 2019, 9:38 a.m. UTC
Set usb buffer size taking into account skb_shared_info in order to
not always copy the first part of received frames if A-MSDU is enabled
for SG capable devices

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/wireless/mediatek/mt76/mt76.h |  3 +++
 drivers/net/wireless/mediatek/mt76/usb.c  | 12 ++++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

Comments

Stanislaw Gruszka June 12, 2019, 9:10 a.m. UTC | #1
On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:
> Set usb buffer size taking into account skb_shared_info in order to
> not always copy the first part of received frames if A-MSDU is enabled
> for SG capable devices
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  drivers/net/wireless/mediatek/mt76/mt76.h |  3 +++
>  drivers/net/wireless/mediatek/mt76/usb.c  | 12 ++++++++----
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 74d4edf941d6..7899e9b88b54 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -32,6 +32,9 @@
>  #define MT_RX_BUF_SIZE      2048
>  #define MT_SKB_HEAD_LEN     128
>  
> +#define MT_BUF_WITH_OVERHEAD(x) \
> +	((x) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
> +
>  struct mt76_dev;
>  struct mt76_wcid;
>  
> diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> index 2bfc8214c0d8..5081643ce701 100644
> --- a/drivers/net/wireless/mediatek/mt76/usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> @@ -286,7 +286,7 @@ static int
>  mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
>  		 int nsgs, gfp_t gfp)
>  {
> -	int i;
> +	int i, data_size = SKB_WITH_OVERHEAD(q->buf_size);
>  
>  	for (i = 0; i < nsgs; i++) {
>  		struct page *page;
> @@ -299,7 +299,7 @@ mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
>  
>  		page = virt_to_head_page(data);
>  		offset = data - page_address(page);
> -		sg_set_page(&urb->sg[i], page, q->buf_size, offset);
> +		sg_set_page(&urb->sg[i], page, data_size, offset);
>  	}
>  
>  	if (i < nsgs) {
> @@ -311,7 +311,7 @@ mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
>  	}
>  
>  	urb->num_sgs = max_t(int, i, urb->num_sgs);
> -	urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> +	urb->transfer_buffer_length = urb->num_sgs * data_size;
>  	sg_init_marker(urb->sg, urb->num_sgs);
>  
>  	return i ? : -ENOMEM;
> @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
>  	if (!q->entry)
>  		return -ENOMEM;
>  
> -	q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> +	if (dev->usb.sg_en)
> +		q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);

I strongly recommend to not doing this. While this should work
in theory creating buffer with size of 2k + some bytes might
trigger various bugs in dma mapping or other low level code.

And skb_shered_info is needed only in first buffer IIUC.

Also this patch seems to make first patch unnecessary except for
non sg_en case (in which I think rx AMSDU is broken anyway),
so I would prefer just to apply first patch.

Stanislaw
Lorenzo Bianconi June 12, 2019, 9:53 a.m. UTC | #2
> On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:

[...]

> >  	}
> >  
> >  	urb->num_sgs = max_t(int, i, urb->num_sgs);
> > -	urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> > +	urb->transfer_buffer_length = urb->num_sgs * data_size;
> >  	sg_init_marker(urb->sg, urb->num_sgs);
> >  
> >  	return i ? : -ENOMEM;
> > @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
> >  	if (!q->entry)
> >  		return -ENOMEM;
> >  
> > -	q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> > +	if (dev->usb.sg_en)
> > +		q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
> 
> I strongly recommend to not doing this. While this should work
> in theory creating buffer with size of 2k + some bytes might
> trigger various bugs in dma mapping or other low level code.

even in practice actually :) but we can be more cautious since probably copying
the first 128B will not make any difference

> 
> And skb_shered_info is needed only in first buffer IIUC.
> 
> Also this patch seems to make first patch unnecessary except for
> non sg_en case (in which I think rx AMSDU is broken anyway),
> so I would prefer just to apply first patch.

I do not think rx AMSDU is broken for non sg_en case since the max rx value
allowed should be 3839 IIRC and we alloc one page in this case

Regards,
Lorenzo

> 
> Stanislaw
>
Stanislaw Gruszka June 12, 2019, 10:25 a.m. UTC | #3
On Wed, Jun 12, 2019 at 11:53:03AM +0200, Lorenzo Bianconi wrote:
> > On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:
> 
> [...]
> 
> > >  	}
> > >  
> > >  	urb->num_sgs = max_t(int, i, urb->num_sgs);
> > > -	urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> > > +	urb->transfer_buffer_length = urb->num_sgs * data_size;
> > >  	sg_init_marker(urb->sg, urb->num_sgs);
> > >  
> > >  	return i ? : -ENOMEM;
> > > @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
> > >  	if (!q->entry)
> > >  		return -ENOMEM;
> > >  
> > > -	q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> > > +	if (dev->usb.sg_en)
> > > +		q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
> > 
> > I strongly recommend to not doing this. While this should work
> > in theory creating buffer with size of 2k + some bytes might
> > trigger various bugs in dma mapping or other low level code.
> 
> even in practice actually :)

I wouldn't be sure about this. It's not common to have buffers of
such size and crossing pages boundaries. It really can trigger
nasty bugs on various IOMMU drivers.

> but we can be more cautious since probably copying
> the first 128B will not make any difference

Not sure if I understand what you mean.

> > And skb_shered_info is needed only in first buffer IIUC.
> > 
> > Also this patch seems to make first patch unnecessary except for
> > non sg_en case (in which I think rx AMSDU is broken anyway),
> > so I would prefer just to apply first patch.
> 
> I do not think rx AMSDU is broken for non sg_en case since the max rx value
> allowed should be 3839 IIRC and we alloc one page in this case

If that's the case we should be fine, but then I do not understand
why we allocate 8*2k buffers for sg_en case, isn't that AP can
sent AMSDU frame 16k big?

Stanislaw
Lorenzo Bianconi June 12, 2019, 10:49 a.m. UTC | #4
> On Wed, Jun 12, 2019 at 11:53:03AM +0200, Lorenzo Bianconi wrote:
> > > On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:
> > 
> > [...]
> > 
> > > >  	}
> > > >  
> > > >  	urb->num_sgs = max_t(int, i, urb->num_sgs);
> > > > -	urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> > > > +	urb->transfer_buffer_length = urb->num_sgs * data_size;
> > > >  	sg_init_marker(urb->sg, urb->num_sgs);
> > > >  
> > > >  	return i ? : -ENOMEM;
> > > > @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
> > > >  	if (!q->entry)
> > > >  		return -ENOMEM;
> > > >  
> > > > -	q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> > > > +	if (dev->usb.sg_en)
> > > > +		q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
> > > 
> > > I strongly recommend to not doing this. While this should work
> > > in theory creating buffer with size of 2k + some bytes might
> > > trigger various bugs in dma mapping or other low level code.
> > 
> > even in practice actually :)
> 
> I wouldn't be sure about this. It's not common to have buffers of
> such size and crossing pages boundaries. It really can trigger
> nasty bugs on various IOMMU drivers.

I was just joking, I mean that it worked in the tests I carried out, but I
agree it can trigger some issues in buggy IOMMU drivers

> 
> > but we can be more cautious since probably copying
> > the first 128B will not make any difference
> 
> Not sure if I understand what you mean.

Please correct me if I am wrong but I think max amsdu rx size is 3839B for
mt76. For the sg_en case this frame will span over multiple sg buffers since
sg buffer size is 2048B (2 sg buffers). Moreover if we do not take into account
skb_shared_info when configuring the sg buffer size we will need to always copy
the first 128B of the first buffer since received len will be set to 2048 and
the following if condition will always fail:

if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
}

> 
> > > And skb_shered_info is needed only in first buffer IIUC.
> > > 
> > > Also this patch seems to make first patch unnecessary except for
> > > non sg_en case (in which I think rx AMSDU is broken anyway),
> > > so I would prefer just to apply first patch.
> > 
> > I do not think rx AMSDU is broken for non sg_en case since the max rx value
> > allowed should be 3839 IIRC and we alloc one page in this case
> 
> If that's the case we should be fine, but then I do not understand
> why we allocate 8*2k buffers for sg_en case, isn't that AP can
> sent AMSDU frame 16k big?

Sorry I did not get what you mean here, could you please explain?

Regards,
Lorenzo

> 
> Stanislaw
>
Stanislaw Gruszka June 12, 2019, 11:51 a.m. UTC | #5
On Wed, Jun 12, 2019 at 12:49:22PM +0200, Lorenzo Bianconi wrote:
> > On Wed, Jun 12, 2019 at 11:53:03AM +0200, Lorenzo Bianconi wrote:
> > > > On Fri, May 31, 2019 at 11:38:23AM +0200, Lorenzo Bianconi wrote:
> > > 
> > > [...]
> > > 
> > > > >  	}
> > > > >  
> > > > >  	urb->num_sgs = max_t(int, i, urb->num_sgs);
> > > > > -	urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
> > > > > +	urb->transfer_buffer_length = urb->num_sgs * data_size;
> > > > >  	sg_init_marker(urb->sg, urb->num_sgs);
> > > > >  
> > > > >  	return i ? : -ENOMEM;
> > > > > @@ -611,8 +611,12 @@ static int mt76u_alloc_rx(struct mt76_dev *dev)
> > > > >  	if (!q->entry)
> > > > >  		return -ENOMEM;
> > > > >  
> > > > > -	q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
> > > > > +	if (dev->usb.sg_en)
> > > > > +		q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
> > > > 
> > > > I strongly recommend to not doing this. While this should work
> > > > in theory creating buffer with size of 2k + some bytes might
> > > > trigger various bugs in dma mapping or other low level code.
> > > 
> > > even in practice actually :)
> > 
> > I wouldn't be sure about this. It's not common to have buffers of
> > such size and crossing pages boundaries. It really can trigger
> > nasty bugs on various IOMMU drivers.
> 
> I was just joking, I mean that it worked in the tests I carried out, but I
> agree it can trigger some issues in buggy IOMMU drivers

My sense of humor declined quite drastically lastly ;-/

> > > but we can be more cautious since probably copying
> > > the first 128B will not make any difference
> > 
> > Not sure if I understand what you mean.
> 
> Please correct me if I am wrong but I think max amsdu rx size is 3839B for
> mt76. For the sg_en case this frame will span over multiple sg buffers since
> sg buffer size is 2048B (2 sg buffers). Moreover if we do not take into account
> skb_shared_info when configuring the sg buffer size we will need to always copy
> the first 128B of the first buffer since received len will be set to 2048 and
> the following if condition will always fail:
> 
> if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> }

Ok, that I understand.

> > > > And skb_shered_info is needed only in first buffer IIUC.
> > > > 
> > > > Also this patch seems to make first patch unnecessary except for
> > > > non sg_en case (in which I think rx AMSDU is broken anyway),
> > > > so I would prefer just to apply first patch.
> > > 
> > > I do not think rx AMSDU is broken for non sg_en case since the max rx value
> > > allowed should be 3839 IIRC and we alloc one page in this case
> > 
> > If that's the case we should be fine, but then I do not understand
> > why we allocate 8*2k buffers for sg_en case, isn't that AP can
> > sent AMSDU frame 16k big?
> 
> Sorry I did not get what you mean here, could you please explain?

If max RX AMSDU size is 3839B I do not see reason why we allocate
MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
I thought the reason is that max AMSDU size is 16kB so it fit into
8 sg buffers of 2k.

In other words, for me, looks like either
- we can not handle AMSDU for non sg case because we do not
allocate big enough buffer
or
- we can just use one PAGE_SIZE buffer for rx and remove sg
buffers for rx completely 

Do I miss something ?

Stanislaw
Lorenzo Bianconi June 12, 2019, 12:28 p.m. UTC | #6
[...]
> 
> My sense of humor declined quite drastically lastly ;-/
> 
> > > > but we can be more cautious since probably copying
> > > > the first 128B will not make any difference
> > > 
> > > Not sure if I understand what you mean.
> > 
> > Please correct me if I am wrong but I think max amsdu rx size is 3839B for
> > mt76. For the sg_en case this frame will span over multiple sg buffers since
> > sg buffer size is 2048B (2 sg buffers). Moreover if we do not take into account
> > skb_shared_info when configuring the sg buffer size we will need to always copy
> > the first 128B of the first buffer since received len will be set to 2048 and
> > the following if condition will always fail:
> > 
> > if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> > }
> 
> Ok, that I understand.
> 
> > > > > And skb_shered_info is needed only in first buffer IIUC.
> > > > > 
> > > > > Also this patch seems to make first patch unnecessary except for
> > > > > non sg_en case (in which I think rx AMSDU is broken anyway),
> > > > > so I would prefer just to apply first patch.
> > > > 
> > > > I do not think rx AMSDU is broken for non sg_en case since the max rx value
> > > > allowed should be 3839 IIRC and we alloc one page in this case
> > > 
> > > If that's the case we should be fine, but then I do not understand
> > > why we allocate 8*2k buffers for sg_en case, isn't that AP can
> > > sent AMSDU frame 16k big?
> > 
> > Sorry I did not get what you mean here, could you please explain?
> 
> If max RX AMSDU size is 3839B I do not see reason why we allocate
> MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> I thought the reason is that max AMSDU size is 16kB so it fit into
> 8 sg buffers of 2k.
> 
> In other words, for me, looks like either
> - we can not handle AMSDU for non sg case because we do not
> allocate big enough buffer

I think AMSDU is mandatory and we currently support it even for non-sg case
(since max rx AMSDU is 3839B)

> or
> - we can just use one PAGE_SIZE buffer for rx and remove sg
> buffers for rx completely 

using sg buffers we can support bigger rx AMSDU size in the future without using
huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
mt76x2u)

Regards,
Lorenzo

> 
> Do I miss something ?
> 
> Stanislaw
Stanislaw Gruszka June 12, 2019, 12:59 p.m. UTC | #7
On Wed, Jun 12, 2019 at 02:28:48PM +0200, Lorenzo Bianconi wrote:
> [...]
> > 
> > My sense of humor declined quite drastically lastly ;-/
> > 
> > > > > but we can be more cautious since probably copying
> > > > > the first 128B will not make any difference
> > > > 
> > > > Not sure if I understand what you mean.
> > > 
> > > Please correct me if I am wrong but I think max amsdu rx size is 3839B for
> > > mt76. For the sg_en case this frame will span over multiple sg buffers since
> > > sg buffer size is 2048B (2 sg buffers). Moreover if we do not take into account
> > > skb_shared_info when configuring the sg buffer size we will need to always copy
> > > the first 128B of the first buffer since received len will be set to 2048 and
> > > the following if condition will always fail:
> > > 
> > > if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> > > }
> > 
> > Ok, that I understand.
> > 
> > > > > > And skb_shered_info is needed only in first buffer IIUC.
> > > > > > 
> > > > > > Also this patch seems to make first patch unnecessary except for
> > > > > > non sg_en case (in which I think rx AMSDU is broken anyway),
> > > > > > so I would prefer just to apply first patch.
> > > > > 
> > > > > I do not think rx AMSDU is broken for non sg_en case since the max rx value
> > > > > allowed should be 3839 IIRC and we alloc one page in this case
> > > > 
> > > > If that's the case we should be fine, but then I do not understand
> > > > why we allocate 8*2k buffers for sg_en case, isn't that AP can
> > > > sent AMSDU frame 16k big?
> > > 
> > > Sorry I did not get what you mean here, could you please explain?
> > 
> > If max RX AMSDU size is 3839B I do not see reason why we allocate
> > MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> > I thought the reason is that max AMSDU size is 16kB so it fit into
> > 8 sg buffers of 2k.
> > 
> > In other words, for me, looks like either
> > - we can not handle AMSDU for non sg case because we do not
> > allocate big enough buffer
> 
> I think AMSDU is mandatory and we currently support it even for non-sg case
> (since max rx AMSDU is 3839B)
> 
> > or
> > - we can just use one PAGE_SIZE buffer for rx and remove sg
> > buffers for rx completely 
> 
> using sg buffers we can support bigger rx AMSDU size in the future without using
> huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> mt76x2u)

I think it would be simpler just to allocate 2 pages for 7935B .

Stanislaw
Stanislaw Gruszka June 12, 2019, 2:21 p.m. UTC | #8
On Wed, Jun 12, 2019 at 02:59:05PM +0200, Stanislaw Gruszka wrote:
> > > If max RX AMSDU size is 3839B I do not see reason why we allocate
> > > MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> > > I thought the reason is that max AMSDU size is 16kB so it fit into
> > > 8 sg buffers of 2k.
> > > 
> > > In other words, for me, looks like either
> > > - we can not handle AMSDU for non sg case because we do not
> > > allocate big enough buffer
> > 
> > I think AMSDU is mandatory and we currently support it even for non-sg case
> > (since max rx AMSDU is 3839B)
> > 
> > > or
> > > - we can just use one PAGE_SIZE buffer for rx and remove sg
> > > buffers for rx completely 
> > 
> > using sg buffers we can support bigger rx AMSDU size in the future without using
> > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > mt76x2u)
> 
> I think it would be simpler just to allocate 2 pages for 7935B .

And if we could determine that there is no true need to use sg for rx,
I think best approach here would be revert f8f527b16db5 in v5.2 to fix
regression and remove rx sg in -next. That would make code simpler,
allocate 4k instead 16k per packet, allow to use build_skb (4096 - 3839
give enough space for shared info) and not use usb hcd bounce buffer.

Stanislaw
Lorenzo Bianconi June 12, 2019, 2:27 p.m. UTC | #9
> On Wed, Jun 12, 2019 at 02:28:48PM +0200, Lorenzo Bianconi wrote:

[...]

> > 
> > using sg buffers we can support bigger rx AMSDU size in the future without using
> > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > mt76x2u)
> 
> I think it would be simpler just to allocate 2 pages for 7935B .
> 

We should avoid increasing buffer size to more than PAGE_SIZE for
performance reasons. As suggested by Felix what about of setting buf_size to
PAGE_SIZE for both sg and non-sg cases and for sg setting usb data size to

SKB_WITH_OVERHEAD(q->buf_size) & (usb_endpoint_maxp() - 1);

Regards,
Lorenzo

> Stanislaw
Lorenzo Bianconi June 12, 2019, 2:44 p.m. UTC | #10
On Jun 12, Stanislaw Gruszka wrote:
> On Wed, Jun 12, 2019 at 02:59:05PM +0200, Stanislaw Gruszka wrote:
> > > > If max RX AMSDU size is 3839B I do not see reason why we allocate
> > > > MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> > > > I thought the reason is that max AMSDU size is 16kB so it fit into
> > > > 8 sg buffers of 2k.
> > > > 
> > > > In other words, for me, looks like either
> > > > - we can not handle AMSDU for non sg case because we do not
> > > > allocate big enough buffer
> > > 
> > > I think AMSDU is mandatory and we currently support it even for non-sg case
> > > (since max rx AMSDU is 3839B)
> > > 
> > > > or
> > > > - we can just use one PAGE_SIZE buffer for rx and remove sg
> > > > buffers for rx completely 
> > > 
> > > using sg buffers we can support bigger rx AMSDU size in the future without using
> > > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > > mt76x2u)
> > 
> > I think it would be simpler just to allocate 2 pages for 7935B .
> 
> And if we could determine that there is no true need to use sg for rx,
> I think best approach here would be revert f8f527b16db5 in v5.2 to fix
> regression and remove rx sg in -next. That would make code simpler,
> allocate 4k instead 16k per packet, allow to use build_skb (4096 - 3839
> give enough space for shared info) and not use usb hcd bounce buffer.

I do not think we should drop sg support since:
- it allow us to rx huge amsdu frames (e.g. IEEE80211_MAX_MPDU_LEN_VHT_11454)
  using multiple one page buffer. I think there will be new usb devices where we can
  increase amsdu size (we can increase it even on mt76x2u usb 3.0 devices)
- without SG we can't use build_skb() without copying a given size of the packet since the
  space needed for skb_shared_info is 320B on a x86_64 device
- the fix for f8f527b16db5 has been already tested

Regards,
Lorenzo

> 
> Stanislaw
Stanislaw Gruszka June 13, 2019, 7:51 a.m. UTC | #11
On Wed, Jun 12, 2019 at 04:44:01PM +0200, Lorenzo Bianconi wrote:
> On Jun 12, Stanislaw Gruszka wrote:
> > On Wed, Jun 12, 2019 at 02:59:05PM +0200, Stanislaw Gruszka wrote:
> > > > > If max RX AMSDU size is 3839B I do not see reason why we allocate
> > > > > MT_SG_MAX_SIZE=8 of MT_RX_BUF_SIZE=2k buffers for sg_en case.
> > > > > I thought the reason is that max AMSDU size is 16kB so it fit into
> > > > > 8 sg buffers of 2k.
> > > > > 
> > > > > In other words, for me, looks like either
> > > > > - we can not handle AMSDU for non sg case because we do not
> > > > > allocate big enough buffer
> > > > 
> > > > I think AMSDU is mandatory and we currently support it even for non-sg case
> > > > (since max rx AMSDU is 3839B)
> > > > 
> > > > > or
> > > > > - we can just use one PAGE_SIZE buffer for rx and remove sg
> > > > > buffers for rx completely 
> > > > 
> > > > using sg buffers we can support bigger rx AMSDU size in the future without using
> > > > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > > > mt76x2u)
> > > 
> > > I think it would be simpler just to allocate 2 pages for 7935B .
> > 
> > And if we could determine that there is no true need to use sg for rx,
> > I think best approach here would be revert f8f527b16db5 in v5.2 to fix
> > regression and remove rx sg in -next. That would make code simpler,
> > allocate 4k instead 16k per packet, allow to use build_skb (4096 - 3839
> > give enough space for shared info) and not use usb hcd bounce buffer.
> 
> I do not think we should drop sg support since:
> - it allow us to rx huge amsdu frames (e.g. IEEE80211_MAX_MPDU_LEN_VHT_11454)
>   using multiple one page buffer. I think there will be new usb devices where we can
>   increase amsdu size (we can increase it even on mt76x2u usb 3.0 devices)

I looked at intel wifi drivers and this is handled by amsdu_size module
parameter, supported values are 4k, 8k and 12k. RX allocation size and
proper values in vht_cap & ht_cap are set accordingly. Assuming (some)
mt76 HW and FW can handle bigger AMSDUs I think we should do similar
thing.

Otherwise looks for me, we just waste memory and have not needed code
for no true reason. 

>   space needed for skb_shared_info is 320B on a x86_64 device

Uhh, I haven't expected that sk_shared_info() is that big, so indeed build_skb
could not used and 128B copy fallback will be necessary.


Stanislaw
Stanislaw Gruszka June 13, 2019, 7:55 a.m. UTC | #12
On Wed, Jun 12, 2019 at 04:27:42PM +0200, Lorenzo Bianconi wrote:
> > On Wed, Jun 12, 2019 at 02:28:48PM +0200, Lorenzo Bianconi wrote:
> 
> [...]
> 
> > > 
> > > using sg buffers we can support bigger rx AMSDU size in the future without using
> > > huge buffers (e.g. we can try to use IEEE80211_MAX_MPDU_LEN_HT_7935 with
> > > mt76x2u)
> > I think it would be simpler just to allocate 2 pages for 7935B .
> > 
> 
> We should avoid increasing buffer size to more than PAGE_SIZE for
> performance reasons.

Interesting, at what place and how this affect performance negatively ?

> As suggested by Felix what about of setting buf_size to
> PAGE_SIZE for both sg and non-sg cases and for sg setting usb data size to
> 
> SKB_WITH_OVERHEAD(q->buf_size) & (usb_endpoint_maxp() - 1);

Increasing to PAGE_SIZE for sg looks reasonable to me.

Not sure if understand data_size logic. If this mean 'PAGE_SIZE - usb_endpint_maxp()',
looks ok to me as well (for first segment), but would require one extra segment
to avoid coping (i.e. 2 pages for 3839 , 3 pages for 7935 ...) If we would 
like to stay with 128B copy fallback, we can have 1 page for 3839 , 2 for 7935 ...

It would be interesting how frequently AMSDU of max size is sent by
remote station. If this is rare situation I would be opting for
smaller allocation and 128B copy fallback. If this is frequent
for bigger allocation to assure we can always use build_skb().

Stanislaw
Lorenzo Bianconi June 13, 2019, 8:26 a.m. UTC | #13
[...]

> I looked at intel wifi drivers and this is handled by amsdu_size module
> parameter, supported values are 4k, 8k and 12k. RX allocation size and
> proper values in vht_cap & ht_cap are set accordingly. Assuming (some)
> mt76 HW and FW can handle bigger AMSDUs I think we should do similar
> thing.
> 
> Otherwise looks for me, we just waste memory and have not needed code
> for no true reason. 
> 
> >   space needed for skb_shared_info is 320B on a x86_64 device
> 
> Uhh, I haven't expected that sk_shared_info() is that big, so indeed build_skb
> could not used and 128B copy fallback will be necessary.

Hi Stanislaw,

reviewing the original patch I think we can't trigger any IOMMU bug since the
usb buffer length is actually 2048 and not 2048 + skb_shared_info_size:

in mt76u_fill_rx_sg()
data_size = SKB_WITH_OVERHEAD(q->buf_size);

q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);

where MT_BUF_WITH_OVERHEAD is
((x) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

anyway we can even set
q->buf_size = PAGE_SIZE
data_size = rounddown(SKB_WITH_OVERHEAD(q->buf_size), usb_endpoint_maxp())

Regards,
Lorenzo

> 
> 
> Stanislaw
Stanislaw Gruszka June 13, 2019, 9:02 a.m. UTC | #14
Hi

On Thu, Jun 13, 2019 at 10:26:38AM +0200, Lorenzo Bianconi wrote:
> [...]
> 
> > I looked at intel wifi drivers and this is handled by amsdu_size module
> > parameter, supported values are 4k, 8k and 12k. RX allocation size and
> > proper values in vht_cap & ht_cap are set accordingly. Assuming (some)
> > mt76 HW and FW can handle bigger AMSDUs I think we should do similar
> > thing.
> > 
> > Otherwise looks for me, we just waste memory and have not needed code
> > for no true reason. 
> > 
> > >   space needed for skb_shared_info is 320B on a x86_64 device
> > 
> > Uhh, I haven't expected that sk_shared_info() is that big, so indeed build_skb
> > could not used and 128B copy fallback will be necessary.
> 
> Hi Stanislaw,
> 
> reviewing the original patch I think we can't trigger any IOMMU bug since the
> usb buffer length is actually 2048 and not 2048 + skb_shared_info_size:

I'm concerned about alignment and crossing pages boundaries. If you
allocate via page_frag_alloc() buffers, except first one, will have
'not natural' alignment and every second will be spanned across
two pages.

Stanislaw
Lorenzo Bianconi June 13, 2019, 9:06 a.m. UTC | #15
> Hi
> 
> On Thu, Jun 13, 2019 at 10:26:38AM +0200, Lorenzo Bianconi wrote:
> > [...]
> > 
> > > I looked at intel wifi drivers and this is handled by amsdu_size module
> > > parameter, supported values are 4k, 8k and 12k. RX allocation size and
> > > proper values in vht_cap & ht_cap are set accordingly. Assuming (some)
> > > mt76 HW and FW can handle bigger AMSDUs I think we should do similar
> > > thing.
> > > 
> > > Otherwise looks for me, we just waste memory and have not needed code
> > > for no true reason. 
> > > 
> > > >   space needed for skb_shared_info is 320B on a x86_64 device
> > > 
> > > Uhh, I haven't expected that sk_shared_info() is that big, so indeed build_skb
> > > could not used and 128B copy fallback will be necessary.
> > 
> > Hi Stanislaw,
> > 
> > reviewing the original patch I think we can't trigger any IOMMU bug since the
> > usb buffer length is actually 2048 and not 2048 + skb_shared_info_size:
> 
> I'm concerned about alignment and crossing pages boundaries. If you
> allocate via page_frag_alloc() buffers, except first one, will have
> 'not natural' alignment and every second will be spanned across
> two pages.

ack, so I think the second approach will be safer (using roundup instead of
rounddown :))

Regards,
Lorenzo

> 
> Stanislaw
>
diff mbox series

Patch

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 74d4edf941d6..7899e9b88b54 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -32,6 +32,9 @@ 
 #define MT_RX_BUF_SIZE      2048
 #define MT_SKB_HEAD_LEN     128
 
+#define MT_BUF_WITH_OVERHEAD(x) \
+	((x) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
 struct mt76_dev;
 struct mt76_wcid;
 
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index 2bfc8214c0d8..5081643ce701 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -286,7 +286,7 @@  static int
 mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
 		 int nsgs, gfp_t gfp)
 {
-	int i;
+	int i, data_size = SKB_WITH_OVERHEAD(q->buf_size);
 
 	for (i = 0; i < nsgs; i++) {
 		struct page *page;
@@ -299,7 +299,7 @@  mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
 
 		page = virt_to_head_page(data);
 		offset = data - page_address(page);
-		sg_set_page(&urb->sg[i], page, q->buf_size, offset);
+		sg_set_page(&urb->sg[i], page, data_size, offset);
 	}
 
 	if (i < nsgs) {
@@ -311,7 +311,7 @@  mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
 	}
 
 	urb->num_sgs = max_t(int, i, urb->num_sgs);
-	urb->transfer_buffer_length = urb->num_sgs * q->buf_size,
+	urb->transfer_buffer_length = urb->num_sgs * data_size;
 	sg_init_marker(urb->sg, urb->num_sgs);
 
 	return i ? : -ENOMEM;
@@ -611,8 +611,12 @@  static int mt76u_alloc_rx(struct mt76_dev *dev)
 	if (!q->entry)
 		return -ENOMEM;
 
-	q->buf_size = dev->usb.sg_en ? MT_RX_BUF_SIZE : PAGE_SIZE;
+	if (dev->usb.sg_en)
+		q->buf_size = MT_BUF_WITH_OVERHEAD(MT_RX_BUF_SIZE);
+	else
+		q->buf_size = PAGE_SIZE;
 	q->ndesc = MT_NUM_RX_ENTRIES;
+
 	for (i = 0; i < q->ndesc; i++) {
 		err = mt76u_rx_urb_alloc(dev, &q->entry[i]);
 		if (err < 0)