diff mbox series

[v2,1/2] mt76: usb: fix rx A-MSDU support

Message ID 52ea155d9889aa15df44b4910806b74fa2fd9056.1559293385.git.lorenzo@kernel.org (mailing list archive)
State Superseded
Delegated to: Kalle Valo
Headers show
Series mt76: usb: fix A-MSDU support | expand

Commit Message

Lorenzo Bianconi May 31, 2019, 9:38 a.m. UTC
Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
for rx") breaks A-MSDU support. When A-MSDU is enable the device can
receive frames up to q->buf_size but they will be discarded in
mt76u_process_rx_entry since there is no enough room for
skb_shared_info. Fix the issue reallocating the skb and copying in the
linear area the first 128B of the received frames and in the frag_list
the remaining part.

Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/wireless/mediatek/mt76/mt76.h |  1 +
 drivers/net/wireless/mediatek/mt76/usb.c  | 52 +++++++++++++++++++----
 2 files changed, 44 insertions(+), 9 deletions(-)

Comments

Stanislaw Gruszka June 12, 2019, 8:58 a.m. UTC | #1
Hi and sorry for late comment.

On Fri, May 31, 2019 at 11:38:22AM +0200, Lorenzo Bianconi wrote:
> Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
> for rx") breaks A-MSDU support. When A-MSDU is enable the device can
> receive frames up to q->buf_size but they will be discarded in
> mt76u_process_rx_entry since there is no enough room for
> skb_shared_info. Fix the issue reallocating the skb and copying in the
> linear area the first 128B of the received frames and in the frag_list
> the remaining part.
> 
> Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  drivers/net/wireless/mediatek/mt76/mt76.h |  1 +
>  drivers/net/wireless/mediatek/mt76/usb.c  | 52 +++++++++++++++++++----
>  2 files changed, 44 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 97a1296562d0..74d4edf941d6 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -30,6 +30,7 @@
>  #define MT_TX_RING_SIZE     256
>  #define MT_MCU_RING_SIZE    32
>  #define MT_RX_BUF_SIZE      2048
> +#define MT_SKB_HEAD_LEN     128
>  
>  struct mt76_dev;
>  struct mt76_wcid;
> diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> index bbaa1365bbda..2bfc8214c0d8 100644
> --- a/drivers/net/wireless/mediatek/mt76/usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> @@ -429,6 +429,48 @@ static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
>  	return dma_len;
>  }
>  
> +static struct sk_buff *
> +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> +		   int *nsgs)
> +{
> +	int data_len = min(len, MT_SKB_HEAD_LEN);
> +	struct sk_buff *skb;
> +
> +	if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> +		/* fast path */
> +		skb = build_skb(data, buf_size);
> +		if (!skb)
> +			return NULL;
> +
> +		skb_reserve(skb, MT_DMA_HDR_LEN);
> +		__skb_put(skb, len);
> +
> +		return skb;
> +	}
> +
> +	/* slow path, not enough space for data and
> +	 * skb_shared_info
> +	 */
> +	skb = alloc_skb(data_len, GFP_ATOMIC);
> +	if (!skb)
> +		return NULL;
> +
> +	skb_put_data(skb, data + MT_DMA_HDR_LEN, data_len);
mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
of the buffer in fragment, which supose to be more efficient,
see comment in iwl_mvm_pass_packet_to_mac80211().
 
> +	data += (data_len + MT_DMA_HDR_LEN);
> +	len -= data_len;
> +	if (len > 0) {
> +		struct page *page = virt_to_head_page(data);
> +		int offset = data - (u8 *)page_address(page);
u8 cast not needed.

> +		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
> +				page, offset, len, buf_size);
> +	} else {
> +		*nsgs = 0;
This seems to be not necessary or a bug (use first buffer 2 times). 

Stanislaw
Lorenzo Bianconi June 12, 2019, 9:45 a.m. UTC | #2
> Hi and sorry for late comment.
> 

Hi Stanislaw,

> On Fri, May 31, 2019 at 11:38:22AM +0200, Lorenzo Bianconi wrote:
> > Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
> > for rx") breaks A-MSDU support. When A-MSDU is enable the device can
> > receive frames up to q->buf_size but they will be discarded in
> > mt76u_process_rx_entry since there is no enough room for
> > skb_shared_info. Fix the issue reallocating the skb and copying in the
> > linear area the first 128B of the received frames and in the frag_list
> > the remaining part.
> > 
> > Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> >  drivers/net/wireless/mediatek/mt76/mt76.h |  1 +
> >  drivers/net/wireless/mediatek/mt76/usb.c  | 52 +++++++++++++++++++----
> >  2 files changed, 44 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> > index 97a1296562d0..74d4edf941d6 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> > +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> > @@ -30,6 +30,7 @@
> >  #define MT_TX_RING_SIZE     256
> >  #define MT_MCU_RING_SIZE    32
> >  #define MT_RX_BUF_SIZE      2048
> > +#define MT_SKB_HEAD_LEN     128
> >  
> >  struct mt76_dev;
> >  struct mt76_wcid;
> > diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> > index bbaa1365bbda..2bfc8214c0d8 100644
> > --- a/drivers/net/wireless/mediatek/mt76/usb.c
> > +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> > @@ -429,6 +429,48 @@ static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
> >  	return dma_len;
> >  }
> >  
> > +static struct sk_buff *
> > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > +		   int *nsgs)
> > +{
> > +	int data_len = min(len, MT_SKB_HEAD_LEN);
> > +	struct sk_buff *skb;
> > +
> > +	if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
> > +		/* fast path */
> > +		skb = build_skb(data, buf_size);
> > +		if (!skb)
> > +			return NULL;
> > +
> > +		skb_reserve(skb, MT_DMA_HDR_LEN);
> > +		__skb_put(skb, len);
> > +
> > +		return skb;
> > +	}
> > +
> > +	/* slow path, not enough space for data and
> > +	 * skb_shared_info
> > +	 */
> > +	skb = alloc_skb(data_len, GFP_ATOMIC);
> > +	if (!skb)
> > +		return NULL;
> > +
> > +	skb_put_data(skb, data + MT_DMA_HDR_LEN, data_len);
> mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> of the buffer in fragment, which supose to be more efficient,
> see comment in iwl_mvm_pass_packet_to_mac80211().

Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
header in the linear area of the skb since otherwise the stack will need to
align them

>  
> > +	data += (data_len + MT_DMA_HDR_LEN);
> > +	len -= data_len;
> > +	if (len > 0) {
> > +		struct page *page = virt_to_head_page(data);
> > +		int offset = data - (u8 *)page_address(page);
> u8 cast not needed.
> 
> > +		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
> > +				page, offset, len, buf_size);
> > +	} else {
> > +		*nsgs = 0;
> This seems to be not necessary or a bug (use first buffer 2 times). 

maybe I have been a little bit too paranoid here :)

Regards,
Lorenzo

> 
> Stanislaw
Stanislaw Gruszka June 12, 2019, 10 a.m. UTC | #3
On Wed, Jun 12, 2019 at 11:45:21AM +0200, Lorenzo Bianconi wrote:
> > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > > +		   int *nsgs)
> > > +{
> > > +	int data_len = min(len, MT_SKB_HEAD_LEN);

Oh, and this looks unneeded as well as for len < MT_SKB_HEAD_LEN=128
we will go through fast path.

> > mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> > of the buffer in fragment, which supose to be more efficient,
> > see comment in iwl_mvm_pass_packet_to_mac80211().
> 
> Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
> header in the linear area of the skb since otherwise the stack will need to
> align them

Not sure if understand, I think aliment of L3 & L4 headers will be
the same, assuming ieee80211 header is aligned the same in fragment
buffer and in linear area. But if you think this is better to copy those
to linear area I'm ok with that.

Stanislaw
Lorenzo Bianconi June 12, 2019, 10:21 a.m. UTC | #4
On Jun 12, Stanislaw Gruszka wrote:
> On Wed, Jun 12, 2019 at 11:45:21AM +0200, Lorenzo Bianconi wrote:
> > > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > > > +		   int *nsgs)
> > > > +{
> > > > +	int data_len = min(len, MT_SKB_HEAD_LEN);
> 
> Oh, and this looks unneeded as well as for len < MT_SKB_HEAD_LEN=128
> we will go through fast path.

I guess if we remove data_len = min(len, MT_SKB_HEAD_LEN) and even *nsgs = 0 at
the end we are making some assumptions on the value of MT_SKB_HEAD_LEN and
buf_size. In the patch I just avoided them but maybe we can just assume that
MT_SKB_HEAD_LEN and buf_size will not changed in the future. What do you
think?

> 
> > > mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> > > of the buffer in fragment, which supose to be more efficient,
> > > see comment in iwl_mvm_pass_packet_to_mac80211().
> > 
> > Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
> > header in the linear area of the skb since otherwise the stack will need to
> > align them
> 
> Not sure if understand, I think aliment of L3 & L4 headers will be
> the same, assuming ieee80211 header is aligned the same in fragment
> buffer and in linear area. But if you think this is better to copy those
> to linear area I'm ok with that.

Sorry I have not been so clear. I mean in the stack before accessing a given
header we will run pskb_may_pull() that can end up copying the skb if there is
not enough space in the skb->head

Regards,
Lorenzo

> 
> Stanislaw
Stanislaw Gruszka June 12, 2019, 10:58 a.m. UTC | #5
On Wed, Jun 12, 2019 at 12:21:34PM +0200, Lorenzo Bianconi wrote:
> On Jun 12, Stanislaw Gruszka wrote:
> > On Wed, Jun 12, 2019 at 11:45:21AM +0200, Lorenzo Bianconi wrote:
> > > > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > > > > +		   int *nsgs)
> > > > > +{
> > > > > +	int data_len = min(len, MT_SKB_HEAD_LEN);
> > 
> > Oh, and this looks unneeded as well as for len < MT_SKB_HEAD_LEN=128
> > we will go through fast path.
> 
> I guess if we remove data_len = min(len, MT_SKB_HEAD_LEN) and even *nsgs = 0 at
> the end we are making some assumptions on the value of MT_SKB_HEAD_LEN and
> buf_size. In the patch I just avoided them but maybe we can just assume that
> MT_SKB_HEAD_LEN and buf_size will not changed in the future. What do you
> think?

Yes, sure. Other drivers just use 128 value directly and don't even
create a macro for that. And if somebody will decide to change
buf_size it will not be small value.

> > > > mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> > > > of the buffer in fragment, which supose to be more efficient,
> > > > see comment in iwl_mvm_pass_packet_to_mac80211().
> > > 
> > > Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
> > > header in the linear area of the skb since otherwise the stack will need to
> > > align them
> > 
> > Not sure if understand, I think aliment of L3 & L4 headers will be
> > the same, assuming ieee80211 header is aligned the same in fragment
> > buffer and in linear area. But if you think this is better to copy those
> > to linear area I'm ok with that.
> 
> Sorry I have not been so clear. I mean in the stack before accessing a given
> header we will run pskb_may_pull() that can end up copying the skb if there is
> not enough space in the skb->head

Ok, so L3 and L4 headers should be in linear area of skb and if not 
network stack will copy them from fragment. But I wonder why other
drivers just copy ieee80211_hdr and SNAP ? Isn't that if we copy 
128B then is possible that part of the payload will be in linear
area and part in fragment, whereas is expected that payload
will not be broken into two parts?

Stanislaw
Lorenzo Bianconi June 12, 2019, 11:17 a.m. UTC | #6
> On Wed, Jun 12, 2019 at 12:21:34PM +0200, Lorenzo Bianconi wrote:
> > On Jun 12, Stanislaw Gruszka wrote:
> > > On Wed, Jun 12, 2019 at 11:45:21AM +0200, Lorenzo Bianconi wrote:
> > > > > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size,
> > > > > > +		   int *nsgs)
> > > > > > +{
> > > > > > +	int data_len = min(len, MT_SKB_HEAD_LEN);
> > > 
> > > Oh, and this looks unneeded as well as for len < MT_SKB_HEAD_LEN=128
> > > we will go through fast path.
> > 
> > I guess if we remove data_len = min(len, MT_SKB_HEAD_LEN) and even *nsgs = 0 at
> > the end we are making some assumptions on the value of MT_SKB_HEAD_LEN and
> > buf_size. In the patch I just avoided them but maybe we can just assume that
> > MT_SKB_HEAD_LEN and buf_size will not changed in the future. What do you
> > think?
> 
> Yes, sure. Other drivers just use 128 value directly and don't even
> create a macro for that. And if somebody will decide to change
> buf_size it will not be small value.

Ok, I will simplify it in v2

> 
> > > > > mt7601u and iwlmvm just copy hdrlen + 8 and put the rest
> > > > > of the buffer in fragment, which supose to be more efficient,
> > > > > see comment in iwl_mvm_pass_packet_to_mac80211().
> > > > 
> > > > Right here we copy 128B instead of 32 but I think it is good to have L3 and L4
> > > > header in the linear area of the skb since otherwise the stack will need to
> > > > align them
> > > 
> > > Not sure if understand, I think aliment of L3 & L4 headers will be
> > > the same, assuming ieee80211 header is aligned the same in fragment
> > > buffer and in linear area. But if you think this is better to copy those
> > > to linear area I'm ok with that.
> > 
> > Sorry I have not been so clear. I mean in the stack before accessing a given
> > header we will run pskb_may_pull() that can end up copying the skb if there is
> > not enough space in the skb->head
> 
> Ok, so L3 and L4 headers should be in linear area of skb and if not 
> network stack will copy them from fragment. But I wonder why other
> drivers just copy ieee80211_hdr and SNAP ? Isn't that if we copy 
> 128B then is possible that part of the payload will be in linear
> area and part in fragment, whereas is expected that payload
> will not be broken into two parts?

I think the payload can be split in multiple skb fragments, the constraint is
just related to header parsing

Regards,
Lorenzo



> 
> Stanislaw
diff mbox series

Patch

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 97a1296562d0..74d4edf941d6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -30,6 +30,7 @@ 
 #define MT_TX_RING_SIZE     256
 #define MT_MCU_RING_SIZE    32
 #define MT_RX_BUF_SIZE      2048
+#define MT_SKB_HEAD_LEN     128
 
 struct mt76_dev;
 struct mt76_wcid;
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index bbaa1365bbda..2bfc8214c0d8 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -429,6 +429,48 @@  static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
 	return dma_len;
 }
 
+static struct sk_buff *
+mt76u_build_rx_skb(u8 *data, int len, int buf_size,
+		   int *nsgs)
+{
+	int data_len = min(len, MT_SKB_HEAD_LEN);
+	struct sk_buff *skb;
+
+	if (SKB_WITH_OVERHEAD(buf_size) >= MT_DMA_HDR_LEN + len) {
+		/* fast path */
+		skb = build_skb(data, buf_size);
+		if (!skb)
+			return NULL;
+
+		skb_reserve(skb, MT_DMA_HDR_LEN);
+		__skb_put(skb, len);
+
+		return skb;
+	}
+
+	/* slow path, not enough space for data and
+	 * skb_shared_info
+	 */
+	skb = alloc_skb(data_len, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	skb_put_data(skb, data + MT_DMA_HDR_LEN, data_len);
+	data += (data_len + MT_DMA_HDR_LEN);
+	len -= data_len;
+	if (len > 0) {
+		struct page *page = virt_to_head_page(data);
+		int offset = data - (u8 *)page_address(page);
+
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+				page, offset, len, buf_size);
+	} else {
+		*nsgs = 0;
+	}
+
+	return skb;
+}
+
 static int
 mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb)
 {
@@ -446,19 +488,11 @@  mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb)
 		return 0;
 
 	data_len = min_t(int, len, data_len - MT_DMA_HDR_LEN);
-	if (MT_DMA_HDR_LEN + data_len > SKB_WITH_OVERHEAD(q->buf_size)) {
-		dev_err_ratelimited(dev->dev, "rx data too big %d\n", data_len);
-		return 0;
-	}
-
-	skb = build_skb(data, q->buf_size);
+	skb = mt76u_build_rx_skb(data, data_len, q->buf_size, &nsgs);
 	if (!skb)
 		return 0;
 
-	skb_reserve(skb, MT_DMA_HDR_LEN);
-	__skb_put(skb, data_len);
 	len -= data_len;
-
 	while (len > 0 && nsgs < urb->num_sgs) {
 		data_len = min_t(int, len, urb->sg[nsgs].length);
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,