diff mbox series

[v3,wireless-drivers,1/3] mt76: usb: fix rx A-MSDU support

Message ID 66fc02e45fb5ce0d6176395b5ac43acbd53b3e66.1560461404.git.lorenzo@kernel.org (mailing list archive)
State Superseded
Delegated to: Felix Fietkau
Headers show
Series mt76: usb: fix A-MSDU support | expand

Commit Message

Lorenzo Bianconi June 13, 2019, 9:43 p.m. UTC
Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
for rx") breaks A-MSDU support. When A-MSDU is enable the device can
receive frames up to q->buf_size but they will be discarded in
mt76u_process_rx_entry since there is no enough room for
skb_shared_info. Fix the issue reallocating the skb and copying in the
linear area the first 128B of the received frames and in the frag_list
the remaining part.

Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/wireless/mediatek/mt76/mt76.h |  1 +
 drivers/net/wireless/mediatek/mt76/usb.c  | 49 ++++++++++++++++++-----
 2 files changed, 41 insertions(+), 9 deletions(-)

Comments

Stanislaw Gruszka June 14, 2019, 7:24 a.m. UTC | #1
On Thu, Jun 13, 2019 at 11:43:11PM +0200, Lorenzo Bianconi wrote:
> Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
> for rx") breaks A-MSDU support. When A-MSDU is enable the device can
> receive frames up to q->buf_size but they will be discarded in
> mt76u_process_rx_entry since there is no enough room for
> skb_shared_info. Fix the issue reallocating the skb and copying in the
> linear area the first 128B of the received frames and in the frag_list
> the remaining part.
> 
> Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  drivers/net/wireless/mediatek/mt76/mt76.h |  1 +
>  drivers/net/wireless/mediatek/mt76/usb.c  | 49 ++++++++++++++++++-----
>  2 files changed, 41 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 8ecbf81a906f..889b76deb703 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -30,6 +30,7 @@
>  #define MT_TX_RING_SIZE     256
>  #define MT_MCU_RING_SIZE    32
>  #define MT_RX_BUF_SIZE      2048
> +#define MT_SKB_HEAD_LEN     128
>  
>  struct mt76_dev;
>  struct mt76_wcid;
> diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> index bbaa1365bbda..12d60d31cb51 100644
> --- a/drivers/net/wireless/mediatek/mt76/usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> @@ -429,6 +429,45 @@ static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
>  	return dma_len;
>  }
>  
> +static struct sk_buff *
> +mt76u_build_rx_skb(u8 *data, int len, int buf_size)
> +{
> +	struct sk_buff *skb;
> +
> +	if (SKB_WITH_OVERHEAD(buf_size) < MT_DMA_HDR_LEN + len) {
> +		struct page *page;
> +		int offset;
> +
> +		/* slow path, not enough space for data and
> +		 * skb_shared_info
> +		 */
> +		skb = alloc_skb(MT_SKB_HEAD_LEN, GFP_ATOMIC);
> +		if (!skb)
> +			return NULL;
> +
> +		skb_put_data(skb, data + MT_DMA_HDR_LEN, MT_SKB_HEAD_LEN);

I looked how rx amsdu is processed in mac80211 and it is decomposed and
copied into newly allocated individual skb's, see ieee80211_amsdu_to_8023s()

So copy L3 & L4 headers doesn't do anything good here, actually seems to
be better to have them in frag as __ieee80211_amsdu_copy_frag() do some
magic to avid copy.

Stanislaw
Lorenzo Bianconi June 14, 2019, 10:11 a.m. UTC | #2
> On Thu, Jun 13, 2019 at 11:43:11PM +0200, Lorenzo Bianconi wrote:
> > Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
> > for rx") breaks A-MSDU support. When A-MSDU is enable the device can
> > receive frames up to q->buf_size but they will be discarded in
> > mt76u_process_rx_entry since there is no enough room for
> > skb_shared_info. Fix the issue reallocating the skb and copying in the
> > linear area the first 128B of the received frames and in the frag_list
> > the remaining part.
> > 
> > Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> >  drivers/net/wireless/mediatek/mt76/mt76.h |  1 +
> >  drivers/net/wireless/mediatek/mt76/usb.c  | 49 ++++++++++++++++++-----
> >  2 files changed, 41 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> > index 8ecbf81a906f..889b76deb703 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> > +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> > @@ -30,6 +30,7 @@
> >  #define MT_TX_RING_SIZE     256
> >  #define MT_MCU_RING_SIZE    32
> >  #define MT_RX_BUF_SIZE      2048
> > +#define MT_SKB_HEAD_LEN     128
> >  
> >  struct mt76_dev;
> >  struct mt76_wcid;
> > diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> > index bbaa1365bbda..12d60d31cb51 100644
> > --- a/drivers/net/wireless/mediatek/mt76/usb.c
> > +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> > @@ -429,6 +429,45 @@ static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
> >  	return dma_len;
> >  }
> >  
> > +static struct sk_buff *
> > +mt76u_build_rx_skb(u8 *data, int len, int buf_size)
> > +{
> > +	struct sk_buff *skb;
> > +
> > +	if (SKB_WITH_OVERHEAD(buf_size) < MT_DMA_HDR_LEN + len) {
> > +		struct page *page;
> > +		int offset;
> > +
> > +		/* slow path, not enough space for data and
> > +		 * skb_shared_info
> > +		 */
> > +		skb = alloc_skb(MT_SKB_HEAD_LEN, GFP_ATOMIC);
> > +		if (!skb)
> > +			return NULL;
> > +
> > +		skb_put_data(skb, data + MT_DMA_HDR_LEN, MT_SKB_HEAD_LEN);
> 
> I looked how rx amsdu is processed in mac80211 and it is decomposed and
> copied into newly allocated individual skb's, see ieee80211_amsdu_to_8023s()
> 
> So copy L3 & L4 headers doesn't do anything good here, actually seems to
> be better to have them in frag as __ieee80211_amsdu_copy_frag() do some
> magic to avid copy.

Looking at __ieee80211_amsdu_copy() now I got why other drivers copy hdrlen +
8, thx :)
In our case reuse_frag is true in __ieee80211_amsdu_copy, so we will end up
copying 32B + ether_len. Anyway I think 32 is a little bit too low and we could get
better performances increasing it a little bit.
A typical use case (e.g IPv6 + TCP):

IPv6 = 40B, TCP = 32B --> so 72B..I guess 128B is a good value :)
@Felix, Johannes: what do you think?

Regarding the patch I guess let's apply it as it is in order to fix the pending
issue and then we will figure out how to proceed (copy hdr_len + 3 or increase
the value in __ieee80211_amsdu_copy)

Regards,
Lorenzo

> 
> Stanislaw
Johannes Berg June 14, 2019, 10:20 a.m. UTC | #3
On Fri, 2019-06-14 at 12:11 +0200, Lorenzo Bianconi wrote:

> Looking at __ieee80211_amsdu_copy() now I got why other drivers copy hdrlen +
> 8, thx :)
> In our case reuse_frag is true in __ieee80211_amsdu_copy, so we will end up
> copying 32B + ether_len. Anyway I think 32 is a little bit too low and we could get
> better performances increasing it a little bit.
> A typical use case (e.g IPv6 + TCP):
> 
> IPv6 = 40B, TCP = 32B --> so 72B..I guess 128B is a good value :)
> @Felix, Johannes: what do you think?

I think while we might *allocate* more, I don't think we should *copy*
more, since then the TCP payload will no longer be in pages.

It'd probably be better to implement leaving enough tailroom (allocate
128), but copying nothing, unless the *entire* packet fits.

johannes
Stanislaw Gruszka June 14, 2019, 11:14 a.m. UTC | #4
On Fri, Jun 14, 2019 at 12:11:17PM +0200, Lorenzo Bianconi wrote:
> > On Thu, Jun 13, 2019 at 11:43:11PM +0200, Lorenzo Bianconi wrote:
> > > Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
> > > for rx") breaks A-MSDU support. When A-MSDU is enable the device can
> > > receive frames up to q->buf_size but they will be discarded in
> > > mt76u_process_rx_entry since there is no enough room for
> > > skb_shared_info. Fix the issue reallocating the skb and copying in the
> > > linear area the first 128B of the received frames and in the frag_list
> > > the remaining part.
> > > 
> > > Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
> > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > ---
> > >  drivers/net/wireless/mediatek/mt76/mt76.h |  1 +
> > >  drivers/net/wireless/mediatek/mt76/usb.c  | 49 ++++++++++++++++++-----
> > >  2 files changed, 41 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> > > index 8ecbf81a906f..889b76deb703 100644
> > > --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> > > +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> > > @@ -30,6 +30,7 @@
> > >  #define MT_TX_RING_SIZE     256
> > >  #define MT_MCU_RING_SIZE    32
> > >  #define MT_RX_BUF_SIZE      2048
> > > +#define MT_SKB_HEAD_LEN     128
> > >  
> > >  struct mt76_dev;
> > >  struct mt76_wcid;
> > > diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> > > index bbaa1365bbda..12d60d31cb51 100644
> > > --- a/drivers/net/wireless/mediatek/mt76/usb.c
> > > +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> > > @@ -429,6 +429,45 @@ static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
> > >  	return dma_len;
> > >  }
> > >  
> > > +static struct sk_buff *
> > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size)
> > > +{
> > > +	struct sk_buff *skb;
> > > +
> > > +	if (SKB_WITH_OVERHEAD(buf_size) < MT_DMA_HDR_LEN + len) {
> > > +		struct page *page;
> > > +		int offset;
> > > +
> > > +		/* slow path, not enough space for data and
> > > +		 * skb_shared_info
> > > +		 */
> > > +		skb = alloc_skb(MT_SKB_HEAD_LEN, GFP_ATOMIC);
> > > +		if (!skb)
> > > +			return NULL;
> > > +
> > > +		skb_put_data(skb, data + MT_DMA_HDR_LEN, MT_SKB_HEAD_LEN);
> > 
> > I looked how rx amsdu is processed in mac80211 and it is decomposed and
> > copied into newly allocated individual skb's, see ieee80211_amsdu_to_8023s()
> > 
> > So copy L3 & L4 headers doesn't do anything good here, actually seems to
> > be better to have them in frag as __ieee80211_amsdu_copy_frag() do some
> > magic to avid copy.
> 
> Looking at __ieee80211_amsdu_copy() now I got why other drivers copy hdrlen +
> 8, thx :)
> In our case reuse_frag is true in __ieee80211_amsdu_copy, so we will end up

I don't think reuse_frag is true in our case since skb->head_frag is
not set when we use alloc_skb(). 

Stanislaw
Stanislaw Gruszka June 14, 2019, 11:31 a.m. UTC | #5
On Fri, Jun 14, 2019 at 12:20:59PM +0200, Johannes Berg wrote:
> On Fri, 2019-06-14 at 12:11 +0200, Lorenzo Bianconi wrote:
> 
> > Looking at __ieee80211_amsdu_copy() now I got why other drivers copy hdrlen +
> > 8, thx :)
> > In our case reuse_frag is true in __ieee80211_amsdu_copy, so we will end up
> > copying 32B + ether_len. Anyway I think 32 is a little bit too low and we could get
> > better performances increasing it a little bit.
> > A typical use case (e.g IPv6 + TCP):
> > 
> > IPv6 = 40B, TCP = 32B --> so 72B..I guess 128B is a good value :)
> > @Felix, Johannes: what do you think?
> 
> I think while we might *allocate* more, I don't think we should *copy*
> more, since then the TCP payload will no longer be in pages.
> 
> It'd probably be better to implement leaving enough tailroom (allocate
> 128), but copying nothing, unless the *entire* packet fits.

iwl4965 put entire packet in fragment in il4965_pass_packet_to_mac80211() .
Initially I thought this is a bug, since mac80211 require header be
in the linear area, but looks like ieee80211_rx_monitor() copy header
before rest of mac80211 check it, so 4965 is fine.

Anyway I think the driver should put ieee80211 header in linear area
and iwlwifi & mt7601u implementation is somewhat optimal.

Stanislaw
Johannes Berg June 14, 2019, 11:34 a.m. UTC | #6
On Fri, 2019-06-14 at 13:31 +0200, Stanislaw Gruszka wrote:
> On Fri, Jun 14, 2019 at 12:20:59PM +0200, Johannes Berg wrote:
> > On Fri, 2019-06-14 at 12:11 +0200, Lorenzo Bianconi wrote:
> > 
> > > Looking at __ieee80211_amsdu_copy() now I got why other drivers copy hdrlen +
> > > 8, thx :)
> > > In our case reuse_frag is true in __ieee80211_amsdu_copy, so we will end up
> > > copying 32B + ether_len. Anyway I think 32 is a little bit too low and we could get
> > > better performances increasing it a little bit.
> > > A typical use case (e.g IPv6 + TCP):
> > > 
> > > IPv6 = 40B, TCP = 32B --> so 72B..I guess 128B is a good value :)
> > > @Felix, Johannes: what do you think?
> > 
> > I think while we might *allocate* more, I don't think we should *copy*
> > more, since then the TCP payload will no longer be in pages.
> > 
> > It'd probably be better to implement leaving enough tailroom (allocate
> > 128), but copying nothing, unless the *entire* packet fits.
> 
> iwl4965 put entire packet in fragment in il4965_pass_packet_to_mac80211() .
> Initially I thought this is a bug, since mac80211 require header be
> in the linear area, but looks like ieee80211_rx_monitor() copy header
> before rest of mac80211 check it, so 4965 is fine.

Mac80211 should not assume anything about header being present or not,
just like the rest of the network stack.

> Anyway I think the driver should put ieee80211 header in linear area
> and iwlwifi & mt7601u implementation is somewhat optimal.

Yes, it's just an optimisation to do the copy-break (copy small packets
completely) or to copy the header already (since we may have better ways
to do so than skb_copy_bits if we still have a virt pointer to the page,
rather than just the page pointer).

johannes
Lorenzo Bianconi June 14, 2019, 12:32 p.m. UTC | #7
> On Fri, Jun 14, 2019 at 12:11:17PM +0200, Lorenzo Bianconi wrote:
> > > On Thu, Jun 13, 2019 at 11:43:11PM +0200, Lorenzo Bianconi wrote:
> > > > Commit f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes
> > > > for rx") breaks A-MSDU support. When A-MSDU is enable the device can
> > > > receive frames up to q->buf_size but they will be discarded in
> > > > mt76u_process_rx_entry since there is no enough room for
> > > > skb_shared_info. Fix the issue reallocating the skb and copying in the
> > > > linear area the first 128B of the received frames and in the frag_list
> > > > the remaining part.
> > > > 
> > > > Fixes: f8f527b16db5 ("mt76: usb: use EP max packet aligned buffer sizes for rx")
> > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > ---
> > > >  drivers/net/wireless/mediatek/mt76/mt76.h |  1 +
> > > >  drivers/net/wireless/mediatek/mt76/usb.c  | 49 ++++++++++++++++++-----
> > > >  2 files changed, 41 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> > > > index 8ecbf81a906f..889b76deb703 100644
> > > > --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> > > > +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> > > > @@ -30,6 +30,7 @@
> > > >  #define MT_TX_RING_SIZE     256
> > > >  #define MT_MCU_RING_SIZE    32
> > > >  #define MT_RX_BUF_SIZE      2048
> > > > +#define MT_SKB_HEAD_LEN     128
> > > >  
> > > >  struct mt76_dev;
> > > >  struct mt76_wcid;
> > > > diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> > > > index bbaa1365bbda..12d60d31cb51 100644
> > > > --- a/drivers/net/wireless/mediatek/mt76/usb.c
> > > > +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> > > > @@ -429,6 +429,45 @@ static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
> > > >  	return dma_len;
> > > >  }
> > > >  
> > > > +static struct sk_buff *
> > > > +mt76u_build_rx_skb(u8 *data, int len, int buf_size)
> > > > +{
> > > > +	struct sk_buff *skb;
> > > > +
> > > > +	if (SKB_WITH_OVERHEAD(buf_size) < MT_DMA_HDR_LEN + len) {
> > > > +		struct page *page;
> > > > +		int offset;
> > > > +
> > > > +		/* slow path, not enough space for data and
> > > > +		 * skb_shared_info
> > > > +		 */
> > > > +		skb = alloc_skb(MT_SKB_HEAD_LEN, GFP_ATOMIC);
> > > > +		if (!skb)
> > > > +			return NULL;
> > > > +
> > > > +		skb_put_data(skb, data + MT_DMA_HDR_LEN, MT_SKB_HEAD_LEN);
> > > 
> > > I looked how rx amsdu is processed in mac80211 and it is decomposed and
> > > copied into newly allocated individual skb's, see ieee80211_amsdu_to_8023s()
> > > 
> > > So copy L3 & L4 headers doesn't do anything good here, actually seems to
> > > be better to have them in frag as __ieee80211_amsdu_copy_frag() do some
> > > magic to avid copy.
> > 
> > Looking at __ieee80211_amsdu_copy() now I got why other drivers copy hdrlen +
> > 8, thx :)
> > In our case reuse_frag is true in __ieee80211_amsdu_copy, so we will end up
> 
> I don't think reuse_frag is true in our case since skb->head_frag is
> not set when we use alloc_skb(). 

Oh, right. In this case it is probably better to use netdev_alloc_skb().
I will repost using the approach used in mt7601u since for the moment it will
not make any difference to copy more data.

Regards,
Lorenzo

> 
> Stanislaw
Lorenzo Bianconi June 15, 2019, 12:06 p.m. UTC | #8
>
> On Fri, Jun 14, 2019 at 12:20:59PM +0200, Johannes Berg wrote:
> > On Fri, 2019-06-14 at 12:11 +0200, Lorenzo Bianconi wrote:
> >
> > > Looking at __ieee80211_amsdu_copy() now I got why other drivers copy hdrlen +
> > > 8, thx :)
> > > In our case reuse_frag is true in __ieee80211_amsdu_copy, so we will end up
> > > copying 32B + ether_len. Anyway I think 32 is a little bit too low and we could get
> > > better performances increasing it a little bit.
> > > A typical use case (e.g IPv6 + TCP):
> > >
> > > IPv6 = 40B, TCP = 32B --> so 72B..I guess 128B is a good value :)
> > > @Felix, Johannes: what do you think?
> >
> > I think while we might *allocate* more, I don't think we should *copy*
> > more, since then the TCP payload will no longer be in pages.
> >
> > It'd probably be better to implement leaving enough tailroom (allocate
> > 128), but copying nothing, unless the *entire* packet fits.
>
> iwl4965 put entire packet in fragment in il4965_pass_packet_to_mac80211() .
> Initially I thought this is a bug, since mac80211 require header be
> in the linear area, but looks like ieee80211_rx_monitor() copy header
> before rest of mac80211 check it, so 4965 is fine.
>
> Anyway I think the driver should put ieee80211 header in linear area
> and iwlwifi & mt7601u implementation is somewhat optimal.

Actually the case is a little bit different for mt76 since we need
even mt76x02_rxwi in the linear area of the received skb.
Taking that into account the requested size to copy will be:
32 + 802.11 hdr + SNAP hdr  = ~ 70B
Moreover to pass rxwi size to usb module we need to add a field in
mt76_driver_ops (e.g rxwi_size).
I will carry out some tests and if there are no differences I will
post a single patch to wireless-drivers using 128B as default size

Regards,
Lorenzo

>
> Stanislaw
diff mbox series

Patch

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 8ecbf81a906f..889b76deb703 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -30,6 +30,7 @@ 
 #define MT_TX_RING_SIZE     256
 #define MT_MCU_RING_SIZE    32
 #define MT_RX_BUF_SIZE      2048
+#define MT_SKB_HEAD_LEN     128
 
 struct mt76_dev;
 struct mt76_wcid;
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index bbaa1365bbda..12d60d31cb51 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -429,6 +429,45 @@  static int mt76u_get_rx_entry_len(u8 *data, u32 data_len)
 	return dma_len;
 }
 
+static struct sk_buff *
+mt76u_build_rx_skb(u8 *data, int len, int buf_size)
+{
+	struct sk_buff *skb;
+
+	if (SKB_WITH_OVERHEAD(buf_size) < MT_DMA_HDR_LEN + len) {
+		struct page *page;
+		int offset;
+
+		/* slow path, not enough space for data and
+		 * skb_shared_info
+		 */
+		skb = alloc_skb(MT_SKB_HEAD_LEN, GFP_ATOMIC);
+		if (!skb)
+			return NULL;
+
+		skb_put_data(skb, data + MT_DMA_HDR_LEN, MT_SKB_HEAD_LEN);
+		data += (MT_SKB_HEAD_LEN + MT_DMA_HDR_LEN);
+		page = virt_to_head_page(data);
+		offset = data - (u8 *)page_address(page);
+
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+				page, offset, len - MT_SKB_HEAD_LEN,
+				buf_size);
+
+		return skb;
+	}
+
+	/* fast path */
+	skb = build_skb(data, buf_size);
+	if (!skb)
+		return NULL;
+
+	skb_reserve(skb, MT_DMA_HDR_LEN);
+	__skb_put(skb, len);
+
+	return skb;
+}
+
 static int
 mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb)
 {
@@ -446,19 +485,11 @@  mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb)
 		return 0;
 
 	data_len = min_t(int, len, data_len - MT_DMA_HDR_LEN);
-	if (MT_DMA_HDR_LEN + data_len > SKB_WITH_OVERHEAD(q->buf_size)) {
-		dev_err_ratelimited(dev->dev, "rx data too big %d\n", data_len);
-		return 0;
-	}
-
-	skb = build_skb(data, q->buf_size);
+	skb = mt76u_build_rx_skb(data, data_len, q->buf_size);
 	if (!skb)
 		return 0;
 
-	skb_reserve(skb, MT_DMA_HDR_LEN);
-	__skb_put(skb, data_len);
 	len -= data_len;
-
 	while (len > 0 && nsgs < urb->num_sgs) {
 		data_len = min_t(int, len, urb->sg[nsgs].length);
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,