diff mbox series

[V3,net-next,3/4] net: mana: Enable RX path to handle various MTU sizes

Message ID 1681334163-31084-4-git-send-email-haiyangz@microsoft.com (mailing list archive)
State Not Applicable
Headers show
Series net: mana: Add support for jumbo frame | expand

Commit Message

Haiyang Zhang April 12, 2023, 9:16 p.m. UTC
Update RX data path to allocate and use RX queue DMA buffers with
proper size based on potentially various MTU sizes.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
---
V3:
Refectored to multiple patches for readability. Suggested by Jacob Keller.

V2:
Refectored to multiple patches for readability. Suggested by Yunsheng Lin.

---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 38 ++++++++++++++-----
 include/net/mana/mana.h                       |  7 ++++
 2 files changed, 35 insertions(+), 10 deletions(-)

Comments

Jakub Kicinski April 15, 2023, 2:06 a.m. UTC | #1
On Wed, 12 Apr 2023 14:16:02 -0700 Haiyang Zhang wrote:
> +	} else if (rxq->alloc_size > PAGE_SIZE) {
> +		if (is_napi)
> +			va = napi_alloc_frag(rxq->alloc_size);

Allocating frag larger than a page is not safe.
Frag allocator falls back to allocating single pages, doesn't it?
Haiyang Zhang April 15, 2023, 2:25 p.m. UTC | #2
> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: Friday, April 14, 2023 10:06 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; Dexuan Cui
> <decui@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Paul Rosswurm
> <paulros@microsoft.com>; olaf@aepfle.de; vkuznets@redhat.com;
> davem@davemloft.net; wei.liu@kernel.org; edumazet@google.com;
> pabeni@redhat.com; leon@kernel.org; Long Li <longli@microsoft.com>;
> ssengar@linux.microsoft.com; linux-rdma@vger.kernel.org;
> daniel@iogearbox.net; john.fastabend@gmail.com; bpf@vger.kernel.org;
> ast@kernel.org; Ajay Sharma <sharmaajay@microsoft.com>;
> hawk@kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle
> various MTU sizes
> 
> On Wed, 12 Apr 2023 14:16:02 -0700 Haiyang Zhang wrote:
> > +	} else if (rxq->alloc_size > PAGE_SIZE) {
> > +		if (is_napi)
> > +			va = napi_alloc_frag(rxq->alloc_size);
> 
> Allocating frag larger than a page is not safe.

 I saw other drivers doing this - use napi_alloc_frag for size bigger than a page.
And it returns compound page. Why it's not safe? Should we use other allocator
when need compound pages?

> Frag allocator falls back to allocating single pages, doesn't it?

Actually I checked it. Compound page is still returned for size smaller than PAGE_SIZE,
so I used single page allocation for that.

Thanks,
- Haiyang
Jakub Kicinski April 17, 2023, 5:52 p.m. UTC | #3
On Sat, 15 Apr 2023 14:25:29 +0000 Haiyang Zhang wrote:
> > Allocating frag larger than a page is not safe.  
> 
>  I saw other drivers doing this - use napi_alloc_frag for size bigger than a page.
> And it returns compound page. Why it's not safe? Should we use other allocator
> when need compound pages?

I believe so. There was a thread about this within the last year.
Someone was trying to fix the page frag allocator to not fall back
to order 0 pages in case of failure if requested size is > PAGE_SIZE.
But there was push back and folks were saying that it's simply not 
a case supported by the frag allocator. 
Haiyang Zhang April 17, 2023, 7:52 p.m. UTC | #4
> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: Monday, April 17, 2023 1:52 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; Dexuan Cui
> <decui@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Paul Rosswurm
> <paulros@microsoft.com>; olaf@aepfle.de; vkuznets@redhat.com;
> davem@davemloft.net; wei.liu@kernel.org; edumazet@google.com;
> pabeni@redhat.com; leon@kernel.org; Long Li <longli@microsoft.com>;
> ssengar@linux.microsoft.com; linux-rdma@vger.kernel.org;
> daniel@iogearbox.net; john.fastabend@gmail.com; bpf@vger.kernel.org;
> ast@kernel.org; Ajay Sharma <sharmaajay@microsoft.com>;
> hawk@kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle
> various MTU sizes
> 
> On Sat, 15 Apr 2023 14:25:29 +0000 Haiyang Zhang wrote:
> > > Allocating frag larger than a page is not safe.
> >
> >  I saw other drivers doing this - use napi_alloc_frag for size bigger than a
> page.
> > And it returns compound page. Why it's not safe? Should we use other
> allocator
> > when need compound pages?
> 
> I believe so. There was a thread about this within the last year.
> Someone was trying to fix the page frag allocator to not fall back
> to order 0 pages in case of failure if requested size is > PAGE_SIZE.
> But there was push back and folks were saying that it's simply not
> a case supported by the frag allocator. 
diff mbox series

Patch

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 911954ff84ee..8e7fa6e9c3b5 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1185,10 +1185,10 @@  static void mana_post_pkt_rxq(struct mana_rxq *rxq)
 	WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
 }
 
-static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
-				      struct xdp_buff *xdp)
+static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
+				      uint pkt_len, struct xdp_buff *xdp)
 {
-	struct sk_buff *skb = napi_build_skb(buf_va, PAGE_SIZE);
+	struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size);
 
 	if (!skb)
 		return NULL;
@@ -1196,11 +1196,12 @@  static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
 	if (xdp->data_hard_start) {
 		skb_reserve(skb, xdp->data - xdp->data_hard_start);
 		skb_put(skb, xdp->data_end - xdp->data);
-	} else {
-		skb_reserve(skb, XDP_PACKET_HEADROOM);
-		skb_put(skb, pkt_len);
+		return skb;
 	}
 
+	skb_reserve(skb, rxq->headroom);
+	skb_put(skb, pkt_len);
+
 	return skb;
 }
 
@@ -1233,7 +1234,7 @@  static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
 	if (act != XDP_PASS && act != XDP_TX)
 		goto drop_xdp;
 
-	skb = mana_build_skb(buf_va, pkt_len, &xdp);
+	skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp);
 
 	if (!skb)
 		goto drop;
@@ -1301,6 +1302,14 @@  static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
 	if (rxq->xdp_save_va) {
 		va = rxq->xdp_save_va;
 		rxq->xdp_save_va = NULL;
+	} else if (rxq->alloc_size > PAGE_SIZE) {
+		if (is_napi)
+			va = napi_alloc_frag(rxq->alloc_size);
+		else
+			va = netdev_alloc_frag(rxq->alloc_size);
+
+		if (!va)
+			return NULL;
 	} else {
 		page = dev_alloc_page();
 		if (!page)
@@ -1309,7 +1318,7 @@  static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
 		va = page_to_virt(page);
 	}
 
-	*da = dma_map_single(dev, va + XDP_PACKET_HEADROOM, rxq->datasize,
+	*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
 			     DMA_FROM_DEVICE);
 
 	if (dma_mapping_error(dev, *da)) {
@@ -1732,7 +1741,7 @@  static int mana_alloc_rx_wqe(struct mana_port_context *apc,
 	u32 buf_idx;
 	int ret;
 
-	WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE);
+	WARN_ON(rxq->datasize == 0);
 
 	*rxq_size = 0;
 	*cq_size = 0;
@@ -1788,6 +1797,7 @@  static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 	struct gdma_dev *gd = apc->ac->gdma_dev;
 	struct mana_obj_spec wq_spec;
 	struct mana_obj_spec cq_spec;
+	unsigned int mtu = ndev->mtu;
 	struct gdma_queue_spec spec;
 	struct mana_cq *cq = NULL;
 	struct gdma_context *gc;
@@ -1807,7 +1817,15 @@  static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 	rxq->rxq_idx = rxq_idx;
 	rxq->rxobj = INVALID_MANA_HANDLE;
 
-	rxq->datasize = ALIGN(ETH_FRAME_LEN, 64);
+	rxq->datasize = ALIGN(mtu + ETH_HLEN, 64);
+
+	if (mtu > MANA_XDP_MTU_MAX) {
+		rxq->alloc_size = mtu + MANA_RXBUF_PAD;
+		rxq->headroom = 0;
+	} else {
+		rxq->alloc_size = mtu + MANA_RXBUF_PAD + XDP_PACKET_HEADROOM;
+		rxq->headroom = XDP_PACKET_HEADROOM;
+	}
 
 	err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
 	if (err)
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 037bcabf6b98..fee99d704281 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -291,6 +291,11 @@  struct mana_recv_buf_oob {
 	struct gdma_posted_wqe_info wqe_inf;
 };
 
+#define MANA_RXBUF_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+			+ ETH_HLEN)
+
+#define MANA_XDP_MTU_MAX (PAGE_SIZE - MANA_RXBUF_PAD - XDP_PACKET_HEADROOM)
+
 struct mana_rxq {
 	struct gdma_queue *gdma_rq;
 	/* Cache the gdma receive queue id */
@@ -300,6 +305,8 @@  struct mana_rxq {
 	u32 rxq_idx;
 
 	u32 datasize;
+	u32 alloc_size;
+	u32 headroom;
 
 	mana_handle_t rxobj;