diff mbox series

[net-next,v2] xsk: support use vaddr as ring

Message ID 20230212031232.3007-1-xuanzhuo@linux.alibaba.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v2] xsk: support use vaddr as ring | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2 this patch: 2
netdev/cc_maintainers success CCed 14 of 14 maintainers
netdev/build_clang success Errors and warnings before: 1 this patch: 1
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2 this patch: 2
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 65 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xuan Zhuo Feb. 12, 2023, 3:12 a.m. UTC
When we try to start AF_XDP on some machines with long running time, due
to the machine's memory fragmentation problem, there is no sufficient
continuous physical memory that will cause the start failure.

If the size of the queue is 8 * 1024, then the size of the desc[] is
8 * 1024 * 8 = 16 * PAGE, but we also add struct xdp_ring size, so it is
16page+. This is necessary to apply for a 4-order memory. If there are a
lot of queues, it is difficult to these machine with long running time.

Here, that we actually waste 15 pages. 4-Order memory is 32 pages, but
we only use 17 pages.

This patch replaces __get_free_pages() by vmalloc() to allocate memory
to solve these problems.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/oe-kbuild-all/202302091850.0HBmsDAq-lkp@intel.com
---

v2:
    1. remove __get_free_pages() @Magnus Karlsson

 net/xdp/xsk.c       |  9 ++-------
 net/xdp/xsk_queue.c | 10 ++++------
 net/xdp/xsk_queue.h |  1 +
 3 files changed, 7 insertions(+), 13 deletions(-)

--
2.32.0.3.g01195cf9f

Comments

Magnus Karlsson Feb. 13, 2023, 1:07 p.m. UTC | #1
On Sun, 12 Feb 2023 at 04:22, Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> When we try to start AF_XDP on some machines with long running time, due
> to the machine's memory fragmentation problem, there is no sufficient
> continuous physical memory that will cause the start failure.

nit: contiguous

But perfectly understandable without fixing this.

> If the size of the queue is 8 * 1024, then the size of the desc[] is
> 8 * 1024 * 8 = 16 * PAGE, but we also add struct xdp_ring size, so it is
> 16page+. This is necessary to apply for a 4-order memory. If there are a
> lot of queues, it is difficult to these machine with long running time.
>
> Here, that we actually waste 15 pages. 4-Order memory is 32 pages, but
> we only use 17 pages.
>
> This patch replaces __get_free_pages() by vmalloc() to allocate memory
> to solve these problems.

Thanks for improving/fixing this.

Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>

> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> Reported-by: kernel test robot <lkp@intel.com>
> Link: https://lore.kernel.org/oe-kbuild-all/202302091850.0HBmsDAq-lkp@intel.com
> ---
>
> v2:
>     1. remove __get_free_pages() @Magnus Karlsson
>
>  net/xdp/xsk.c       |  9 ++-------
>  net/xdp/xsk_queue.c | 10 ++++------
>  net/xdp/xsk_queue.h |  1 +
>  3 files changed, 7 insertions(+), 13 deletions(-)
>
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index 9f0561b67c12..6a588b99b670 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -1295,8 +1295,6 @@ static int xsk_mmap(struct file *file, struct socket *sock,
>         unsigned long size = vma->vm_end - vma->vm_start;
>         struct xdp_sock *xs = xdp_sk(sock->sk);
>         struct xsk_queue *q = NULL;
> -       unsigned long pfn;
> -       struct page *qpg;
>
>         if (READ_ONCE(xs->state) != XSK_READY)
>                 return -EBUSY;
> @@ -1319,13 +1317,10 @@ static int xsk_mmap(struct file *file, struct socket *sock,
>
>         /* Matches the smp_wmb() in xsk_init_queue */
>         smp_rmb();
> -       qpg = virt_to_head_page(q->ring);
> -       if (size > page_size(qpg))
> +       if (size > PAGE_ALIGN(q->ring_size))
>                 return -EINVAL;
>
> -       pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
> -       return remap_pfn_range(vma, vma->vm_start, pfn,
> -                              size, vma->vm_page_prot);
> +       return remap_vmalloc_range(vma, q->ring, 0);
>  }
>
>  static int xsk_notifier(struct notifier_block *this,
> diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
> index 6cf9586e5027..247316bdfcbe 100644
> --- a/net/xdp/xsk_queue.c
> +++ b/net/xdp/xsk_queue.c
> @@ -7,6 +7,7 @@
>  #include <linux/slab.h>
>  #include <linux/overflow.h>
>  #include <net/xdp_sock_drv.h>
> +#include <linux/vmalloc.h>
>
>  #include "xsk_queue.h"
>
> @@ -23,7 +24,6 @@ static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue)
>  struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
>  {
>         struct xsk_queue *q;
> -       gfp_t gfp_flags;
>         size_t size;
>
>         q = kzalloc(sizeof(*q), GFP_KERNEL);
> @@ -33,12 +33,10 @@ struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
>         q->nentries = nentries;
>         q->ring_mask = nentries - 1;
>
> -       gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN |
> -                   __GFP_COMP  | __GFP_NORETRY;
>         size = xskq_get_ring_size(q, umem_queue);
>
> -       q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags,
> -                                                     get_order(size));
> +       q->ring_size = size;
> +       q->ring = (struct xdp_ring *)vmalloc_user(size);
>         if (!q->ring) {
>                 kfree(q);
>                 return NULL;
> @@ -52,6 +50,6 @@ void xskq_destroy(struct xsk_queue *q)
>         if (!q)
>                 return;
>
> -       page_frag_free(q->ring);
> +       vfree(q->ring);
>         kfree(q);
>  }
> diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
> index c6fb6b763658..35922b8b92a8 100644
> --- a/net/xdp/xsk_queue.h
> +++ b/net/xdp/xsk_queue.h
> @@ -45,6 +45,7 @@ struct xsk_queue {
>         struct xdp_ring *ring;
>         u64 invalid_descs;
>         u64 queue_empty_descs;
> +       size_t ring_size;
>  };
>
>  /* The structure of the shared state of the rings are a simple
> --
> 2.32.0.3.g01195cf9f
>
diff mbox series

Patch

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9f0561b67c12..6a588b99b670 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1295,8 +1295,6 @@  static int xsk_mmap(struct file *file, struct socket *sock,
 	unsigned long size = vma->vm_end - vma->vm_start;
 	struct xdp_sock *xs = xdp_sk(sock->sk);
 	struct xsk_queue *q = NULL;
-	unsigned long pfn;
-	struct page *qpg;

 	if (READ_ONCE(xs->state) != XSK_READY)
 		return -EBUSY;
@@ -1319,13 +1317,10 @@  static int xsk_mmap(struct file *file, struct socket *sock,

 	/* Matches the smp_wmb() in xsk_init_queue */
 	smp_rmb();
-	qpg = virt_to_head_page(q->ring);
-	if (size > page_size(qpg))
+	if (size > PAGE_ALIGN(q->ring_size))
 		return -EINVAL;

-	pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
-	return remap_pfn_range(vma, vma->vm_start, pfn,
-			       size, vma->vm_page_prot);
+	return remap_vmalloc_range(vma, q->ring, 0);
 }

 static int xsk_notifier(struct notifier_block *this,
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index 6cf9586e5027..247316bdfcbe 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -7,6 +7,7 @@ 
 #include <linux/slab.h>
 #include <linux/overflow.h>
 #include <net/xdp_sock_drv.h>
+#include <linux/vmalloc.h>

 #include "xsk_queue.h"

@@ -23,7 +24,6 @@  static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue)
 struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
 {
 	struct xsk_queue *q;
-	gfp_t gfp_flags;
 	size_t size;

 	q = kzalloc(sizeof(*q), GFP_KERNEL);
@@ -33,12 +33,10 @@  struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
 	q->nentries = nentries;
 	q->ring_mask = nentries - 1;

-	gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN |
-		    __GFP_COMP  | __GFP_NORETRY;
 	size = xskq_get_ring_size(q, umem_queue);

-	q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags,
-						      get_order(size));
+	q->ring_size = size;
+	q->ring = (struct xdp_ring *)vmalloc_user(size);
 	if (!q->ring) {
 		kfree(q);
 		return NULL;
@@ -52,6 +50,6 @@  void xskq_destroy(struct xsk_queue *q)
 	if (!q)
 		return;

-	page_frag_free(q->ring);
+	vfree(q->ring);
 	kfree(q);
 }
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index c6fb6b763658..35922b8b92a8 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -45,6 +45,7 @@  struct xsk_queue {
 	struct xdp_ring *ring;
 	u64 invalid_descs;
 	u64 queue_empty_descs;
+	size_t ring_size;
 };

 /* The structure of the shared state of the rings are a simple