diff mbox series

[v7,net-next,1/4] net: add generic percpu page_pool allocator

Message ID 1d34b717f8f842b9c3e9f70f0e8ffd245a5d2460.1706861261.git.lorenzo@kernel.org (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series add multi-buff support for xdp running in generic mode | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1200 this patch: 1202
netdev/build_tools success Errors and warnings before: 1 this patch: 0
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 1066 this patch: 1066
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1231 this patch: 1233
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 157 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Lorenzo Bianconi Feb. 2, 2024, 8:12 a.m. UTC
Introduce generic percpu page_pools allocator.
Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
in order to recycle the page in the page_pool "hot" cache if
napi_pp_put_page() is running on the same cpu.
This is a preliminary patch to add xdp multi-buff support for xdp running
in generic mode.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 include/net/page_pool/types.h |  3 +++
 net/core/dev.c                | 45 +++++++++++++++++++++++++++++++++++
 net/core/page_pool.c          | 23 ++++++++++++++----
 net/core/skbuff.c             |  5 ++--
 4 files changed, 70 insertions(+), 6 deletions(-)

Comments

Jesper Dangaard Brouer Feb. 2, 2024, 8:59 a.m. UTC | #1
On 02/02/2024 09.12, Lorenzo Bianconi wrote:
> Introduce generic percpu page_pools allocator.
> Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> in order to recycle the page in the page_pool "hot" cache if
> napi_pp_put_page() is running on the same cpu.
> This is a preliminary patch to add xdp multi-buff support for xdp running
> in generic mode.
> 
> Signed-off-by: Lorenzo Bianconi<lorenzo@kernel.org>
> ---
>   include/net/page_pool/types.h |  3 +++
>   net/core/dev.c                | 45 +++++++++++++++++++++++++++++++++++
>   net/core/page_pool.c          | 23 ++++++++++++++----
>   net/core/skbuff.c             |  5 ++--
>   4 files changed, 70 insertions(+), 6 deletions(-)
> 
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 76481c465375..3828396ae60c 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -128,6 +128,7 @@ struct page_pool_stats {
>   struct page_pool {
>   	struct page_pool_params_fast p;
>   
> +	int cpuid;
>   	bool has_init_callback;
>   
>   	long frag_users;
> @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
>   struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
>   				  unsigned int size, gfp_t gfp);
>   struct page_pool *page_pool_create(const struct page_pool_params *params);
> +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
> +					  int cpuid);
>   
>   struct xdp_mem_info;
>   
> diff --git a/net/core/dev.c b/net/core/dev.c
> index b53b9c94de40..5a100360389f 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -153,6 +153,8 @@
>   #include <linux/prandom.h>
>   #include <linux/once_lite.h>
>   #include <net/netdev_rx_queue.h>
> +#include <net/page_pool/types.h>
> +#include <net/page_pool/helpers.h>
>   
>   #include "dev.h"
>   #include "net-sysfs.h"
> @@ -450,6 +452,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
>   DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
>   EXPORT_PER_CPU_SYMBOL(softnet_data);
>   
> +/* Page_pool has a lockless array/stack to alloc/recycle pages.
> + * PP consumers must pay attention to run APIs in the appropriate context
> + * (e.g. NAPI context).
> + */
> +static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);

Thanks for adding comment.

Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Toke Høiland-Jørgensen Feb. 2, 2024, 11:38 a.m. UTC | #2
Lorenzo Bianconi <lorenzo@kernel.org> writes:

> Introduce generic percpu page_pools allocator.
> Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> in order to recycle the page in the page_pool "hot" cache if
> napi_pp_put_page() is running on the same cpu.
> This is a preliminary patch to add xdp multi-buff support for xdp running
> in generic mode.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>

Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
kernel test robot Feb. 3, 2024, 2:52 p.m. UTC | #3
Hi Lorenzo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Lorenzo-Bianconi/net-add-generic-percpu-page_pool-allocator/20240202-162516
base:   net-next/main
patch link:    https://lore.kernel.org/r/1d34b717f8f842b9c3e9f70f0e8ffd245a5d2460.1706861261.git.lorenzo%40kernel.org
patch subject: [PATCH v7 net-next 1/4] net: add generic percpu page_pool allocator
config: x86_64-randconfig-121-20240203 (https://download.01.org/0day-ci/archive/20240203/202402032223.Imbb9JgJ-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240203/202402032223.Imbb9JgJ-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202402032223.Imbb9JgJ-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
   net/core/dev.c:3364:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3364:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3364:23: sparse:     got unsigned int
   net/core/dev.c:3364:23: sparse: sparse: cast from restricted __wsum
>> net/core/dev.c:11809:34: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected void const [noderef] __percpu *__vpp_verify @@     got struct page_pool * @@
   net/core/dev.c:11809:34: sparse:     expected void const [noderef] __percpu *__vpp_verify
   net/core/dev.c:11809:34: sparse:     got struct page_pool *
   net/core/dev.c: note: in included file (through include/linux/smp.h, include/linux/lockdep.h, include/linux/spinlock.h, ...):
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   net/core/dev.c:205:9: sparse: sparse: context imbalance in 'unlist_netdevice' - different lock contexts for basic block
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   net/core/dev.c:3804:17: sparse: sparse: context imbalance in '__dev_queue_xmit' - different lock contexts for basic block
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   net/core/dev.c:5184:17: sparse: sparse: context imbalance in 'net_tx_action' - different lock contexts for basic block
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
   include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
>> net/core/dev.c:11809:34: sparse: sparse: dereference of noderef expression

vim +11809 net/core/dev.c

 11722	
 11723	/*
 11724	 *       This is called single threaded during boot, so no need
 11725	 *       to take the rtnl semaphore.
 11726	 */
 11727	static int __init net_dev_init(void)
 11728	{
 11729		int i, rc = -ENOMEM;
 11730	
 11731		BUG_ON(!dev_boot_phase);
 11732	
 11733		net_dev_struct_check();
 11734	
 11735		if (dev_proc_init())
 11736			goto out;
 11737	
 11738		if (netdev_kobject_init())
 11739			goto out;
 11740	
 11741		INIT_LIST_HEAD(&ptype_all);
 11742		for (i = 0; i < PTYPE_HASH_SIZE; i++)
 11743			INIT_LIST_HEAD(&ptype_base[i]);
 11744	
 11745		if (register_pernet_subsys(&netdev_net_ops))
 11746			goto out;
 11747	
 11748		/*
 11749		 *	Initialise the packet receive queues.
 11750		 */
 11751	
 11752		for_each_possible_cpu(i) {
 11753			struct work_struct *flush = per_cpu_ptr(&flush_works, i);
 11754			struct softnet_data *sd = &per_cpu(softnet_data, i);
 11755	
 11756			INIT_WORK(flush, flush_backlog);
 11757	
 11758			skb_queue_head_init(&sd->input_pkt_queue);
 11759			skb_queue_head_init(&sd->process_queue);
 11760	#ifdef CONFIG_XFRM_OFFLOAD
 11761			skb_queue_head_init(&sd->xfrm_backlog);
 11762	#endif
 11763			INIT_LIST_HEAD(&sd->poll_list);
 11764			sd->output_queue_tailp = &sd->output_queue;
 11765	#ifdef CONFIG_RPS
 11766			INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
 11767			sd->cpu = i;
 11768	#endif
 11769			INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
 11770			spin_lock_init(&sd->defer_lock);
 11771	
 11772			init_gro_hash(&sd->backlog);
 11773			sd->backlog.poll = process_backlog;
 11774			sd->backlog.weight = weight_p;
 11775	
 11776			if (net_page_pool_create(i))
 11777				goto out;
 11778		}
 11779	
 11780		dev_boot_phase = 0;
 11781	
 11782		/* The loopback device is special if any other network devices
 11783		 * is present in a network namespace the loopback device must
 11784		 * be present. Since we now dynamically allocate and free the
 11785		 * loopback device ensure this invariant is maintained by
 11786		 * keeping the loopback device as the first device on the
 11787		 * list of network devices.  Ensuring the loopback devices
 11788		 * is the first device that appears and the last network device
 11789		 * that disappears.
 11790		 */
 11791		if (register_pernet_device(&loopback_net_ops))
 11792			goto out;
 11793	
 11794		if (register_pernet_device(&default_device_ops))
 11795			goto out;
 11796	
 11797		open_softirq(NET_TX_SOFTIRQ, net_tx_action);
 11798		open_softirq(NET_RX_SOFTIRQ, net_rx_action);
 11799	
 11800		rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
 11801					       NULL, dev_cpu_dead);
 11802		WARN_ON(rc < 0);
 11803		rc = 0;
 11804	out:
 11805		if (rc < 0) {
 11806			for_each_possible_cpu(i) {
 11807				struct page_pool *pp_ptr;
 11808	
 11809				pp_ptr = per_cpu_ptr(system_page_pool, i);
 11810				if (!pp_ptr)
 11811					continue;
 11812	
 11813				page_pool_destroy(pp_ptr);
 11814				per_cpu(system_page_pool, i) = NULL;
 11815			}
 11816		}
 11817	
 11818		return rc;
 11819	}
 11820
diff mbox series

Patch

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 76481c465375..3828396ae60c 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -128,6 +128,7 @@  struct page_pool_stats {
 struct page_pool {
 	struct page_pool_params_fast p;
 
+	int cpuid;
 	bool has_init_callback;
 
 	long frag_users;
@@ -203,6 +204,8 @@  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
 struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
 				  unsigned int size, gfp_t gfp);
 struct page_pool *page_pool_create(const struct page_pool_params *params);
+struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
+					  int cpuid);
 
 struct xdp_mem_info;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index b53b9c94de40..5a100360389f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -153,6 +153,8 @@ 
 #include <linux/prandom.h>
 #include <linux/once_lite.h>
 #include <net/netdev_rx_queue.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
 
 #include "dev.h"
 #include "net-sysfs.h"
@@ -450,6 +452,12 @@  static RAW_NOTIFIER_HEAD(netdev_chain);
 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 
+/* Page_pool has a lockless array/stack to alloc/recycle pages.
+ * PP consumers must pay attention to run APIs in the appropriate context
+ * (e.g. NAPI context).
+ */
+static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);
+
 #ifdef CONFIG_LOCKDEP
 /*
  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
@@ -11691,6 +11699,27 @@  static void __init net_dev_struct_check(void)
  *
  */
 
+/* We allocate 256 pages for each CPU if PAGE_SHIFT is 12 */
+#define SYSTEM_PERCPU_PAGE_POOL_SIZE	((1 << 20) / PAGE_SIZE)
+
+static int net_page_pool_create(int cpuid)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+	struct page_pool_params page_pool_params = {
+		.pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE,
+		.nid = NUMA_NO_NODE,
+	};
+	struct page_pool *pp_ptr;
+
+	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
+	if (IS_ERR(pp_ptr))
+		return -ENOMEM;
+
+	per_cpu(system_page_pool, cpuid) = pp_ptr;
+#endif
+	return 0;
+}
+
 /*
  *       This is called single threaded during boot, so no need
  *       to take the rtnl semaphore.
@@ -11743,6 +11772,9 @@  static int __init net_dev_init(void)
 		init_gro_hash(&sd->backlog);
 		sd->backlog.poll = process_backlog;
 		sd->backlog.weight = weight_p;
+
+		if (net_page_pool_create(i))
+			goto out;
 	}
 
 	dev_boot_phase = 0;
@@ -11770,6 +11802,19 @@  static int __init net_dev_init(void)
 	WARN_ON(rc < 0);
 	rc = 0;
 out:
+	if (rc < 0) {
+		for_each_possible_cpu(i) {
+			struct page_pool *pp_ptr;
+
+			pp_ptr = per_cpu_ptr(system_page_pool, i);
+			if (!pp_ptr)
+				continue;
+
+			page_pool_destroy(pp_ptr);
+			per_cpu(system_page_pool, i) = NULL;
+		}
+	}
+
 	return rc;
 }
 
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 4933762e5a6b..89c835fcf094 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -171,13 +171,16 @@  static void page_pool_producer_unlock(struct page_pool *pool,
 }
 
 static int page_pool_init(struct page_pool *pool,
-			  const struct page_pool_params *params)
+			  const struct page_pool_params *params,
+			  int cpuid)
 {
 	unsigned int ring_qsize = 1024; /* Default */
 
 	memcpy(&pool->p, &params->fast, sizeof(pool->p));
 	memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
 
+	pool->cpuid = cpuid;
+
 	/* Validate only known flags were used */
 	if (pool->p.flags & ~(PP_FLAG_ALL))
 		return -EINVAL;
@@ -253,10 +256,12 @@  static void page_pool_uninit(struct page_pool *pool)
 }
 
 /**
- * page_pool_create() - create a page pool.
+ * page_pool_create_percpu() - create a page pool for a given cpu.
  * @params: parameters, see struct page_pool_params
+ * @cpuid: cpu identifier
  */
-struct page_pool *page_pool_create(const struct page_pool_params *params)
+struct page_pool *
+page_pool_create_percpu(const struct page_pool_params *params, int cpuid)
 {
 	struct page_pool *pool;
 	int err;
@@ -265,7 +270,7 @@  struct page_pool *page_pool_create(const struct page_pool_params *params)
 	if (!pool)
 		return ERR_PTR(-ENOMEM);
 
-	err = page_pool_init(pool, params);
+	err = page_pool_init(pool, params, cpuid);
 	if (err < 0)
 		goto err_free;
 
@@ -282,6 +287,16 @@  struct page_pool *page_pool_create(const struct page_pool_params *params)
 	kfree(pool);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(page_pool_create_percpu);
+
+/**
+ * page_pool_create() - create a page pool
+ * @params: parameters, see struct page_pool_params
+ */
+struct page_pool *page_pool_create(const struct page_pool_params *params)
+{
+	return page_pool_create_percpu(params, -1);
+}
 EXPORT_SYMBOL(page_pool_create);
 
 static void page_pool_return_page(struct page_pool *pool, struct page *page);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index edbbef563d4d..9e5eb47b4025 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -923,9 +923,10 @@  bool napi_pp_put_page(struct page *page, bool napi_safe)
 	 */
 	if (napi_safe || in_softirq()) {
 		const struct napi_struct *napi = READ_ONCE(pp->p.napi);
+		unsigned int cpuid = smp_processor_id();
 
-		allow_direct = napi &&
-			READ_ONCE(napi->list_owner) == smp_processor_id();
+		allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
+		allow_direct |= (pp->cpuid == cpuid);
 	}
 
 	/* Driver set this to memory recycling info. Reset it on recycle.