diff mbox series

[net-next,v6,1/2] page_pool: Add page_pool stats

Message ID 1645574424-60857-2-git-send-email-jdamato@fastly.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series page_pool: Add page pool stats | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5979 this patch: 5979
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 878 this patch: 878
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6130 this patch: 6130
netdev/checkpatch warning CHECK: Macro argument 'pool' may be better as '(pool)' to avoid precedence issues
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Joe Damato Feb. 23, 2022, midnight UTC
Add per-cpu per-pool statistics counters for the allocation path of a page
pool.

This code is disabled by default and a kernel config option is provided for
users who wish to enable them.

The statistics added are:
	- fast: successful fast path allocations
	- slow: slow path order-0 allocations
	- slow_high_order: slow path high order allocations
	- empty: ptr ring is empty, so a slow path allocation was forced.
	- refill: an allocation which triggered a refill of the cache
	- waive: pages obtained from the ptr ring that cannot be added to
	  the cache due to a NUMA mismatch.

Signed-off-by: Joe Damato <jdamato@fastly.com>
---
 include/net/page_pool.h | 18 ++++++++++++++++++
 net/Kconfig             | 13 +++++++++++++
 net/core/page_pool.c    | 37 +++++++++++++++++++++++++++++++++----
 3 files changed, 64 insertions(+), 4 deletions(-)

Comments

Jesper Dangaard Brouer Feb. 23, 2022, 4:05 p.m. UTC | #1
On 23/02/2022 01.00, Joe Damato wrote:
> Add per-cpu per-pool statistics counters for the allocation path of a page
> pool.
> 
> This code is disabled by default and a kernel config option is provided for
> users who wish to enable them.
> 
> The statistics added are:
> 	- fast: successful fast path allocations
> 	- slow: slow path order-0 allocations
> 	- slow_high_order: slow path high order allocations
> 	- empty: ptr ring is empty, so a slow path allocation was forced.
> 	- refill: an allocation which triggered a refill of the cache
> 	- waive: pages obtained from the ptr ring that cannot be added to
> 	  the cache due to a NUMA mismatch.
> 
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> ---
>   include/net/page_pool.h | 18 ++++++++++++++++++
>   net/Kconfig             | 13 +++++++++++++
>   net/core/page_pool.c    | 37 +++++++++++++++++++++++++++++++++----
>   3 files changed, 64 insertions(+), 4 deletions(-)
> 
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index 97c3c19..bedc82f 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -135,7 +135,25 @@ struct page_pool {
>   	refcount_t user_cnt;
>   
>   	u64 destroy_cnt;
> +#ifdef CONFIG_PAGE_POOL_STATS
> +	struct page_pool_stats __percpu *stats ____cacheline_aligned_in_smp;
> +#endif
> +};

Adding this to the end of the struct and using attribute 
____cacheline_aligned_in_smp cause the structure have a lot of wasted 
padding in the end.

I recommend using the tool pahole to see the struct layout.


> +
> +#ifdef CONFIG_PAGE_POOL_STATS
> +struct page_pool_stats {
> +	struct {
> +		u64 fast; /* fast path allocations */
> +		u64 slow; /* slow-path order 0 allocations */
> +		u64 slow_high_order; /* slow-path high order allocations */
> +		u64 empty; /* failed refills due to empty ptr ring, forcing
> +			    * slow path allocation
> +			    */
> +		u64 refill; /* allocations via successful refill */
> +		u64 waive;  /* failed refills due to numa zone mismatch */
> +	} alloc;
>   };
> +#endif

All of these stats are for page_pool allocation "RX" side, which is 
protected by softirq/NAPI.
Thus, I find it unnecessary to do __percpu stats.


As Ilias have pointed out-before, the __percpu stats (first) becomes 
relevant once we want stats for the free/"return" path ... which is not 
part of this patchset.

--Jesper
Joe Damato Feb. 23, 2022, 5:05 p.m. UTC | #2
On Wed, Feb 23, 2022 at 8:06 AM Jesper Dangaard Brouer
<jbrouer@redhat.com> wrote:
>
>
>
> On 23/02/2022 01.00, Joe Damato wrote:
> > Add per-cpu per-pool statistics counters for the allocation path of a page
> > pool.
> >
> > This code is disabled by default and a kernel config option is provided for
> > users who wish to enable them.
> >
> > The statistics added are:
> >       - fast: successful fast path allocations
> >       - slow: slow path order-0 allocations
> >       - slow_high_order: slow path high order allocations
> >       - empty: ptr ring is empty, so a slow path allocation was forced.
> >       - refill: an allocation which triggered a refill of the cache
> >       - waive: pages obtained from the ptr ring that cannot be added to
> >         the cache due to a NUMA mismatch.
> >
> > Signed-off-by: Joe Damato <jdamato@fastly.com>
> > ---
> >   include/net/page_pool.h | 18 ++++++++++++++++++
> >   net/Kconfig             | 13 +++++++++++++
> >   net/core/page_pool.c    | 37 +++++++++++++++++++++++++++++++++----
> >   3 files changed, 64 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> > index 97c3c19..bedc82f 100644
> > --- a/include/net/page_pool.h
> > +++ b/include/net/page_pool.h
> > @@ -135,7 +135,25 @@ struct page_pool {
> >       refcount_t user_cnt;
> >
> >       u64 destroy_cnt;
> > +#ifdef CONFIG_PAGE_POOL_STATS
> > +     struct page_pool_stats __percpu *stats ____cacheline_aligned_in_smp;
> > +#endif
> > +};
>
> Adding this to the end of the struct and using attribute
> ____cacheline_aligned_in_smp cause the structure have a lot of wasted
> padding in the end.
>
> I recommend using the tool pahole to see the struct layout.

I mentioned placement near xdp_mem_id in the cover letter for this
code and in my reply on the v5 [1] , but I didn't hear back, so I
really had no idea what you preferred.

I'll move it near xdp_mem_id for the v7, and hope that's what you are
saying here.

> > +
> > +#ifdef CONFIG_PAGE_POOL_STATS
> > +struct page_pool_stats {
> > +     struct {
> > +             u64 fast; /* fast path allocations */
> > +             u64 slow; /* slow-path order 0 allocations */
> > +             u64 slow_high_order; /* slow-path high order allocations */
> > +             u64 empty; /* failed refills due to empty ptr ring, forcing
> > +                         * slow path allocation
> > +                         */
> > +             u64 refill; /* allocations via successful refill */
> > +             u64 waive;  /* failed refills due to numa zone mismatch */
> > +     } alloc;
> >   };
> > +#endif
>
> All of these stats are for page_pool allocation "RX" side, which is
> protected by softirq/NAPI.

Yes.

> Thus, I find it unnecessary to do __percpu stats.

Unnecessary, sure, but doesn't seem harmful and it allows us to expand
this to other stats in a future change.

> As Ilias have pointed out-before, the __percpu stats (first) becomes
> relevant once we want stats for the free/"return" path ... which is not
> part of this patchset.

Does that need to be covered in this patchset?

I believe Ilias' response which mentioned the recycle stats indicates
that they can be added in the future [2]. I took this to mean a
separate patchset, assuming that this first change is accepted.

As I understand your comment, then: this change will not be accepted
unless it is expanded to include recycle stats or if the per-cpu
designation is removed.

Are the cache-line placement and per-cpu designations the only
remaining issues with this change?

[1]: https://lore.kernel.org/all/CALALjgzUQUuEVkNXous0kOcHHqiSrTem+n9MjQh6q-8+Azi-sg@mail.gmail.com/
[2]: https://lore.kernel.org/all/YfJhIpBGW6suBwkY@hades/
Jakub Kicinski Feb. 23, 2022, 5:40 p.m. UTC | #3
On Wed, 23 Feb 2022 09:05:06 -0800 Joe Damato wrote:
> Are the cache-line placement and per-cpu designations the only
> remaining issues with this change?

page_pool_get_stats() has no callers as is, I'm not sure how we can
merge it in current form. 

Maybe I'm missing bigger picture or some former discussion.
Joe Damato Feb. 23, 2022, 5:45 p.m. UTC | #4
On Wed, Feb 23, 2022 at 9:40 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 23 Feb 2022 09:05:06 -0800 Joe Damato wrote:
> > Are the cache-line placement and per-cpu designations the only
> > remaining issues with this change?
>
> page_pool_get_stats() has no callers as is, I'm not sure how we can
> merge it in current form.
>
> Maybe I'm missing bigger picture or some former discussion.

I wrote the mlx5 code to call this function and export the data via
ethtool. I had assumed the mlx5 changes should be in a separate
patchset. I can include that code as part of this change, if needed.

Thanks,
Joe
Jesper Dangaard Brouer Feb. 23, 2022, 6:10 p.m. UTC | #5
On 23/02/2022 18.45, Joe Damato wrote:
> On Wed, Feb 23, 2022 at 9:40 AM Jakub Kicinski <kuba@kernel.org> wrote:
>>
>> On Wed, 23 Feb 2022 09:05:06 -0800 Joe Damato wrote:
>>> Are the cache-line placement and per-cpu designations the only
>>> remaining issues with this change?
>>
>> page_pool_get_stats() has no callers as is, I'm not sure how we can
>> merge it in current form.
>>
>> Maybe I'm missing bigger picture or some former discussion.
> 
> I wrote the mlx5 code to call this function and export the data via
> ethtool. I had assumed the mlx5 changes should be in a separate
> patchset. I can include that code as part of this change, if needed.

I agree with Jakub we need to see how this is used by drivers.

--Jesper
Joe Damato Feb. 23, 2022, 6:16 p.m. UTC | #6
On Wed, Feb 23, 2022 at 10:10 AM Jesper Dangaard Brouer
<jbrouer@redhat.com> wrote:
>
>
>
> On 23/02/2022 18.45, Joe Damato wrote:
> > On Wed, Feb 23, 2022 at 9:40 AM Jakub Kicinski <kuba@kernel.org> wrote:
> >>
> >> On Wed, 23 Feb 2022 09:05:06 -0800 Joe Damato wrote:
> >>> Are the cache-line placement and per-cpu designations the only
> >>> remaining issues with this change?
> >>
> >> page_pool_get_stats() has no callers as is, I'm not sure how we can
> >> merge it in current form.
> >>
> >> Maybe I'm missing bigger picture or some former discussion.
> >
> > I wrote the mlx5 code to call this function and export the data via
> > ethtool. I had assumed the mlx5 changes should be in a separate
> > patchset. I can include that code as part of this change, if needed.
>
> I agree with Jakub we need to see how this is used by drivers.

OK. I'll include the mlx5 code which uses this API in the v7.
Ilias Apalodimas Feb. 23, 2022, 7:01 p.m. UTC | #7
Hi all

[...]
> > Signed-off-by: Joe Damato <jdamato@fastly.com>
> > ---
> >   include/net/page_pool.h | 18 ++++++++++++++++++
> >   net/Kconfig             | 13 +++++++++++++
> >   net/core/page_pool.c    | 37 +++++++++++++++++++++++++++++++++----
> >   3 files changed, 64 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> > index 97c3c19..bedc82f 100644
> > --- a/include/net/page_pool.h
> > +++ b/include/net/page_pool.h
> > @@ -135,7 +135,25 @@ struct page_pool {
> >       refcount_t user_cnt;
> >
> >       u64 destroy_cnt;
> > +#ifdef CONFIG_PAGE_POOL_STATS
> > +     struct page_pool_stats __percpu *stats ____cacheline_aligned_in_smp;
> > +#endif
> > +};
>
> Adding this to the end of the struct and using attribute
> ____cacheline_aligned_in_smp cause the structure have a lot of wasted
> padding in the end.
>
> I recommend using the tool pahole to see the struct layout.
>
>
> > +
> > +#ifdef CONFIG_PAGE_POOL_STATS
> > +struct page_pool_stats {
> > +     struct {
> > +             u64 fast; /* fast path allocations */
> > +             u64 slow; /* slow-path order 0 allocations */
> > +             u64 slow_high_order; /* slow-path high order allocations */
> > +             u64 empty; /* failed refills due to empty ptr ring, forcing
> > +                         * slow path allocation
> > +                         */
> > +             u64 refill; /* allocations via successful refill */
> > +             u64 waive;  /* failed refills due to numa zone mismatch */
> > +     } alloc;
> >   };
> > +#endif
>
> All of these stats are for page_pool allocation "RX" side, which is
> protected by softirq/NAPI.
> Thus, I find it unnecessary to do __percpu stats.
>
>
> As Ilias have pointed out-before, the __percpu stats (first) becomes
> relevant once we want stats for the free/"return" path ... which is not
> part of this patchset.

Do we really mind though?  The point was to have the percpu variables
in order to plug in any stats that weren't napi-protected.  I think we
can always plug in the recycling stats later?  OTOH if you prefer
having them in now I can work with Joe and we can get that supported
as well.

Cheers
/Ilias
>
> --Jesper
>
diff mbox series

Patch

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 97c3c19..bedc82f 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -135,7 +135,25 @@  struct page_pool {
 	refcount_t user_cnt;
 
 	u64 destroy_cnt;
+#ifdef CONFIG_PAGE_POOL_STATS
+	struct page_pool_stats __percpu *stats ____cacheline_aligned_in_smp;
+#endif
+};
+
+#ifdef CONFIG_PAGE_POOL_STATS
+struct page_pool_stats {
+	struct {
+		u64 fast; /* fast path allocations */
+		u64 slow; /* slow-path order 0 allocations */
+		u64 slow_high_order; /* slow-path high order allocations */
+		u64 empty; /* failed refills due to empty ptr ring, forcing
+			    * slow path allocation
+			    */
+		u64 refill; /* allocations via successful refill */
+		u64 waive;  /* failed refills due to numa zone mismatch */
+	} alloc;
 };
+#endif
 
 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
 
diff --git a/net/Kconfig b/net/Kconfig
index 8a1f9d0..8c07308 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -434,6 +434,19 @@  config NET_DEVLINK
 config PAGE_POOL
 	bool
 
+config PAGE_POOL_STATS
+	default n
+	bool "Page pool stats"
+	depends on PAGE_POOL
+	help
+	  Enable page pool statistics to track allocations. This option
+	  incurs additional CPU cost in allocation paths and additional
+	  memory cost to store the statistics. These statistics are only
+	  available if this option is enabled and if the driver using
+	  the page pool supports exporting this data.
+
+	  If unsure, say N.
+
 config FAILOVER
 	tristate "Generic failover module"
 	help
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index e25d359..f29dff9 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -26,6 +26,17 @@ 
 
 #define BIAS_MAX	LONG_MAX
 
+#ifdef CONFIG_PAGE_POOL_STATS
+/* this_cpu_inc_alloc_stat is intended to be used in softirq context */
+#define this_cpu_inc_alloc_stat(pool, __stat)				\
+	do {								\
+		struct page_pool_stats __percpu *s = pool->stats;	\
+		__this_cpu_inc(s->alloc.__stat);			\
+	} while (0)
+#else
+#define this_cpu_inc_alloc_stat(pool, __stat)
+#endif
+
 static int page_pool_init(struct page_pool *pool,
 			  const struct page_pool_params *params)
 {
@@ -73,6 +84,12 @@  static int page_pool_init(struct page_pool *pool,
 	    pool->p.flags & PP_FLAG_PAGE_FRAG)
 		return -EINVAL;
 
+#ifdef CONFIG_PAGE_POOL_STATS
+	pool->stats = alloc_percpu(struct page_pool_stats);
+	if (!pool->stats)
+		return -ENOMEM;
+#endif
+
 	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
 		return -ENOMEM;
 
@@ -117,8 +134,10 @@  static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
 	int pref_nid; /* preferred NUMA node */
 
 	/* Quicker fallback, avoid locks when ring is empty */
-	if (__ptr_ring_empty(r))
+	if (__ptr_ring_empty(r)) {
+		this_cpu_inc_alloc_stat(pool, empty);
 		return NULL;
+	}
 
 	/* Softirq guarantee CPU and thus NUMA node is stable. This,
 	 * assumes CPU refilling driver RX-ring will also run RX-NAPI.
@@ -145,14 +164,17 @@  static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
 			 * This limit stress on page buddy alloactor.
 			 */
 			page_pool_return_page(pool, page);
+			this_cpu_inc_alloc_stat(pool, waive);
 			page = NULL;
 			break;
 		}
 	} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
 
 	/* Return last page */
-	if (likely(pool->alloc.count > 0))
+	if (likely(pool->alloc.count > 0)) {
 		page = pool->alloc.cache[--pool->alloc.count];
+		this_cpu_inc_alloc_stat(pool, refill);
+	}
 
 	return page;
 }
@@ -166,6 +188,7 @@  static struct page *__page_pool_get_cached(struct page_pool *pool)
 	if (likely(pool->alloc.count)) {
 		/* Fast-path */
 		page = pool->alloc.cache[--pool->alloc.count];
+		this_cpu_inc_alloc_stat(pool, fast);
 	} else {
 		page = page_pool_refill_alloc_cache(pool);
 	}
@@ -239,6 +262,7 @@  static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
 		return NULL;
 	}
 
+	this_cpu_inc_alloc_stat(pool, slow_high_order);
 	page_pool_set_pp_info(pool, page);
 
 	/* Track how many pages are held 'in-flight' */
@@ -293,10 +317,12 @@  static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 	}
 
 	/* Return last page */
-	if (likely(pool->alloc.count > 0))
+	if (likely(pool->alloc.count > 0)) {
 		page = pool->alloc.cache[--pool->alloc.count];
-	else
+		this_cpu_inc_alloc_stat(pool, slow);
+	} else {
 		page = NULL;
+	}
 
 	/* When page just alloc'ed is should/must have refcnt 1. */
 	return page;
@@ -620,6 +646,9 @@  static void page_pool_free(struct page_pool *pool)
 	if (pool->p.flags & PP_FLAG_DMA_MAP)
 		put_device(pool->p.dev);
 
+#ifdef CONFIG_PAGE_POOL_STATS
+	free_percpu(pool->stats);
+#endif
 	kfree(pool);
 }