Message ID | 20230725131258.31306-1-linyunsheng@huawei.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Kalle Valo |
Headers | show |
Series | [net-next,v2] page_pool: split types and declarations from page_pool.h | expand |
Hi Yunsheng, all On Tue, Jul 25, 2023 at 09:12:55PM +0800, Yunsheng Lin wrote: > Split types and pure function declarations from page_pool.h > and add them in page_pool/types.h, so that C sources can > include page_pool.h and headers should generally only include > page_pool/types.h as suggested by jakub. > Apologies for the very late replies, I was on long vacation with limited internet access. Yunsheng, since there's been a few mails and I lost track, this is instead of [0] right? If so, I prefer this approach. It looks ok on a first quick pass, I'll have a closer look later. [0] https://lore.kernel.org/netdev/20230714170853.866018-2-aleksander.lobakin@intel.com/ Thanks /Ilias > Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> > Suggested-by: Jakub Kicinski <kuba@kernel.org> > CC: Alexander Lobakin <aleksander.lobakin@intel.com> > --- > V2: Move from page_pool_types.h to page_pool/types.h, fix > some typo and alphabetic sorting. > --- > MAINTAINERS | 1 + > drivers/net/ethernet/engleder/tsnep_main.c | 1 + > drivers/net/ethernet/freescale/fec_main.c | 1 + > .../marvell/octeontx2/nic/otx2_common.c | 1 + > .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 1 + > .../ethernet/mellanox/mlx5/core/en/params.c | 1 + > .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 1 + > drivers/net/wireless/mediatek/mt76/mt76.h | 1 + > include/linux/skbuff.h | 2 +- > include/net/page_pool.h | 192 +---------------- > include/net/page_pool/types.h | 193 ++++++++++++++++++ > 11 files changed, 206 insertions(+), 189 deletions(-) > create mode 100644 include/net/page_pool/types.h > > diff --git a/MAINTAINERS b/MAINTAINERS > index d0553ad37865..1dbfe7fcb10e 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -16016,6 +16016,7 @@ L: netdev@vger.kernel.org > S: Supported > F: Documentation/networking/page_pool.rst > F: include/net/page_pool.h > +F: include/net/page_pool_types.h > F: include/trace/events/page_pool.h > F: net/core/page_pool.c > > diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c > index 079f9f6ae21a..934b890ba2ab 100644 > --- a/drivers/net/ethernet/engleder/tsnep_main.c > +++ b/drivers/net/ethernet/engleder/tsnep_main.c > @@ -28,6 +28,7 @@ > #include <linux/iopoll.h> > #include <linux/bpf.h> > #include <linux/bpf_trace.h> > +#include <net/page_pool.h> > #include <net/xdp_sock_drv.h> > > #define TSNEP_RX_OFFSET (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN) > diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c > index 03ac7690b5c4..68b79bda632a 100644 > --- a/drivers/net/ethernet/freescale/fec_main.c > +++ b/drivers/net/ethernet/freescale/fec_main.c > @@ -38,6 +38,7 @@ > #include <linux/in.h> > #include <linux/ip.h> > #include <net/ip.h> > +#include <net/page_pool.h> > #include <net/selftests.h> > #include <net/tso.h> > #include <linux/tcp.h> > diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c > index 8cdd92dd9762..d4f1baf0f987 100644 > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c > @@ -7,6 +7,7 @@ > > #include <linux/interrupt.h> > #include <linux/pci.h> > +#include <net/page_pool.h> > #include <net/tso.h> > #include <linux/bitfield.h> > > diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c > index 9551b422622a..8807e40b1174 100644 > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c > @@ -16,6 +16,7 @@ > #include <linux/bpf.h> > #include <linux/bpf_trace.h> > #include <linux/bitfield.h> > +#include <net/page_pool.h> > > #include "otx2_reg.h" > #include "otx2_common.h" > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c > index 5ce28ff7685f..0f152f14165b 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c > @@ -6,6 +6,7 @@ > #include "en/port.h" > #include "en_accel/en_accel.h" > #include "en_accel/ipsec.h" > +#include <net/page_pool.h> > #include <net/xdp_sock_drv.h> > > static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c > index 40589cebb773..16038c23b7d8 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c > @@ -35,6 +35,7 @@ > #include "en/xdp.h" > #include "en/params.h" > #include <linux/bitfield.h> > +#include <net/page_pool.h> > > int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) > { > diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h > index 6b07b8fafec2..95c16f11d156 100644 > --- a/drivers/net/wireless/mediatek/mt76/mt76.h > +++ b/drivers/net/wireless/mediatek/mt76/mt76.h > @@ -15,6 +15,7 @@ > #include <linux/average.h> > #include <linux/soc/mediatek/mtk_wed.h> > #include <net/mac80211.h> > +#include <net/page_pool.h> > #include "util.h" > #include "testmode.h" > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > index faaba050f843..864c51c95ac4 100644 > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -32,7 +32,7 @@ > #include <linux/if_packet.h> > #include <linux/llist.h> > #include <net/flow.h> > -#include <net/page_pool.h> > +#include <net/page_pool/types.h> > #if IS_ENABLED(CONFIG_NF_CONNTRACK) > #include <linux/netfilter/nf_conntrack_common.h> > #endif > diff --git a/include/net/page_pool.h b/include/net/page_pool.h > index f1d5cc1fa13b..dd70474c67cc 100644 > --- a/include/net/page_pool.h > +++ b/include/net/page_pool.h > @@ -29,107 +29,9 @@ > #ifndef _NET_PAGE_POOL_H > #define _NET_PAGE_POOL_H > > -#include <linux/mm.h> /* Needed by ptr_ring */ > -#include <linux/ptr_ring.h> > -#include <linux/dma-direction.h> > - > -#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA > - * map/unmap > - */ > -#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets > - * from page_pool will be > - * DMA-synced-for-device according to > - * the length provided by the device > - * driver. > - * Please note DMA-sync-for-CPU is still > - * device driver responsibility > - */ > -#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ > -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ > - PP_FLAG_DMA_SYNC_DEV |\ > - PP_FLAG_PAGE_FRAG) > - > -/* > - * Fast allocation side cache array/stack > - * > - * The cache size and refill watermark is related to the network > - * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX > - * ring is usually refilled and the max consumed elements will be 64, > - * thus a natural max size of objects needed in the cache. > - * > - * Keeping room for more objects, is due to XDP_DROP use-case. As > - * XDP_DROP allows the opportunity to recycle objects directly into > - * this array, as it shares the same softirq/NAPI protection. If > - * cache is already full (or partly full) then the XDP_DROP recycles > - * would have to take a slower code path. > - */ > -#define PP_ALLOC_CACHE_SIZE 128 > -#define PP_ALLOC_CACHE_REFILL 64 > -struct pp_alloc_cache { > - u32 count; > - struct page *cache[PP_ALLOC_CACHE_SIZE]; > -}; > - > -struct page_pool_params { > - unsigned int flags; > - unsigned int order; > - unsigned int pool_size; > - int nid; /* Numa node id to allocate from pages from */ > - struct device *dev; /* device, for DMA pre-mapping purposes */ > - struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */ > - enum dma_data_direction dma_dir; /* DMA mapping direction */ > - unsigned int max_len; /* max DMA sync memory size */ > - unsigned int offset; /* DMA addr offset */ > - void (*init_callback)(struct page *page, void *arg); > - void *init_arg; > -}; > - > -#ifdef CONFIG_PAGE_POOL_STATS > -struct page_pool_alloc_stats { > - u64 fast; /* fast path allocations */ > - u64 slow; /* slow-path order 0 allocations */ > - u64 slow_high_order; /* slow-path high order allocations */ > - u64 empty; /* failed refills due to empty ptr ring, forcing > - * slow path allocation > - */ > - u64 refill; /* allocations via successful refill */ > - u64 waive; /* failed refills due to numa zone mismatch */ > -}; > - > -struct page_pool_recycle_stats { > - u64 cached; /* recycling placed page in the cache. */ > - u64 cache_full; /* cache was full */ > - u64 ring; /* recycling placed page back into ptr ring */ > - u64 ring_full; /* page was released from page-pool because > - * PTR ring was full. > - */ > - u64 released_refcnt; /* page released because of elevated > - * refcnt > - */ > -}; > - > -/* This struct wraps the above stats structs so users of the > - * page_pool_get_stats API can pass a single argument when requesting the > - * stats for the page pool. > - */ > -struct page_pool_stats { > - struct page_pool_alloc_stats alloc_stats; > - struct page_pool_recycle_stats recycle_stats; > -}; > - > -int page_pool_ethtool_stats_get_count(void); > -u8 *page_pool_ethtool_stats_get_strings(u8 *data); > -u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); > - > -/* > - * Drivers that wish to harvest page pool stats and report them to users > - * (perhaps via ethtool, debugfs, or another mechanism) can allocate a > - * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. > - */ > -bool page_pool_get_stats(struct page_pool *pool, > - struct page_pool_stats *stats); > -#else > +#include <net/page_pool/types.h> > > +#ifndef CONFIG_PAGE_POOL_STATS > static inline int page_pool_ethtool_stats_get_count(void) > { > return 0; > @@ -144,72 +46,7 @@ static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) > { > return data; > } > - > -#endif > - > -struct page_pool { > - struct page_pool_params p; > - > - struct delayed_work release_dw; > - void (*disconnect)(void *); > - unsigned long defer_start; > - unsigned long defer_warn; > - > - u32 pages_state_hold_cnt; > - unsigned int frag_offset; > - struct page *frag_page; > - long frag_users; > - > -#ifdef CONFIG_PAGE_POOL_STATS > - /* these stats are incremented while in softirq context */ > - struct page_pool_alloc_stats alloc_stats; > -#endif > - u32 xdp_mem_id; > - > - /* > - * Data structure for allocation side > - * > - * Drivers allocation side usually already perform some kind > - * of resource protection. Piggyback on this protection, and > - * require driver to protect allocation side. > - * > - * For NIC drivers this means, allocate a page_pool per > - * RX-queue. As the RX-queue is already protected by > - * Softirq/BH scheduling and napi_schedule. NAPI schedule > - * guarantee that a single napi_struct will only be scheduled > - * on a single CPU (see napi_schedule). > - */ > - struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; > - > - /* Data structure for storing recycled pages. > - * > - * Returning/freeing pages is more complicated synchronization > - * wise, because free's can happen on remote CPUs, with no > - * association with allocation resource. > - * > - * Use ptr_ring, as it separates consumer and producer > - * effeciently, it a way that doesn't bounce cache-lines. > - * > - * TODO: Implement bulk return pages into this structure. > - */ > - struct ptr_ring ring; > - > -#ifdef CONFIG_PAGE_POOL_STATS > - /* recycle stats are per-cpu to avoid locking */ > - struct page_pool_recycle_stats __percpu *recycle_stats; > #endif > - atomic_t pages_state_release_cnt; > - > - /* A page_pool is strictly tied to a single RX-queue being > - * protected by NAPI, due to above pp_alloc_cache. This > - * refcnt serves purpose is to simplify drivers error handling. > - */ > - refcount_t user_cnt; > - > - u64 destroy_cnt; > -}; > - > -struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); > > static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) > { > @@ -218,9 +55,6 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) > return page_pool_alloc_pages(pool, gfp); > } > > -struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, > - unsigned int size, gfp_t gfp); > - > static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, > unsigned int *offset, > unsigned int size) > @@ -239,20 +73,7 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) > return pool->p.dma_dir; > } > > -bool page_pool_return_skb_page(struct page *page, bool napi_safe); > - > -struct page_pool *page_pool_create(const struct page_pool_params *params); > - > -struct xdp_mem_info; > - > -#ifdef CONFIG_PAGE_POOL > -void page_pool_unlink_napi(struct page_pool *pool); > -void page_pool_destroy(struct page_pool *pool); > -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), > - struct xdp_mem_info *mem); > -void page_pool_put_page_bulk(struct page_pool *pool, void **data, > - int count); > -#else > +#ifndef CONFIG_PAGE_POOL > static inline void page_pool_unlink_napi(struct page_pool *pool) > { > } > @@ -261,6 +82,7 @@ static inline void page_pool_destroy(struct page_pool *pool) > { > } > > +struct xdp_mem_info; > static inline void page_pool_use_xdp_mem(struct page_pool *pool, > void (*disconnect)(void *), > struct xdp_mem_info *mem) > @@ -273,10 +95,6 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, > } > #endif > > -void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, > - unsigned int dma_sync_size, > - bool allow_direct); > - > /* pp_frag_count represents the number of writers who can update the page > * either by updating skb->data or via DMA mappings for the device. > * We can't rely on the page refcnt for that as we don't know who might be > @@ -385,8 +203,6 @@ static inline bool page_pool_put(struct page_pool *pool) > return refcount_dec_and_test(&pool->user_cnt); > } > > -/* Caller must provide appropriate safe context, e.g. NAPI. */ > -void page_pool_update_nid(struct page_pool *pool, int new_nid); > static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) > { > if (unlikely(pool->p.nid != new_nid)) > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h > new file mode 100644 > index 000000000000..1d54ba0708db > --- /dev/null > +++ b/include/net/page_pool/types.h > @@ -0,0 +1,193 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef _NET_PAGE_POOL_TYPES_H > +#define _NET_PAGE_POOL_TYPES_H > + > +#include <linux/dma-direction.h> > +#include <linux/ptr_ring.h> > + > +#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA > + * map/unmap > + */ > +#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets > + * from page_pool will be > + * DMA-synced-for-device according to > + * the length provided by the device > + * driver. > + * Please note DMA-sync-for-CPU is still > + * device driver responsibility > + */ > +#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ > +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ > + PP_FLAG_DMA_SYNC_DEV |\ > + PP_FLAG_PAGE_FRAG) > + > +/* > + * Fast allocation side cache array/stack > + * > + * The cache size and refill watermark is related to the network > + * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX > + * ring is usually refilled and the max consumed elements will be 64, > + * thus a natural max size of objects needed in the cache. > + * > + * Keeping room for more objects, is due to XDP_DROP use-case. As > + * XDP_DROP allows the opportunity to recycle objects directly into > + * this array, as it shares the same softirq/NAPI protection. If > + * cache is already full (or partly full) then the XDP_DROP recycles > + * would have to take a slower code path. > + */ > +#define PP_ALLOC_CACHE_SIZE 128 > +#define PP_ALLOC_CACHE_REFILL 64 > +struct pp_alloc_cache { > + u32 count; > + struct page *cache[PP_ALLOC_CACHE_SIZE]; > +}; > + > +struct page_pool_params { > + unsigned int flags; > + unsigned int order; > + unsigned int pool_size; > + int nid; /* Numa node id to allocate from pages from */ > + struct device *dev; /* device, for DMA pre-mapping purposes */ > + struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */ > + enum dma_data_direction dma_dir; /* DMA mapping direction */ > + unsigned int max_len; /* max DMA sync memory size */ > + unsigned int offset; /* DMA addr offset */ > + void (*init_callback)(struct page *page, void *arg); > + void *init_arg; > +}; > + > +#ifdef CONFIG_PAGE_POOL_STATS > +struct page_pool_alloc_stats { > + u64 fast; /* fast path allocations */ > + u64 slow; /* slow-path order 0 allocations */ > + u64 slow_high_order; /* slow-path high order allocations */ > + u64 empty; /* failed refills due to empty ptr ring, forcing > + * slow path allocation > + */ > + u64 refill; /* allocations via successful refill */ > + u64 waive; /* failed refills due to numa zone mismatch */ > +}; > + > +struct page_pool_recycle_stats { > + u64 cached; /* recycling placed page in the cache. */ > + u64 cache_full; /* cache was full */ > + u64 ring; /* recycling placed page back into ptr ring */ > + u64 ring_full; /* page was released from page-pool because > + * PTR ring was full. > + */ > + u64 released_refcnt; /* page released because of elevated > + * refcnt > + */ > +}; > + > +/* This struct wraps the above stats structs so users of the > + * page_pool_get_stats API can pass a single argument when requesting the > + * stats for the page pool. > + */ > +struct page_pool_stats { > + struct page_pool_alloc_stats alloc_stats; > + struct page_pool_recycle_stats recycle_stats; > +}; > + > +int page_pool_ethtool_stats_get_count(void); > +u8 *page_pool_ethtool_stats_get_strings(u8 *data); > +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); > + > +/* > + * Drivers that wish to harvest page pool stats and report them to users > + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a > + * struct page_pool_stats call page_pool_get_stats to get stats for the > + * specified pool. > + */ > +bool page_pool_get_stats(struct page_pool *pool, > + struct page_pool_stats *stats); > +#endif > + > +struct page_pool { > + struct page_pool_params p; > + > + struct delayed_work release_dw; > + void (*disconnect)(void *); > + unsigned long defer_start; > + unsigned long defer_warn; > + > + u32 pages_state_hold_cnt; > + unsigned int frag_offset; > + struct page *frag_page; > + long frag_users; > + > +#ifdef CONFIG_PAGE_POOL_STATS > + /* these stats are incremented while in softirq context */ > + struct page_pool_alloc_stats alloc_stats; > +#endif > + u32 xdp_mem_id; > + > + /* > + * Data structure for allocation side > + * > + * Drivers allocation side usually already perform some kind > + * of resource protection. Piggyback on this protection, and > + * require driver to protect allocation side. > + * > + * For NIC drivers this means, allocate a page_pool per > + * RX-queue. As the RX-queue is already protected by > + * Softirq/BH scheduling and napi_schedule. NAPI schedule > + * guarantee that a single napi_struct will only be scheduled > + * on a single CPU (see napi_schedule). > + */ > + struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; > + > + /* Data structure for storing recycled pages. > + * > + * Returning/freeing pages is more complicated synchronization > + * wise, because free's can happen on remote CPUs, with no > + * association with allocation resource. > + * > + * Use ptr_ring, as it separates consumer and producer > + * efficiently, it a way that doesn't bounce cache-lines. > + * > + * TODO: Implement bulk return pages into this structure. > + */ > + struct ptr_ring ring; > + > +#ifdef CONFIG_PAGE_POOL_STATS > + /* recycle stats are per-cpu to avoid locking */ > + struct page_pool_recycle_stats __percpu *recycle_stats; > +#endif > + atomic_t pages_state_release_cnt; > + > + /* A page_pool is strictly tied to a single RX-queue being > + * protected by NAPI, due to above pp_alloc_cache. This > + * refcnt serves purpose is to simplify drivers error handling. > + */ > + refcount_t user_cnt; > + > + u64 destroy_cnt; > +}; > + > +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); > +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, > + unsigned int size, gfp_t gfp); > +bool page_pool_return_skb_page(struct page *page, bool napi_safe); > +struct page_pool *page_pool_create(const struct page_pool_params *params); > + > +#ifdef CONFIG_PAGE_POOL > +void page_pool_unlink_napi(struct page_pool *pool); > +void page_pool_destroy(struct page_pool *pool); > + > +struct xdp_mem_info; > +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), > + struct xdp_mem_info *mem); > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > + int count); > +#endif > + > +void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, > + unsigned int dma_sync_size, > + bool allow_direct); > + > +/* Caller must provide appropriate safe context, e.g. NAPI. */ > +void page_pool_update_nid(struct page_pool *pool, int new_nid); > + > +#endif /* _NET_PAGE_POOL_H */ > -- > 2.33.0 >
On Tue, 2023-07-25 at 21:12 +0800, Yunsheng Lin wrote: > Split types and pure function declarations from page_pool.h > and add them in page_pool/types.h, so that C sources can > include page_pool.h and headers should generally only include > page_pool/types.h as suggested by jakub. > > Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> > Suggested-by: Jakub Kicinski <kuba@kernel.org> > CC: Alexander Lobakin <aleksander.lobakin@intel.com> > --- > V2: Move from page_pool_types.h to page_pool/types.h, fix > some typo and alphabetic sorting. > --- > MAINTAINERS | 1 + > drivers/net/ethernet/engleder/tsnep_main.c | 1 + > drivers/net/ethernet/freescale/fec_main.c | 1 + > .../marvell/octeontx2/nic/otx2_common.c | 1 + > .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 1 + > .../ethernet/mellanox/mlx5/core/en/params.c | 1 + > .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 1 + > drivers/net/wireless/mediatek/mt76/mt76.h | 1 + > include/linux/skbuff.h | 2 +- > include/net/page_pool.h | 192 +---------------- > include/net/page_pool/types.h | 193 ++++++++++++++++++ > 11 files changed, 206 insertions(+), 189 deletions(-) > create mode 100644 include/net/page_pool/types.h > > diff --git a/MAINTAINERS b/MAINTAINERS > index d0553ad37865..1dbfe7fcb10e 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -16016,6 +16016,7 @@ L: netdev@vger.kernel.org > S: Supported > F: Documentation/networking/page_pool.rst > F: include/net/page_pool.h > +F: include/net/page_pool_types.h > F: include/trace/events/page_pool.h > F: net/core/page_pool.c > The file name here doesn't match what you created below. I think you swapped a '/' for a '_'. <...> > diff --git a/include/net/page_pool.h b/include/net/page_pool.h > index f1d5cc1fa13b..dd70474c67cc 100644 > --- a/include/net/page_pool.h > +++ b/include/net/page_pool.h > @@ -29,107 +29,9 @@ > #ifndef _NET_PAGE_POOL_H > #define _NET_PAGE_POOL_H > > -#include <linux/mm.h> /* Needed by ptr_ring */ > -#include <linux/ptr_ring.h> > -#include <linux/dma-direction.h> > - > -#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA > - * map/unmap > - */ > -#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets > - * from page_pool will be > - * DMA-synced-for-device according to > - * the length provided by the device > - * driver. > - * Please note DMA-sync-for-CPU is still > - * device driver responsibility > - */ > -#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ > -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ > - PP_FLAG_DMA_SYNC_DEV |\ > - PP_FLAG_PAGE_FRAG) > - > -/* > - * Fast allocation side cache array/stack > - * > - * The cache size and refill watermark is related to the network > - * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX > - * ring is usually refilled and the max consumed elements will be 64, > - * thus a natural max size of objects needed in the cache. > - * > - * Keeping room for more objects, is due to XDP_DROP use-case. As > - * XDP_DROP allows the opportunity to recycle objects directly into > - * this array, as it shares the same softirq/NAPI protection. If > - * cache is already full (or partly full) then the XDP_DROP recycles > - * would have to take a slower code path. > - */ > -#define PP_ALLOC_CACHE_SIZE 128 > -#define PP_ALLOC_CACHE_REFILL 64 > -struct pp_alloc_cache { > - u32 count; > - struct page *cache[PP_ALLOC_CACHE_SIZE]; > -}; > - > -struct page_pool_params { > - unsigned int flags; > - unsigned int order; > - unsigned int pool_size; > - int nid; /* Numa node id to allocate from pages from */ > - struct device *dev; /* device, for DMA pre-mapping purposes */ > - struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */ > - enum dma_data_direction dma_dir; /* DMA mapping direction */ > - unsigned int max_len; /* max DMA sync memory size */ > - unsigned int offset; /* DMA addr offset */ > - void (*init_callback)(struct page *page, void *arg); > - void *init_arg; > -}; > - > -#ifdef CONFIG_PAGE_POOL_STATS > -struct page_pool_alloc_stats { > - u64 fast; /* fast path allocations */ > - u64 slow; /* slow-path order 0 allocations */ > - u64 slow_high_order; /* slow-path high order allocations */ > - u64 empty; /* failed refills due to empty ptr ring, forcing > - * slow path allocation > - */ > - u64 refill; /* allocations via successful refill */ > - u64 waive; /* failed refills due to numa zone mismatch */ > -}; > - > -struct page_pool_recycle_stats { > - u64 cached; /* recycling placed page in the cache. */ > - u64 cache_full; /* cache was full */ > - u64 ring; /* recycling placed page back into ptr ring */ > - u64 ring_full; /* page was released from page-pool because > - * PTR ring was full. > - */ > - u64 released_refcnt; /* page released because of elevated > - * refcnt > - */ > -}; > - > -/* This struct wraps the above stats structs so users of the > - * page_pool_get_stats API can pass a single argument when requesting the > - * stats for the page pool. > - */ > -struct page_pool_stats { > - struct page_pool_alloc_stats alloc_stats; > - struct page_pool_recycle_stats recycle_stats; > -}; > - > -int page_pool_ethtool_stats_get_count(void); > -u8 *page_pool_ethtool_stats_get_strings(u8 *data); > -u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); > - > -/* > - * Drivers that wish to harvest page pool stats and report them to users > - * (perhaps via ethtool, debugfs, or another mechanism) can allocate a > - * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. > - */ > -bool page_pool_get_stats(struct page_pool *pool, > - struct page_pool_stats *stats); > -#else > +#include <net/page_pool/types.h> > > +#ifndef CONFIG_PAGE_POOL_STATS > static inline int page_pool_ethtool_stats_get_count(void) > { > return 0; > @@ -144,72 +46,7 @@ static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) > { > return data; > } > - > -#endif > - > -struct page_pool { > - struct page_pool_params p; > - > - struct delayed_work release_dw; > - void (*disconnect)(void *); > - unsigned long defer_start; > - unsigned long defer_warn; > - > - u32 pages_state_hold_cnt; > - unsigned int frag_offset; > - struct page *frag_page; > - long frag_users; > - > -#ifdef CONFIG_PAGE_POOL_STATS > - /* these stats are incremented while in softirq context */ > - struct page_pool_alloc_stats alloc_stats; > -#endif > - u32 xdp_mem_id; > - > - /* > - * Data structure for allocation side > - * > - * Drivers allocation side usually already perform some kind > - * of resource protection. Piggyback on this protection, and > - * require driver to protect allocation side. > - * > - * For NIC drivers this means, allocate a page_pool per > - * RX-queue. As the RX-queue is already protected by > - * Softirq/BH scheduling and napi_schedule. NAPI schedule > - * guarantee that a single napi_struct will only be scheduled > - * on a single CPU (see napi_schedule). > - */ > - struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; > - > - /* Data structure for storing recycled pages. > - * > - * Returning/freeing pages is more complicated synchronization > - * wise, because free's can happen on remote CPUs, with no > - * association with allocation resource. > - * > - * Use ptr_ring, as it separates consumer and producer > - * effeciently, it a way that doesn't bounce cache-lines. > - * > - * TODO: Implement bulk return pages into this structure. > - */ > - struct ptr_ring ring; > - > -#ifdef CONFIG_PAGE_POOL_STATS > - /* recycle stats are per-cpu to avoid locking */ > - struct page_pool_recycle_stats __percpu *recycle_stats; > #endif > - atomic_t pages_state_release_cnt; > - > - /* A page_pool is strictly tied to a single RX-queue being > - * protected by NAPI, due to above pp_alloc_cache. This > - * refcnt serves purpose is to simplify drivers error handling. > - */ > - refcount_t user_cnt; > - > - u64 destroy_cnt; > -}; > - > -struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); > > static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) > { > @@ -218,9 +55,6 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) > return page_pool_alloc_pages(pool, gfp); > } > > -struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, > - unsigned int size, gfp_t gfp); > - > static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, > unsigned int *offset, > unsigned int size) > @@ -239,20 +73,7 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) > return pool->p.dma_dir; > } > > -bool page_pool_return_skb_page(struct page *page, bool napi_safe); > - > -struct page_pool *page_pool_create(const struct page_pool_params *params); > - > -struct xdp_mem_info; > - > -#ifdef CONFIG_PAGE_POOL > -void page_pool_unlink_napi(struct page_pool *pool); > -void page_pool_destroy(struct page_pool *pool); > -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), > - struct xdp_mem_info *mem); > -void page_pool_put_page_bulk(struct page_pool *pool, void **data, > - int count); > -#else > +#ifndef CONFIG_PAGE_POOL > static inline void page_pool_unlink_napi(struct page_pool *pool) > { > } > @@ -261,6 +82,7 @@ static inline void page_pool_destroy(struct page_pool *pool) > { > } > > +struct xdp_mem_info; > static inline void page_pool_use_xdp_mem(struct page_pool *pool, > void (*disconnect)(void *), > struct xdp_mem_info *mem) > @@ -273,10 +95,6 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, > } > #endif > > -void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, > - unsigned int dma_sync_size, > - bool allow_direct); > - > /* pp_frag_count represents the number of writers who can update the page > * either by updating skb->data or via DMA mappings for the device. > * We can't rely on the page refcnt for that as we don't know who might be > @@ -385,8 +203,6 @@ static inline bool page_pool_put(struct page_pool *pool) > return refcount_dec_and_test(&pool->user_cnt); > } > > -/* Caller must provide appropriate safe context, e.g. NAPI. */ > -void page_pool_update_nid(struct page_pool *pool, int new_nid); > static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) > { > if (unlikely(pool->p.nid != new_nid)) > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h > new file mode 100644 > index 000000000000..1d54ba0708db > --- /dev/null > +++ b/include/net/page_pool/types.h > @@ -0,0 +1,193 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef _NET_PAGE_POOL_TYPES_H > +#define _NET_PAGE_POOL_TYPES_H > + > +#include <linux/dma-direction.h> > +#include <linux/ptr_ring.h> > + > +#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA > + * map/unmap > + */ > +#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets > + * from page_pool will be > + * DMA-synced-for-device according to > + * the length provided by the device > + * driver. > + * Please note DMA-sync-for-CPU is still > + * device driver responsibility > + */ > +#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ > +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ > + PP_FLAG_DMA_SYNC_DEV |\ > + PP_FLAG_PAGE_FRAG) > + > +/* > + * Fast allocation side cache array/stack > + * > + * The cache size and refill watermark is related to the network > + * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX > + * ring is usually refilled and the max consumed elements will be 64, > + * thus a natural max size of objects needed in the cache. > + * > + * Keeping room for more objects, is due to XDP_DROP use-case. As > + * XDP_DROP allows the opportunity to recycle objects directly into > + * this array, as it shares the same softirq/NAPI protection. If > + * cache is already full (or partly full) then the XDP_DROP recycles > + * would have to take a slower code path. > + */ > +#define PP_ALLOC_CACHE_SIZE 128 > +#define PP_ALLOC_CACHE_REFILL 64 > +struct pp_alloc_cache { > + u32 count; > + struct page *cache[PP_ALLOC_CACHE_SIZE]; > +}; > + > +struct page_pool_params { > + unsigned int flags; > + unsigned int order; > + unsigned int pool_size; > + int nid; /* Numa node id to allocate from pages from */ > + struct device *dev; /* device, for DMA pre-mapping purposes */ > + struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */ > + enum dma_data_direction dma_dir; /* DMA mapping direction */ > + unsigned int max_len; /* max DMA sync memory size */ > + unsigned int offset; /* DMA addr offset */ > + void (*init_callback)(struct page *page, void *arg); > + void *init_arg; > +}; > + > +#ifdef CONFIG_PAGE_POOL_STATS > +struct page_pool_alloc_stats { > + u64 fast; /* fast path allocations */ > + u64 slow; /* slow-path order 0 allocations */ > + u64 slow_high_order; /* slow-path high order allocations */ > + u64 empty; /* failed refills due to empty ptr ring, forcing > + * slow path allocation > + */ > + u64 refill; /* allocations via successful refill */ > + u64 waive; /* failed refills due to numa zone mismatch */ > +}; > + > +struct page_pool_recycle_stats { > + u64 cached; /* recycling placed page in the cache. */ > + u64 cache_full; /* cache was full */ > + u64 ring; /* recycling placed page back into ptr ring */ > + u64 ring_full; /* page was released from page-pool because > + * PTR ring was full. > + */ > + u64 released_refcnt; /* page released because of elevated > + * refcnt > + */ > +}; > + > +/* This struct wraps the above stats structs so users of the > + * page_pool_get_stats API can pass a single argument when requesting the > + * stats for the page pool. > + */ > +struct page_pool_stats { > + struct page_pool_alloc_stats alloc_stats; > + struct page_pool_recycle_stats recycle_stats; > +}; > + > +int page_pool_ethtool_stats_get_count(void); > +u8 *page_pool_ethtool_stats_get_strings(u8 *data); > +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); > + > +/* > + * Drivers that wish to harvest page pool stats and report them to users > + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a > + * struct page_pool_stats call page_pool_get_stats to get stats for the > + * specified pool. > + */ > +bool page_pool_get_stats(struct page_pool *pool, > + struct page_pool_stats *stats); > +#endif > + > +struct page_pool { > + struct page_pool_params p; > + > + struct delayed_work release_dw; > + void (*disconnect)(void *); > + unsigned long defer_start; > + unsigned long defer_warn; > + > + u32 pages_state_hold_cnt; > + unsigned int frag_offset; > + struct page *frag_page; > + long frag_users; > + > +#ifdef CONFIG_PAGE_POOL_STATS > + /* these stats are incremented while in softirq context */ > + struct page_pool_alloc_stats alloc_stats; > +#endif > + u32 xdp_mem_id; > + > + /* > + * Data structure for allocation side > + * > + * Drivers allocation side usually already perform some kind > + * of resource protection. Piggyback on this protection, and > + * require driver to protect allocation side. > + * > + * For NIC drivers this means, allocate a page_pool per > + * RX-queue. As the RX-queue is already protected by > + * Softirq/BH scheduling and napi_schedule. NAPI schedule > + * guarantee that a single napi_struct will only be scheduled > + * on a single CPU (see napi_schedule). > + */ > + struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; > + > + /* Data structure for storing recycled pages. > + * > + * Returning/freeing pages is more complicated synchronization > + * wise, because free's can happen on remote CPUs, with no > + * association with allocation resource. > + * > + * Use ptr_ring, as it separates consumer and producer > + * efficiently, it a way that doesn't bounce cache-lines. > + * > + * TODO: Implement bulk return pages into this structure. > + */ > + struct ptr_ring ring; > + > +#ifdef CONFIG_PAGE_POOL_STATS > + /* recycle stats are per-cpu to avoid locking */ > + struct page_pool_recycle_stats __percpu *recycle_stats; > +#endif > + atomic_t pages_state_release_cnt; > + > + /* A page_pool is strictly tied to a single RX-queue being > + * protected by NAPI, due to above pp_alloc_cache. This > + * refcnt serves purpose is to simplify drivers error handling. > + */ > + refcount_t user_cnt; > + > + u64 destroy_cnt; > +}; > + > +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); > +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, > + unsigned int size, gfp_t gfp); > +bool page_pool_return_skb_page(struct page *page, bool napi_safe); > +struct page_pool *page_pool_create(const struct page_pool_params *params); > + > +#ifdef CONFIG_PAGE_POOL > +void page_pool_unlink_napi(struct page_pool *pool); > +void page_pool_destroy(struct page_pool *pool); > + > +struct xdp_mem_info; > +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), > + struct xdp_mem_info *mem); > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > + int count); > +#endif > + > +void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, > + unsigned int dma_sync_size, > + bool allow_direct); > + > +/* Caller must provide appropriate safe context, e.g. NAPI. */ > +void page_pool_update_nid(struct page_pool *pool, int new_nid); > + > +#endif /* _NET_PAGE_POOL_H */ This seems kind of overkill for what is needed. It seems like the general thought process with splitting this was so that you had just the minimum of what is needed to support skbuff.h and the functions declared there. The rest of this would then be added via the .h to the .c files that will actually be calling the functions. By that logic I think the only thing we really need is the function declaration for page_pool_return_skb_page moved into skbuff.h. We could then just remove page_pool.h from skbuff.h couldn't we? Another thing we could consider doing is looking at splitting things up so that we had a include file in net/core/page_pool.h to handle some of the cases where we are just linking the page_pool bits to other core file bits such as xdp.c and skbuff.c.
On Tue, 25 Jul 2023 17:42:28 +0300 Ilias Apalodimas wrote: > Apologies for the very late replies, I was on long vacation with limited > internet access. > Yunsheng, since there's been a few mails and I lost track, this is instead of > [0] right? If so, I prefer this approach. It looks ok on a first quick pass, > I'll have a closer look later. > > [0] https://lore.kernel.org/netdev/20230714170853.866018-2-aleksander.lobakin@intel.com/ I prefer the more systematic approach of creating a separate types.h file, so I don't have to keep chasing people or cleaning up the include hell myself. I think it should be adopted more widely going forward, it's not just about the page pool.
Hi Jakub, On Wed, 26 Jul 2023 at 00:12, Jakub Kicinski <kuba@kernel.org> wrote: > > On Tue, 25 Jul 2023 17:42:28 +0300 Ilias Apalodimas wrote: > > Apologies for the very late replies, I was on long vacation with limited > > internet access. > > Yunsheng, since there's been a few mails and I lost track, this is instead of > > [0] right? If so, I prefer this approach. It looks ok on a first quick pass, > > I'll have a closer look later. > > > > [0] https://lore.kernel.org/netdev/20230714170853.866018-2-aleksander.lobakin@intel.com/ > > > I prefer the more systematic approach of creating a separate types.h > file, so I don't have to keep chasing people or cleaning up the include > hell myself. I think it should be adopted more widely going forward, > it's not just about the page pool. Right, my bad. We share the same opinion. What I meant by "this" is the split proposed by Yunsheng, but reading my email again that wasn't very clear ... Cheers /Ilias
From: Alexander H Duyck <alexander.duyck@gmail.com> Date: Tue, 25 Jul 2023 08:47:46 -0700 > On Tue, 2023-07-25 at 21:12 +0800, Yunsheng Lin wrote: >> Split types and pure function declarations from page_pool.h >> and add them in page_pool/types.h, so that C sources can >> include page_pool.h and headers should generally only include >> page_pool/types.h as suggested by jakub. >> >> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> >> Suggested-by: Jakub Kicinski <kuba@kernel.org> >> CC: Alexander Lobakin <aleksander.lobakin@intel.com> [...] >> +/* Caller must provide appropriate safe context, e.g. NAPI. */ >> +void page_pool_update_nid(struct page_pool *pool, int new_nid); >> + >> +#endif /* _NET_PAGE_POOL_H */ > > > This seems kind of overkill for what is needed. It seems like the > general thought process with splitting this was so that you had just > the minimum of what is needed to support skbuff.h and the functions > declared there. The rest of this would then be added via the .h to the > .c files that will actually be calling the functions. > > By that logic I think the only thing we really need is the function > declaration for page_pool_return_skb_page moved into skbuff.h. We could > then just remove page_pool.h from skbuff.h couldn't we? This patch is not to drop page_pool.h include from skbuff.h. This is more future-proof (since I'm dropping this include anyway in my series) to have includes organized and prevent cases like that one with skbuff.h from happening. And to save some CPU cycles on preprocessing if that makes sense. > > Another thing we could consider doing is looking at splitting things up > so that we had a include file in net/core/page_pool.h to handle some of > the cases where we are just linking the page_pool bits to other core > file bits such as xdp.c and skbuff.c. Thanks, Olek
From: Jakub Kicinski <kuba@kernel.org> Date: Tue, 25 Jul 2023 14:12:23 -0700 > On Tue, 25 Jul 2023 17:42:28 +0300 Ilias Apalodimas wrote: >> Apologies for the very late replies, I was on long vacation with limited >> internet access. >> Yunsheng, since there's been a few mails and I lost track, this is instead of >> [0] right? If so, I prefer this approach. It looks ok on a first quick pass, >> I'll have a closer look later. >> >> [0] https://lore.kernel.org/netdev/20230714170853.866018-2-aleksander.lobakin@intel.com/ > > > I prefer the more systematic approach of creating a separate types.h > file, so I don't have to keep chasing people or cleaning up the include > hell myself. I think it should be adopted more widely going forward, > it's not just about the page pool. I have this patch reworked to introduce include/net/page_pool/{types,helpers}.h in my tree, maybe someone could take a quick look[0] and say if this works while I'm preparing the next version for sending? Not the most MLish way, I know :s [0] https://github.com/alobakin/linux/commit/19741ee072c32eb1d30033cd4fcb236d1c00bfbf Thanks, Olek
On 2023/7/26 18:43, Alexander Lobakin wrote: > From: Alexander H Duyck <alexander.duyck@gmail.com> > Date: Tue, 25 Jul 2023 08:47:46 -0700 > >> On Tue, 2023-07-25 at 21:12 +0800, Yunsheng Lin wrote: >>> Split types and pure function declarations from page_pool.h >>> and add them in page_pool/types.h, so that C sources can >>> include page_pool.h and headers should generally only include >>> page_pool/types.h as suggested by jakub. >>> >>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> >>> Suggested-by: Jakub Kicinski <kuba@kernel.org> >>> CC: Alexander Lobakin <aleksander.lobakin@intel.com> > > [...] > >>> +/* Caller must provide appropriate safe context, e.g. NAPI. */ >>> +void page_pool_update_nid(struct page_pool *pool, int new_nid); >>> + >>> +#endif /* _NET_PAGE_POOL_H */ >> >> >> This seems kind of overkill for what is needed. It seems like the >> general thought process with splitting this was so that you had just >> the minimum of what is needed to support skbuff.h and the functions >> declared there. The rest of this would then be added via the .h to the >> .c files that will actually be calling the functions. >> >> By that logic I think the only thing we really need is the function >> declaration for page_pool_return_skb_page moved into skbuff.h. We could >> then just remove page_pool.h from skbuff.h couldn't we? > > This patch is not to drop page_pool.h include from skbuff.h. > This is more future-proof (since I'm dropping this include anyway in my > series) to have includes organized and prevent cases like that one with > skbuff.h from happening. And to save some CPU cycles on preprocessing if > that makes sense. The suggestion is from below: https://lore.kernel.org/all/20230710113841.482cbeac@kernel.org/ > >> >> Another thing we could consider doing is looking at splitting things up >> so that we had a include file in net/core/page_pool.h to handle some of >> the cases where we are just linking the page_pool bits to other core >> file bits such as xdp.c and skbuff.c. I suppose the above suggestion is about splitting or naming by the user as the discussed in the below thread? https://lore.kernel.org/all/20230721182942.0ca57663@kernel.org/ > > Thanks, > Olek > . >
On Wed, Jul 26, 2023 at 4:23 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > On 2023/7/26 18:43, Alexander Lobakin wrote: > > From: Alexander H Duyck <alexander.duyck@gmail.com> > > Date: Tue, 25 Jul 2023 08:47:46 -0700 > > > >> On Tue, 2023-07-25 at 21:12 +0800, Yunsheng Lin wrote: > >>> Split types and pure function declarations from page_pool.h > >>> and add them in page_pool/types.h, so that C sources can > >>> include page_pool.h and headers should generally only include > >>> page_pool/types.h as suggested by jakub. > >>> > >>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> > >>> Suggested-by: Jakub Kicinski <kuba@kernel.org> > >>> CC: Alexander Lobakin <aleksander.lobakin@intel.com> > > > > [...] > > > >>> +/* Caller must provide appropriate safe context, e.g. NAPI. */ > >>> +void page_pool_update_nid(struct page_pool *pool, int new_nid); > >>> + > >>> +#endif /* _NET_PAGE_POOL_H */ > >> > >> > >> This seems kind of overkill for what is needed. It seems like the > >> general thought process with splitting this was so that you had just > >> the minimum of what is needed to support skbuff.h and the functions > >> declared there. The rest of this would then be added via the .h to the > >> .c files that will actually be calling the functions. > >> > >> By that logic I think the only thing we really need is the function > >> declaration for page_pool_return_skb_page moved into skbuff.h. We could > >> then just remove page_pool.h from skbuff.h couldn't we? > > > > This patch is not to drop page_pool.h include from skbuff.h. > > This is more future-proof (since I'm dropping this include anyway in my > > series) to have includes organized and prevent cases like that one with > > skbuff.h from happening. And to save some CPU cycles on preprocessing if > > that makes sense. > > The suggestion is from below: > > https://lore.kernel.org/all/20230710113841.482cbeac@kernel.org/ I get that. But it seemed like your types.h is full of inline functions. That is what I was responding to. I would leave the inline functions in page_pool.h unless there is some significant need for them. > > > >> > >> Another thing we could consider doing is looking at splitting things up > >> so that we had a include file in net/core/page_pool.h to handle some of > >> the cases where we are just linking the page_pool bits to other core > >> file bits such as xdp.c and skbuff.c. > > I suppose the above suggestion is about splitting or naming by > the user as the discussed in the below thread? > https://lore.kernel.org/all/20230721182942.0ca57663@kernel.org/ Actually my suggestion is more about defining boundaries for what is meant to be used by drivers and what isn't. The stuff you could keep in net/core/page_pool.h would only be usable by the files in net/core/ whereas the stuff you are keeping in the include/net/ folder is usable by drivers. It is meant to prevent things like what you were complaining about with the Mellanox drivers making use of interfaces you didn't intend them to use. So for example you could pull out functions like page_pool_return_skb_page, page_pool_use_xdp_mem, page_pool_update_nid, and the like and look at relocating them into the net/core/ folder and thereby prevent abuse of those functions by drivers.
On Wed, Jul 26, 2023 at 8:30 AM Alexander Duyck <alexander.duyck@gmail.com> wrote: > > On Wed, Jul 26, 2023 at 4:23 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > > > On 2023/7/26 18:43, Alexander Lobakin wrote: > > > From: Alexander H Duyck <alexander.duyck@gmail.com> > > > Date: Tue, 25 Jul 2023 08:47:46 -0700 > > > > > >> On Tue, 2023-07-25 at 21:12 +0800, Yunsheng Lin wrote: > > >>> Split types and pure function declarations from page_pool.h > > >>> and add them in page_pool/types.h, so that C sources can > > >>> include page_pool.h and headers should generally only include > > >>> page_pool/types.h as suggested by jakub. > > >>> > > >>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> > > >>> Suggested-by: Jakub Kicinski <kuba@kernel.org> > > >>> CC: Alexander Lobakin <aleksander.lobakin@intel.com> > > > > > > [...] > > > > > >>> +/* Caller must provide appropriate safe context, e.g. NAPI. */ > > >>> +void page_pool_update_nid(struct page_pool *pool, int new_nid); > > >>> + > > >>> +#endif /* _NET_PAGE_POOL_H */ > > >> > > >> > > >> This seems kind of overkill for what is needed. It seems like the > > >> general thought process with splitting this was so that you had just > > >> the minimum of what is needed to support skbuff.h and the functions > > >> declared there. The rest of this would then be added via the .h to the > > >> .c files that will actually be calling the functions. > > >> > > >> By that logic I think the only thing we really need is the function > > >> declaration for page_pool_return_skb_page moved into skbuff.h. We could > > >> then just remove page_pool.h from skbuff.h couldn't we? > > > > > > This patch is not to drop page_pool.h include from skbuff.h. > > > This is more future-proof (since I'm dropping this include anyway in my > > > series) to have includes organized and prevent cases like that one with > > > skbuff.h from happening. And to save some CPU cycles on preprocessing if > > > that makes sense. > > > > The suggestion is from below: > > > > https://lore.kernel.org/all/20230710113841.482cbeac@kernel.org/ > > I get that. But it seemed like your types.h is full of inline > functions. That is what I was responding to. I would leave the inline > functions in page_pool.h unless there is some significant need for > them. > > > > > > >> > > >> Another thing we could consider doing is looking at splitting things up > > >> so that we had a include file in net/core/page_pool.h to handle some of > > >> the cases where we are just linking the page_pool bits to other core > > >> file bits such as xdp.c and skbuff.c. > > > > I suppose the above suggestion is about splitting or naming by > > the user as the discussed in the below thread? > > https://lore.kernel.org/all/20230721182942.0ca57663@kernel.org/ > > Actually my suggestion is more about defining boundaries for what is > meant to be used by drivers and what isn't. The stuff you could keep > in net/core/page_pool.h would only be usable by the files in net/core/ > whereas the stuff you are keeping in the include/net/ folder is usable > by drivers. It is meant to prevent things like what you were > complaining about with the Mellanox drivers making use of interfaces > you didn't intend them to use. > > So for example you could pull out functions like > page_pool_return_skb_page, page_pool_use_xdp_mem, > page_pool_update_nid, and the like and look at relocating them into > the net/core/ folder and thereby prevent abuse of those functions by > drivers. Okay, maybe not page_pool_update_nid. It looks like that is already in use in the form of page_pool_nid_changed by drivers..
From: Alexander Duyck <alexander.duyck@gmail.com> Date: Wed, 26 Jul 2023 08:30:17 -0700 > On Wed, Jul 26, 2023 at 4:23 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: >> >> On 2023/7/26 18:43, Alexander Lobakin wrote: >>> From: Alexander H Duyck <alexander.duyck@gmail.com> >>> Date: Tue, 25 Jul 2023 08:47:46 -0700 >>> >>>> On Tue, 2023-07-25 at 21:12 +0800, Yunsheng Lin wrote: >>>>> Split types and pure function declarations from page_pool.h >>>>> and add them in page_pool/types.h, so that C sources can >>>>> include page_pool.h and headers should generally only include >>>>> page_pool/types.h as suggested by jakub. >>>>> >>>>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> >>>>> Suggested-by: Jakub Kicinski <kuba@kernel.org> >>>>> CC: Alexander Lobakin <aleksander.lobakin@intel.com> >>> >>> [...] >>> >>>>> +/* Caller must provide appropriate safe context, e.g. NAPI. */ >>>>> +void page_pool_update_nid(struct page_pool *pool, int new_nid); >>>>> + >>>>> +#endif /* _NET_PAGE_POOL_H */ >>>> >>>> >>>> This seems kind of overkill for what is needed. It seems like the >>>> general thought process with splitting this was so that you had just >>>> the minimum of what is needed to support skbuff.h and the functions >>>> declared there. The rest of this would then be added via the .h to the >>>> .c files that will actually be calling the functions. >>>> >>>> By that logic I think the only thing we really need is the function >>>> declaration for page_pool_return_skb_page moved into skbuff.h. We could >>>> then just remove page_pool.h from skbuff.h couldn't we? >>> >>> This patch is not to drop page_pool.h include from skbuff.h. >>> This is more future-proof (since I'm dropping this include anyway in my >>> series) to have includes organized and prevent cases like that one with >>> skbuff.h from happening. And to save some CPU cycles on preprocessing if >>> that makes sense. >> >> The suggestion is from below: >> >> https://lore.kernel.org/all/20230710113841.482cbeac@kernel.org/ > > I get that. But it seemed like your types.h is full of inline > functions. That is what I was responding to. I would leave the inline Ah, okay. So this was reply to my proposal, not Yunsheng's. I missed that ._. > functions in page_pool.h unless there is some significant need for > them. Only in order to not have the same functions defined in either types.h or helpers.h depending on the kernel configuration -- this is counter-intuitive and doesn't allow to use types.h when we don't need driver-facing API. Those inlines don't require any includes and 99% of them are empty (with 1% returning true/false depending on the kernel conf). What's the point of breaking declaration consistency if we don't win anything? > >>> >>>> >>>> Another thing we could consider doing is looking at splitting things up >>>> so that we had a include file in net/core/page_pool.h to handle some of >>>> the cases where we are just linking the page_pool bits to other core >>>> file bits such as xdp.c and skbuff.c. >> >> I suppose the above suggestion is about splitting or naming by >> the user as the discussed in the below thread? >> https://lore.kernel.org/all/20230721182942.0ca57663@kernel.org/ > > Actually my suggestion is more about defining boundaries for what is > meant to be used by drivers and what isn't. The stuff you could keep helpers.h is to be used by drivers, types.h by kernel core. Mostly :D > in net/core/page_pool.h would only be usable by the files in net/core/ > whereas the stuff you are keeping in the include/net/ folder is usable > by drivers. It is meant to prevent things like what you were > complaining about with the Mellanox drivers making use of interfaces You mean abusing DMA allocation function by manual rewriting of device's NUMA node? If you want to avoid that, you need to make struct device private, I don't think it's a good idea. Otherwise, there will always be a room for exploiting the internals up to some point. > you didn't intend them to use. > > So for example you could pull out functions like > page_pool_return_skb_page, page_pool_use_xdp_mem, > page_pool_update_nid, and the like and look at relocating them into update_nid() is used by drivers. > the net/core/ folder and thereby prevent abuse of those functions by > drivers. I don't think there are that many internal functions, so that it would be worth separate header. Thanks, Olek
On Wed, 26 Jul 2023 12:48:05 +0200 Alexander Lobakin wrote: > > I prefer the more systematic approach of creating a separate types.h > > file, so I don't have to keep chasing people or cleaning up the include > > hell myself. I think it should be adopted more widely going forward, > > it's not just about the page pool. > > I have this patch reworked to introduce > include/net/page_pool/{types,helpers}.h in my tree, maybe someone could > take a quick look[0] and say if this works while I'm preparing the next > version for sending? Not the most MLish way, I know :s > > [0] > https://github.com/alobakin/linux/commit/19741ee072c32eb1d30033cd4fcb236d1c00bfbf LGTM!
On Wed, 26 Jul 2023 08:39:43 -0700 Alexander Duyck wrote: > > > I suppose the above suggestion is about splitting or naming by > > > the user as the discussed in the below thread? > > > https://lore.kernel.org/all/20230721182942.0ca57663@kernel.org/ > > > > Actually my suggestion is more about defining boundaries for what is > > meant to be used by drivers and what isn't. The stuff you could keep > > in net/core/page_pool.h would only be usable by the files in net/core/ > > whereas the stuff you are keeping in the include/net/ folder is usable > > by drivers. It is meant to prevent things like what you were > > complaining about with the Mellanox drivers making use of interfaces > > you didn't intend them to use. FWIW moving stuff which is only supposed to be used by core (xdp, skb, etc.) to net/core/page_pool.h is a good idea, too. Seems a bit independent from splitting the main header, tho. > > So for example you could pull out functions like > > page_pool_return_skb_page, page_pool_use_xdp_mem, > > page_pool_update_nid, and the like and look at relocating them into > > the net/core/ folder and thereby prevent abuse of those functions by > > drivers. > > Okay, maybe not page_pool_update_nid. It looks like that is already in > use in the form of page_pool_nid_changed by drivers..
On Wed, Jul 26, 2023 at 8:50 AM Jakub Kicinski <kuba@kernel.org> wrote: > > On Wed, 26 Jul 2023 08:39:43 -0700 Alexander Duyck wrote: > > > > I suppose the above suggestion is about splitting or naming by > > > > the user as the discussed in the below thread? > > > > https://lore.kernel.org/all/20230721182942.0ca57663@kernel.org/ > > > > > > Actually my suggestion is more about defining boundaries for what is > > > meant to be used by drivers and what isn't. The stuff you could keep > > > in net/core/page_pool.h would only be usable by the files in net/core/ > > > whereas the stuff you are keeping in the include/net/ folder is usable > > > by drivers. It is meant to prevent things like what you were > > > complaining about with the Mellanox drivers making use of interfaces > > > you didn't intend them to use. > > FWIW moving stuff which is only supposed to be used by core (xdp, skb, > etc.) to net/core/page_pool.h is a good idea, too. > Seems a bit independent from splitting the main header, tho. It seems a bit independent, but I was reacting only because I feel like this ijust adding to the technical debt on this. Basically before we can really just go ahead and split it the header file itself should probably be cleaned up a bit. The reason why it occurred to me is that I noticed things like page_pool_use_xdp_mem and the forward declaration for xdp_mem_info was being picked up and moved into the types.h file in the move. The whole block was in a #if/#else statement w/ definitions for the PAGE_POOL and non-PAGE_POOL cases. We also have functions that don't really need to be included such as page_pool_unlink_napi which is exported but not used outside of page_pool.c from what I can tell.
On 2023/7/26 23:47, Jakub Kicinski wrote: > On Wed, 26 Jul 2023 12:48:05 +0200 Alexander Lobakin wrote: >>> I prefer the more systematic approach of creating a separate types.h >>> file, so I don't have to keep chasing people or cleaning up the include >>> hell myself. I think it should be adopted more widely going forward, >>> it's not just about the page pool. >> >> I have this patch reworked to introduce >> include/net/page_pool/{types,helpers}.h in my tree, maybe someone could >> take a quick look[0] and say if this works while I'm preparing the next >> version for sending? Not the most MLish way, I know :s >> >> [0] >> https://github.com/alobakin/linux/commit/19741ee072c32eb1d30033cd4fcb236d1c00bfbf > > LGTM! Hi, Alexander It seems you have taken it and adjust it accordingly, do you mind sending the next version along with your patchset, so that there is less patch conflict for both of us:) > > . >
From: Yunsheng Lin <linyunsheng@huawei.com> Date: Thu, 27 Jul 2023 19:47:23 +0800 > On 2023/7/26 23:47, Jakub Kicinski wrote: >> On Wed, 26 Jul 2023 12:48:05 +0200 Alexander Lobakin wrote: >>>> I prefer the more systematic approach of creating a separate types.h >>>> file, so I don't have to keep chasing people or cleaning up the include >>>> hell myself. I think it should be adopted more widely going forward, >>>> it's not just about the page pool. >>> >>> I have this patch reworked to introduce >>> include/net/page_pool/{types,helpers}.h in my tree, maybe someone could >>> take a quick look[0] and say if this works while I'm preparing the next >>> version for sending? Not the most MLish way, I know :s >>> >>> [0] >>> https://github.com/alobakin/linux/commit/19741ee072c32eb1d30033cd4fcb236d1c00bfbf >> >> LGTM! > > Hi, Alexander > It seems you have taken it and adjust it accordingly, do you mind sending > the next version along with your patchset, so that there is less patch > conflict for both of us:) Sure, I'm planning to do that, just had some work burden. I'm sending the next rev today if everything goes fine. > >> >> . >> Thanks, Olek
diff --git a/MAINTAINERS b/MAINTAINERS index d0553ad37865..1dbfe7fcb10e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16016,6 +16016,7 @@ L: netdev@vger.kernel.org S: Supported F: Documentation/networking/page_pool.rst F: include/net/page_pool.h +F: include/net/page_pool_types.h F: include/trace/events/page_pool.h F: net/core/page_pool.c diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 079f9f6ae21a..934b890ba2ab 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -28,6 +28,7 @@ #include <linux/iopoll.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> +#include <net/page_pool.h> #include <net/xdp_sock_drv.h> #define TSNEP_RX_OFFSET (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 03ac7690b5c4..68b79bda632a 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -38,6 +38,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <net/ip.h> +#include <net/page_pool.h> #include <net/selftests.h> #include <net/tso.h> #include <linux/tcp.h> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 8cdd92dd9762..d4f1baf0f987 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -7,6 +7,7 @@ #include <linux/interrupt.h> #include <linux/pci.h> +#include <net/page_pool.h> #include <net/tso.h> #include <linux/bitfield.h> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 9551b422622a..8807e40b1174 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -16,6 +16,7 @@ #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/bitfield.h> +#include <net/page_pool.h> #include "otx2_reg.h" #include "otx2_common.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 5ce28ff7685f..0f152f14165b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -6,6 +6,7 @@ #include "en/port.h" #include "en_accel/en_accel.h" #include "en_accel/ipsec.h" +#include <net/page_pool.h> #include <net/xdp_sock_drv.h> static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 40589cebb773..16038c23b7d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -35,6 +35,7 @@ #include "en/xdp.h" #include "en/params.h" #include <linux/bitfield.h> +#include <net/page_pool.h> int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 6b07b8fafec2..95c16f11d156 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -15,6 +15,7 @@ #include <linux/average.h> #include <linux/soc/mediatek/mtk_wed.h> #include <net/mac80211.h> +#include <net/page_pool.h> #include "util.h" #include "testmode.h" diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index faaba050f843..864c51c95ac4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,7 +32,7 @@ #include <linux/if_packet.h> #include <linux/llist.h> #include <net/flow.h> -#include <net/page_pool.h> +#include <net/page_pool/types.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_common.h> #endif diff --git a/include/net/page_pool.h b/include/net/page_pool.h index f1d5cc1fa13b..dd70474c67cc 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -29,107 +29,9 @@ #ifndef _NET_PAGE_POOL_H #define _NET_PAGE_POOL_H -#include <linux/mm.h> /* Needed by ptr_ring */ -#include <linux/ptr_ring.h> -#include <linux/dma-direction.h> - -#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA - * map/unmap - */ -#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets - * from page_pool will be - * DMA-synced-for-device according to - * the length provided by the device - * driver. - * Please note DMA-sync-for-CPU is still - * device driver responsibility - */ -#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ - PP_FLAG_DMA_SYNC_DEV |\ - PP_FLAG_PAGE_FRAG) - -/* - * Fast allocation side cache array/stack - * - * The cache size and refill watermark is related to the network - * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX - * ring is usually refilled and the max consumed elements will be 64, - * thus a natural max size of objects needed in the cache. - * - * Keeping room for more objects, is due to XDP_DROP use-case. As - * XDP_DROP allows the opportunity to recycle objects directly into - * this array, as it shares the same softirq/NAPI protection. If - * cache is already full (or partly full) then the XDP_DROP recycles - * would have to take a slower code path. - */ -#define PP_ALLOC_CACHE_SIZE 128 -#define PP_ALLOC_CACHE_REFILL 64 -struct pp_alloc_cache { - u32 count; - struct page *cache[PP_ALLOC_CACHE_SIZE]; -}; - -struct page_pool_params { - unsigned int flags; - unsigned int order; - unsigned int pool_size; - int nid; /* Numa node id to allocate from pages from */ - struct device *dev; /* device, for DMA pre-mapping purposes */ - struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */ - enum dma_data_direction dma_dir; /* DMA mapping direction */ - unsigned int max_len; /* max DMA sync memory size */ - unsigned int offset; /* DMA addr offset */ - void (*init_callback)(struct page *page, void *arg); - void *init_arg; -}; - -#ifdef CONFIG_PAGE_POOL_STATS -struct page_pool_alloc_stats { - u64 fast; /* fast path allocations */ - u64 slow; /* slow-path order 0 allocations */ - u64 slow_high_order; /* slow-path high order allocations */ - u64 empty; /* failed refills due to empty ptr ring, forcing - * slow path allocation - */ - u64 refill; /* allocations via successful refill */ - u64 waive; /* failed refills due to numa zone mismatch */ -}; - -struct page_pool_recycle_stats { - u64 cached; /* recycling placed page in the cache. */ - u64 cache_full; /* cache was full */ - u64 ring; /* recycling placed page back into ptr ring */ - u64 ring_full; /* page was released from page-pool because - * PTR ring was full. - */ - u64 released_refcnt; /* page released because of elevated - * refcnt - */ -}; - -/* This struct wraps the above stats structs so users of the - * page_pool_get_stats API can pass a single argument when requesting the - * stats for the page pool. - */ -struct page_pool_stats { - struct page_pool_alloc_stats alloc_stats; - struct page_pool_recycle_stats recycle_stats; -}; - -int page_pool_ethtool_stats_get_count(void); -u8 *page_pool_ethtool_stats_get_strings(u8 *data); -u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); - -/* - * Drivers that wish to harvest page pool stats and report them to users - * (perhaps via ethtool, debugfs, or another mechanism) can allocate a - * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. - */ -bool page_pool_get_stats(struct page_pool *pool, - struct page_pool_stats *stats); -#else +#include <net/page_pool/types.h> +#ifndef CONFIG_PAGE_POOL_STATS static inline int page_pool_ethtool_stats_get_count(void) { return 0; @@ -144,72 +46,7 @@ static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) { return data; } - -#endif - -struct page_pool { - struct page_pool_params p; - - struct delayed_work release_dw; - void (*disconnect)(void *); - unsigned long defer_start; - unsigned long defer_warn; - - u32 pages_state_hold_cnt; - unsigned int frag_offset; - struct page *frag_page; - long frag_users; - -#ifdef CONFIG_PAGE_POOL_STATS - /* these stats are incremented while in softirq context */ - struct page_pool_alloc_stats alloc_stats; -#endif - u32 xdp_mem_id; - - /* - * Data structure for allocation side - * - * Drivers allocation side usually already perform some kind - * of resource protection. Piggyback on this protection, and - * require driver to protect allocation side. - * - * For NIC drivers this means, allocate a page_pool per - * RX-queue. As the RX-queue is already protected by - * Softirq/BH scheduling and napi_schedule. NAPI schedule - * guarantee that a single napi_struct will only be scheduled - * on a single CPU (see napi_schedule). - */ - struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; - - /* Data structure for storing recycled pages. - * - * Returning/freeing pages is more complicated synchronization - * wise, because free's can happen on remote CPUs, with no - * association with allocation resource. - * - * Use ptr_ring, as it separates consumer and producer - * effeciently, it a way that doesn't bounce cache-lines. - * - * TODO: Implement bulk return pages into this structure. - */ - struct ptr_ring ring; - -#ifdef CONFIG_PAGE_POOL_STATS - /* recycle stats are per-cpu to avoid locking */ - struct page_pool_recycle_stats __percpu *recycle_stats; #endif - atomic_t pages_state_release_cnt; - - /* A page_pool is strictly tied to a single RX-queue being - * protected by NAPI, due to above pp_alloc_cache. This - * refcnt serves purpose is to simplify drivers error handling. - */ - refcount_t user_cnt; - - u64 destroy_cnt; -}; - -struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) { @@ -218,9 +55,6 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } -struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, - unsigned int size, gfp_t gfp); - static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size) @@ -239,20 +73,7 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) return pool->p.dma_dir; } -bool page_pool_return_skb_page(struct page *page, bool napi_safe); - -struct page_pool *page_pool_create(const struct page_pool_params *params); - -struct xdp_mem_info; - -#ifdef CONFIG_PAGE_POOL -void page_pool_unlink_napi(struct page_pool *pool); -void page_pool_destroy(struct page_pool *pool); -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), - struct xdp_mem_info *mem); -void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count); -#else +#ifndef CONFIG_PAGE_POOL static inline void page_pool_unlink_napi(struct page_pool *pool) { } @@ -261,6 +82,7 @@ static inline void page_pool_destroy(struct page_pool *pool) { } +struct xdp_mem_info; static inline void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), struct xdp_mem_info *mem) @@ -273,10 +95,6 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif -void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, - bool allow_direct); - /* pp_frag_count represents the number of writers who can update the page * either by updating skb->data or via DMA mappings for the device. * We can't rely on the page refcnt for that as we don't know who might be @@ -385,8 +203,6 @@ static inline bool page_pool_put(struct page_pool *pool) return refcount_dec_and_test(&pool->user_cnt); } -/* Caller must provide appropriate safe context, e.g. NAPI. */ -void page_pool_update_nid(struct page_pool *pool, int new_nid); static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) { if (unlikely(pool->p.nid != new_nid)) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h new file mode 100644 index 000000000000..1d54ba0708db --- /dev/null +++ b/include/net/page_pool/types.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NET_PAGE_POOL_TYPES_H +#define _NET_PAGE_POOL_TYPES_H + +#include <linux/dma-direction.h> +#include <linux/ptr_ring.h> + +#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA + * map/unmap + */ +#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets + * from page_pool will be + * DMA-synced-for-device according to + * the length provided by the device + * driver. + * Please note DMA-sync-for-CPU is still + * device driver responsibility + */ +#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ + PP_FLAG_DMA_SYNC_DEV |\ + PP_FLAG_PAGE_FRAG) + +/* + * Fast allocation side cache array/stack + * + * The cache size and refill watermark is related to the network + * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX + * ring is usually refilled and the max consumed elements will be 64, + * thus a natural max size of objects needed in the cache. + * + * Keeping room for more objects, is due to XDP_DROP use-case. As + * XDP_DROP allows the opportunity to recycle objects directly into + * this array, as it shares the same softirq/NAPI protection. If + * cache is already full (or partly full) then the XDP_DROP recycles + * would have to take a slower code path. + */ +#define PP_ALLOC_CACHE_SIZE 128 +#define PP_ALLOC_CACHE_REFILL 64 +struct pp_alloc_cache { + u32 count; + struct page *cache[PP_ALLOC_CACHE_SIZE]; +}; + +struct page_pool_params { + unsigned int flags; + unsigned int order; + unsigned int pool_size; + int nid; /* Numa node id to allocate from pages from */ + struct device *dev; /* device, for DMA pre-mapping purposes */ + struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */ + enum dma_data_direction dma_dir; /* DMA mapping direction */ + unsigned int max_len; /* max DMA sync memory size */ + unsigned int offset; /* DMA addr offset */ + void (*init_callback)(struct page *page, void *arg); + void *init_arg; +}; + +#ifdef CONFIG_PAGE_POOL_STATS +struct page_pool_alloc_stats { + u64 fast; /* fast path allocations */ + u64 slow; /* slow-path order 0 allocations */ + u64 slow_high_order; /* slow-path high order allocations */ + u64 empty; /* failed refills due to empty ptr ring, forcing + * slow path allocation + */ + u64 refill; /* allocations via successful refill */ + u64 waive; /* failed refills due to numa zone mismatch */ +}; + +struct page_pool_recycle_stats { + u64 cached; /* recycling placed page in the cache. */ + u64 cache_full; /* cache was full */ + u64 ring; /* recycling placed page back into ptr ring */ + u64 ring_full; /* page was released from page-pool because + * PTR ring was full. + */ + u64 released_refcnt; /* page released because of elevated + * refcnt + */ +}; + +/* This struct wraps the above stats structs so users of the + * page_pool_get_stats API can pass a single argument when requesting the + * stats for the page pool. + */ +struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; +}; + +int page_pool_ethtool_stats_get_count(void); +u8 *page_pool_ethtool_stats_get_strings(u8 *data); +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); + +/* + * Drivers that wish to harvest page pool stats and report them to users + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a + * struct page_pool_stats call page_pool_get_stats to get stats for the + * specified pool. + */ +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats); +#endif + +struct page_pool { + struct page_pool_params p; + + struct delayed_work release_dw; + void (*disconnect)(void *); + unsigned long defer_start; + unsigned long defer_warn; + + u32 pages_state_hold_cnt; + unsigned int frag_offset; + struct page *frag_page; + long frag_users; + +#ifdef CONFIG_PAGE_POOL_STATS + /* these stats are incremented while in softirq context */ + struct page_pool_alloc_stats alloc_stats; +#endif + u32 xdp_mem_id; + + /* + * Data structure for allocation side + * + * Drivers allocation side usually already perform some kind + * of resource protection. Piggyback on this protection, and + * require driver to protect allocation side. + * + * For NIC drivers this means, allocate a page_pool per + * RX-queue. As the RX-queue is already protected by + * Softirq/BH scheduling and napi_schedule. NAPI schedule + * guarantee that a single napi_struct will only be scheduled + * on a single CPU (see napi_schedule). + */ + struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; + + /* Data structure for storing recycled pages. + * + * Returning/freeing pages is more complicated synchronization + * wise, because free's can happen on remote CPUs, with no + * association with allocation resource. + * + * Use ptr_ring, as it separates consumer and producer + * efficiently, it a way that doesn't bounce cache-lines. + * + * TODO: Implement bulk return pages into this structure. + */ + struct ptr_ring ring; + +#ifdef CONFIG_PAGE_POOL_STATS + /* recycle stats are per-cpu to avoid locking */ + struct page_pool_recycle_stats __percpu *recycle_stats; +#endif + atomic_t pages_state_release_cnt; + + /* A page_pool is strictly tied to a single RX-queue being + * protected by NAPI, due to above pp_alloc_cache. This + * refcnt serves purpose is to simplify drivers error handling. + */ + refcount_t user_cnt; + + u64 destroy_cnt; +}; + +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, + unsigned int size, gfp_t gfp); +bool page_pool_return_skb_page(struct page *page, bool napi_safe); +struct page_pool *page_pool_create(const struct page_pool_params *params); + +#ifdef CONFIG_PAGE_POOL +void page_pool_unlink_napi(struct page_pool *pool); +void page_pool_destroy(struct page_pool *pool); + +struct xdp_mem_info; +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), + struct xdp_mem_info *mem); +void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count); +#endif + +void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, + bool allow_direct); + +/* Caller must provide appropriate safe context, e.g. NAPI. */ +void page_pool_update_nid(struct page_pool *pool, int new_nid); + +#endif /* _NET_PAGE_POOL_H */
Split types and pure function declarations from page_pool.h and add them in page_pool/types.h, so that C sources can include page_pool.h and headers should generally only include page_pool/types.h as suggested by jakub. Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> Suggested-by: Jakub Kicinski <kuba@kernel.org> CC: Alexander Lobakin <aleksander.lobakin@intel.com> --- V2: Move from page_pool_types.h to page_pool/types.h, fix some typo and alphabetic sorting. --- MAINTAINERS | 1 + drivers/net/ethernet/engleder/tsnep_main.c | 1 + drivers/net/ethernet/freescale/fec_main.c | 1 + .../marvell/octeontx2/nic/otx2_common.c | 1 + .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 1 + .../ethernet/mellanox/mlx5/core/en/params.c | 1 + .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 1 + drivers/net/wireless/mediatek/mt76/mt76.h | 1 + include/linux/skbuff.h | 2 +- include/net/page_pool.h | 192 +---------------- include/net/page_pool/types.h | 193 ++++++++++++++++++ 11 files changed, 206 insertions(+), 189 deletions(-) create mode 100644 include/net/page_pool/types.h