Message ID | 20241203173733.3181246-10-aleksander.lobakin@intel.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | xdp: a fistful of generic changes pt. I | expand |
Very nice in general, I'll apply the previous 8 but I'd like to offer some alternatives here.. On Tue, 3 Dec 2024 18:37:32 +0100 Alexander Lobakin wrote: > +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) > { > - int i, bulk_len = 0; > - bool allow_direct; > - bool in_softirq; > + bool allow_direct, in_softirq, again = false; > + netmem_ref bulk[XDP_BULK_QUEUE_SIZE]; > + u32 i, bulk_len, foreign; > + struct page_pool *pool; > > - allow_direct = page_pool_napi_local(pool); > +again: > + pool = NULL; > + bulk_len = 0; > + foreign = 0; > > for (i = 0; i < count; i++) { > - netmem_ref netmem = netmem_compound_head(data[i]); > + struct page_pool *netmem_pp; > + netmem_ref netmem; > + > + if (!again) { > + netmem = netmem_compound_head(data[i]); > > - /* It is not the last user for the page frag case */ > - if (!page_pool_is_last_ref(netmem)) > + /* It is not the last user for the page frag case */ > + if (!page_pool_is_last_ref(netmem)) > + continue; We check the "again" condition potentially n^2 times, is it written this way because we expect no mixing? Would it not be fewer cycles to do a first pass, convert all buffers to heads, filter out all non-last refs, and delete the "again" check? Minor benefit is that it removes a few of the long lines so it'd be feasible to drop the "goto again" as well and just turn this function into a while (count) loop. > + } else { > + netmem = data[i]; > + } > + > + netmem_pp = netmem_get_pp(netmem); nit: netmem_pp is not a great name. Ain't nothing especially netmem about it, it's just the _current_ page pool. > + if (unlikely(!pool)) { > + pool = netmem_pp; > + allow_direct = page_pool_napi_local(pool); > + } else if (netmem_pp != pool) { > + /* > + * If the netmem belongs to a different page_pool, save > + * it for another round after the main loop. > + */ > + data[foreign++] = netmem; > continue; > + } > > netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); > /* Approved for bulk recycling in ptr_ring cache */ > if (netmem) > - data[bulk_len++] = netmem; > + bulk[bulk_len++] = netmem; > } > > if (!bulk_len) You can invert this condition, and move all the code from here to the out label into a small helper with just 3 params (pool, bulk, bulk_len). Naming will be the tricky part but you can save us a bunch of gotos. > - return; > + goto out; > > /* Bulk producer into ptr_ring page_pool cache */ > in_softirq = page_pool_producer_lock(pool); > for (i = 0; i < bulk_len; i++) { > - if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) { > + if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) { > /* ring full */ > recycle_stat_inc(pool, ring_full); > break; > @@ -893,13 +915,22 @@ void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, > > /* Hopefully all pages was return into ptr_ring */ > if (likely(i == bulk_len)) > - return; > + goto out; > > /* ptr_ring cache full, free remaining pages outside producer lock > * since put_page() with refcnt == 1 can be an expensive operation > */ > for (; i < bulk_len; i++) > - page_pool_return_page(pool, data[i]); > + page_pool_return_page(pool, bulk[i]); > + > +out: > + if (!foreign) > + return; > + > + count = foreign; > + again = true; > + > + goto again; > } > EXPORT_SYMBOL(page_pool_put_netmem_bulk);
From: Jakub Kicinski <kuba@kernel.org> Date: Thu, 5 Dec 2024 18:40:16 -0800 > Very nice in general, I'll apply the previous 8 but I'd like to offer > some alternatives here.. Great suggestions, I addressed most of them already and the function looks much better now. One note below. > > On Tue, 3 Dec 2024 18:37:32 +0100 Alexander Lobakin wrote: >> +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) >> { >> - int i, bulk_len = 0; >> - bool allow_direct; >> - bool in_softirq; >> + bool allow_direct, in_softirq, again = false; >> + netmem_ref bulk[XDP_BULK_QUEUE_SIZE]; >> + u32 i, bulk_len, foreign; >> + struct page_pool *pool; >> >> - allow_direct = page_pool_napi_local(pool); >> +again: >> + pool = NULL; >> + bulk_len = 0; >> + foreign = 0; >> >> for (i = 0; i < count; i++) { >> - netmem_ref netmem = netmem_compound_head(data[i]); >> + struct page_pool *netmem_pp; >> + netmem_ref netmem; >> + >> + if (!again) { >> + netmem = netmem_compound_head(data[i]); >> >> - /* It is not the last user for the page frag case */ >> - if (!page_pool_is_last_ref(netmem)) >> + /* It is not the last user for the page frag case */ >> + if (!page_pool_is_last_ref(netmem)) >> + continue; > > We check the "again" condition potentially n^2 times, is it written > this way because we expect no mixing? Would it not be fewer cycles > to do a first pass, convert all buffers to heads, filter out all > non-last refs, and delete the "again" check? > > Minor benefit is that it removes a few of the long lines so it'd be > feasible to drop the "goto again" as well and just turn this function > into a while (count) loop. > >> + } else { >> + netmem = data[i]; >> + } >> + >> + netmem_pp = netmem_get_pp(netmem); > > nit: netmem_pp is not a great name. Ain't nothing especially netmem > about it, it's just the _current_ page pool. It's the page_pool of the @netmem we're processing on this iteration. "This netmem's PP" => netmem_pp. Current page_pool which we'll use for recycling is @pool. > >> + if (unlikely(!pool)) { >> + pool = netmem_pp; >> + allow_direct = page_pool_napi_local(pool); >> + } else if (netmem_pp != pool) { >> + /* >> + * If the netmem belongs to a different page_pool, save >> + * it for another round after the main loop. >> + */ >> + data[foreign++] = netmem; >> continue; >> + } >> >> netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); >> /* Approved for bulk recycling in ptr_ring cache */ >> if (netmem) >> - data[bulk_len++] = netmem; >> + bulk[bulk_len++] = netmem; >> } Thanks, Olek
On Fri, 6 Dec 2024 14:49:18 +0100 Alexander Lobakin wrote: > > nit: netmem_pp is not a great name. Ain't nothing especially netmem > > about it, it's just the _current_ page pool. > > It's the page_pool of the @netmem we're processing on this iteration. > "This netmem's PP" => netmem_pp. > Current page_pool which we'll use for recycling is @pool. Heh, yes, I guess there are levels to current..ness :) Maybe instead of current the one we're servicing could be called tgt_pp and the one from iterator just pp? No big deal either way, tho, this is very nit picky and subjective...
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 1ea16b0e9c79..05a864031271 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -259,8 +259,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem); -void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, - u32 count); +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -272,8 +271,7 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool, { } -static inline void page_pool_put_netmem_bulk(struct page_pool *pool, - netmem_ref *data, u32 count) +static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) { } #endif diff --git a/include/net/xdp.h b/include/net/xdp.h index f4020b29122f..9e7eb8223513 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -11,6 +11,8 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> /* skb_shared_info */ +#include <net/page_pool/types.h> + /** * DOC: XDP RX-queue information * @@ -193,14 +195,12 @@ xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; - void *xa; netmem_ref q[XDP_BULK_QUEUE_SIZE]; }; static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) { - /* bq->count will be zero'ed when bq->xa gets updated */ - bq->xa = NULL; + bq->count = 0; } static inline struct skb_shared_info * @@ -317,10 +317,18 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); -void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); +static inline void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) +{ + if (unlikely(!bq->count)) + return; + + page_pool_put_netmem_bulk(bq->q, bq->count); + bq->count = 0; +} + static __always_inline unsigned int xdp_get_frame_len(const struct xdp_frame *xdpf) { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 4c85b77cfdac..62cd1fcb9e97 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -841,7 +841,6 @@ EXPORT_SYMBOL(page_pool_put_unrefed_page); /** * page_pool_put_netmem_bulk() - release references on multiple netmems - * @pool: pool from which pages were allocated * @data: array holding netmem references * @count: number of entries in @data * @@ -854,35 +853,58 @@ EXPORT_SYMBOL(page_pool_put_unrefed_page); * Please note the caller must not use data area after running * page_pool_put_netmem_bulk(), as this function overwrites it. */ -void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, - u32 count) +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) { - int i, bulk_len = 0; - bool allow_direct; - bool in_softirq; + bool allow_direct, in_softirq, again = false; + netmem_ref bulk[XDP_BULK_QUEUE_SIZE]; + u32 i, bulk_len, foreign; + struct page_pool *pool; - allow_direct = page_pool_napi_local(pool); +again: + pool = NULL; + bulk_len = 0; + foreign = 0; for (i = 0; i < count; i++) { - netmem_ref netmem = netmem_compound_head(data[i]); + struct page_pool *netmem_pp; + netmem_ref netmem; + + if (!again) { + netmem = netmem_compound_head(data[i]); - /* It is not the last user for the page frag case */ - if (!page_pool_is_last_ref(netmem)) + /* It is not the last user for the page frag case */ + if (!page_pool_is_last_ref(netmem)) + continue; + } else { + netmem = data[i]; + } + + netmem_pp = netmem_get_pp(netmem); + if (unlikely(!pool)) { + pool = netmem_pp; + allow_direct = page_pool_napi_local(pool); + } else if (netmem_pp != pool) { + /* + * If the netmem belongs to a different page_pool, save + * it for another round after the main loop. + */ + data[foreign++] = netmem; continue; + } netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); /* Approved for bulk recycling in ptr_ring cache */ if (netmem) - data[bulk_len++] = netmem; + bulk[bulk_len++] = netmem; } if (!bulk_len) - return; + goto out; /* Bulk producer into ptr_ring page_pool cache */ in_softirq = page_pool_producer_lock(pool); for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) { + if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) { /* ring full */ recycle_stat_inc(pool, ring_full); break; @@ -893,13 +915,22 @@ void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, /* Hopefully all pages was return into ptr_ring */ if (likely(i == bulk_len)) - return; + goto out; /* ptr_ring cache full, free remaining pages outside producer lock * since put_page() with refcnt == 1 can be an expensive operation */ for (; i < bulk_len; i++) - page_pool_return_page(pool, data[i]); + page_pool_return_page(pool, bulk[i]); + +out: + if (!foreign) + return; + + count = foreign; + again = true; + + goto again; } EXPORT_SYMBOL(page_pool_put_netmem_bulk); diff --git a/net/core/xdp.c b/net/core/xdp.c index 938ad15c9857..56127e8ec85f 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -511,46 +511,19 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); * xdp_frame_bulk is usually stored/allocated on the function * call-stack to avoid locking penalties. */ -void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) -{ - struct xdp_mem_allocator *xa = bq->xa; - - if (unlikely(!xa || !bq->count)) - return; - - page_pool_put_netmem_bulk(xa->page_pool, bq->q, bq->count); - /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ - bq->count = 0; -} -EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); /* Must be called with rcu_read_lock held */ void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq) { - struct xdp_mem_info *mem = &xdpf->mem; - struct xdp_mem_allocator *xa; - - if (mem->type != MEM_TYPE_PAGE_POOL) { + if (xdpf->mem.type != MEM_TYPE_PAGE_POOL) { xdp_return_frame(xdpf); return; } - xa = bq->xa; - if (unlikely(!xa)) { - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - bq->count = 0; - bq->xa = xa; - } - if (bq->count == XDP_BULK_QUEUE_SIZE) xdp_flush_frame_bulk(bq); - if (unlikely(mem->id != xa->mem.id)) { - xdp_flush_frame_bulk(bq); - bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - } - if (unlikely(xdp_frame_has_frags(xdpf))) { struct skb_shared_info *sinfo; int i;