Message ID | 20231208005250.2910004-3-almasrymina@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Device Memory TCP | expand |
Hi Mina, Apologies for not participating in the party earlier. On Fri, 8 Dec 2023 at 02:52, Mina Almasry <almasrymina@google.com> wrote: > > From: Jakub Kicinski <kuba@kernel.org> > > The page providers which try to reuse the same pages will > need to hold onto the ref, even if page gets released from > the pool - as in releasing the page from the pp just transfers > the "ownership" reference from pp to the provider, and provider > will wait for other references to be gone before feeding this > page back into the pool. > > Signed-off-by: Jakub Kicinski <kuba@kernel.org> > Signed-off-by: Mina Almasry <almasrymina@google.com> > > --- > > This is implemented by Jakub in his RFC: > https://lore.kernel.org/netdev/f8270765-a27b-6ccf-33ea-cda097168d79@redhat.com/T/ > > I take no credit for the idea or implementation; I only added minor > edits to make this workable with device memory TCP, and removed some > hacky test code. This is a critical dependency of device memory TCP > and thus I'm pulling it into this series to make it revewable and > mergable. > > RFC v3 -> v1 > - Removed unusued mem_provider. (Yunsheng). > - Replaced memory_provider & mp_priv with netdev_rx_queue (Jakub). > > --- > include/net/page_pool/types.h | 12 ++++++++++ > net/core/page_pool.c | 43 +++++++++++++++++++++++++++++++---- > 2 files changed, 50 insertions(+), 5 deletions(-) > > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h > index ac286ea8ce2d..0e9fa79a5ef1 100644 > --- a/include/net/page_pool/types.h > +++ b/include/net/page_pool/types.h > @@ -51,6 +51,7 @@ struct pp_alloc_cache { > * @dev: device, for DMA pre-mapping purposes > * @netdev: netdev this pool will serve (leave as NULL if none or multiple) > * @napi: NAPI which is the sole consumer of pages, otherwise NULL > + * @queue: struct netdev_rx_queue this page_pool is being created for. > * @dma_dir: DMA mapping direction > * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV > * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV > @@ -63,6 +64,7 @@ struct page_pool_params { > int nid; > struct device *dev; > struct napi_struct *napi; > + struct netdev_rx_queue *queue; > enum dma_data_direction dma_dir; > unsigned int max_len; > unsigned int offset; > @@ -125,6 +127,13 @@ struct page_pool_stats { > }; > #endif > > +struct memory_provider_ops { > + int (*init)(struct page_pool *pool); > + void (*destroy)(struct page_pool *pool); > + struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp); > + bool (*release_page)(struct page_pool *pool, struct page *page); > +}; > + > struct page_pool { > struct page_pool_params_fast p; > > @@ -174,6 +183,9 @@ struct page_pool { > */ > struct ptr_ring ring; > > + void *mp_priv; > + const struct memory_provider_ops *mp_ops; > + > #ifdef CONFIG_PAGE_POOL_STATS > /* recycle stats are per-cpu to avoid locking */ > struct page_pool_recycle_stats __percpu *recycle_stats; > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > index ca1b3b65c9b5..f5c84d2a4510 100644 > --- a/net/core/page_pool.c > +++ b/net/core/page_pool.c > @@ -25,6 +25,8 @@ > > #include "page_pool_priv.h" > > +static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); We could add the existing page pool mechanisms as another 'provider', but I assume this is coded like this for performance reasons (IOW skip the expensive ptr call for the default case?) > + > #define DEFER_TIME (msecs_to_jiffies(1000)) > #define DEFER_WARN_INTERVAL (60 * HZ) > > @@ -174,6 +176,7 @@ static int page_pool_init(struct page_pool *pool, > const struct page_pool_params *params) > { > unsigned int ring_qsize = 1024; /* Default */ > + int err; > > memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); > memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); > @@ -234,10 +237,25 @@ static int page_pool_init(struct page_pool *pool, > /* Driver calling page_pool_create() also call page_pool_destroy() */ > refcount_set(&pool->user_cnt, 1); > > + if (pool->mp_ops) { > + err = pool->mp_ops->init(pool); > + if (err) { > + pr_warn("%s() mem-provider init failed %d\n", > + __func__, err); > + goto free_ptr_ring; > + } > + > + static_branch_inc(&page_pool_mem_providers); > + } > + > if (pool->p.flags & PP_FLAG_DMA_MAP) > get_device(pool->p.dev); > > return 0; > + > +free_ptr_ring: > + ptr_ring_cleanup(&pool->ring, NULL); > + return err; > } > > static void page_pool_uninit(struct page_pool *pool) > @@ -519,7 +537,10 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) > return page; > > /* Slow-path: cache empty, do real allocation */ > - page = __page_pool_alloc_pages_slow(pool, gfp); > + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) Why do we need && pool->mp_ops? On the init function, we only bump page_pool_mem_providers if the ops are there > + page = pool->mp_ops->alloc_pages(pool, gfp); > + else > + page = __page_pool_alloc_pages_slow(pool, gfp); > return page; > } > EXPORT_SYMBOL(page_pool_alloc_pages); > @@ -576,10 +597,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) > void page_pool_return_page(struct page_pool *pool, struct page *page) > { > int count; > + bool put; > > - __page_pool_release_page_dma(pool, page); > - > - page_pool_clear_pp_info(page); > + put = true; > + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) ditto > + put = pool->mp_ops->release_page(pool, page); > + else > + __page_pool_release_page_dma(pool, page); > > /* This may be the last page returned, releasing the pool, so > * it is not safe to reference pool afterwards. > @@ -587,7 +611,10 @@ void page_pool_return_page(struct page_pool *pool, struct page *page) > count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); > trace_page_pool_state_release(pool, page, count); > > - put_page(page); > + if (put) { > + page_pool_clear_pp_info(page); > + put_page(page); > + } > /* An optimization would be to call __free_pages(page, pool->p.order) > * knowing page is not part of page-cache (thus avoiding a > * __page_cache_release() call). > @@ -857,6 +884,12 @@ static void __page_pool_destroy(struct page_pool *pool) > > page_pool_unlist(pool); > page_pool_uninit(pool); > + > + if (pool->mp_ops) { Same here. Using a mix of pool->mp_ops and page_pool_mem_providers will work, but since we always check the ptr on init, can't we simply rely on page_pool_mem_providers for the rest of the code? Thanks /Ilias > + pool->mp_ops->destroy(pool); > + static_branch_dec(&page_pool_mem_providers); > + } > + > kfree(pool); > } > > -- > 2.43.0.472.g3155946c3a-goog >
On Tue, Dec 12, 2023 at 12:07 AM Ilias Apalodimas <ilias.apalodimas@linaro.org> wrote: > > Hi Mina, > > Apologies for not participating in the party earlier. > No worries, thanks for looking. > On Fri, 8 Dec 2023 at 02:52, Mina Almasry <almasrymina@google.com> wrote: > > > > From: Jakub Kicinski <kuba@kernel.org> > > > > The page providers which try to reuse the same pages will > > need to hold onto the ref, even if page gets released from > > the pool - as in releasing the page from the pp just transfers > > the "ownership" reference from pp to the provider, and provider > > will wait for other references to be gone before feeding this > > page back into the pool. > > > > Signed-off-by: Jakub Kicinski <kuba@kernel.org> > > Signed-off-by: Mina Almasry <almasrymina@google.com> > > > > --- > > > > This is implemented by Jakub in his RFC: > > https://lore.kernel.org/netdev/f8270765-a27b-6ccf-33ea-cda097168d79@redhat.com/T/ > > > > I take no credit for the idea or implementation; I only added minor > > edits to make this workable with device memory TCP, and removed some > > hacky test code. This is a critical dependency of device memory TCP > > and thus I'm pulling it into this series to make it revewable and > > mergable. > > > > RFC v3 -> v1 > > - Removed unusued mem_provider. (Yunsheng). > > - Replaced memory_provider & mp_priv with netdev_rx_queue (Jakub). > > > > --- > > include/net/page_pool/types.h | 12 ++++++++++ > > net/core/page_pool.c | 43 +++++++++++++++++++++++++++++++---- > > 2 files changed, 50 insertions(+), 5 deletions(-) > > > > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h > > index ac286ea8ce2d..0e9fa79a5ef1 100644 > > --- a/include/net/page_pool/types.h > > +++ b/include/net/page_pool/types.h > > @@ -51,6 +51,7 @@ struct pp_alloc_cache { > > * @dev: device, for DMA pre-mapping purposes > > * @netdev: netdev this pool will serve (leave as NULL if none or multiple) > > * @napi: NAPI which is the sole consumer of pages, otherwise NULL > > + * @queue: struct netdev_rx_queue this page_pool is being created for. > > * @dma_dir: DMA mapping direction > > * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV > > * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV > > @@ -63,6 +64,7 @@ struct page_pool_params { > > int nid; > > struct device *dev; > > struct napi_struct *napi; > > + struct netdev_rx_queue *queue; > > enum dma_data_direction dma_dir; > > unsigned int max_len; > > unsigned int offset; > > @@ -125,6 +127,13 @@ struct page_pool_stats { > > }; > > #endif > > > > +struct memory_provider_ops { > > + int (*init)(struct page_pool *pool); > > + void (*destroy)(struct page_pool *pool); > > + struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp); > > + bool (*release_page)(struct page_pool *pool, struct page *page); > > +}; > > + > > struct page_pool { > > struct page_pool_params_fast p; > > > > @@ -174,6 +183,9 @@ struct page_pool { > > */ > > struct ptr_ring ring; > > > > + void *mp_priv; > > + const struct memory_provider_ops *mp_ops; > > + > > #ifdef CONFIG_PAGE_POOL_STATS > > /* recycle stats are per-cpu to avoid locking */ > > struct page_pool_recycle_stats __percpu *recycle_stats; > > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > > index ca1b3b65c9b5..f5c84d2a4510 100644 > > --- a/net/core/page_pool.c > > +++ b/net/core/page_pool.c > > @@ -25,6 +25,8 @@ > > > > #include "page_pool_priv.h" > > > > +static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); > > We could add the existing page pool mechanisms as another 'provider', > but I assume this is coded like this for performance reasons (IOW skip > the expensive ptr call for the default case?) > Correct, it's done like this for performance reasons. > > + > > #define DEFER_TIME (msecs_to_jiffies(1000)) > > #define DEFER_WARN_INTERVAL (60 * HZ) > > > > @@ -174,6 +176,7 @@ static int page_pool_init(struct page_pool *pool, > > const struct page_pool_params *params) > > { > > unsigned int ring_qsize = 1024; /* Default */ > > + int err; > > > > memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); > > memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); > > @@ -234,10 +237,25 @@ static int page_pool_init(struct page_pool *pool, > > /* Driver calling page_pool_create() also call page_pool_destroy() */ > > refcount_set(&pool->user_cnt, 1); > > > > + if (pool->mp_ops) { > > + err = pool->mp_ops->init(pool); > > + if (err) { > > + pr_warn("%s() mem-provider init failed %d\n", > > + __func__, err); > > + goto free_ptr_ring; > > + } > > + > > + static_branch_inc(&page_pool_mem_providers); > > + } > > + > > if (pool->p.flags & PP_FLAG_DMA_MAP) > > get_device(pool->p.dev); > > > > return 0; > > + > > +free_ptr_ring: > > + ptr_ring_cleanup(&pool->ring, NULL); > > + return err; > > } > > > > static void page_pool_uninit(struct page_pool *pool) > > @@ -519,7 +537,10 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) > > return page; > > > > /* Slow-path: cache empty, do real allocation */ > > - page = __page_pool_alloc_pages_slow(pool, gfp); > > + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) > > Why do we need && pool->mp_ops? On the init function, we only bump > page_pool_mem_providers if the ops are there > Note that page_pool_mem_providers is a static variable (not part of the page_pool struct), so if you have 2 page_pools on the system, one using devmem and one not, we need to check pool->mp_ops to make sure this page_pool is using a memory provider. > > + page = pool->mp_ops->alloc_pages(pool, gfp); > > + else > > + page = __page_pool_alloc_pages_slow(pool, gfp); > > return page; > > } > > EXPORT_SYMBOL(page_pool_alloc_pages); > > @@ -576,10 +597,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) > > void page_pool_return_page(struct page_pool *pool, struct page *page) > > { > > int count; > > + bool put; > > > > - __page_pool_release_page_dma(pool, page); > > - > > - page_pool_clear_pp_info(page); > > + put = true; > > + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) > > ditto > > > + put = pool->mp_ops->release_page(pool, page); > > + else > > + __page_pool_release_page_dma(pool, page); > > > > /* This may be the last page returned, releasing the pool, so > > * it is not safe to reference pool afterwards. > > @@ -587,7 +611,10 @@ void page_pool_return_page(struct page_pool *pool, struct page *page) > > count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); > > trace_page_pool_state_release(pool, page, count); > > > > - put_page(page); > > + if (put) { > > + page_pool_clear_pp_info(page); > > + put_page(page); > > + } > > /* An optimization would be to call __free_pages(page, pool->p.order) > > * knowing page is not part of page-cache (thus avoiding a > > * __page_cache_release() call). > > @@ -857,6 +884,12 @@ static void __page_pool_destroy(struct page_pool *pool) > > > > page_pool_unlist(pool); > > page_pool_uninit(pool); > > + > > + if (pool->mp_ops) { > > Same here. Using a mix of pool->mp_ops and page_pool_mem_providers > will work, but since we always check the ptr on init, can't we simply > rely on page_pool_mem_providers for the rest of the code? > > Thanks > /Ilias > > + pool->mp_ops->destroy(pool); > > + static_branch_dec(&page_pool_mem_providers); > > + } > > + > > kfree(pool); > > } > > > > -- > > 2.43.0.472.g3155946c3a-goog > > -- Thanks, Mina
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index ac286ea8ce2d..0e9fa79a5ef1 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -51,6 +51,7 @@ struct pp_alloc_cache { * @dev: device, for DMA pre-mapping purposes * @netdev: netdev this pool will serve (leave as NULL if none or multiple) * @napi: NAPI which is the sole consumer of pages, otherwise NULL + * @queue: struct netdev_rx_queue this page_pool is being created for. * @dma_dir: DMA mapping direction * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV @@ -63,6 +64,7 @@ struct page_pool_params { int nid; struct device *dev; struct napi_struct *napi; + struct netdev_rx_queue *queue; enum dma_data_direction dma_dir; unsigned int max_len; unsigned int offset; @@ -125,6 +127,13 @@ struct page_pool_stats { }; #endif +struct memory_provider_ops { + int (*init)(struct page_pool *pool); + void (*destroy)(struct page_pool *pool); + struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp); + bool (*release_page)(struct page_pool *pool, struct page *page); +}; + struct page_pool { struct page_pool_params_fast p; @@ -174,6 +183,9 @@ struct page_pool { */ struct ptr_ring ring; + void *mp_priv; + const struct memory_provider_ops *mp_ops; + #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ struct page_pool_recycle_stats __percpu *recycle_stats; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index ca1b3b65c9b5..f5c84d2a4510 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -25,6 +25,8 @@ #include "page_pool_priv.h" +static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); + #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) @@ -174,6 +176,7 @@ static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { unsigned int ring_qsize = 1024; /* Default */ + int err; memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); @@ -234,10 +237,25 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); + if (pool->mp_ops) { + err = pool->mp_ops->init(pool); + if (err) { + pr_warn("%s() mem-provider init failed %d\n", + __func__, err); + goto free_ptr_ring; + } + + static_branch_inc(&page_pool_mem_providers); + } + if (pool->p.flags & PP_FLAG_DMA_MAP) get_device(pool->p.dev); return 0; + +free_ptr_ring: + ptr_ring_cleanup(&pool->ring, NULL); + return err; } static void page_pool_uninit(struct page_pool *pool) @@ -519,7 +537,10 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) return page; /* Slow-path: cache empty, do real allocation */ - page = __page_pool_alloc_pages_slow(pool, gfp); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + page = pool->mp_ops->alloc_pages(pool, gfp); + else + page = __page_pool_alloc_pages_slow(pool, gfp); return page; } EXPORT_SYMBOL(page_pool_alloc_pages); @@ -576,10 +597,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) void page_pool_return_page(struct page_pool *pool, struct page *page) { int count; + bool put; - __page_pool_release_page_dma(pool, page); - - page_pool_clear_pp_info(page); + put = true; + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + put = pool->mp_ops->release_page(pool, page); + else + __page_pool_release_page_dma(pool, page); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. @@ -587,7 +611,10 @@ void page_pool_return_page(struct page_pool *pool, struct page *page) count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); trace_page_pool_state_release(pool, page, count); - put_page(page); + if (put) { + page_pool_clear_pp_info(page); + put_page(page); + } /* An optimization would be to call __free_pages(page, pool->p.order) * knowing page is not part of page-cache (thus avoiding a * __page_cache_release() call). @@ -857,6 +884,12 @@ static void __page_pool_destroy(struct page_pool *pool) page_pool_unlist(pool); page_pool_uninit(pool); + + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); + static_branch_dec(&page_pool_mem_providers); + } + kfree(pool); }