diff mbox series

[v4,net-next,1/4] net: core: page_pool: add user cnt preventing pool deletion

Message ID 20190625175948.24771-2-ivan.khoronzhuk@linaro.org (mailing list archive)
State New, archived
Headers show
Series net: ethernet: ti: cpsw: Add XDP support | expand

Commit Message

Ivan Khoronzhuk June 25, 2019, 5:59 p.m. UTC
Add user counter allowing to delete pool only when no users.
It doesn't prevent pool from flush, only prevents freeing the
pool instance. Helps when no need to delete the pool and now
it's user responsibility to free it by calling page_pool_free()
while destroying procedure. It also makes to use page_pool_free()
explicitly, not fully hidden in xdp unreg, which looks more
correct after page pool "create" routine.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++++---
 include/net/page_pool.h                           | 7 +++++++
 net/core/page_pool.c                              | 7 +++++++
 net/core/xdp.c                                    | 3 +++
 4 files changed, 22 insertions(+), 3 deletions(-)

Comments

Willem de Bruijn June 26, 2019, 1:36 a.m. UTC | #1
On Tue, Jun 25, 2019 at 2:00 PM Ivan Khoronzhuk
<ivan.khoronzhuk@linaro.org> wrote:
>
> Add user counter allowing to delete pool only when no users.
> It doesn't prevent pool from flush, only prevents freeing the
> pool instance. Helps when no need to delete the pool and now
> it's user responsibility to free it by calling page_pool_free()
> while destroying procedure. It also makes to use page_pool_free()
> explicitly, not fully hidden in xdp unreg, which looks more
> correct after page pool "create" routine.
>
> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
> ---

> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index f07c518ef8a5..1ec838e9927e 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -101,6 +101,7 @@ struct page_pool {
>         struct ptr_ring ring;
>
>         atomic_t pages_state_release_cnt;
> +       atomic_t user_cnt;

refcount_t?

>  };
>
>  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
> @@ -183,6 +184,12 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>         return page->dma_addr;
>  }
>
> +/* used to prevent pool from deallocation */
> +static inline void page_pool_get(struct page_pool *pool)
> +{
> +       atomic_inc(&pool->user_cnt);
> +}
> +
>  static inline bool is_page_pool_compiled_in(void)
>  {
>  #ifdef CONFIG_PAGE_POOL
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index b366f59885c1..169b0e3c870e 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -48,6 +48,7 @@ static int page_pool_init(struct page_pool *pool,
>                 return -ENOMEM;
>
>         atomic_set(&pool->pages_state_release_cnt, 0);
> +       atomic_set(&pool->user_cnt, 0);
>
>         if (pool->p.flags & PP_FLAG_DMA_MAP)
>                 get_device(pool->p.dev);
> @@ -70,6 +71,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
>                 kfree(pool);
>                 return ERR_PTR(err);
>         }
> +
> +       page_pool_get(pool);
>         return pool;
>  }
>  EXPORT_SYMBOL(page_pool_create);
> @@ -356,6 +359,10 @@ static void __warn_in_flight(struct page_pool *pool)
>
>  void __page_pool_free(struct page_pool *pool)
>  {
> +       /* free only if no users */
> +       if (!atomic_dec_and_test(&pool->user_cnt))
> +               return;
> +
>         WARN(pool->alloc.count, "API usage violation");
>         WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
>
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 829377cc83db..04bdcd784d2e 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -372,6 +372,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>
>         mutex_unlock(&mem_id_lock);
>
> +       if (type == MEM_TYPE_PAGE_POOL)
> +               page_pool_get(xdp_alloc->page_pool);
> +

need an analogous page_pool_put in xdp_rxq_info_unreg_mem_model? mlx5
does not use that inverse function, but intel drivers do.

>         trace_mem_connect(xdp_alloc, xdp_rxq);
>         return 0;
>  err:
> --
> 2.17.1
>
Jesper Dangaard Brouer June 26, 2019, 10:42 a.m. UTC | #2
On Tue, 25 Jun 2019 20:59:45 +0300
Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:

> Add user counter allowing to delete pool only when no users.
> It doesn't prevent pool from flush, only prevents freeing the
> pool instance. Helps when no need to delete the pool and now
> it's user responsibility to free it by calling page_pool_free()
> while destroying procedure. It also makes to use page_pool_free()
> explicitly, not fully hidden in xdp unreg, which looks more
> correct after page pool "create" routine.

No, this is wrong.

> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++++---
>  include/net/page_pool.h                           | 7 +++++++
>  net/core/page_pool.c                              | 7 +++++++
>  net/core/xdp.c                                    | 3 +++
>  4 files changed, 22 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 5e40db8f92e6..cb028de64a1d 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -545,10 +545,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>  	}
>  	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
>  					 MEM_TYPE_PAGE_POOL, rq->page_pool);
> -	if (err) {
> -		page_pool_free(rq->page_pool);
> +	if (err)
>  		goto err_free;
> -	}
>  
>  	for (i = 0; i < wq_sz; i++) {
>  		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
> @@ -613,6 +611,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>  	if (rq->xdp_prog)
>  		bpf_prog_put(rq->xdp_prog);
>  	xdp_rxq_info_unreg(&rq->xdp_rxq);
> +	if (rq->page_pool)
> +		page_pool_free(rq->page_pool);
>  	mlx5_wq_destroy(&rq->wq_ctrl);
>  
>  	return err;
> @@ -643,6 +643,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
>  	}
>  
>  	xdp_rxq_info_unreg(&rq->xdp_rxq);
> +	if (rq->page_pool)
> +		page_pool_free(rq->page_pool);

No, this is wrong.  The hole point with the merged page_pool fixes
patchset was that page_pool_free() needs to be delayed until no-more
in-flight packets exist.


>  	mlx5_wq_destroy(&rq->wq_ctrl);
>  }
>  
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index f07c518ef8a5..1ec838e9927e 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -101,6 +101,7 @@ struct page_pool {
>  	struct ptr_ring ring;
>  
>  	atomic_t pages_state_release_cnt;
> +	atomic_t user_cnt;
>  };
>  
>  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
> @@ -183,6 +184,12 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  	return page->dma_addr;
>  }
>  
> +/* used to prevent pool from deallocation */
> +static inline void page_pool_get(struct page_pool *pool)
> +{
> +	atomic_inc(&pool->user_cnt);
> +}
> +
>  static inline bool is_page_pool_compiled_in(void)
>  {
>  #ifdef CONFIG_PAGE_POOL
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index b366f59885c1..169b0e3c870e 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -48,6 +48,7 @@ static int page_pool_init(struct page_pool *pool,
>  		return -ENOMEM;
>  
>  	atomic_set(&pool->pages_state_release_cnt, 0);
> +	atomic_set(&pool->user_cnt, 0);
>  
>  	if (pool->p.flags & PP_FLAG_DMA_MAP)
>  		get_device(pool->p.dev);
> @@ -70,6 +71,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
>  		kfree(pool);
>  		return ERR_PTR(err);
>  	}
> +
> +	page_pool_get(pool);
>  	return pool;
>  }
>  EXPORT_SYMBOL(page_pool_create);
> @@ -356,6 +359,10 @@ static void __warn_in_flight(struct page_pool *pool)
>  
>  void __page_pool_free(struct page_pool *pool)
>  {
> +	/* free only if no users */
> +	if (!atomic_dec_and_test(&pool->user_cnt))
> +		return;
> +
>  	WARN(pool->alloc.count, "API usage violation");
>  	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
>  
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 829377cc83db..04bdcd784d2e 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -372,6 +372,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>  
>  	mutex_unlock(&mem_id_lock);
>  
> +	if (type == MEM_TYPE_PAGE_POOL)
> +		page_pool_get(xdp_alloc->page_pool);
> +
>  	trace_mem_connect(xdp_alloc, xdp_rxq);
>  	return 0;
>  err:
Ivan Khoronzhuk June 26, 2019, 10:49 a.m. UTC | #3
On Wed, Jun 26, 2019 at 12:42:16PM +0200, Jesper Dangaard Brouer wrote:
>On Tue, 25 Jun 2019 20:59:45 +0300
>Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:
>
>> Add user counter allowing to delete pool only when no users.
>> It doesn't prevent pool from flush, only prevents freeing the
>> pool instance. Helps when no need to delete the pool and now
>> it's user responsibility to free it by calling page_pool_free()
>> while destroying procedure. It also makes to use page_pool_free()
>> explicitly, not fully hidden in xdp unreg, which looks more
>> correct after page pool "create" routine.
>
>No, this is wrong.
below.

>
>> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
>> ---
>>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++++---
>>  include/net/page_pool.h                           | 7 +++++++
>>  net/core/page_pool.c                              | 7 +++++++
>>  net/core/xdp.c                                    | 3 +++
>>  4 files changed, 22 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> index 5e40db8f92e6..cb028de64a1d 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> @@ -545,10 +545,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>>  	}
>>  	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
>>  					 MEM_TYPE_PAGE_POOL, rq->page_pool);
>> -	if (err) {
>> -		page_pool_free(rq->page_pool);
>> +	if (err)
>>  		goto err_free;
>> -	}
>>
>>  	for (i = 0; i < wq_sz; i++) {
>>  		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
>> @@ -613,6 +611,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>>  	if (rq->xdp_prog)
>>  		bpf_prog_put(rq->xdp_prog);
>>  	xdp_rxq_info_unreg(&rq->xdp_rxq);
>> +	if (rq->page_pool)
>> +		page_pool_free(rq->page_pool);
>>  	mlx5_wq_destroy(&rq->wq_ctrl);
>>
>>  	return err;
>> @@ -643,6 +643,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
>>  	}
>>
>>  	xdp_rxq_info_unreg(&rq->xdp_rxq);
>> +	if (rq->page_pool)
>> +		page_pool_free(rq->page_pool);
>
>No, this is wrong.  The hole point with the merged page_pool fixes
>patchset was that page_pool_free() needs to be delayed until no-more
>in-flight packets exist.

Probably it's not so obvious, but it's still delayed and deleted only
after no-more in-flight packets exist. Here question is only who is able
to do this first based on refcnt.

>
>
>>  	mlx5_wq_destroy(&rq->wq_ctrl);
>>  }
>>
>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
>> index f07c518ef8a5..1ec838e9927e 100644
>> --- a/include/net/page_pool.h
>> +++ b/include/net/page_pool.h
>> @@ -101,6 +101,7 @@ struct page_pool {
>>  	struct ptr_ring ring;
>>
>>  	atomic_t pages_state_release_cnt;
>> +	atomic_t user_cnt;
>>  };
>>
>>  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
>> @@ -183,6 +184,12 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>>  	return page->dma_addr;
>>  }
>>
>> +/* used to prevent pool from deallocation */
>> +static inline void page_pool_get(struct page_pool *pool)
>> +{
>> +	atomic_inc(&pool->user_cnt);
>> +}
>> +
>>  static inline bool is_page_pool_compiled_in(void)
>>  {
>>  #ifdef CONFIG_PAGE_POOL
>> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>> index b366f59885c1..169b0e3c870e 100644
>> --- a/net/core/page_pool.c
>> +++ b/net/core/page_pool.c
>> @@ -48,6 +48,7 @@ static int page_pool_init(struct page_pool *pool,
>>  		return -ENOMEM;
>>
>>  	atomic_set(&pool->pages_state_release_cnt, 0);
>> +	atomic_set(&pool->user_cnt, 0);
>>
>>  	if (pool->p.flags & PP_FLAG_DMA_MAP)
>>  		get_device(pool->p.dev);
>> @@ -70,6 +71,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
>>  		kfree(pool);
>>  		return ERR_PTR(err);
>>  	}
>> +
>> +	page_pool_get(pool);
>>  	return pool;
>>  }
>>  EXPORT_SYMBOL(page_pool_create);
>> @@ -356,6 +359,10 @@ static void __warn_in_flight(struct page_pool *pool)
>>
>>  void __page_pool_free(struct page_pool *pool)
>>  {
>> +	/* free only if no users */
>> +	if (!atomic_dec_and_test(&pool->user_cnt))
>> +		return;
>> +
>>  	WARN(pool->alloc.count, "API usage violation");
>>  	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
>>
>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>> index 829377cc83db..04bdcd784d2e 100644
>> --- a/net/core/xdp.c
>> +++ b/net/core/xdp.c
>> @@ -372,6 +372,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>>
>>  	mutex_unlock(&mem_id_lock);
>>
>> +	if (type == MEM_TYPE_PAGE_POOL)
>> +		page_pool_get(xdp_alloc->page_pool);
>> +
>>  	trace_mem_connect(xdp_alloc, xdp_rxq);
>>  	return 0;
>>  err:
>
>
>
>-- 
>Best regards,
>  Jesper Dangaard Brouer
>  MSc.CS, Principal Kernel Engineer at Red Hat
>  LinkedIn: http://www.linkedin.com/in/brouer
Jesper Dangaard Brouer June 26, 2019, 11:51 a.m. UTC | #4
On Wed, 26 Jun 2019 13:49:49 +0300
Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:

> On Wed, Jun 26, 2019 at 12:42:16PM +0200, Jesper Dangaard Brouer wrote:
> >On Tue, 25 Jun 2019 20:59:45 +0300
> >Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:
> >  
> >> Add user counter allowing to delete pool only when no users.
> >> It doesn't prevent pool from flush, only prevents freeing the
> >> pool instance. Helps when no need to delete the pool and now
> >> it's user responsibility to free it by calling page_pool_free()
> >> while destroying procedure. It also makes to use page_pool_free()
> >> explicitly, not fully hidden in xdp unreg, which looks more
> >> correct after page pool "create" routine.  
> >
> >No, this is wrong.  
> below.
> 
> >  
> >> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
> >> ---
> >>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++++---
> >>  include/net/page_pool.h                           | 7 +++++++
> >>  net/core/page_pool.c                              | 7 +++++++
> >>  net/core/xdp.c                                    | 3 +++
> >>  4 files changed, 22 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> >> index 5e40db8f92e6..cb028de64a1d 100644
> >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> >> @@ -545,10 +545,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
> >>  	}
> >>  	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
> >>  					 MEM_TYPE_PAGE_POOL, rq->page_pool);
> >> -	if (err) {
> >> -		page_pool_free(rq->page_pool);
> >> +	if (err)
> >>  		goto err_free;
> >> -	}
> >>
> >>  	for (i = 0; i < wq_sz; i++) {
> >>  		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
> >> @@ -613,6 +611,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
> >>  	if (rq->xdp_prog)
> >>  		bpf_prog_put(rq->xdp_prog);
> >>  	xdp_rxq_info_unreg(&rq->xdp_rxq);
> >> +	if (rq->page_pool)
> >> +		page_pool_free(rq->page_pool);
> >>  	mlx5_wq_destroy(&rq->wq_ctrl);
> >>
> >>  	return err;
> >> @@ -643,6 +643,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
> >>  	}
> >>
> >>  	xdp_rxq_info_unreg(&rq->xdp_rxq);
> >> +	if (rq->page_pool)
> >> +		page_pool_free(rq->page_pool);  
> >
> >No, this is wrong.  The hole point with the merged page_pool fixes
> >patchset was that page_pool_free() needs to be delayed until no-more
> >in-flight packets exist.  
> 
> Probably it's not so obvious, but it's still delayed and deleted only
> after no-more in-flight packets exist. Here question is only who is able
> to do this first based on refcnt.

Hmm... then I find this API is rather misleading, even the function
name page_pool_free is misleading ("free"). (Now, I do see, below, that
page_pool_create() take an extra reference).

But it is still wrong / problematic.  As you allow
__page_pool_request_shutdown() to be called with elevated refcnt.  Your
use-case is to have more than 1 xdp_rxq_info struct using the same
page_pool.  Then you have to call xdp_rxq_info_unreg_mem_model() for
each, which will call __page_pool_request_shutdown().

For this to be safe, your driver have to stop RX for all the
xdp_rxq_info structs that share the page_pool.  The page_pool already
have this requirement, but it comes as natural step when shutting down
an RXQ.  With your change, you have to take care of stopping the RXQs
first, and then call xdp_rxq_info_unreg_mem_model() for each
xdp_rxq_info afterwards.  I assume you do this, but it is just a driver
bug waiting to happen.


> >> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> >> index b366f59885c1..169b0e3c870e 100644
> >> --- a/net/core/page_pool.c
> >> +++ b/net/core/page_pool.c
[...]
> >> @@ -70,6 +71,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
> >>  		kfree(pool);
> >>  		return ERR_PTR(err);
> >>  	}
> >> +
> >> +	page_pool_get(pool);
> >>  	return pool;
> >>  }
> >>  EXPORT_SYMBOL(page_pool_create);

The thing (perhaps) like about your API change, is that you also allow
the driver to explicitly keep the page_pool object across/after a
xdp_rxq_info_unreg_mem_model().  And this way possibly reuse it for
another RXQ.  The problem is of-cause that on driver shutdown, this
will force drivers to implement the same shutdown logic with
schedule_delayed_work as the core xdp.c code already does.
Ivan Khoronzhuk June 26, 2019, 12:39 p.m. UTC | #5
On Wed, Jun 26, 2019 at 01:51:28PM +0200, Jesper Dangaard Brouer wrote:
>On Wed, 26 Jun 2019 13:49:49 +0300
>Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:
>
>> On Wed, Jun 26, 2019 at 12:42:16PM +0200, Jesper Dangaard Brouer wrote:
>> >On Tue, 25 Jun 2019 20:59:45 +0300
>> >Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:
>> >
>> >> Add user counter allowing to delete pool only when no users.
>> >> It doesn't prevent pool from flush, only prevents freeing the
>> >> pool instance. Helps when no need to delete the pool and now
>> >> it's user responsibility to free it by calling page_pool_free()
>> >> while destroying procedure. It also makes to use page_pool_free()
>> >> explicitly, not fully hidden in xdp unreg, which looks more
>> >> correct after page pool "create" routine.
>> >
>> >No, this is wrong.
>> below.
>>
>> >
>> >> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
>> >> ---
>> >>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++++---
>> >>  include/net/page_pool.h                           | 7 +++++++
>> >>  net/core/page_pool.c                              | 7 +++++++
>> >>  net/core/xdp.c                                    | 3 +++
>> >>  4 files changed, 22 insertions(+), 3 deletions(-)
>> >>
>> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> >> index 5e40db8f92e6..cb028de64a1d 100644
>> >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> >> @@ -545,10 +545,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>> >>  	}
>> >>  	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
>> >>  					 MEM_TYPE_PAGE_POOL, rq->page_pool);
>> >> -	if (err) {
>> >> -		page_pool_free(rq->page_pool);
>> >> +	if (err)
>> >>  		goto err_free;
>> >> -	}
>> >>
>> >>  	for (i = 0; i < wq_sz; i++) {
>> >>  		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
>> >> @@ -613,6 +611,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>> >>  	if (rq->xdp_prog)
>> >>  		bpf_prog_put(rq->xdp_prog);
>> >>  	xdp_rxq_info_unreg(&rq->xdp_rxq);
>> >> +	if (rq->page_pool)
>> >> +		page_pool_free(rq->page_pool);
>> >>  	mlx5_wq_destroy(&rq->wq_ctrl);
>> >>
>> >>  	return err;
>> >> @@ -643,6 +643,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
>> >>  	}
>> >>
>> >>  	xdp_rxq_info_unreg(&rq->xdp_rxq);
>> >> +	if (rq->page_pool)
>> >> +		page_pool_free(rq->page_pool);
>> >
>> >No, this is wrong.  The hole point with the merged page_pool fixes
>> >patchset was that page_pool_free() needs to be delayed until no-more
>> >in-flight packets exist.
>>
>> Probably it's not so obvious, but it's still delayed and deleted only
>> after no-more in-flight packets exist. Here question is only who is able
>> to do this first based on refcnt.
>
>Hmm... then I find this API is rather misleading, even the function
>name page_pool_free is misleading ("free"). (Now, I do see, below, that
>page_pool_create() take an extra reference).
In feneral "free" looks not bad after "create".
It's called after "create" if some error with registering it rxq.
and it looks logical, if it's called after no need in pool.

obj = create()
 /* a lot of different stuff */
free(obj);


>
>But it is still wrong / problematic.  As you allow
>__page_pool_request_shutdown() to be called with elevated refcnt.  Your
>use-case is to have more than 1 xdp_rxq_info struct using the same
>page_pool.  Then you have to call xdp_rxq_info_unreg_mem_model() for
>each, which will call __page_pool_request_shutdown().
>
>For this to be safe, your driver have to stop RX for all the
>xdp_rxq_info structs that share the page_pool.  The page_pool already
>have this requirement, but it comes as natural step when shutting down
>an RXQ.  With your change, you have to take care of stopping the RXQs
>first, and then call xdp_rxq_info_unreg_mem_model() for each
>xdp_rxq_info afterwards.  I assume you do this, but it is just a driver
>bug waiting to happen.
All rxq queues are stopped before this, and only after this the pools are freed,
exactly as it required for one xdp_rxq_info_unreg_mem_model(), w/o exclusions,
as it requires the API.

>
>> >> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>> >> index b366f59885c1..169b0e3c870e 100644
>> >> --- a/net/core/page_pool.c
>> >> +++ b/net/core/page_pool.c
>[...]
>> >> @@ -70,6 +71,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
>> >>  		kfree(pool);
>> >>  		return ERR_PTR(err);
>> >>  	}
>> >> +
>> >> +	page_pool_get(pool);
>> >>  	return pool;
>> >>  }
>> >>  EXPORT_SYMBOL(page_pool_create);
>
>The thing (perhaps) like about your API change, is that you also allow
>the driver to explicitly keep the page_pool object across/after a
>xdp_rxq_info_unreg_mem_model().  And this way possibly reuse it for
>another RXQ.
>The problem is of-cause that on driver shutdown, this
>will force drivers to implement the same shutdown logic with
>schedule_delayed_work as the core xdp.c code already does.
I see.

The cpsw dosn't re-use it, so here all is fine, but if a driver needs
to re-use it again, lets suppose, as it can happen, the pool needs to
be registered with xdp_rxq_info_reg_mem_model() again, and for that
potentially can be added verification on in-flight packets
or some register state...but better mention in some place
to not do this, frankly, I don't know where it should be at this moment.
Ivan Khoronzhuk June 26, 2019, 2:01 p.m. UTC | #6
On Tue, Jun 25, 2019 at 09:36:15PM -0400, Willem de Bruijn wrote:
>On Tue, Jun 25, 2019 at 2:00 PM Ivan Khoronzhuk
><ivan.khoronzhuk@linaro.org> wrote:
>>
>> Add user counter allowing to delete pool only when no users.
>> It doesn't prevent pool from flush, only prevents freeing the
>> pool instance. Helps when no need to delete the pool and now
>> it's user responsibility to free it by calling page_pool_free()
>> while destroying procedure. It also makes to use page_pool_free()
>> explicitly, not fully hidden in xdp unreg, which looks more
>> correct after page pool "create" routine.
>>
>> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
>> ---
>
>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
>> index f07c518ef8a5..1ec838e9927e 100644
>> --- a/include/net/page_pool.h
>> +++ b/include/net/page_pool.h
>> @@ -101,6 +101,7 @@ struct page_pool {
>>         struct ptr_ring ring;
>>
>>         atomic_t pages_state_release_cnt;
>> +       atomic_t user_cnt;
>
>refcount_t?
yes, thanks.

>
>>  };
>>
>>  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
>> @@ -183,6 +184,12 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>>         return page->dma_addr;
>>  }
>>
>> +/* used to prevent pool from deallocation */
>> +static inline void page_pool_get(struct page_pool *pool)
>> +{
>> +       atomic_inc(&pool->user_cnt);
>> +}
>> +
>>  static inline bool is_page_pool_compiled_in(void)
>>  {
>>  #ifdef CONFIG_PAGE_POOL
>> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>> index b366f59885c1..169b0e3c870e 100644
>> --- a/net/core/page_pool.c
>> +++ b/net/core/page_pool.c
>> @@ -48,6 +48,7 @@ static int page_pool_init(struct page_pool *pool,
>>                 return -ENOMEM;
>>
>>         atomic_set(&pool->pages_state_release_cnt, 0);
>> +       atomic_set(&pool->user_cnt, 0);
>>
>>         if (pool->p.flags & PP_FLAG_DMA_MAP)
>>                 get_device(pool->p.dev);
>> @@ -70,6 +71,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
>>                 kfree(pool);
>>                 return ERR_PTR(err);
>>         }
>> +
>> +       page_pool_get(pool);
>>         return pool;
>>  }
>>  EXPORT_SYMBOL(page_pool_create);
>> @@ -356,6 +359,10 @@ static void __warn_in_flight(struct page_pool *pool)
>>
>>  void __page_pool_free(struct page_pool *pool)
>>  {
>> +       /* free only if no users */
>> +       if (!atomic_dec_and_test(&pool->user_cnt))
>> +               return;
>> +
>>         WARN(pool->alloc.count, "API usage violation");
>>         WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
>>
>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>> index 829377cc83db..04bdcd784d2e 100644
>> --- a/net/core/xdp.c
>> +++ b/net/core/xdp.c
>> @@ -372,6 +372,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>>
>>         mutex_unlock(&mem_id_lock);
>>
>> +       if (type == MEM_TYPE_PAGE_POOL)
>> +               page_pool_get(xdp_alloc->page_pool);
>> +
>
>need an analogous page_pool_put in xdp_rxq_info_unreg_mem_model? mlx5
>does not use that inverse function, but intel drivers do.
no need, it's put after call to page_pool_free() in unreg workqueue.

>
>>         trace_mem_connect(xdp_alloc, xdp_rxq);
>>         return 0;
>>  err:
>> --
>> 2.17.1
>>
Jesper Dangaard Brouer June 27, 2019, 7:44 p.m. UTC | #7
On Tue, 25 Jun 2019 20:59:45 +0300
Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:

> Add user counter allowing to delete pool only when no users.
> It doesn't prevent pool from flush, only prevents freeing the
> pool instance. Helps when no need to delete the pool and now
> it's user responsibility to free it by calling page_pool_free()
> while destroying procedure. It also makes to use page_pool_free()
> explicitly, not fully hidden in xdp unreg, which looks more
> correct after page pool "create" routine.

I don't think that "create" and "free" routines paring looks "more
correct" together.

Maybe we can scale back your solution(?), via creating a page_pool_get()
and page_pool_put() API that can be used by your driver, to keep the
page_pool object after a xdp_rxq_info_unreg() call.  Then you can use
it for two xdp_rxq_info structs, and call page_pool_put() after you
have unregistered both.

The API would basically be:

diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index b366f59885c1..691ddacfb5a6 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -357,6 +357,10 @@ static void __warn_in_flight(struct page_pool *pool)
 void __page_pool_free(struct page_pool *pool)
 {
        WARN(pool->alloc.count, "API usage violation");
+
+       if (atomic_read(&pool->user_cnt) != 0)
+               return;
+
        WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
 
        /* Can happen due to forced shutdown */
@@ -372,6 +376,19 @@ void __page_pool_free(struct page_pool *pool)
 }
 EXPORT_SYMBOL(__page_pool_free);
 
+void page_pool_put(struct page_pool *pool)
+{
+       if (!atomic_dec_and_test(&pool->user_cnt))
+               __page_pool_free(pool);
+}
+EXPORT_SYMBOL(page_pool_put);
+
+void page_pool_get(struct page_pool *pool)
+{
+       atomic_inc(&pool->user_cnt);
+}
+EXPORT_SYMBOL(page_pool_get);
+
Ivan Khoronzhuk June 27, 2019, 10:02 p.m. UTC | #8
Hi Jesper, thanks you remember about it.

>
>I don't think that "create" and "free" routines paring looks "more
>correct" together.
>
>Maybe we can scale back your solution(?), via creating a page_pool_get()
>and page_pool_put() API that can be used by your driver, to keep the
>page_pool object after a xdp_rxq_info_unreg() call.  Then you can use
>it for two xdp_rxq_info structs, and call page_pool_put() after you
>have unregistered both.
>
>The API would basically be:
>
>diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>index b366f59885c1..691ddacfb5a6 100644
>--- a/net/core/page_pool.c
>+++ b/net/core/page_pool.c
>@@ -357,6 +357,10 @@ static void __warn_in_flight(struct page_pool *pool)
> void __page_pool_free(struct page_pool *pool)
> {
>        WARN(pool->alloc.count, "API usage violation");
>+
>+       if (atomic_read(&pool->user_cnt) != 0)
>+               return;
>+
>        WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
>
>        /* Can happen due to forced shutdown */
>@@ -372,6 +376,19 @@ void __page_pool_free(struct page_pool *pool)
> }
> EXPORT_SYMBOL(__page_pool_free);
>
>+void page_pool_put(struct page_pool *pool)
>+{
>+       if (!atomic_dec_and_test(&pool->user_cnt))
>+               __page_pool_free(pool);
>+}
>+EXPORT_SYMBOL(page_pool_put);
>+
>+void page_pool_get(struct page_pool *pool)
>+{
>+       atomic_inc(&pool->user_cnt);
>+}
>+EXPORT_SYMBOL(page_pool_get);
>+

I have another solution that doesn't touch page pool and adds modifications
to xdp allocator. As for me it looks better and work wider, I don't need to
think about this in the driver also.

It's supposed allocator works as before, no any changes to mlx5 and
page_pool API and its usage and seems like fits your requirements.
It still supposes that allocator runs under same napi softirq but allows
to reuse allocator.

I have not verified yet, but looks like:

diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h
index 6a8cba6ea79a..995b21da2f27 100644
--- a/include/net/xdp_priv.h
+++ b/include/net/xdp_priv.h
@@ -18,6 +18,7 @@ struct xdp_mem_allocator {
 	struct rcu_head rcu;
 	struct delayed_work defer_wq;
 	unsigned long defer_warn;
+	unsigned long refcnt;
 };
 
 #endif /* __LINUX_NET_XDP_PRIV_H__ */
diff --git a/net/core/xdp.c b/net/core/xdp.c
index f98ab6b98674..6239483e3793 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -98,6 +98,12 @@ static bool __mem_id_disconnect(int id, bool force)
 		WARN(1, "Request remove non-existing id(%d), driver bug?", id);
 		return true;
 	}
+
+	if (--xa->refcnt) {
+		mutex_unlock(&mem_id_lock);
+		return true;
+	}
+
 	xa->disconnect_cnt++;
 
 	/* Detects in-flight packet-pages for page_pool */
@@ -312,6 +318,33 @@ static bool __is_supported_mem_type(enum xdp_mem_type type)
 	return true;
 }
 
+static struct xdp_mem_allocator *xdp_allocator_get(void *allocator)
+{
+	struct xdp_mem_allocator *xae, *xa == NULL;
+	struct rhashtable_iter iter;
+
+	mutex_lock(&mem_id_lock);
+	rhashtable_walk_enter(mem_id_ht, &iter);
+	do {
+		rhashtable_walk_start(&iter);
+
+		while ((xae = rhashtable_walk_next(&iter)) && !IS_ERR(xae)) {
+			if (xae->allocator == allocator) {
+				xae->refcnt++;
+				xa = xae;
+				break;
+			}
+		}
+
+		rhashtable_walk_stop(&iter);
+
+	} while (xae == ERR_PTR(-EAGAIN));
+	rhashtable_walk_exit(&iter);
+	mutex_unlock(&mem_id_lock);
+
+	return xa;
+}
+
 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 			       enum xdp_mem_type type, void *allocator)
 {
@@ -347,6 +380,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 		}
 	}
 
+	if (xdp_allocator_get(allocator))
+		return 0;
+
 	xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
 	if (!xdp_alloc)
 		return -ENOMEM;
@@ -360,6 +396,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 	xdp_rxq->mem.id = id;
 	xdp_alloc->mem  = xdp_rxq->mem;
 	xdp_alloc->allocator = allocator;
+	xdp_alloc->refcnt = 1;
 
 	/* Insert allocator into ID lookup table */
 	ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
Jesper Dangaard Brouer June 28, 2019, 6:35 a.m. UTC | #9
On Fri, 28 Jun 2019 01:02:47 +0300
Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:

> Hi Jesper, thanks you remember about it.
> 
> >
> >I don't think that "create" and "free" routines paring looks "more
> >correct" together.
> >
> >Maybe we can scale back your solution(?), via creating a page_pool_get()
> >and page_pool_put() API that can be used by your driver, to keep the
> >page_pool object after a xdp_rxq_info_unreg() call.  Then you can use
> >it for two xdp_rxq_info structs, and call page_pool_put() after you
> >have unregistered both.
> >
> >The API would basically be:
> >
> >diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> >index b366f59885c1..691ddacfb5a6 100644
> >--- a/net/core/page_pool.c
> >+++ b/net/core/page_pool.c
> >@@ -357,6 +357,10 @@ static void __warn_in_flight(struct page_pool *pool)
> > void __page_pool_free(struct page_pool *pool)
> > {
> >        WARN(pool->alloc.count, "API usage violation");
> >+
> >+       if (atomic_read(&pool->user_cnt) != 0)
> >+               return;
> >+
> >        WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
> >
> >        /* Can happen due to forced shutdown */
> >@@ -372,6 +376,19 @@ void __page_pool_free(struct page_pool *pool)
> > }
> > EXPORT_SYMBOL(__page_pool_free);
> >
> >+void page_pool_put(struct page_pool *pool)
> >+{
> >+       if (!atomic_dec_and_test(&pool->user_cnt))
> >+               __page_pool_free(pool);
> >+}
> >+EXPORT_SYMBOL(page_pool_put);
> >+
> >+void page_pool_get(struct page_pool *pool)
> >+{
> >+       atomic_inc(&pool->user_cnt);
> >+}
> >+EXPORT_SYMBOL(page_pool_get);
> >+  
> 
> I have another solution that doesn't touch page pool and adds modifications
> to xdp allocator. As for me it looks better and work wider, I don't need to
> think about this in the driver also.
> 
> It's supposed allocator works as before, no any changes to mlx5 and
> page_pool API and its usage and seems like fits your requirements.
> It still supposes that allocator runs under same napi softirq but allows
> to reuse allocator.
> 
> I have not verified yet, but looks like:
> 
> diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h
> index 6a8cba6ea79a..995b21da2f27 100644
> --- a/include/net/xdp_priv.h
> +++ b/include/net/xdp_priv.h
> @@ -18,6 +18,7 @@ struct xdp_mem_allocator {
>  	struct rcu_head rcu;
>  	struct delayed_work defer_wq;
>  	unsigned long defer_warn;
> +	unsigned long refcnt;
>  };
>  
>  #endif /* __LINUX_NET_XDP_PRIV_H__ */
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index f98ab6b98674..6239483e3793 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -98,6 +98,12 @@ static bool __mem_id_disconnect(int id, bool force)
>  		WARN(1, "Request remove non-existing id(%d), driver bug?", id);
>  		return true;
>  	}
> +
> +	if (--xa->refcnt) {
> +		mutex_unlock(&mem_id_lock);
> +		return true;

This doesn't work.  This function __mem_id_disconnect() can be called
multiple times. E.g. if there are in-flight packets.


> +	}
> +
>  	xa->disconnect_cnt++;
>  
>  	/* Detects in-flight packet-pages for page_pool */
> @@ -312,6 +318,33 @@ static bool __is_supported_mem_type(enum xdp_mem_type type)
>  	return true;
>  }
>  
> +static struct xdp_mem_allocator *xdp_allocator_get(void *allocator)
> +{
> +	struct xdp_mem_allocator *xae, *xa == NULL;
> +	struct rhashtable_iter iter;
> +
> +	mutex_lock(&mem_id_lock);
> +	rhashtable_walk_enter(mem_id_ht, &iter);
> +	do {
> +		rhashtable_walk_start(&iter);
> +
> +		while ((xae = rhashtable_walk_next(&iter)) && !IS_ERR(xae)) {
> +			if (xae->allocator == allocator) {
> +				xae->refcnt++;
> +				xa = xae;
> +				break;
> +			}
> +		}
> +
> +		rhashtable_walk_stop(&iter);
> +
> +	} while (xae == ERR_PTR(-EAGAIN));
> +	rhashtable_walk_exit(&iter);
> +	mutex_unlock(&mem_id_lock);
> +
> +	return xa;
> +}
> +
>  int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>  			       enum xdp_mem_type type, void *allocator)
>  {
> @@ -347,6 +380,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>  		}
>  	}
>  
> +	if (xdp_allocator_get(allocator))
> +		return 0;
> +
>  	xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
>  	if (!xdp_alloc)
>  		return -ENOMEM;
> @@ -360,6 +396,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>  	xdp_rxq->mem.id = id;
>  	xdp_alloc->mem  = xdp_rxq->mem;
>  	xdp_alloc->allocator = allocator;
> +	xdp_alloc->refcnt = 1;
>  
>  	/* Insert allocator into ID lookup table */
>  	ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
> 
>
Ivan Khoronzhuk June 28, 2019, 8:53 a.m. UTC | #10
On Fri, Jun 28, 2019 at 08:35:20AM +0200, Jesper Dangaard Brouer wrote:
>On Fri, 28 Jun 2019 01:02:47 +0300
>Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:
>
>> Hi Jesper, thanks you remember about it.
>>
>> >
>> >I don't think that "create" and "free" routines paring looks "more
>> >correct" together.
>> >
>> >Maybe we can scale back your solution(?), via creating a page_pool_get()
>> >and page_pool_put() API that can be used by your driver, to keep the
>> >page_pool object after a xdp_rxq_info_unreg() call.  Then you can use
>> >it for two xdp_rxq_info structs, and call page_pool_put() after you
>> >have unregistered both.
>> >
>> >The API would basically be:
>> >
>> >diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>> >index b366f59885c1..691ddacfb5a6 100644
>> >--- a/net/core/page_pool.c
>> >+++ b/net/core/page_pool.c
>> >@@ -357,6 +357,10 @@ static void __warn_in_flight(struct page_pool *pool)
>> > void __page_pool_free(struct page_pool *pool)
>> > {
>> >        WARN(pool->alloc.count, "API usage violation");
>> >+
>> >+       if (atomic_read(&pool->user_cnt) != 0)
>> >+               return;
>> >+
>> >        WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
>> >
>> >        /* Can happen due to forced shutdown */
>> >@@ -372,6 +376,19 @@ void __page_pool_free(struct page_pool *pool)
>> > }
>> > EXPORT_SYMBOL(__page_pool_free);
>> >
>> >+void page_pool_put(struct page_pool *pool)
>> >+{
>> >+       if (!atomic_dec_and_test(&pool->user_cnt))
>> >+               __page_pool_free(pool);
>> >+}
>> >+EXPORT_SYMBOL(page_pool_put);
>> >+
>> >+void page_pool_get(struct page_pool *pool)
>> >+{
>> >+       atomic_inc(&pool->user_cnt);
>> >+}
>> >+EXPORT_SYMBOL(page_pool_get);
>> >+
>>
>> I have another solution that doesn't touch page pool and adds modifications
>> to xdp allocator. As for me it looks better and work wider, I don't need to
>> think about this in the driver also.
>>
>> It's supposed allocator works as before, no any changes to mlx5 and
>> page_pool API and its usage and seems like fits your requirements.
>> It still supposes that allocator runs under same napi softirq but allows
>> to reuse allocator.
>>
>> I have not verified yet, but looks like:
>>
>> diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h
>> index 6a8cba6ea79a..995b21da2f27 100644
>> --- a/include/net/xdp_priv.h
>> +++ b/include/net/xdp_priv.h
>> @@ -18,6 +18,7 @@ struct xdp_mem_allocator {
>>  	struct rcu_head rcu;
>>  	struct delayed_work defer_wq;
>>  	unsigned long defer_warn;
>> +	unsigned long refcnt;
>>  };
>>
>>  #endif /* __LINUX_NET_XDP_PRIV_H__ */
>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>> index f98ab6b98674..6239483e3793 100644
>> --- a/net/core/xdp.c
>> +++ b/net/core/xdp.c
>> @@ -98,6 +98,12 @@ static bool __mem_id_disconnect(int id, bool force)
>>  		WARN(1, "Request remove non-existing id(%d), driver bug?", id);
>>  		return true;
>>  	}
>> +
>> +	if (--xa->refcnt) {
>> +		mutex_unlock(&mem_id_lock);
>> +		return true;
>
>This doesn't work.  This function __mem_id_disconnect() can be called
>multiple times. E.g. if there are in-flight packets.
Yes, it was draft. I still have not completely verify it due
to several changes in cpsw and holiday in my side, second draft
looks like:

Subject: [PATCH] net: core: xdp: allow same allocator for rxq

XDP rxq can be same for ndevs running under same rx napi softirq.
But there is no ability to register same allocator for both rxqs,
by fact it's same rxq but has different ndev as a reference.

Due to last changes allocator destroy can be defered till the moment
all packets are recycled by destination interface, afterwards it's
freed. In order to schedule allocator destroy only after all users are
unregistered, add refcnt to allocator object and start to destroy
only it reaches 0.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 include/net/xdp_priv.h |  1 +
 net/core/xdp.c         | 46 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h
index 6a8cba6ea79a..995b21da2f27 100644
--- a/include/net/xdp_priv.h
+++ b/include/net/xdp_priv.h
@@ -18,6 +18,7 @@ struct xdp_mem_allocator {
 	struct rcu_head rcu;
 	struct delayed_work defer_wq;
 	unsigned long defer_warn;
+	unsigned long refcnt;
 };
 
 #endif /* __LINUX_NET_XDP_PRIV_H__ */
diff --git a/net/core/xdp.c b/net/core/xdp.c
index f98ab6b98674..7b0185eec124 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -98,6 +98,18 @@ static bool __mem_id_disconnect(int id, bool force)
 		WARN(1, "Request remove non-existing id(%d), driver bug?", id);
 		return true;
 	}
+
+	/* to avoid hash lookup twice do refcnt dec here, but not when
+	 * it's 0 as it can be called from workqueue aftewards
+	 */
+	if (xa->refcnt)
+		xa->refcnt--;
+
+	if (xa->refcnt) {
+		mutex_unlock(&mem_id_lock);
+		return true;
+	}
+
 	xa->disconnect_cnt++;
 
 	/* Detects in-flight packet-pages for page_pool */
@@ -312,6 +324,33 @@ static bool __is_supported_mem_type(enum xdp_mem_type type)
 	return true;
 }
 
+static struct xdp_mem_allocator *xdp_allocator_get(void *allocator)
+{
+	struct xdp_mem_allocator *xae, *xa = NULL;
+	struct rhashtable_iter iter;
+
+	mutex_lock(&mem_id_lock);
+	rhashtable_walk_enter(mem_id_ht, &iter);
+	do {
+		rhashtable_walk_start(&iter);
+
+		while ((xae = rhashtable_walk_next(&iter)) && !IS_ERR(xae)) {
+			if (xae->allocator == allocator) {
+				xae->refcnt++;
+				xa = xae;
+				break;
+			}
+		}
+
+		rhashtable_walk_stop(&iter);
+
+	} while (xae == ERR_PTR(-EAGAIN));
+	rhashtable_walk_exit(&iter);
+	mutex_unlock(&mem_id_lock);
+
+	return xa;
+}
+
 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 			       enum xdp_mem_type type, void *allocator)
 {
@@ -347,6 +386,12 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 		}
 	}
 
+	xdp_alloc = xdp_allocator_get(allocator);
+	if (xdp_alloc) {
+		xdp_rxq->mem.id = xdp_alloc->mem.id;
+		return 0;
+	}
+
 	xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
 	if (!xdp_alloc)
 		return -ENOMEM;
@@ -360,6 +405,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 	xdp_rxq->mem.id = id;
 	xdp_alloc->mem  = xdp_rxq->mem;
 	xdp_alloc->allocator = allocator;
+	xdp_alloc->refcnt = 1;
 
 	/* Insert allocator into ID lookup table */
 	ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5e40db8f92e6..cb028de64a1d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -545,10 +545,8 @@  static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	}
 	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
 					 MEM_TYPE_PAGE_POOL, rq->page_pool);
-	if (err) {
-		page_pool_free(rq->page_pool);
+	if (err)
 		goto err_free;
-	}
 
 	for (i = 0; i < wq_sz; i++) {
 		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -613,6 +611,8 @@  static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	if (rq->xdp_prog)
 		bpf_prog_put(rq->xdp_prog);
 	xdp_rxq_info_unreg(&rq->xdp_rxq);
+	if (rq->page_pool)
+		page_pool_free(rq->page_pool);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 
 	return err;
@@ -643,6 +643,8 @@  static void mlx5e_free_rq(struct mlx5e_rq *rq)
 	}
 
 	xdp_rxq_info_unreg(&rq->xdp_rxq);
+	if (rq->page_pool)
+		page_pool_free(rq->page_pool);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 }
 
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index f07c518ef8a5..1ec838e9927e 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -101,6 +101,7 @@  struct page_pool {
 	struct ptr_ring ring;
 
 	atomic_t pages_state_release_cnt;
+	atomic_t user_cnt;
 };
 
 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
@@ -183,6 +184,12 @@  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 	return page->dma_addr;
 }
 
+/* used to prevent pool from deallocation */
+static inline void page_pool_get(struct page_pool *pool)
+{
+	atomic_inc(&pool->user_cnt);
+}
+
 static inline bool is_page_pool_compiled_in(void)
 {
 #ifdef CONFIG_PAGE_POOL
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index b366f59885c1..169b0e3c870e 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -48,6 +48,7 @@  static int page_pool_init(struct page_pool *pool,
 		return -ENOMEM;
 
 	atomic_set(&pool->pages_state_release_cnt, 0);
+	atomic_set(&pool->user_cnt, 0);
 
 	if (pool->p.flags & PP_FLAG_DMA_MAP)
 		get_device(pool->p.dev);
@@ -70,6 +71,8 @@  struct page_pool *page_pool_create(const struct page_pool_params *params)
 		kfree(pool);
 		return ERR_PTR(err);
 	}
+
+	page_pool_get(pool);
 	return pool;
 }
 EXPORT_SYMBOL(page_pool_create);
@@ -356,6 +359,10 @@  static void __warn_in_flight(struct page_pool *pool)
 
 void __page_pool_free(struct page_pool *pool)
 {
+	/* free only if no users */
+	if (!atomic_dec_and_test(&pool->user_cnt))
+		return;
+
 	WARN(pool->alloc.count, "API usage violation");
 	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
 
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 829377cc83db..04bdcd784d2e 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -372,6 +372,9 @@  int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 
 	mutex_unlock(&mem_id_lock);
 
+	if (type == MEM_TYPE_PAGE_POOL)
+		page_pool_get(xdp_alloc->page_pool);
+
 	trace_mem_connect(xdp_alloc, xdp_rxq);
 	return 0;
 err: