diff mbox series

[PATCHv2,1/1] IB: rxe: remove pool state

Message ID 1545313307-11562-1-git-send-email-yanjun.zhu@oracle.com (mailing list archive)
State Changes Requested
Headers show
Series [PATCHv2,1/1] IB: rxe: remove pool state | expand

Commit Message

Zhu Yanjun Dec. 20, 2018, 1:41 p.m. UTC
The pool state is the duplicate of pool ref_cnt. If ref_cnt > 0,
it indicates that state is valid. If ref_cnt = 0, it indicates
that state is invalid.

Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
---
V1->V2: Follow Jason's advice, the state is replaced with ref_cnt.
---
 drivers/infiniband/sw/rxe/rxe_pool.c | 20 +++-----------------
 drivers/infiniband/sw/rxe/rxe_pool.h |  6 ------
 2 files changed, 3 insertions(+), 23 deletions(-)

Comments

Yuval Shaia Dec. 20, 2018, 6:04 p.m. UTC | #1
On Thu, Dec 20, 2018 at 08:41:47AM -0500, Yanjun Zhu wrote:
> The pool state is the duplicate of pool ref_cnt. If ref_cnt > 0,
> it indicates that state is valid. If ref_cnt = 0, it indicates
> that state is invalid.
> 
> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
> ---
> V1->V2: Follow Jason's advice, the state is replaced with ref_cnt.

Is it v2 the patch "IB/rxe: Remove duplicate pool invalidation"?

> ---
>  drivers/infiniband/sw/rxe/rxe_pool.c | 20 +++-----------------
>  drivers/infiniband/sw/rxe/rxe_pool.h |  6 ------
>  2 files changed, 3 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
> index 36b53fb..d8f969d 100644
> --- a/drivers/infiniband/sw/rxe/rxe_pool.c
> +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
> @@ -222,8 +222,6 @@ int rxe_pool_init(
>  		pool->key_size = rxe_type_info[type].key_size;
>  	}
>  
> -	pool->state = RXE_POOL_STATE_VALID;
> -
>  out:
>  	return err;
>  }
> @@ -232,7 +230,6 @@ static void rxe_pool_release(struct kref *kref)
>  {
>  	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
>  
> -	pool->state = RXE_POOL_STATE_INVALID;
>  	kfree(pool->table);
>  }
>  
> @@ -243,14 +240,9 @@ static void rxe_pool_put(struct rxe_pool *pool)
>  
>  int rxe_pool_cleanup(struct rxe_pool *pool)
>  {
> -	unsigned long flags;
> -
> -	write_lock_irqsave(&pool->pool_lock, flags);
> -	pool->state = RXE_POOL_STATE_INVALID;
>  	if (atomic_read(&pool->num_elem) > 0)
>  		pr_warn("%s pool destroyed with unfree'd elem\n",
>  			pool_name(pool));
> -	write_unlock_irqrestore(&pool->pool_lock, flags);

I wonder why it was surrounding the atomic_read call in the first place.

>  
>  	rxe_pool_put(pool);
>  
> @@ -380,17 +372,11 @@ void rxe_drop_index(void *arg)
>  void *rxe_alloc(struct rxe_pool *pool)
>  {
>  	struct rxe_pool_entry *elem;
> -	unsigned long flags;
>  
>  	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
>  
> -	read_lock_irqsave(&pool->pool_lock, flags);
> -	if (pool->state != RXE_POOL_STATE_VALID) {
> -		read_unlock_irqrestore(&pool->pool_lock, flags);
> +	if (!kref_get_unless_zero(&pool->ref_cnt))
>  		return NULL;
> -	}
> -	kref_get(&pool->ref_cnt);
> -	read_unlock_irqrestore(&pool->pool_lock, flags);
>  
>  	kref_get(&pool->rxe->ref_cnt);
>  
> @@ -438,7 +424,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
>  
>  	read_lock_irqsave(&pool->pool_lock, flags);
>  
> -	if (pool->state != RXE_POOL_STATE_VALID)
> +	if (!kref_read(&pool->ref_cnt))

So ref_cnt cannot go below 0,right?

Will it be safer to check <= 0, just to protect against incorrect calls to
rxe_pool_cleanup?

>  		goto out;
>  
>  	node = pool->tree.rb_node;
> @@ -470,7 +456,7 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
>  
>  	read_lock_irqsave(&pool->pool_lock, flags);
>  
> -	if (pool->state != RXE_POOL_STATE_VALID)
> +	if (!kref_read(&pool->ref_cnt))

Ditto.

>  		goto out;
>  
>  	node = pool->tree.rb_node;
> diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
> index aa4ba30..eeca733 100644
> --- a/drivers/infiniband/sw/rxe/rxe_pool.h
> +++ b/drivers/infiniband/sw/rxe/rxe_pool.h
> @@ -73,11 +73,6 @@ struct rxe_type_info {
>  
>  extern struct rxe_type_info rxe_type_info[];
>  
> -enum rxe_pool_state {
> -	RXE_POOL_STATE_INVALID,
> -	RXE_POOL_STATE_VALID,
> -};
> -
>  struct rxe_pool_entry {
>  	struct rxe_pool		*pool;
>  	struct kref		ref_cnt;
> @@ -94,7 +89,6 @@ struct rxe_pool {
>  	size_t			elem_size;
>  	struct kref		ref_cnt;
>  	void			(*cleanup)(struct rxe_pool_entry *obj);
> -	enum rxe_pool_state	state;
>  	enum rxe_pool_flags	flags;
>  	enum rxe_elem_type	type;
>  
> -- 
> 2.7.4
>
Jason Gunthorpe Dec. 20, 2018, 9:09 p.m. UTC | #2
On Thu, Dec 20, 2018 at 08:41:47AM -0500, Zhu Yanjun wrote:
> The pool state is the duplicate of pool ref_cnt. If ref_cnt > 0,
> it indicates that state is valid. If ref_cnt = 0, it indicates
> that state is invalid.
> 
> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
> V1->V2: Follow Jason's advice, the state is replaced with ref_cnt.
>  drivers/infiniband/sw/rxe/rxe_pool.c | 20 +++-----------------
>  drivers/infiniband/sw/rxe/rxe_pool.h |  6 ------
>  2 files changed, 3 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
> index 36b53fb..d8f969d 100644
> +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
> @@ -222,8 +222,6 @@ int rxe_pool_init(
>  		pool->key_size = rxe_type_info[type].key_size;
>  	}
>  
> -	pool->state = RXE_POOL_STATE_VALID;
> -
>  out:
>  	return err;
>  }
> @@ -232,7 +230,6 @@ static void rxe_pool_release(struct kref *kref)
>  {
>  	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
>  
> -	pool->state = RXE_POOL_STATE_INVALID;
>  	kfree(pool->table);
>  }
>  
> @@ -243,14 +240,9 @@ static void rxe_pool_put(struct rxe_pool *pool)
>  
>  int rxe_pool_cleanup(struct rxe_pool *pool)
>  {
> -	unsigned long flags;
> -
> -	write_lock_irqsave(&pool->pool_lock, flags);
> -	pool->state = RXE_POOL_STATE_INVALID;
>  	if (atomic_read(&pool->num_elem) > 0)
>  		pr_warn("%s pool destroyed with unfree'd elem\n",
>  			pool_name(pool));
> -	write_unlock_irqrestore(&pool->pool_lock, flags);
>  
>  	rxe_pool_put(pool);
>  
> @@ -380,17 +372,11 @@ void rxe_drop_index(void *arg)
>  void *rxe_alloc(struct rxe_pool *pool)
>  {
>  	struct rxe_pool_entry *elem;
> -	unsigned long flags;
>  
>  	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
>  
> -	read_lock_irqsave(&pool->pool_lock, flags);
> -	if (pool->state != RXE_POOL_STATE_VALID) {
> -		read_unlock_irqrestore(&pool->pool_lock, flags);
> +	if (!kref_get_unless_zero(&pool->ref_cnt))
>  		return NULL;
> -	}
> -	kref_get(&pool->ref_cnt);
> -	read_unlock_irqrestore(&pool->pool_lock, flags);
>  
>  	kref_get(&pool->rxe->ref_cnt);
>  
> @@ -438,7 +424,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
>  
>  	read_lock_irqsave(&pool->pool_lock, flags);
>  
> -	if (pool->state != RXE_POOL_STATE_VALID)
> +	if (!kref_read(&pool->ref_cnt))
>  		goto out;

These kref_reads make no sense, the caller has to be holding a kref on
pool to call this API, otherwise it is already a free'd pointer.  So
there is no reason to check the kref.

Did you audit that all callers hold the kref?

Jason
Zhu Yanjun Dec. 21, 2018, 7:14 a.m. UTC | #3
On 2018/12/21 2:04, Yuval Shaia wrote:
> On Thu, Dec 20, 2018 at 08:41:47AM -0500, Yanjun Zhu wrote:
>> The pool state is the duplicate of pool ref_cnt. If ref_cnt > 0,
>> it indicates that state is valid. If ref_cnt = 0, it indicates
>> that state is invalid.
>>
>> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
>> ---
>> V1->V2: Follow Jason's advice, the state is replaced with ref_cnt.
> Is it v2 the patch "IB/rxe: Remove duplicate pool invalidation"?

Yeah. The patch "IB/rxe: Remove duplicate pool invalidation" is included 
in this patch.

In fact, the V1 patch is "IB: rxe: replace read/write locks with atomic 
bitops".

>
>> ---
>>   drivers/infiniband/sw/rxe/rxe_pool.c | 20 +++-----------------
>>   drivers/infiniband/sw/rxe/rxe_pool.h |  6 ------
>>   2 files changed, 3 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
>> index 36b53fb..d8f969d 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_pool.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
>> @@ -222,8 +222,6 @@ int rxe_pool_init(
>>   		pool->key_size = rxe_type_info[type].key_size;
>>   	}
>>   
>> -	pool->state = RXE_POOL_STATE_VALID;
>> -
>>   out:
>>   	return err;
>>   }
>> @@ -232,7 +230,6 @@ static void rxe_pool_release(struct kref *kref)
>>   {
>>   	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
>>   
>> -	pool->state = RXE_POOL_STATE_INVALID;
>>   	kfree(pool->table);
>>   }
>>   
>> @@ -243,14 +240,9 @@ static void rxe_pool_put(struct rxe_pool *pool)
>>   
>>   int rxe_pool_cleanup(struct rxe_pool *pool)
>>   {
>> -	unsigned long flags;
>> -
>> -	write_lock_irqsave(&pool->pool_lock, flags);
>> -	pool->state = RXE_POOL_STATE_INVALID;
>>   	if (atomic_read(&pool->num_elem) > 0)
>>   		pr_warn("%s pool destroyed with unfree'd elem\n",
>>   			pool_name(pool));
>> -	write_unlock_irqrestore(&pool->pool_lock, flags);
> I wonder why it was surrounding the atomic_read call in the first place.

Yeah. atomic_read only gets the value of pool->num_elem. And if 
pool->num_elem > 0, a log will pop out.

It does not change anything.

>
>>   
>>   	rxe_pool_put(pool);
>>   
>> @@ -380,17 +372,11 @@ void rxe_drop_index(void *arg)
>>   void *rxe_alloc(struct rxe_pool *pool)
>>   {
>>   	struct rxe_pool_entry *elem;
>> -	unsigned long flags;
>>   
>>   	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
>>   
>> -	read_lock_irqsave(&pool->pool_lock, flags);
>> -	if (pool->state != RXE_POOL_STATE_VALID) {
>> -		read_unlock_irqrestore(&pool->pool_lock, flags);
>> +	if (!kref_get_unless_zero(&pool->ref_cnt))
>>   		return NULL;
>> -	}
>> -	kref_get(&pool->ref_cnt);
>> -	read_unlock_irqrestore(&pool->pool_lock, flags);
>>   
>>   	kref_get(&pool->rxe->ref_cnt);
>>   
>> @@ -438,7 +424,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
>>   
>>   	read_lock_irqsave(&pool->pool_lock, flags);
>>   
>> -	if (pool->state != RXE_POOL_STATE_VALID)
>> +	if (!kref_read(&pool->ref_cnt))
> So ref_cnt cannot go below 0,right?
>
> Will it be safer to check <= 0, just to protect against incorrect calls to
> rxe_pool_cleanup?

Sure. I agree with you. But I am not sure whether it is necessary to 
change to check <=0 since the root cause is "rxe_pool_cleanup 
incorrectly called".

Thanks for your review.

Zhu Yanjun

>
>>   		goto out;
>>   
>>   	node = pool->tree.rb_node;
>> @@ -470,7 +456,7 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
>>   
>>   	read_lock_irqsave(&pool->pool_lock, flags);
>>   
>> -	if (pool->state != RXE_POOL_STATE_VALID)
>> +	if (!kref_read(&pool->ref_cnt))
> Ditto.
>
>>   		goto out;
>>   
>>   	node = pool->tree.rb_node;
>> diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
>> index aa4ba30..eeca733 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_pool.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_pool.h
>> @@ -73,11 +73,6 @@ struct rxe_type_info {
>>   
>>   extern struct rxe_type_info rxe_type_info[];
>>   
>> -enum rxe_pool_state {
>> -	RXE_POOL_STATE_INVALID,
>> -	RXE_POOL_STATE_VALID,
>> -};
>> -
>>   struct rxe_pool_entry {
>>   	struct rxe_pool		*pool;
>>   	struct kref		ref_cnt;
>> @@ -94,7 +89,6 @@ struct rxe_pool {
>>   	size_t			elem_size;
>>   	struct kref		ref_cnt;
>>   	void			(*cleanup)(struct rxe_pool_entry *obj);
>> -	enum rxe_pool_state	state;
>>   	enum rxe_pool_flags	flags;
>>   	enum rxe_elem_type	type;
>>   
>> -- 
>> 2.7.4
>>
Zhu Yanjun Dec. 21, 2018, 8 a.m. UTC | #4
On 2018/12/21 5:09, Jason Gunthorpe wrote:
> On Thu, Dec 20, 2018 at 08:41:47AM -0500, Zhu Yanjun wrote:
>> The pool state is the duplicate of pool ref_cnt. If ref_cnt > 0,
>> it indicates that state is valid. If ref_cnt = 0, it indicates
>> that state is invalid.
>>
>> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
>> V1->V2: Follow Jason's advice, the state is replaced with ref_cnt.
>>   drivers/infiniband/sw/rxe/rxe_pool.c | 20 +++-----------------
>>   drivers/infiniband/sw/rxe/rxe_pool.h |  6 ------
>>   2 files changed, 3 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
>> index 36b53fb..d8f969d 100644
>> +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
>> @@ -222,8 +222,6 @@ int rxe_pool_init(
>>   		pool->key_size = rxe_type_info[type].key_size;
>>   	}
>>   
>> -	pool->state = RXE_POOL_STATE_VALID;
>> -
>>   out:
>>   	return err;
>>   }
>> @@ -232,7 +230,6 @@ static void rxe_pool_release(struct kref *kref)
>>   {
>>   	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
>>   
>> -	pool->state = RXE_POOL_STATE_INVALID;
>>   	kfree(pool->table);
>>   }
>>   
>> @@ -243,14 +240,9 @@ static void rxe_pool_put(struct rxe_pool *pool)
>>   
>>   int rxe_pool_cleanup(struct rxe_pool *pool)
>>   {
>> -	unsigned long flags;
>> -
>> -	write_lock_irqsave(&pool->pool_lock, flags);
>> -	pool->state = RXE_POOL_STATE_INVALID;
>>   	if (atomic_read(&pool->num_elem) > 0)
>>   		pr_warn("%s pool destroyed with unfree'd elem\n",
>>   			pool_name(pool));
>> -	write_unlock_irqrestore(&pool->pool_lock, flags);
>>   
>>   	rxe_pool_put(pool);
>>   
>> @@ -380,17 +372,11 @@ void rxe_drop_index(void *arg)
>>   void *rxe_alloc(struct rxe_pool *pool)
>>   {
>>   	struct rxe_pool_entry *elem;
>> -	unsigned long flags;
>>   
>>   	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
>>   
>> -	read_lock_irqsave(&pool->pool_lock, flags);
>> -	if (pool->state != RXE_POOL_STATE_VALID) {
>> -		read_unlock_irqrestore(&pool->pool_lock, flags);
>> +	if (!kref_get_unless_zero(&pool->ref_cnt))
>>   		return NULL;
>> -	}
>> -	kref_get(&pool->ref_cnt);
>> -	read_unlock_irqrestore(&pool->pool_lock, flags);
>>   
>>   	kref_get(&pool->rxe->ref_cnt);
>>   
>> @@ -438,7 +424,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
>>   
>>   	read_lock_irqsave(&pool->pool_lock, flags);
>>   
>> -	if (pool->state != RXE_POOL_STATE_VALID)
>> +	if (!kref_read(&pool->ref_cnt))
>>   		goto out;
> These kref_reads make no sense, the caller has to be holding a kref on
> pool to call this API, otherwise it is already a free'd pointer.  So
> there is no reason to check the kref.
>
> Did you audit that all callers hold the kref?

No. Take pg->pool as an example.

In drivers/infiniband/sw/rxe/rxe_verbs.c:

"

static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
                                    struct ib_qp_init_attr *init,
                                    struct ib_udata *udata)
{
         int err;
         struct rxe_dev *rxe = to_rdev(ibpd->device);
         struct rxe_pd *pd = to_rpd(ibpd);
         struct rxe_qp *qp;
         struct rxe_create_qp_resp __user *uresp = NULL;

         if (udata) {
                 if (udata->outlen < sizeof(*uresp))
                         return ERR_PTR(-EINVAL);
                 uresp = udata->outbuf;
         }

         err = rxe_qp_chk_init(rxe, init);
         if (err)
                 goto err1;

         qp = rxe_alloc(&rxe->qp_pool); <---This will call rxe_alloc 
function.
         if (!qp) {
                 err = -ENOMEM;
                 goto err1;
         }

         if (udata) {
                 if (udata->inlen) {
                         err = -EINVAL;
                         goto err2;
                 }
                 qp->is_user = 1;
         }

         rxe_add_index(qp);

...

"

Before qp = rxe_alloc(&rxe->qp_pool);, there is no any holding a kref on 
pool.

And qp_pool is not pointer variable. So it will not be freed.

drivers/infiniband/sw/rxe/rxe_verbs.h:

"

struct rxe_dev {
         struct ib_device        ib_dev;
         struct ib_device_attr   attr;
         int                     max_ucontext;
         int                     max_inline_data;
         struct kref             ref_cnt;
         struct mutex    usdev_lock;

         struct net_device       *ndev;

         int                     xmit_errors;

         struct rxe_pool         uc_pool;
         struct rxe_pool         pd_pool;
         struct rxe_pool         ah_pool;
         struct rxe_pool         srq_pool;
         struct rxe_pool         qp_pool; <----This is not a pointer 
variable.
         struct rxe_pool         cq_pool;
         struct rxe_pool         mr_pool;
         struct rxe_pool         mw_pool;
         struct rxe_pool         mc_grp_pool;
         struct rxe_pool         mc_elem_pool;

...

"

And in rxe_pool_put

static void rxe_pool_put(struct rxe_pool *pool)
{
         kref_put(&pool->ref_cnt, rxe_pool_release);
}

The function will decrease pool->fef_cnt. It is possible that 
pool->ref_cnt is decreased to zero.

So it is necessary to test kref_read(&pool->ref_cnt).

If I am wrong, please let me know.

Thanks a lot.

Zhu Yanjun

>
> Jason
Jason Gunthorpe Dec. 21, 2018, 4:45 p.m. UTC | #5
On Fri, Dec 21, 2018 at 04:00:23PM +0800, Yanjun Zhu wrote:
> 
> On 2018/12/21 5:09, Jason Gunthorpe wrote:
> > On Thu, Dec 20, 2018 at 08:41:47AM -0500, Zhu Yanjun wrote:
> > > The pool state is the duplicate of pool ref_cnt. If ref_cnt > 0,
> > > it indicates that state is valid. If ref_cnt = 0, it indicates
> > > that state is invalid.
> > > 
> > > Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
> > > V1->V2: Follow Jason's advice, the state is replaced with ref_cnt.
> > >   drivers/infiniband/sw/rxe/rxe_pool.c | 20 +++-----------------
> > >   drivers/infiniband/sw/rxe/rxe_pool.h |  6 ------
> > >   2 files changed, 3 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
> > > index 36b53fb..d8f969d 100644
> > > +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
> > > @@ -222,8 +222,6 @@ int rxe_pool_init(
> > >   		pool->key_size = rxe_type_info[type].key_size;
> > >   	}
> > > -	pool->state = RXE_POOL_STATE_VALID;
> > > -
> > >   out:
> > >   	return err;
> > >   }
> > > @@ -232,7 +230,6 @@ static void rxe_pool_release(struct kref *kref)
> > >   {
> > >   	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
> > > -	pool->state = RXE_POOL_STATE_INVALID;
> > >   	kfree(pool->table);
> > >   }
> > > @@ -243,14 +240,9 @@ static void rxe_pool_put(struct rxe_pool *pool)
> > >   int rxe_pool_cleanup(struct rxe_pool *pool)
> > >   {
> > > -	unsigned long flags;
> > > -
> > > -	write_lock_irqsave(&pool->pool_lock, flags);
> > > -	pool->state = RXE_POOL_STATE_INVALID;
> > >   	if (atomic_read(&pool->num_elem) > 0)
> > >   		pr_warn("%s pool destroyed with unfree'd elem\n",
> > >   			pool_name(pool));
> > > -	write_unlock_irqrestore(&pool->pool_lock, flags);
> > >   	rxe_pool_put(pool);
> > > @@ -380,17 +372,11 @@ void rxe_drop_index(void *arg)
> > >   void *rxe_alloc(struct rxe_pool *pool)
> > >   {
> > >   	struct rxe_pool_entry *elem;
> > > -	unsigned long flags;
> > >   	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
> > > -	read_lock_irqsave(&pool->pool_lock, flags);
> > > -	if (pool->state != RXE_POOL_STATE_VALID) {
> > > -		read_unlock_irqrestore(&pool->pool_lock, flags);
> > > +	if (!kref_get_unless_zero(&pool->ref_cnt))
> > >   		return NULL;
> > > -	}
> > > -	kref_get(&pool->ref_cnt);
> > > -	read_unlock_irqrestore(&pool->pool_lock, flags);
> > >   	kref_get(&pool->rxe->ref_cnt);
> > > @@ -438,7 +424,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
> > >   	read_lock_irqsave(&pool->pool_lock, flags);
> > > -	if (pool->state != RXE_POOL_STATE_VALID)
> > > +	if (!kref_read(&pool->ref_cnt))
> > >   		goto out;
> > These kref_reads make no sense, the caller has to be holding a kref on
> > pool to call this API, otherwise it is already a free'd pointer.  So
> > there is no reason to check the kref.
> > 
> > Did you audit that all callers hold the kref?
> 
> No. Take pg->pool as an example.
> 
> In drivers/infiniband/sw/rxe/rxe_verbs.c:
> 
> struct rxe_dev {
>         struct ib_device        ib_dev;
>         struct ib_device_attr   attr;
>         int                     max_ucontext;
>         int                     max_inline_data;
>         struct kref             ref_cnt;
>         struct mutex    usdev_lock;
> 
>         struct net_device       *ndev;
> 
>         int                     xmit_errors;
> 
>         struct rxe_pool         uc_pool;
>         struct rxe_pool         pd_pool;
>         struct rxe_pool         ah_pool;
>         struct rxe_pool         srq_pool;
>         struct rxe_pool         qp_pool; <----This is not a pointer
> variable.
>         struct rxe_pool         cq_pool;
>         struct rxe_pool         mr_pool;
>         struct rxe_pool         mw_pool;
>         struct rxe_pool         mc_grp_pool;
>         struct rxe_pool         mc_elem_pool;

Oh. Use only one kref per struct.

Delete the sub-kref and move the table freeing to the release
function of rxe_dev's kref..

The pool should be functional as long as the rxe_dev exists, no need
for the invalid thing.

Jason
Zhu Yanjun Dec. 24, 2018, 9:19 a.m. UTC | #6
On 2018/12/22 0:45, Jason Gunthorpe wrote:
> On Fri, Dec 21, 2018 at 04:00:23PM +0800, Yanjun Zhu wrote:
>> On 2018/12/21 5:09, Jason Gunthorpe wrote:
>>> On Thu, Dec 20, 2018 at 08:41:47AM -0500, Zhu Yanjun wrote:
>>>> The pool state is the duplicate of pool ref_cnt. If ref_cnt > 0,
>>>> it indicates that state is valid. If ref_cnt = 0, it indicates
>>>> that state is invalid.
>>>>
>>>> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
>>>> V1->V2: Follow Jason's advice, the state is replaced with ref_cnt.
>>>>    drivers/infiniband/sw/rxe/rxe_pool.c | 20 +++-----------------
>>>>    drivers/infiniband/sw/rxe/rxe_pool.h |  6 ------
>>>>    2 files changed, 3 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
>>>> index 36b53fb..d8f969d 100644
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
>>>> @@ -222,8 +222,6 @@ int rxe_pool_init(
>>>>    		pool->key_size = rxe_type_info[type].key_size;
>>>>    	}
>>>> -	pool->state = RXE_POOL_STATE_VALID;
>>>> -
>>>>    out:
>>>>    	return err;
>>>>    }
>>>> @@ -232,7 +230,6 @@ static void rxe_pool_release(struct kref *kref)
>>>>    {
>>>>    	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
>>>> -	pool->state = RXE_POOL_STATE_INVALID;
>>>>    	kfree(pool->table);
>>>>    }
>>>> @@ -243,14 +240,9 @@ static void rxe_pool_put(struct rxe_pool *pool)
>>>>    int rxe_pool_cleanup(struct rxe_pool *pool)
>>>>    {
>>>> -	unsigned long flags;
>>>> -
>>>> -	write_lock_irqsave(&pool->pool_lock, flags);
>>>> -	pool->state = RXE_POOL_STATE_INVALID;
>>>>    	if (atomic_read(&pool->num_elem) > 0)
>>>>    		pr_warn("%s pool destroyed with unfree'd elem\n",
>>>>    			pool_name(pool));
>>>> -	write_unlock_irqrestore(&pool->pool_lock, flags);
>>>>    	rxe_pool_put(pool);
>>>> @@ -380,17 +372,11 @@ void rxe_drop_index(void *arg)
>>>>    void *rxe_alloc(struct rxe_pool *pool)
>>>>    {
>>>>    	struct rxe_pool_entry *elem;
>>>> -	unsigned long flags;
>>>>    	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
>>>> -	read_lock_irqsave(&pool->pool_lock, flags);
>>>> -	if (pool->state != RXE_POOL_STATE_VALID) {
>>>> -		read_unlock_irqrestore(&pool->pool_lock, flags);
>>>> +	if (!kref_get_unless_zero(&pool->ref_cnt))
>>>>    		return NULL;
>>>> -	}
>>>> -	kref_get(&pool->ref_cnt);
>>>> -	read_unlock_irqrestore(&pool->pool_lock, flags);
>>>>    	kref_get(&pool->rxe->ref_cnt);
>>>> @@ -438,7 +424,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
>>>>    	read_lock_irqsave(&pool->pool_lock, flags);
>>>> -	if (pool->state != RXE_POOL_STATE_VALID)
>>>> +	if (!kref_read(&pool->ref_cnt))
>>>>    		goto out;
>>> These kref_reads make no sense, the caller has to be holding a kref on
>>> pool to call this API, otherwise it is already a free'd pointer.  So
>>> there is no reason to check the kref.
>>>
>>> Did you audit that all callers hold the kref?
>> No. Take pg->pool as an example.
>>
>> In drivers/infiniband/sw/rxe/rxe_verbs.c:
>>
>> struct rxe_dev {
>>          struct ib_device        ib_dev;
>>          struct ib_device_attr   attr;
>>          int                     max_ucontext;
>>          int                     max_inline_data;
>>          struct kref             ref_cnt;
>>          struct mutex    usdev_lock;
>>
>>          struct net_device       *ndev;
>>
>>          int                     xmit_errors;
>>
>>          struct rxe_pool         uc_pool;
>>          struct rxe_pool         pd_pool;
>>          struct rxe_pool         ah_pool;
>>          struct rxe_pool         srq_pool;
>>          struct rxe_pool         qp_pool; <----This is not a pointer
>> variable.
>>          struct rxe_pool         cq_pool;
>>          struct rxe_pool         mr_pool;
>>          struct rxe_pool         mw_pool;
>>          struct rxe_pool         mc_grp_pool;
>>          struct rxe_pool         mc_elem_pool;
> Oh. Use only one kref per struct.
>
> Delete the sub-kref and move the table freeing to the release
> function of rxe_dev's kref..
>
> The pool should be functional as long as the rxe_dev exists, no need
> for the invalid thing.

Sure. I agree with you. RXE_POOL_STATE_VALID/RXE_POOL_STATE_INVALID are 
not necessary.

Zhu Yanjun

>
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 36b53fb..d8f969d 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -222,8 +222,6 @@  int rxe_pool_init(
 		pool->key_size = rxe_type_info[type].key_size;
 	}
 
-	pool->state = RXE_POOL_STATE_VALID;
-
 out:
 	return err;
 }
@@ -232,7 +230,6 @@  static void rxe_pool_release(struct kref *kref)
 {
 	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
 
-	pool->state = RXE_POOL_STATE_INVALID;
 	kfree(pool->table);
 }
 
@@ -243,14 +240,9 @@  static void rxe_pool_put(struct rxe_pool *pool)
 
 int rxe_pool_cleanup(struct rxe_pool *pool)
 {
-	unsigned long flags;
-
-	write_lock_irqsave(&pool->pool_lock, flags);
-	pool->state = RXE_POOL_STATE_INVALID;
 	if (atomic_read(&pool->num_elem) > 0)
 		pr_warn("%s pool destroyed with unfree'd elem\n",
 			pool_name(pool));
-	write_unlock_irqrestore(&pool->pool_lock, flags);
 
 	rxe_pool_put(pool);
 
@@ -380,17 +372,11 @@  void rxe_drop_index(void *arg)
 void *rxe_alloc(struct rxe_pool *pool)
 {
 	struct rxe_pool_entry *elem;
-	unsigned long flags;
 
 	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
 
-	read_lock_irqsave(&pool->pool_lock, flags);
-	if (pool->state != RXE_POOL_STATE_VALID) {
-		read_unlock_irqrestore(&pool->pool_lock, flags);
+	if (!kref_get_unless_zero(&pool->ref_cnt))
 		return NULL;
-	}
-	kref_get(&pool->ref_cnt);
-	read_unlock_irqrestore(&pool->pool_lock, flags);
 
 	kref_get(&pool->rxe->ref_cnt);
 
@@ -438,7 +424,7 @@  void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
 
 	read_lock_irqsave(&pool->pool_lock, flags);
 
-	if (pool->state != RXE_POOL_STATE_VALID)
+	if (!kref_read(&pool->ref_cnt))
 		goto out;
 
 	node = pool->tree.rb_node;
@@ -470,7 +456,7 @@  void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
 
 	read_lock_irqsave(&pool->pool_lock, flags);
 
-	if (pool->state != RXE_POOL_STATE_VALID)
+	if (!kref_read(&pool->ref_cnt))
 		goto out;
 
 	node = pool->tree.rb_node;
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index aa4ba30..eeca733 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -73,11 +73,6 @@  struct rxe_type_info {
 
 extern struct rxe_type_info rxe_type_info[];
 
-enum rxe_pool_state {
-	RXE_POOL_STATE_INVALID,
-	RXE_POOL_STATE_VALID,
-};
-
 struct rxe_pool_entry {
 	struct rxe_pool		*pool;
 	struct kref		ref_cnt;
@@ -94,7 +89,6 @@  struct rxe_pool {
 	size_t			elem_size;
 	struct kref		ref_cnt;
 	void			(*cleanup)(struct rxe_pool_entry *obj);
-	enum rxe_pool_state	state;
 	enum rxe_pool_flags	flags;
 	enum rxe_elem_type	type;