diff mbox series

[1/1] RDMA/rxe: Fix the error "trying to register non-static key in rxe_cleanup_task"

Message ID 20230401024417.3334889-1-yanjun.zhu@intel.com (mailing list archive)
State Superseded
Headers show
Series [1/1] RDMA/rxe: Fix the error "trying to register non-static key in rxe_cleanup_task" | expand

Commit Message

Zhu Yanjun April 1, 2023, 2:44 a.m. UTC
From: Zhu Yanjun <yanjun.zhu@linux.dev>

In the function rxe_create_qp(), rxe_qp_from_init() is called to
initialize qp, internally things like rxe_init_task are not setup until
rxe_qp_init_req().

If an error occures before this point then the unwind will call
rxe_cleanup() and eventually to rxe_qp_do_cleanup()/rxe_cleanup_task()
which will oops when trying to access the uninitialized spinlock.

If rxe_init_task is not executed, rxe_cleanup_task will not be called.

Reported-by: syzbot+cfcc1a3c85be15a40cba@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?id=fd85757b74b3eb59f904138486f755f71e090df8

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Fixes: 2d4b21e0a291 ("IB/rxe: Prevent from completer to operate on non valid QP")
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
---
 drivers/infiniband/sw/rxe/rxe_qp.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

Comments

Leon Romanovsky April 3, 2023, 6:10 p.m. UTC | #1
On Sat, Apr 01, 2023 at 10:44:17AM +0800, Zhu Yanjun wrote:
> From: Zhu Yanjun <yanjun.zhu@linux.dev>
> 
> In the function rxe_create_qp(), rxe_qp_from_init() is called to
> initialize qp, internally things like rxe_init_task are not setup until
> rxe_qp_init_req().
> 
> If an error occures before this point then the unwind will call
> rxe_cleanup() and eventually to rxe_qp_do_cleanup()/rxe_cleanup_task()
> which will oops when trying to access the uninitialized spinlock.
> 
> If rxe_init_task is not executed, rxe_cleanup_task will not be called.
> 
> Reported-by: syzbot+cfcc1a3c85be15a40cba@syzkaller.appspotmail.com
> Link: https://syzkaller.appspot.com/bug?id=fd85757b74b3eb59f904138486f755f71e090df8
> 
> Fixes: 8700e3e7c485 ("Soft RoCE driver")
> Fixes: 2d4b21e0a291 ("IB/rxe: Prevent from completer to operate on non valid QP")
> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
> ---
>  drivers/infiniband/sw/rxe/rxe_qp.c | 15 ++++++++++++---
>  1 file changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
> index ab72db68b58f..7856c02c1b46 100644
> --- a/drivers/infiniband/sw/rxe/rxe_qp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_qp.c
> @@ -176,6 +176,10 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
>  	spin_lock_init(&qp->rq.producer_lock);
>  	spin_lock_init(&qp->rq.consumer_lock);
>  
> +	memset(&qp->req.task, 0, sizeof(struct rxe_task));
> +	memset(&qp->comp.task, 0, sizeof(struct rxe_task));
> +	memset(&qp->resp.task, 0, sizeof(struct rxe_task));

IMHO QP is already zeroed here.

Please don't send patches as reply-to.

Thanks

> +
>  	atomic_set(&qp->ssn, 0);
>  	atomic_set(&qp->skb_out, 0);
>  }
> @@ -773,15 +777,20 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
>  
>  	qp->valid = 0;
>  	qp->qp_timeout_jiffies = 0;
> -	rxe_cleanup_task(&qp->resp.task);
> +
> +	if (qp->resp.task.func)
> +		rxe_cleanup_task(&qp->resp.task);
>  
>  	if (qp_type(qp) == IB_QPT_RC) {
>  		del_timer_sync(&qp->retrans_timer);
>  		del_timer_sync(&qp->rnr_nak_timer);
>  	}
>  
> -	rxe_cleanup_task(&qp->req.task);
> -	rxe_cleanup_task(&qp->comp.task);
> +	if (qp->req.task.func)
> +		rxe_cleanup_task(&qp->req.task);
> +
> +	if (qp->comp.task.func)
> +		rxe_cleanup_task(&qp->comp.task);
>  
>  	/* flush out any receive wr's or pending requests */
>  	if (qp->req.task.func)
> -- 
> 2.27.0
>
Zhu Yanjun April 4, 2023, 12:13 a.m. UTC | #2
在 2023/4/4 2:10, Leon Romanovsky 写道:
> On Sat, Apr 01, 2023 at 10:44:17AM +0800, Zhu Yanjun wrote:
>> From: Zhu Yanjun <yanjun.zhu@linux.dev>
>>
>> In the function rxe_create_qp(), rxe_qp_from_init() is called to
>> initialize qp, internally things like rxe_init_task are not setup until
>> rxe_qp_init_req().
>>
>> If an error occures before this point then the unwind will call
>> rxe_cleanup() and eventually to rxe_qp_do_cleanup()/rxe_cleanup_task()
>> which will oops when trying to access the uninitialized spinlock.
>>
>> If rxe_init_task is not executed, rxe_cleanup_task will not be called.
>>
>> Reported-by: syzbot+cfcc1a3c85be15a40cba@syzkaller.appspotmail.com
>> Link: https://syzkaller.appspot.com/bug?id=fd85757b74b3eb59f904138486f755f71e090df8
>>
>> Fixes: 8700e3e7c485 ("Soft RoCE driver")
>> Fixes: 2d4b21e0a291 ("IB/rxe: Prevent from completer to operate on non valid QP")
>> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
>> ---
>>   drivers/infiniband/sw/rxe/rxe_qp.c | 15 ++++++++++++---
>>   1 file changed, 12 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
>> index ab72db68b58f..7856c02c1b46 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_qp.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_qp.c
>> @@ -176,6 +176,10 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
>>   	spin_lock_init(&qp->rq.producer_lock);
>>   	spin_lock_init(&qp->rq.consumer_lock);
>>   
>> +	memset(&qp->req.task, 0, sizeof(struct rxe_task));
>> +	memset(&qp->comp.task, 0, sizeof(struct rxe_task));
>> +	memset(&qp->resp.task, 0, sizeof(struct rxe_task));
> IMHO QP is already zeroed here.

Sure. Exactly. Here I just confirm that req.task, comp.task and 
resp.task are zeroed explicitly.

If you think it had better remove these memset functions, I will follow 
your advice.

Please let me know your advice.

> Please don't send patches as reply-to.

Got it. I will follow your advice.

Thanks,

Zhu Yanjun

>
> Thanks
>
>> +
>>   	atomic_set(&qp->ssn, 0);
>>   	atomic_set(&qp->skb_out, 0);
>>   }
>> @@ -773,15 +777,20 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
>>   
>>   	qp->valid = 0;
>>   	qp->qp_timeout_jiffies = 0;
>> -	rxe_cleanup_task(&qp->resp.task);
>> +
>> +	if (qp->resp.task.func)
>> +		rxe_cleanup_task(&qp->resp.task);
>>   
>>   	if (qp_type(qp) == IB_QPT_RC) {
>>   		del_timer_sync(&qp->retrans_timer);
>>   		del_timer_sync(&qp->rnr_nak_timer);
>>   	}
>>   
>> -	rxe_cleanup_task(&qp->req.task);
>> -	rxe_cleanup_task(&qp->comp.task);
>> +	if (qp->req.task.func)
>> +		rxe_cleanup_task(&qp->req.task);
>> +
>> +	if (qp->comp.task.func)
>> +		rxe_cleanup_task(&qp->comp.task);
>>   
>>   	/* flush out any receive wr's or pending requests */
>>   	if (qp->req.task.func)
>> -- 
>> 2.27.0
>>
Leon Romanovsky April 4, 2023, 5:58 a.m. UTC | #3
On Tue, Apr 04, 2023 at 08:13:22AM +0800, Zhu Yanjun wrote:
> 
> 在 2023/4/4 2:10, Leon Romanovsky 写道:
> > On Sat, Apr 01, 2023 at 10:44:17AM +0800, Zhu Yanjun wrote:
> > > From: Zhu Yanjun <yanjun.zhu@linux.dev>
> > > 
> > > In the function rxe_create_qp(), rxe_qp_from_init() is called to
> > > initialize qp, internally things like rxe_init_task are not setup until
> > > rxe_qp_init_req().
> > > 
> > > If an error occures before this point then the unwind will call
> > > rxe_cleanup() and eventually to rxe_qp_do_cleanup()/rxe_cleanup_task()
> > > which will oops when trying to access the uninitialized spinlock.
> > > 
> > > If rxe_init_task is not executed, rxe_cleanup_task will not be called.
> > > 
> > > Reported-by: syzbot+cfcc1a3c85be15a40cba@syzkaller.appspotmail.com
> > > Link: https://syzkaller.appspot.com/bug?id=fd85757b74b3eb59f904138486f755f71e090df8
> > > 
> > > Fixes: 8700e3e7c485 ("Soft RoCE driver")
> > > Fixes: 2d4b21e0a291 ("IB/rxe: Prevent from completer to operate on non valid QP")
> > > Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
> > > ---
> > >   drivers/infiniband/sw/rxe/rxe_qp.c | 15 ++++++++++++---
> > >   1 file changed, 12 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
> > > index ab72db68b58f..7856c02c1b46 100644
> > > --- a/drivers/infiniband/sw/rxe/rxe_qp.c
> > > +++ b/drivers/infiniband/sw/rxe/rxe_qp.c
> > > @@ -176,6 +176,10 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
> > >   	spin_lock_init(&qp->rq.producer_lock);
> > >   	spin_lock_init(&qp->rq.consumer_lock);
> > > +	memset(&qp->req.task, 0, sizeof(struct rxe_task));
> > > +	memset(&qp->comp.task, 0, sizeof(struct rxe_task));
> > > +	memset(&qp->resp.task, 0, sizeof(struct rxe_task));
> > IMHO QP is already zeroed here.
> 
> Sure. Exactly. Here I just confirm that req.task, comp.task and resp.task
> are zeroed explicitly.

There is no need to do so. It is quite misleading to read the code and
see these memset() functions as they give false impression that QP is
not zeroed.

> 
> If you think it had better remove these memset functions, I will follow your
> advice.

Yes, please.

Thanks
Zhu Yanjun April 4, 2023, 6:42 a.m. UTC | #4
在 2023/4/4 13:58, Leon Romanovsky 写道:
> On Tue, Apr 04, 2023 at 08:13:22AM +0800, Zhu Yanjun wrote:
>> 在 2023/4/4 2:10, Leon Romanovsky 写道:
>>> On Sat, Apr 01, 2023 at 10:44:17AM +0800, Zhu Yanjun wrote:
>>>> From: Zhu Yanjun <yanjun.zhu@linux.dev>
>>>>
>>>> In the function rxe_create_qp(), rxe_qp_from_init() is called to
>>>> initialize qp, internally things like rxe_init_task are not setup until
>>>> rxe_qp_init_req().
>>>>
>>>> If an error occures before this point then the unwind will call
>>>> rxe_cleanup() and eventually to rxe_qp_do_cleanup()/rxe_cleanup_task()
>>>> which will oops when trying to access the uninitialized spinlock.
>>>>
>>>> If rxe_init_task is not executed, rxe_cleanup_task will not be called.
>>>>
>>>> Reported-by: syzbot+cfcc1a3c85be15a40cba@syzkaller.appspotmail.com
>>>> Link: https://syzkaller.appspot.com/bug?id=fd85757b74b3eb59f904138486f755f71e090df8
>>>>
>>>> Fixes: 8700e3e7c485 ("Soft RoCE driver")
>>>> Fixes: 2d4b21e0a291 ("IB/rxe: Prevent from completer to operate on non valid QP")
>>>> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
>>>> ---
>>>>    drivers/infiniband/sw/rxe/rxe_qp.c | 15 ++++++++++++---
>>>>    1 file changed, 12 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
>>>> index ab72db68b58f..7856c02c1b46 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_qp.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_qp.c
>>>> @@ -176,6 +176,10 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
>>>>    	spin_lock_init(&qp->rq.producer_lock);
>>>>    	spin_lock_init(&qp->rq.consumer_lock);
>>>> +	memset(&qp->req.task, 0, sizeof(struct rxe_task));
>>>> +	memset(&qp->comp.task, 0, sizeof(struct rxe_task));
>>>> +	memset(&qp->resp.task, 0, sizeof(struct rxe_task));
>>> IMHO QP is already zeroed here.
>> Sure. Exactly. Here I just confirm that req.task, comp.task and resp.task
>> are zeroed explicitly.
> There is no need to do so. It is quite misleading to read the code and
> see these memset() functions as they give false impression that QP is
> not zeroed.

I will remove these memset function in the latest commit.

Thanks,

Zhu Yanjun

>
>> If you think it had better remove these memset functions, I will follow your
>> advice.
> Yes, please.
>
> Thanks
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index ab72db68b58f..7856c02c1b46 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -176,6 +176,10 @@  static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
 	spin_lock_init(&qp->rq.producer_lock);
 	spin_lock_init(&qp->rq.consumer_lock);
 
+	memset(&qp->req.task, 0, sizeof(struct rxe_task));
+	memset(&qp->comp.task, 0, sizeof(struct rxe_task));
+	memset(&qp->resp.task, 0, sizeof(struct rxe_task));
+
 	atomic_set(&qp->ssn, 0);
 	atomic_set(&qp->skb_out, 0);
 }
@@ -773,15 +777,20 @@  static void rxe_qp_do_cleanup(struct work_struct *work)
 
 	qp->valid = 0;
 	qp->qp_timeout_jiffies = 0;
-	rxe_cleanup_task(&qp->resp.task);
+
+	if (qp->resp.task.func)
+		rxe_cleanup_task(&qp->resp.task);
 
 	if (qp_type(qp) == IB_QPT_RC) {
 		del_timer_sync(&qp->retrans_timer);
 		del_timer_sync(&qp->rnr_nak_timer);
 	}
 
-	rxe_cleanup_task(&qp->req.task);
-	rxe_cleanup_task(&qp->comp.task);
+	if (qp->req.task.func)
+		rxe_cleanup_task(&qp->req.task);
+
+	if (qp->comp.task.func)
+		rxe_cleanup_task(&qp->comp.task);
 
 	/* flush out any receive wr's or pending requests */
 	if (qp->req.task.func)