diff mbox series

nbd: pass nbd_sock to nbd_read_reply() instead of index

Message ID 20230911023308.3467802-1-linan666@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series nbd: pass nbd_sock to nbd_read_reply() instead of index | expand

Commit Message

Li Nan Sept. 11, 2023, 2:33 a.m. UTC
From: Li Nan <linan122@huawei.com>

If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
may occurs.

  T1
  nbd_ioctl
   __nbd_ioctl
    nbd_add_socket
     blk_mq_freeze_queue
				T2
  				recv_work
  				 nbd_read_reply
  				  sock_xmit
     krealloc config->socks
				   def config->socks

Pass nbd_sock to nbd_read_reply(). And introduce a new function
sock_xmit_recv(), which differs from sock_xmit only in the way it get
socket.

==================================================================
BUG: KASAN: use-after-free in sock_xmit+0x525/0x550
Read of size 8 at addr ffff8880188ec428 by task kworker/u12:1/18779

Workqueue: knbd4-recv recv_work
Call Trace:
 __dump_stack
 dump_stack+0xbe/0xfd
 print_address_description.constprop.0+0x19/0x170
 __kasan_report.cold+0x6c/0x84
 kasan_report+0x3a/0x50
 sock_xmit+0x525/0x550
 nbd_read_reply+0xfe/0x2c0
 recv_work+0x1c2/0x750
 process_one_work+0x6b6/0xf10
 worker_thread+0xdd/0xd80
 kthread+0x30a/0x410
 ret_from_fork+0x22/0x30

Allocated by task 18784:
 kasan_save_stack+0x1b/0x40
 kasan_set_track
 set_alloc_info
 __kasan_kmalloc
 __kasan_kmalloc.constprop.0+0xf0/0x130
 slab_post_alloc_hook
 slab_alloc_node
 slab_alloc
 __kmalloc_track_caller+0x157/0x550
 __do_krealloc
 krealloc+0x37/0xb0
 nbd_add_socket
 +0x2d3/0x880
 __nbd_ioctl
 nbd_ioctl+0x584/0x8e0
 __blkdev_driver_ioctl
 blkdev_ioctl+0x2a0/0x6e0
 block_ioctl+0xee/0x130
 vfs_ioctl
 __do_sys_ioctl
 __se_sys_ioctl+0x138/0x190
 do_syscall_64+0x33/0x40
 entry_SYSCALL_64_after_hwframe+0x61/0xc6

Freed by task 18784:
 kasan_save_stack+0x1b/0x40
 kasan_set_track+0x1c/0x30
 kasan_set_free_info+0x20/0x40
 __kasan_slab_free.part.0+0x13f/0x1b0
 slab_free_hook
 slab_free_freelist_hook
 slab_free
 kfree+0xcb/0x6c0
 krealloc+0x56/0xb0
 nbd_add_socket+0x2d3/0x880
 __nbd_ioctl
 nbd_ioctl+0x584/0x8e0
 __blkdev_driver_ioctl
 blkdev_ioctl+0x2a0/0x6e0
 block_ioctl+0xee/0x130
 vfs_ioctl
 __do_sys_ioctl
 __se_sys_ioctl+0x138/0x190
 do_syscall_64+0x33/0x40
 entry_SYSCALL_64_after_hwframe+0x61/0xc6

Signed-off-by: Li Nan <linan122@huawei.com>
---
 drivers/block/nbd.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

Comments

Li Nan Sept. 28, 2023, 3:40 a.m. UTC | #1
Friendly ping ...

在 2023/9/11 10:33, linan666@huaweicloud.com 写道:
> From: Li Nan <linan122@huawei.com>
> 
> If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
> krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
> may occurs.
> 
>    T1
>    nbd_ioctl
>     __nbd_ioctl
>      nbd_add_socket
>       blk_mq_freeze_queue
> 				T2
>    				recv_work
>    				 nbd_read_reply
>    				  sock_xmit
>       krealloc config->socks
> 				   def config->socks
> 
> Pass nbd_sock to nbd_read_reply(). And introduce a new function
> sock_xmit_recv(), which differs from sock_xmit only in the way it get
> socket.
> 
> ==================================================================
> BUG: KASAN: use-after-free in sock_xmit+0x525/0x550
> Read of size 8 at addr ffff8880188ec428 by task kworker/u12:1/18779
> 
> Workqueue: knbd4-recv recv_work
> Call Trace:
>   __dump_stack
>   dump_stack+0xbe/0xfd
>   print_address_description.constprop.0+0x19/0x170
>   __kasan_report.cold+0x6c/0x84
>   kasan_report+0x3a/0x50
>   sock_xmit+0x525/0x550
>   nbd_read_reply+0xfe/0x2c0
>   recv_work+0x1c2/0x750
>   process_one_work+0x6b6/0xf10
>   worker_thread+0xdd/0xd80
>   kthread+0x30a/0x410
>   ret_from_fork+0x22/0x30
> 
> Allocated by task 18784:
>   kasan_save_stack+0x1b/0x40
>   kasan_set_track
>   set_alloc_info
>   __kasan_kmalloc
>   __kasan_kmalloc.constprop.0+0xf0/0x130
>   slab_post_alloc_hook
>   slab_alloc_node
>   slab_alloc
>   __kmalloc_track_caller+0x157/0x550
>   __do_krealloc
>   krealloc+0x37/0xb0
>   nbd_add_socket
>   +0x2d3/0x880
>   __nbd_ioctl
>   nbd_ioctl+0x584/0x8e0
>   __blkdev_driver_ioctl
>   blkdev_ioctl+0x2a0/0x6e0
>   block_ioctl+0xee/0x130
>   vfs_ioctl
>   __do_sys_ioctl
>   __se_sys_ioctl+0x138/0x190
>   do_syscall_64+0x33/0x40
>   entry_SYSCALL_64_after_hwframe+0x61/0xc6
> 
> Freed by task 18784:
>   kasan_save_stack+0x1b/0x40
>   kasan_set_track+0x1c/0x30
>   kasan_set_free_info+0x20/0x40
>   __kasan_slab_free.part.0+0x13f/0x1b0
>   slab_free_hook
>   slab_free_freelist_hook
>   slab_free
>   kfree+0xcb/0x6c0
>   krealloc+0x56/0xb0
>   nbd_add_socket+0x2d3/0x880
>   __nbd_ioctl
>   nbd_ioctl+0x584/0x8e0
>   __blkdev_driver_ioctl
>   blkdev_ioctl+0x2a0/0x6e0
>   block_ioctl+0xee/0x130
>   vfs_ioctl
>   __do_sys_ioctl
>   __se_sys_ioctl+0x138/0x190
>   do_syscall_64+0x33/0x40
>   entry_SYSCALL_64_after_hwframe+0x61/0xc6
> 
> Signed-off-by: Li Nan <linan122@huawei.com>
> ---
>   drivers/block/nbd.c | 35 ++++++++++++++++++++++-------------
>   1 file changed, 22 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index a346dbd73543..712b2d164eed 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -67,6 +67,7 @@ struct nbd_sock {
>   struct recv_thread_args {
>   	struct work_struct work;
>   	struct nbd_device *nbd;
> +	struct nbd_sock *nsock;
>   	int index;
>   };
>   
> @@ -490,15 +491,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
>   	return BLK_EH_DONE;
>   }
>   
> -/*
> - *  Send or receive packet. Return a positive value on success and
> - *  negtive value on failue, and never return 0.
> - */
> -static int sock_xmit(struct nbd_device *nbd, int index, int send,
> -		     struct iov_iter *iter, int msg_flags, int *sent)
> +static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
> +		       struct iov_iter *iter, int msg_flags, int *sent)
>   {
> -	struct nbd_config *config = nbd->config;
> -	struct socket *sock = config->socks[index]->sock;
>   	int result;
>   	struct msghdr msg;
>   	unsigned int noreclaim_flag;
> @@ -541,6 +536,19 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
>   	return result;
>   }
>   
> +/*
> + *  Send or receive packet. Return a positive value on success and
> + *  negtive value on failure, and never return 0.
> + */
> +static int sock_xmit(struct nbd_device *nbd, int index, int send,
> +		     struct iov_iter *iter, int msg_flags, int *sent)
> +{
> +	struct nbd_config *config = nbd->config;
> +	struct socket *sock = config->socks[index]->sock;
> +
> +	return __sock_xmit(nbd, sock, send, iter, msg_flags, sent);
> +}
> +
>   /*
>    * Different settings for sk->sk_sndtimeo can result in different return values
>    * if there is a signal pending when we enter sendmsg, because reasons?
> @@ -697,7 +705,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
>   	return 0;
>   }
>   
> -static int nbd_read_reply(struct nbd_device *nbd, int index,
> +static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock,
>   			  struct nbd_reply *reply)
>   {
>   	struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)};
> @@ -706,7 +714,7 @@ static int nbd_read_reply(struct nbd_device *nbd, int index,
>   
>   	reply->magic = 0;
>   	iov_iter_kvec(&to, ITER_DEST, &iov, 1, sizeof(*reply));
> -	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
> +	result = __sock_xmit(nbd, sock, 0, &to, MSG_WAITALL, NULL);
>   	if (result < 0) {
>   		if (!nbd_disconnected(nbd->config))
>   			dev_err(disk_to_dev(nbd->disk),
> @@ -830,14 +838,14 @@ static void recv_work(struct work_struct *work)
>   	struct nbd_device *nbd = args->nbd;
>   	struct nbd_config *config = nbd->config;
>   	struct request_queue *q = nbd->disk->queue;
> -	struct nbd_sock *nsock;
> +	struct nbd_sock *nsock = args->nsock;
>   	struct nbd_cmd *cmd;
>   	struct request *rq;
>   
>   	while (1) {
>   		struct nbd_reply reply;
>   
> -		if (nbd_read_reply(nbd, args->index, &reply))
> +		if (nbd_read_reply(nbd, nsock->sock, &reply))
>   			break;
>   
>   		/*
> @@ -872,7 +880,6 @@ static void recv_work(struct work_struct *work)
>   		percpu_ref_put(&q->q_usage_counter);
>   	}
>   
> -	nsock = config->socks[args->index];
>   	mutex_lock(&nsock->tx_lock);
>   	nbd_mark_nsock_dead(nbd, nsock, 1);
>   	mutex_unlock(&nsock->tx_lock);
> @@ -1216,6 +1223,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
>   		INIT_WORK(&args->work, recv_work);
>   		args->index = i;
>   		args->nbd = nbd;
> +		args->nsock = nsock;
>   		nsock->cookie++;
>   		mutex_unlock(&nsock->tx_lock);
>   		sockfd_put(old);
> @@ -1398,6 +1406,7 @@ static int nbd_start_device(struct nbd_device *nbd)
>   		refcount_inc(&nbd->config_refs);
>   		INIT_WORK(&args->work, recv_work);
>   		args->nbd = nbd;
> +		args->nsock = config->socks[i];
>   		args->index = i;
>   		queue_work(nbd->recv_workq, &args->work);
>   	}
Ming Lei Sept. 28, 2023, 4:05 a.m. UTC | #2
On Mon, Sep 11, 2023 at 10:33:08AM +0800, linan666@huaweicloud.com wrote:
> From: Li Nan <linan122@huawei.com>
> 
> If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
> krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
> may occurs.
> 
>   T1
>   nbd_ioctl
>    __nbd_ioctl
>     nbd_add_socket
>      blk_mq_freeze_queue
> 				T2
>   				recv_work
>   				 nbd_read_reply
>   				  sock_xmit
>      krealloc config->socks
> 				   def config->socks
> 
> Pass nbd_sock to nbd_read_reply(). And introduce a new function
> sock_xmit_recv(), which differs from sock_xmit only in the way it get
> socket.
> 

I am wondering why not grab queue usage counter before calling nbd_read_reply()
for avoiding such issue, something like the following change:

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index df1cd0f718b8..09215b605b12 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
 	while (1) {
 		struct nbd_reply reply;
 
-		if (nbd_read_reply(nbd, args->index, &reply))
-			break;
-
 		/*
 		 * Grab .q_usage_counter so request pool won't go away, then no
 		 * request use-after-free is possible during nbd_handle_reply().
@@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
 			break;
 		}
 
+		if (nbd_read_reply(nbd, args->index, &reply))
+			break;
+
 		cmd = nbd_handle_reply(nbd, args->index, &reply);
 		if (IS_ERR(cmd)) {
 			percpu_ref_put(&q->q_usage_counter);

Thanks,
Ming
Yu Kuai Sept. 28, 2023, 6:03 a.m. UTC | #3
Hi,

在 2023/09/28 12:05, Ming Lei 写道:
> On Mon, Sep 11, 2023 at 10:33:08AM +0800, linan666@huaweicloud.com wrote:
>> From: Li Nan <linan122@huawei.com>
>>
>> If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
>> krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
>> may occurs.
>>
>>    T1
>>    nbd_ioctl
>>     __nbd_ioctl
>>      nbd_add_socket
>>       blk_mq_freeze_queue
>> 				T2
>>    				recv_work
>>    				 nbd_read_reply
>>    				  sock_xmit
>>       krealloc config->socks
>> 				   def config->socks
>>
>> Pass nbd_sock to nbd_read_reply(). And introduce a new function
>> sock_xmit_recv(), which differs from sock_xmit only in the way it get
>> socket.
>>
> 
> I am wondering why not grab queue usage counter before calling nbd_read_reply()
> for avoiding such issue, something like the following change:
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index df1cd0f718b8..09215b605b12 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
>   	while (1) {
>   		struct nbd_reply reply;
>   
> -		if (nbd_read_reply(nbd, args->index, &reply))
> -			break;
> -
>   		/*
>   		 * Grab .q_usage_counter so request pool won't go away, then no
>   		 * request use-after-free is possible during nbd_handle_reply().
> @@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
>   			break;
>   		}
>   

This break how nbd works, if there is no reply yet, recv_work() will
wait for reply in:

nbd_read_reply
  sock_xmit
   sock_recvmsg

After this change, recv_work() will just return if there is no io.

Thanks,
Kuai

> +		if (nbd_read_reply(nbd, args->index, &reply))
> +			break;
> +
>   		cmd = nbd_handle_reply(nbd, args->index, &reply);
>   		if (IS_ERR(cmd)) {
>   			percpu_ref_put(&q->q_usage_counter);
> 
> Thanks,
> Ming
> 
> .
>
Ming Lei Sept. 28, 2023, 7:40 a.m. UTC | #4
On Thu, Sep 28, 2023 at 02:03:28PM +0800, Yu Kuai wrote:
> Hi,
> 
> 在 2023/09/28 12:05, Ming Lei 写道:
> > On Mon, Sep 11, 2023 at 10:33:08AM +0800, linan666@huaweicloud.com wrote:
> > > From: Li Nan <linan122@huawei.com>
> > > 
> > > If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
> > > krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
> > > may occurs.
> > > 
> > >    T1
> > >    nbd_ioctl
> > >     __nbd_ioctl
> > >      nbd_add_socket
> > >       blk_mq_freeze_queue
> > > 				T2
> > >    				recv_work
> > >    				 nbd_read_reply
> > >    				  sock_xmit
> > >       krealloc config->socks
> > > 				   def config->socks
> > > 
> > > Pass nbd_sock to nbd_read_reply(). And introduce a new function
> > > sock_xmit_recv(), which differs from sock_xmit only in the way it get
> > > socket.
> > > 
> > 
> > I am wondering why not grab queue usage counter before calling nbd_read_reply()
> > for avoiding such issue, something like the following change:
> > 
> > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > index df1cd0f718b8..09215b605b12 100644
> > --- a/drivers/block/nbd.c
> > +++ b/drivers/block/nbd.c
> > @@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
> >   	while (1) {
> >   		struct nbd_reply reply;
> > -		if (nbd_read_reply(nbd, args->index, &reply))
> > -			break;
> > -
> >   		/*
> >   		 * Grab .q_usage_counter so request pool won't go away, then no
> >   		 * request use-after-free is possible during nbd_handle_reply().
> > @@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
> >   			break;
> >   		}
> 
> This break how nbd works, if there is no reply yet, recv_work() will
> wait for reply in:
> 
> nbd_read_reply
>  sock_xmit
>   sock_recvmsg
> 
> After this change, recv_work() will just return if there is no io.

OK, got it, thanks for the input.

But I feel it isn't necessary & fragile to store one extra reference of nsock in
`recv_thread_args`.

Just run a quick look, the only potential UAF on config->socks should be recv_work(),
so you can retrieve the `nsock` reference at the entry of recv_work(),
and just pass it(local variable) to nbd_read_reply() and nbd_handle_reply()
since `nsock` won't be freed.


Thanks,
Ming
Yu Kuai Sept. 28, 2023, 8:55 a.m. UTC | #5
Hi,

在 2023/09/28 15:40, Ming Lei 写道:
> On Thu, Sep 28, 2023 at 02:03:28PM +0800, Yu Kuai wrote:
>> Hi,
>>
>> 在 2023/09/28 12:05, Ming Lei 写道:
>>> On Mon, Sep 11, 2023 at 10:33:08AM +0800, linan666@huaweicloud.com wrote:
>>>> From: Li Nan <linan122@huawei.com>
>>>>
>>>> If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
>>>> krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
>>>> may occurs.
>>>>
>>>>     T1
>>>>     nbd_ioctl
>>>>      __nbd_ioctl
>>>>       nbd_add_socket
>>>>        blk_mq_freeze_queue
>>>> 				T2
>>>>     				recv_work
>>>>     				 nbd_read_reply
>>>>     				  sock_xmit
>>>>        krealloc config->socks
>>>> 				   def config->socks
>>>>
>>>> Pass nbd_sock to nbd_read_reply(). And introduce a new function
>>>> sock_xmit_recv(), which differs from sock_xmit only in the way it get
>>>> socket.
>>>>
>>>
>>> I am wondering why not grab queue usage counter before calling nbd_read_reply()
>>> for avoiding such issue, something like the following change:
>>>
>>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>>> index df1cd0f718b8..09215b605b12 100644
>>> --- a/drivers/block/nbd.c
>>> +++ b/drivers/block/nbd.c
>>> @@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
>>>    	while (1) {
>>>    		struct nbd_reply reply;
>>> -		if (nbd_read_reply(nbd, args->index, &reply))
>>> -			break;
>>> -
>>>    		/*
>>>    		 * Grab .q_usage_counter so request pool won't go away, then no
>>>    		 * request use-after-free is possible during nbd_handle_reply().
>>> @@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
>>>    			break;
>>>    		}
>>
>> This break how nbd works, if there is no reply yet, recv_work() will
>> wait for reply in:
>>
>> nbd_read_reply
>>   sock_xmit
>>    sock_recvmsg
>>
>> After this change, recv_work() will just return if there is no io.
> 
> OK, got it, thanks for the input.
> 
> But I feel it isn't necessary & fragile to store one extra reference of nsock in
> `recv_thread_args`.
> 
> Just run a quick look, the only potential UAF on config->socks should be recv_work(),
> so you can retrieve the `nsock` reference at the entry of recv_work(),

I don't understand what you mean retrieve the 'nsock', is following what
you expected?

blk_queue_enter() -> prevent concurrent with nbd_add_socket
nsock = config->socks[args->index]
blk_queue_exit()

while (1) {
	...
	// pass nsock to nbd_read_reply() and nbd_handle_reply()
}

> and just pass it(local variable) to nbd_read_reply() and nbd_handle_reply()
> since `nsock` won't be freed.
> 
> 
> Thanks,
> Ming
> 
> .
>
Ming Lei Sept. 28, 2023, 8:57 a.m. UTC | #6
On Thu, Sep 28, 2023 at 04:55:03PM +0800, Yu Kuai wrote:
> Hi,
> 
> 在 2023/09/28 15:40, Ming Lei 写道:
> > On Thu, Sep 28, 2023 at 02:03:28PM +0800, Yu Kuai wrote:
> > > Hi,
> > > 
> > > 在 2023/09/28 12:05, Ming Lei 写道:
> > > > On Mon, Sep 11, 2023 at 10:33:08AM +0800, linan666@huaweicloud.com wrote:
> > > > > From: Li Nan <linan122@huawei.com>
> > > > > 
> > > > > If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
> > > > > krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
> > > > > may occurs.
> > > > > 
> > > > >     T1
> > > > >     nbd_ioctl
> > > > >      __nbd_ioctl
> > > > >       nbd_add_socket
> > > > >        blk_mq_freeze_queue
> > > > > 				T2
> > > > >     				recv_work
> > > > >     				 nbd_read_reply
> > > > >     				  sock_xmit
> > > > >        krealloc config->socks
> > > > > 				   def config->socks
> > > > > 
> > > > > Pass nbd_sock to nbd_read_reply(). And introduce a new function
> > > > > sock_xmit_recv(), which differs from sock_xmit only in the way it get
> > > > > socket.
> > > > > 
> > > > 
> > > > I am wondering why not grab queue usage counter before calling nbd_read_reply()
> > > > for avoiding such issue, something like the following change:
> > > > 
> > > > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > > > index df1cd0f718b8..09215b605b12 100644
> > > > --- a/drivers/block/nbd.c
> > > > +++ b/drivers/block/nbd.c
> > > > @@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
> > > >    	while (1) {
> > > >    		struct nbd_reply reply;
> > > > -		if (nbd_read_reply(nbd, args->index, &reply))
> > > > -			break;
> > > > -
> > > >    		/*
> > > >    		 * Grab .q_usage_counter so request pool won't go away, then no
> > > >    		 * request use-after-free is possible during nbd_handle_reply().
> > > > @@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
> > > >    			break;
> > > >    		}
> > > 
> > > This break how nbd works, if there is no reply yet, recv_work() will
> > > wait for reply in:
> > > 
> > > nbd_read_reply
> > >   sock_xmit
> > >    sock_recvmsg
> > > 
> > > After this change, recv_work() will just return if there is no io.
> > 
> > OK, got it, thanks for the input.
> > 
> > But I feel it isn't necessary & fragile to store one extra reference of nsock in
> > `recv_thread_args`.
> > 
> > Just run a quick look, the only potential UAF on config->socks should be recv_work(),
> > so you can retrieve the `nsock` reference at the entry of recv_work(),
> 
> I don't understand what you mean retrieve the 'nsock', is following what
> you expected?
> 
> blk_queue_enter() -> prevent concurrent with nbd_add_socket
> nsock = config->socks[args->index]
> blk_queue_exit()

Yeah, turns out you do understand, :-)

Thanks,
Ming
Yu Kuai Sept. 28, 2023, 9:06 a.m. UTC | #7
Hi,

在 2023/09/28 16:57, Ming Lei 写道:
> On Thu, Sep 28, 2023 at 04:55:03PM +0800, Yu Kuai wrote:
>> Hi,
>>
>> 在 2023/09/28 15:40, Ming Lei 写道:
>>> On Thu, Sep 28, 2023 at 02:03:28PM +0800, Yu Kuai wrote:
>>>> Hi,
>>>>
>>>> 在 2023/09/28 12:05, Ming Lei 写道:
>>>>> On Mon, Sep 11, 2023 at 10:33:08AM +0800, linan666@huaweicloud.com wrote:
>>>>>> From: Li Nan <linan122@huawei.com>
>>>>>>
>>>>>> If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
>>>>>> krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
>>>>>> may occurs.
>>>>>>
>>>>>>      T1
>>>>>>      nbd_ioctl
>>>>>>       __nbd_ioctl
>>>>>>        nbd_add_socket
>>>>>>         blk_mq_freeze_queue
>>>>>> 				T2
>>>>>>      				recv_work
>>>>>>      				 nbd_read_reply
>>>>>>      				  sock_xmit
>>>>>>         krealloc config->socks
>>>>>> 				   def config->socks
>>>>>>
>>>>>> Pass nbd_sock to nbd_read_reply(). And introduce a new function
>>>>>> sock_xmit_recv(), which differs from sock_xmit only in the way it get
>>>>>> socket.
>>>>>>
>>>>>
>>>>> I am wondering why not grab queue usage counter before calling nbd_read_reply()
>>>>> for avoiding such issue, something like the following change:
>>>>>
>>>>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>>>>> index df1cd0f718b8..09215b605b12 100644
>>>>> --- a/drivers/block/nbd.c
>>>>> +++ b/drivers/block/nbd.c
>>>>> @@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
>>>>>     	while (1) {
>>>>>     		struct nbd_reply reply;
>>>>> -		if (nbd_read_reply(nbd, args->index, &reply))
>>>>> -			break;
>>>>> -
>>>>>     		/*
>>>>>     		 * Grab .q_usage_counter so request pool won't go away, then no
>>>>>     		 * request use-after-free is possible during nbd_handle_reply().
>>>>> @@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
>>>>>     			break;
>>>>>     		}
>>>>
>>>> This break how nbd works, if there is no reply yet, recv_work() will
>>>> wait for reply in:
>>>>
>>>> nbd_read_reply
>>>>    sock_xmit
>>>>     sock_recvmsg
>>>>
>>>> After this change, recv_work() will just return if there is no io.
>>>
>>> OK, got it, thanks for the input.
>>>
>>> But I feel it isn't necessary & fragile to store one extra reference of nsock in
>>> `recv_thread_args`.
>>>
>>> Just run a quick look, the only potential UAF on config->socks should be recv_work(),
>>> so you can retrieve the `nsock` reference at the entry of recv_work(),
>>
>> I don't understand what you mean retrieve the 'nsock', is following what
>> you expected?
>>
>> blk_queue_enter() -> prevent concurrent with nbd_add_socket
>> nsock = config->socks[args->index]
>> blk_queue_exit()
> 
> Yeah, turns out you do understand, :-)

Ok, I was not sure about this blk_queue_enter(). By the way, this
remind me of what you did to fix uaf of access queue->mq_hctx[] by
convert the array to xarray.


Maybe it's better to covert config->socks[] to xarray to fix this uaf as
well?

Thanks,
Kuai
Ming Lei Sept. 28, 2023, 9:24 a.m. UTC | #8
On Thu, Sep 28, 2023 at 05:06:40PM +0800, Yu Kuai wrote:
> Hi,
> 
> 在 2023/09/28 16:57, Ming Lei 写道:
> > On Thu, Sep 28, 2023 at 04:55:03PM +0800, Yu Kuai wrote:
> > > Hi,
> > > 
> > > 在 2023/09/28 15:40, Ming Lei 写道:
> > > > On Thu, Sep 28, 2023 at 02:03:28PM +0800, Yu Kuai wrote:
> > > > > Hi,
> > > > > 
> > > > > 在 2023/09/28 12:05, Ming Lei 写道:
> > > > > > On Mon, Sep 11, 2023 at 10:33:08AM +0800, linan666@huaweicloud.com wrote:
> > > > > > > From: Li Nan <linan122@huawei.com>
> > > > > > > 
> > > > > > > If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
> > > > > > > krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
> > > > > > > may occurs.
> > > > > > > 
> > > > > > >      T1
> > > > > > >      nbd_ioctl
> > > > > > >       __nbd_ioctl
> > > > > > >        nbd_add_socket
> > > > > > >         blk_mq_freeze_queue
> > > > > > > 				T2
> > > > > > >      				recv_work
> > > > > > >      				 nbd_read_reply
> > > > > > >      				  sock_xmit
> > > > > > >         krealloc config->socks
> > > > > > > 				   def config->socks
> > > > > > > 
> > > > > > > Pass nbd_sock to nbd_read_reply(). And introduce a new function
> > > > > > > sock_xmit_recv(), which differs from sock_xmit only in the way it get
> > > > > > > socket.
> > > > > > > 
> > > > > > 
> > > > > > I am wondering why not grab queue usage counter before calling nbd_read_reply()
> > > > > > for avoiding such issue, something like the following change:
> > > > > > 
> > > > > > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > > > > > index df1cd0f718b8..09215b605b12 100644
> > > > > > --- a/drivers/block/nbd.c
> > > > > > +++ b/drivers/block/nbd.c
> > > > > > @@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
> > > > > >     	while (1) {
> > > > > >     		struct nbd_reply reply;
> > > > > > -		if (nbd_read_reply(nbd, args->index, &reply))
> > > > > > -			break;
> > > > > > -
> > > > > >     		/*
> > > > > >     		 * Grab .q_usage_counter so request pool won't go away, then no
> > > > > >     		 * request use-after-free is possible during nbd_handle_reply().
> > > > > > @@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
> > > > > >     			break;
> > > > > >     		}
> > > > > 
> > > > > This break how nbd works, if there is no reply yet, recv_work() will
> > > > > wait for reply in:
> > > > > 
> > > > > nbd_read_reply
> > > > >    sock_xmit
> > > > >     sock_recvmsg
> > > > > 
> > > > > After this change, recv_work() will just return if there is no io.
> > > > 
> > > > OK, got it, thanks for the input.
> > > > 
> > > > But I feel it isn't necessary & fragile to store one extra reference of nsock in
> > > > `recv_thread_args`.
> > > > 
> > > > Just run a quick look, the only potential UAF on config->socks should be recv_work(),
> > > > so you can retrieve the `nsock` reference at the entry of recv_work(),
> > > 
> > > I don't understand what you mean retrieve the 'nsock', is following what
> > > you expected?
> > > 
> > > blk_queue_enter() -> prevent concurrent with nbd_add_socket
> > > nsock = config->socks[args->index]
> > > blk_queue_exit()
> > 
> > Yeah, turns out you do understand, :-)
> 
> Ok, I was not sure about this blk_queue_enter(). By the way, this

blk_queue_enter() isn't exported, but you can grab ->config_lock
for getting the `nsock`.

> remind me of what you did to fix uaf of access queue->mq_hctx[] by
> convert the array to xarray.
> 
> 
> Maybe it's better to covert config->socks[] to xarray to fix this uaf as
> well?

->socks[idx] is needed in nbd fast path, so xarray may not be one good idea
since xarray_load() introduces extra load, especially ->socks[] uaf
should exist in recv_work() very likely. For other cases, the active
block request holds queue usage counter.


Thanks,
Ming
Yu Kuai Sept. 28, 2023, 9:40 a.m. UTC | #9
Hi,

在 2023/09/28 17:24, Ming Lei 写道:
> On Thu, Sep 28, 2023 at 05:06:40PM +0800, Yu Kuai wrote:
>> Hi,
>>
>> 在 2023/09/28 16:57, Ming Lei 写道:
>>> On Thu, Sep 28, 2023 at 04:55:03PM +0800, Yu Kuai wrote:
>>>> Hi,
>>>>
>>>> 在 2023/09/28 15:40, Ming Lei 写道:
>>>>> On Thu, Sep 28, 2023 at 02:03:28PM +0800, Yu Kuai wrote:
>>>>>> Hi,
>>>>>>
>>>>>> 在 2023/09/28 12:05, Ming Lei 写道:
>>>>>>> On Mon, Sep 11, 2023 at 10:33:08AM +0800, linan666@huaweicloud.com wrote:
>>>>>>>> From: Li Nan <linan122@huawei.com>
>>>>>>>>
>>>>>>>> If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
>>>>>>>> krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
>>>>>>>> may occurs.
>>>>>>>>
>>>>>>>>       T1
>>>>>>>>       nbd_ioctl
>>>>>>>>        __nbd_ioctl
>>>>>>>>         nbd_add_socket
>>>>>>>>          blk_mq_freeze_queue
>>>>>>>> 				T2
>>>>>>>>       				recv_work
>>>>>>>>       				 nbd_read_reply
>>>>>>>>       				  sock_xmit
>>>>>>>>          krealloc config->socks
>>>>>>>> 				   def config->socks
>>>>>>>>
>>>>>>>> Pass nbd_sock to nbd_read_reply(). And introduce a new function
>>>>>>>> sock_xmit_recv(), which differs from sock_xmit only in the way it get
>>>>>>>> socket.
>>>>>>>>
>>>>>>>
>>>>>>> I am wondering why not grab queue usage counter before calling nbd_read_reply()
>>>>>>> for avoiding such issue, something like the following change:
>>>>>>>
>>>>>>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>>>>>>> index df1cd0f718b8..09215b605b12 100644
>>>>>>> --- a/drivers/block/nbd.c
>>>>>>> +++ b/drivers/block/nbd.c
>>>>>>> @@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
>>>>>>>      	while (1) {
>>>>>>>      		struct nbd_reply reply;
>>>>>>> -		if (nbd_read_reply(nbd, args->index, &reply))
>>>>>>> -			break;
>>>>>>> -
>>>>>>>      		/*
>>>>>>>      		 * Grab .q_usage_counter so request pool won't go away, then no
>>>>>>>      		 * request use-after-free is possible during nbd_handle_reply().
>>>>>>> @@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
>>>>>>>      			break;
>>>>>>>      		}
>>>>>>
>>>>>> This break how nbd works, if there is no reply yet, recv_work() will
>>>>>> wait for reply in:
>>>>>>
>>>>>> nbd_read_reply
>>>>>>     sock_xmit
>>>>>>      sock_recvmsg
>>>>>>
>>>>>> After this change, recv_work() will just return if there is no io.
>>>>>
>>>>> OK, got it, thanks for the input.
>>>>>
>>>>> But I feel it isn't necessary & fragile to store one extra reference of nsock in
>>>>> `recv_thread_args`.
>>>>>
>>>>> Just run a quick look, the only potential UAF on config->socks should be recv_work(),
>>>>> so you can retrieve the `nsock` reference at the entry of recv_work(),
>>>>
>>>> I don't understand what you mean retrieve the 'nsock', is following what
>>>> you expected?
>>>>
>>>> blk_queue_enter() -> prevent concurrent with nbd_add_socket
>>>> nsock = config->socks[args->index]
>>>> blk_queue_exit()
>>>
>>> Yeah, turns out you do understand, :-)
>>
>> Ok, I was not sure about this blk_queue_enter(). By the way, this
> 
> blk_queue_enter() isn't exported, but you can grab ->config_lock
> for getting the `nsock`.
> 
>> remind me of what you did to fix uaf of access queue->mq_hctx[] by
>> convert the array to xarray.
>>
>>
>> Maybe it's better to covert config->socks[] to xarray to fix this uaf as
>> well?
> 
> ->socks[idx] is needed in nbd fast path, so xarray may not be one good idea
> since xarray_load() introduces extra load, especially ->socks[] uaf
> should exist in recv_work() very likely. For other cases, the active
> block request holds queue usage counter.

Thanks for the explanation, grab 'config_lock' to get 'nsock' in the
begining sounds good to me.

Kuai

> 
> 
> Thanks,
> Ming
> 
> .
>
Yu Kuai Oct. 30, 2023, 2:07 a.m. UTC | #10
Hi,

在 2023/09/28 17:40, Yu Kuai 写道:
> Hi,
> 
> 在 2023/09/28 17:24, Ming Lei 写道:
>> On Thu, Sep 28, 2023 at 05:06:40PM +0800, Yu Kuai wrote:
>>> Hi,
>>>
>>> 在 2023/09/28 16:57, Ming Lei 写道:
>>>> On Thu, Sep 28, 2023 at 04:55:03PM +0800, Yu Kuai wrote:
>>>>> Hi,
>>>>>
>>>>> 在 2023/09/28 15:40, Ming Lei 写道:
>>>>>> On Thu, Sep 28, 2023 at 02:03:28PM +0800, Yu Kuai wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> 在 2023/09/28 12:05, Ming Lei 写道:
>>>>>>>> On Mon, Sep 11, 2023 at 10:33:08AM +0800, 
>>>>>>>> linan666@huaweicloud.com wrote:
>>>>>>>>> From: Li Nan <linan122@huawei.com>
>>>>>>>>>
>>>>>>>>> If a socket is processing ioctl 'NBD_SET_SOCK', config->socks 
>>>>>>>>> might be
>>>>>>>>> krealloc in nbd_add_socket(), and a garbage request is received 
>>>>>>>>> now, a UAF
>>>>>>>>> may occurs.
>>>>>>>>>
>>>>>>>>>       T1
>>>>>>>>>       nbd_ioctl
>>>>>>>>>        __nbd_ioctl
>>>>>>>>>         nbd_add_socket
>>>>>>>>>          blk_mq_freeze_queue
>>>>>>>>>                 T2
>>>>>>>>>                       recv_work
>>>>>>>>>                        nbd_read_reply
>>>>>>>>>                         sock_xmit
>>>>>>>>>          krealloc config->socks
>>>>>>>>>                    def config->socks
>>>>>>>>>
>>>>>>>>> Pass nbd_sock to nbd_read_reply(). And introduce a new function
>>>>>>>>> sock_xmit_recv(), which differs from sock_xmit only in the way 
>>>>>>>>> it get
>>>>>>>>> socket.
>>>>>>>>>
>>>>>>>>
>>>>>>>> I am wondering why not grab queue usage counter before calling 
>>>>>>>> nbd_read_reply()
>>>>>>>> for avoiding such issue, something like the following change:
>>>>>>>>
>>>>>>>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>>>>>>>> index df1cd0f718b8..09215b605b12 100644
>>>>>>>> --- a/drivers/block/nbd.c
>>>>>>>> +++ b/drivers/block/nbd.c
>>>>>>>> @@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
>>>>>>>>          while (1) {
>>>>>>>>              struct nbd_reply reply;
>>>>>>>> -        if (nbd_read_reply(nbd, args->index, &reply))
>>>>>>>> -            break;
>>>>>>>> -
>>>>>>>>              /*
>>>>>>>>               * Grab .q_usage_counter so request pool won't go 
>>>>>>>> away, then no
>>>>>>>>               * request use-after-free is possible during 
>>>>>>>> nbd_handle_reply().
>>>>>>>> @@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
>>>>>>>>                  break;
>>>>>>>>              }
>>>>>>>
>>>>>>> This break how nbd works, if there is no reply yet, recv_work() will
>>>>>>> wait for reply in:
>>>>>>>
>>>>>>> nbd_read_reply
>>>>>>>     sock_xmit
>>>>>>>      sock_recvmsg
>>>>>>>
>>>>>>> After this change, recv_work() will just return if there is no io.
>>>>>>
>>>>>> OK, got it, thanks for the input.
>>>>>>
>>>>>> But I feel it isn't necessary & fragile to store one extra 
>>>>>> reference of nsock in
>>>>>> `recv_thread_args`.
>>>>>>
>>>>>> Just run a quick look, the only potential UAF on config->socks 
>>>>>> should be recv_work(),
>>>>>> so you can retrieve the `nsock` reference at the entry of 
>>>>>> recv_work(),
>>>>>
>>>>> I don't understand what you mean retrieve the 'nsock', is following 
>>>>> what
>>>>> you expected?
>>>>>
>>>>> blk_queue_enter() -> prevent concurrent with nbd_add_socket
>>>>> nsock = config->socks[args->index]
>>>>> blk_queue_exit()
>>>>
>>>> Yeah, turns out you do understand, :-)
>>>
>>> Ok, I was not sure about this blk_queue_enter(). By the way, this
>>
>> blk_queue_enter() isn't exported, but you can grab ->config_lock
>> for getting the `nsock`.
>>
>>> remind me of what you did to fix uaf of access queue->mq_hctx[] by
>>> convert the array to xarray.
>>>
>>>
>>> Maybe it's better to covert config->socks[] to xarray to fix this uaf as
>>> well?
>>
>> ->socks[idx] is needed in nbd fast path, so xarray may not be one good 
>> idea
>> since xarray_load() introduces extra load, especially ->socks[] uaf
>> should exist in recv_work() very likely. For other cases, the active
>> block request holds queue usage counter.
> 
> Thanks for the explanation, grab 'config_lock' to get 'nsock' in the
> begining sounds good to me.

After reviewing some code, I found that it's wrong to grab config_lock,
because other context will grab such lock and flush_workqueue(), and
there is no gurantee that recv_work() will grab the lock first.

Will it be acceptable to export blk_queue_enter()? I can't think of
other way to retrieve the`nsock` reference at the entry of recv_work().

Thanks,
Kuai

> 
> Kuai
> 
>>
>>
>> Thanks,
>> Ming
>>
>> .
>>
> 
> .
>
Ming Lei Oct. 30, 2023, 12:42 p.m. UTC | #11
On Mon, Oct 30, 2023 at 10:07:13AM +0800, Yu Kuai wrote:
> Hi,
> 
> 在 2023/09/28 17:40, Yu Kuai 写道:
> > Hi,
> > 
> > 在 2023/09/28 17:24, Ming Lei 写道:
> > > On Thu, Sep 28, 2023 at 05:06:40PM +0800, Yu Kuai wrote:
> > > > Hi,
> > > > 
> > > > 在 2023/09/28 16:57, Ming Lei 写道:
> > > > > On Thu, Sep 28, 2023 at 04:55:03PM +0800, Yu Kuai wrote:
> > > > > > Hi,
> > > > > > 
> > > > > > 在 2023/09/28 15:40, Ming Lei 写道:
> > > > > > > On Thu, Sep 28, 2023 at 02:03:28PM +0800, Yu Kuai wrote:
> > > > > > > > Hi,
> > > > > > > > 
> > > > > > > > 在 2023/09/28 12:05, Ming Lei 写道:
> > > > > > > > > On Mon, Sep 11, 2023 at 10:33:08AM +0800,
> > > > > > > > > linan666@huaweicloud.com wrote:
> > > > > > > > > > From: Li Nan <linan122@huawei.com>
> > > > > > > > > > 
> > > > > > > > > > If a socket is processing ioctl
> > > > > > > > > > 'NBD_SET_SOCK', config->socks might be
> > > > > > > > > > krealloc in nbd_add_socket(), and a
> > > > > > > > > > garbage request is received now, a UAF
> > > > > > > > > > may occurs.
> > > > > > > > > > 
> > > > > > > > > >       T1
> > > > > > > > > >       nbd_ioctl
> > > > > > > > > >        __nbd_ioctl
> > > > > > > > > >         nbd_add_socket
> > > > > > > > > >          blk_mq_freeze_queue
> > > > > > > > > >                 T2
> > > > > > > > > >                       recv_work
> > > > > > > > > >                        nbd_read_reply
> > > > > > > > > >                         sock_xmit
> > > > > > > > > >          krealloc config->socks
> > > > > > > > > >                    def config->socks
> > > > > > > > > > 
> > > > > > > > > > Pass nbd_sock to nbd_read_reply(). And introduce a new function
> > > > > > > > > > sock_xmit_recv(), which differs from
> > > > > > > > > > sock_xmit only in the way it get
> > > > > > > > > > socket.
> > > > > > > > > > 
> > > > > > > > > 
> > > > > > > > > I am wondering why not grab queue usage
> > > > > > > > > counter before calling nbd_read_reply()
> > > > > > > > > for avoiding such issue, something like the following change:
> > > > > > > > > 
> > > > > > > > > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > > > > > > > > index df1cd0f718b8..09215b605b12 100644
> > > > > > > > > --- a/drivers/block/nbd.c
> > > > > > > > > +++ b/drivers/block/nbd.c
> > > > > > > > > @@ -837,9 +837,6 @@ static void recv_work(struct work_struct *work)
> > > > > > > > >          while (1) {
> > > > > > > > >              struct nbd_reply reply;
> > > > > > > > > -        if (nbd_read_reply(nbd, args->index, &reply))
> > > > > > > > > -            break;
> > > > > > > > > -
> > > > > > > > >              /*
> > > > > > > > >               * Grab .q_usage_counter so
> > > > > > > > > request pool won't go away, then no
> > > > > > > > >               * request use-after-free is
> > > > > > > > > possible during nbd_handle_reply().
> > > > > > > > > @@ -852,6 +849,9 @@ static void recv_work(struct work_struct *work)
> > > > > > > > >                  break;
> > > > > > > > >              }
> > > > > > > > 
> > > > > > > > This break how nbd works, if there is no reply yet, recv_work() will
> > > > > > > > wait for reply in:
> > > > > > > > 
> > > > > > > > nbd_read_reply
> > > > > > > >     sock_xmit
> > > > > > > >      sock_recvmsg
> > > > > > > > 
> > > > > > > > After this change, recv_work() will just return if there is no io.
> > > > > > > 
> > > > > > > OK, got it, thanks for the input.
> > > > > > > 
> > > > > > > But I feel it isn't necessary & fragile to store one
> > > > > > > extra reference of nsock in
> > > > > > > `recv_thread_args`.
> > > > > > > 
> > > > > > > Just run a quick look, the only potential UAF on
> > > > > > > config->socks should be recv_work(),
> > > > > > > so you can retrieve the `nsock` reference at the
> > > > > > > entry of recv_work(),
> > > > > > 
> > > > > > I don't understand what you mean retrieve the 'nsock',
> > > > > > is following what
> > > > > > you expected?
> > > > > > 
> > > > > > blk_queue_enter() -> prevent concurrent with nbd_add_socket
> > > > > > nsock = config->socks[args->index]
> > > > > > blk_queue_exit()
> > > > > 
> > > > > Yeah, turns out you do understand, :-)
> > > > 
> > > > Ok, I was not sure about this blk_queue_enter(). By the way, this
> > > 
> > > blk_queue_enter() isn't exported, but you can grab ->config_lock
> > > for getting the `nsock`.
> > > 
> > > > remind me of what you did to fix uaf of access queue->mq_hctx[] by
> > > > convert the array to xarray.
> > > > 
> > > > 
> > > > Maybe it's better to covert config->socks[] to xarray to fix this uaf as
> > > > well?
> > > 
> > > ->socks[idx] is needed in nbd fast path, so xarray may not be one
> > > good idea
> > > since xarray_load() introduces extra load, especially ->socks[] uaf
> > > should exist in recv_work() very likely. For other cases, the active
> > > block request holds queue usage counter.
> > 
> > Thanks for the explanation, grab 'config_lock' to get 'nsock' in the
> > begining sounds good to me.
> 
> After reviewing some code, I found that it's wrong to grab config_lock,
> because other context will grab such lock and flush_workqueue(), and
> there is no gurantee that recv_work() will grab the lock first.
> 
> Will it be acceptable to export blk_queue_enter()? I can't think of
> other way to retrieve the`nsock` reference at the entry of recv_work().

Then I think it is easier to pass `nsock` from `recv_thread_args`, which
can be thought as local variable too.

Reviewed-by: Ming Lei <ming.lei@redhat.com>


Thanks,
Ming
Yu Kuai Oct. 30, 2023, 1:16 p.m. UTC | #12
在 2023/10/30 20:42, Ming Lei 写道:

>> After reviewing some code, I found that it's wrong to grab config_lock,
>> because other context will grab such lock and flush_workqueue(), and
>> there is no gurantee that recv_work() will grab the lock first.
>>
>> Will it be acceptable to export blk_queue_enter()? I can't think of
>> other way to retrieve the`nsock` reference at the entry of recv_work().
> 
> Then I think it is easier to pass `nsock` from `recv_thread_args`, which
> can be thought as local variable too.
> 
> Reviewed-by: Ming Lei <ming.lei@redhat.com>

Agreed

Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Li Nan Nov. 21, 2023, 6:16 a.m. UTC | #13
Hi, Jens

This patch has been reviewed by Yu Kuai and Ming Lei. Could you please
consider apply it?

在 2023/10/30 21:16, Yu Kuai 写道:
> 在 2023/10/30 20:42, Ming Lei 写道:
> 
>>> After reviewing some code, I found that it's wrong to grab config_lock,
>>> because other context will grab such lock and flush_workqueue(), and
>>> there is no gurantee that recv_work() will grab the lock first.
>>>
>>> Will it be acceptable to export blk_queue_enter()? I can't think of
>>> other way to retrieve the`nsock` reference at the entry of recv_work().
>>
>> Then I think it is easier to pass `nsock` from `recv_thread_args`, which
>> can be thought as local variable too.
>>
>> Reviewed-by: Ming Lei <ming.lei@redhat.com>
> 
> Agreed
> 
> Reviewed-by: Yu Kuai <yukuai3@huawei.com>
> 
> .
Jens Axboe Nov. 21, 2023, 2:42 p.m. UTC | #14
On Mon, 11 Sep 2023 10:33:08 +0800, linan666@huaweicloud.com wrote:
> If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be
> krealloc in nbd_add_socket(), and a garbage request is received now, a UAF
> may occurs.
> 
>   T1
>   nbd_ioctl
>    __nbd_ioctl
>     nbd_add_socket
>      blk_mq_freeze_queue
> 				T2
>   				recv_work
>   				 nbd_read_reply
>   				  sock_xmit
>      krealloc config->socks
> 				   def config->socks
> 
> [...]

Applied, thanks!

[1/1] nbd: pass nbd_sock to nbd_read_reply() instead of index
      commit: 98c598afc22d4e43c2ad91860b65996d0c099a5d

Best regards,
diff mbox series

Patch

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a346dbd73543..712b2d164eed 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -67,6 +67,7 @@  struct nbd_sock {
 struct recv_thread_args {
 	struct work_struct work;
 	struct nbd_device *nbd;
+	struct nbd_sock *nsock;
 	int index;
 };
 
@@ -490,15 +491,9 @@  static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
 	return BLK_EH_DONE;
 }
 
-/*
- *  Send or receive packet. Return a positive value on success and
- *  negtive value on failue, and never return 0.
- */
-static int sock_xmit(struct nbd_device *nbd, int index, int send,
-		     struct iov_iter *iter, int msg_flags, int *sent)
+static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
+		       struct iov_iter *iter, int msg_flags, int *sent)
 {
-	struct nbd_config *config = nbd->config;
-	struct socket *sock = config->socks[index]->sock;
 	int result;
 	struct msghdr msg;
 	unsigned int noreclaim_flag;
@@ -541,6 +536,19 @@  static int sock_xmit(struct nbd_device *nbd, int index, int send,
 	return result;
 }
 
+/*
+ *  Send or receive packet. Return a positive value on success and
+ *  negtive value on failure, and never return 0.
+ */
+static int sock_xmit(struct nbd_device *nbd, int index, int send,
+		     struct iov_iter *iter, int msg_flags, int *sent)
+{
+	struct nbd_config *config = nbd->config;
+	struct socket *sock = config->socks[index]->sock;
+
+	return __sock_xmit(nbd, sock, send, iter, msg_flags, sent);
+}
+
 /*
  * Different settings for sk->sk_sndtimeo can result in different return values
  * if there is a signal pending when we enter sendmsg, because reasons?
@@ -697,7 +705,7 @@  static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	return 0;
 }
 
-static int nbd_read_reply(struct nbd_device *nbd, int index,
+static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock,
 			  struct nbd_reply *reply)
 {
 	struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)};
@@ -706,7 +714,7 @@  static int nbd_read_reply(struct nbd_device *nbd, int index,
 
 	reply->magic = 0;
 	iov_iter_kvec(&to, ITER_DEST, &iov, 1, sizeof(*reply));
-	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
+	result = __sock_xmit(nbd, sock, 0, &to, MSG_WAITALL, NULL);
 	if (result < 0) {
 		if (!nbd_disconnected(nbd->config))
 			dev_err(disk_to_dev(nbd->disk),
@@ -830,14 +838,14 @@  static void recv_work(struct work_struct *work)
 	struct nbd_device *nbd = args->nbd;
 	struct nbd_config *config = nbd->config;
 	struct request_queue *q = nbd->disk->queue;
-	struct nbd_sock *nsock;
+	struct nbd_sock *nsock = args->nsock;
 	struct nbd_cmd *cmd;
 	struct request *rq;
 
 	while (1) {
 		struct nbd_reply reply;
 
-		if (nbd_read_reply(nbd, args->index, &reply))
+		if (nbd_read_reply(nbd, nsock->sock, &reply))
 			break;
 
 		/*
@@ -872,7 +880,6 @@  static void recv_work(struct work_struct *work)
 		percpu_ref_put(&q->q_usage_counter);
 	}
 
-	nsock = config->socks[args->index];
 	mutex_lock(&nsock->tx_lock);
 	nbd_mark_nsock_dead(nbd, nsock, 1);
 	mutex_unlock(&nsock->tx_lock);
@@ -1216,6 +1223,7 @@  static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
 		INIT_WORK(&args->work, recv_work);
 		args->index = i;
 		args->nbd = nbd;
+		args->nsock = nsock;
 		nsock->cookie++;
 		mutex_unlock(&nsock->tx_lock);
 		sockfd_put(old);
@@ -1398,6 +1406,7 @@  static int nbd_start_device(struct nbd_device *nbd)
 		refcount_inc(&nbd->config_refs);
 		INIT_WORK(&args->work, recv_work);
 		args->nbd = nbd;
+		args->nsock = config->socks[i];
 		args->index = i;
 		queue_work(nbd->recv_workq, &args->work);
 	}