diff mbox series

nbd: Fix hang when connection reset by peer

Message ID 20230307151528.2532986-1-syoshida@redhat.com (mailing list archive)
State New, archived
Headers show
Series nbd: Fix hang when connection reset by peer | expand

Commit Message

Shigeru Yoshida March 7, 2023, 3:15 p.m. UTC
syzbot reported hang task [1].  The following program is a simplified
version of the reproducer:

static int fd;

static void *do_ioctl(void *arg)
{
	ioctl(fd, (int)arg);
	return NULL;
}

int main(void)
{
	int sv[2];
	pthread_t th[2];

	if ((fd = open("/dev/nbd0", 0)) < 0)
		return 1;
	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) < 0)
		return 1;
	if (ioctl(fd, NBD_SET_SIZE_BLOCKS, 0x1) < 0)
		return 1;
	if (ioctl(fd, NBD_SET_SOCK, sv[0]) < 0)
		return 1;
	if (pthread_create(&th[0], NULL, do_ioctl, (void *)NBD_DO_IT) < 0)
		return 1;
	sleep(1);
	if (pthread_create(&th[1], NULL, do_ioctl, (void *)BLKRRPART) < 0)
		return 1;
	sleep(1);
	close(sv[1]);
	sleep(1);
	return 0;
}

When the connection reset by peer, nbd_read_reply() in recv_work()
returns -EOCNNRESET and wakes up the task blocking in
nbd_start_device_ioctl().  wait_event_interruptible() returns 0 in
this case, so inflight IOs in the queue are not cleared.  This causes
hang task.

This patch introduces new runtime flag NBD_RT_CONNECTION_RESET to
notify connection reset by peer.  nbd_start_device_ioctl() checks this
and clears inflight IOs accordingly.

Fixes: 3fe1db626a56 ("nbd: partition nbd_read_stat() into nbd_read_reply() and nbd_handle_reply()")
Cc: Yu Kuai <yukuai3@huawei.com>
Link: https://syzkaller.appspot.com/bug?id=f55039d9ac10967440ca01ff24c38ccfa597efe1 [1]
Reported-by: syzbot+6229476844294775319e@syzkaller.appspotmail.com
Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
---
 drivers/block/nbd.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 592cfa8b765a..fe861a2b063b 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -83,6 +83,7 @@  struct link_dead_args {
 #define NBD_RT_BOUND			5
 #define NBD_RT_DISCONNECT_ON_CLOSE	6
 #define NBD_RT_HAS_BACKEND_FILE		7
+#define NBD_RT_CONNECTION_RESET		8
 
 #define NBD_DESTROY_ON_DISCONNECT	0
 #define NBD_DISCONNECT_REQUESTED	1
@@ -833,9 +834,13 @@  static void recv_work(struct work_struct *work)
 
 	while (1) {
 		struct nbd_reply reply;
+		int err = nbd_read_reply(nbd, args->index, &reply);
 
-		if (nbd_read_reply(nbd, args->index, &reply))
+		if (err) {
+			if (err == -ECONNRESET)
+				set_bit(NBD_RT_CONNECTION_RESET, &config->runtime_flags);
 			break;
+		}
 
 		/*
 		 * Grab .q_usage_counter so request pool won't go away, then no
@@ -1412,7 +1417,7 @@  static int nbd_start_device_ioctl(struct nbd_device *nbd)
 	mutex_unlock(&nbd->config_lock);
 	ret = wait_event_interruptible(config->recv_wq,
 					 atomic_read(&config->recv_threads) == 0);
-	if (ret) {
+	if (ret || test_bit(NBD_RT_CONNECTION_RESET, &config->runtime_flags)) {
 		sock_shutdown(nbd);
 		nbd_clear_que(nbd);
 	}