diff mbox

Set the iothread's eventfd/pipe descriptors to non-blocking

Message ID 4A5FA752.6000502@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dor Laor July 16, 2009, 10:18 p.m. UTC
It replaces the previous fix of using select.
From ab5ae4bb69f8ab6c9a476f7823cb8d6729d31594 Mon Sep 17 00:00:00 2001
From: Dor Laor <dor@redhat.com>
Date: Wed, 15 Jul 2009 17:53:16 +0300
Subject: [PATCH] Set the iothread's eventfd/pipe descriptors to non-blocking.

It fixes migration issue when the destination is loaded.

If the migration socket is full, we get EAGAIN for the write.
The set_fd_handler2 defers the write for later on. The function
tries to wake up the iothread by qemu_kvm_notify_work.
Since this happens in a loop, multiple times, the pipe that emulates eventfd
becomes full and we get a deadlock.

Mark McLoughlin suggested to remove spurious wake-up of the migration code
when we get EAGAIN and wait for the socket to become writeable. (+1)

Nevertheless, the pipe descriptors shouldn't be blocking and the reader can
also read several chunks in a time.

Signed-off-by: Dor Laor <dor@redhat.com>
---
 qemu-kvm.c |   24 ++++++++++++++----------
 1 files changed, 14 insertions(+), 10 deletions(-)

Comments

Marcelo Tosatti July 17, 2009, 4:29 p.m. UTC | #1
On Fri, Jul 17, 2009 at 01:18:58AM +0300, Dor Laor wrote:
> It replaces the previous fix of using select.

> >From ab5ae4bb69f8ab6c9a476f7823cb8d6729d31594 Mon Sep 17 00:00:00 2001
> From: Dor Laor <dor@redhat.com>
> Date: Wed, 15 Jul 2009 17:53:16 +0300
> Subject: [PATCH] Set the iothread's eventfd/pipe descriptors to non-blocking.
> 
> It fixes migration issue when the destination is loaded.
> 
> If the migration socket is full, we get EAGAIN for the write.
> The set_fd_handler2 defers the write for later on. The function
> tries to wake up the iothread by qemu_kvm_notify_work.
> Since this happens in a loop, multiple times, the pipe that emulates eventfd
> becomes full and we get a deadlock.
> 
> Mark McLoughlin suggested to remove spurious wake-up of the migration code
> when we get EAGAIN and wait for the socket to become writeable. (+1)
> 
> Nevertheless, the pipe descriptors shouldn't be blocking and the reader can
> also read several chunks in a time.
> 
> Signed-off-by: Dor Laor <dor@redhat.com>
> ---
>  qemu-kvm.c |   24 ++++++++++++++----------
>  1 files changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index ed7e466..68a453c 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -2107,14 +2107,17 @@ void qemu_kvm_notify_work(void)
>  	if (len == -1 && errno == EINTR)
>  	    continue;
>  
> -	if (len <= 0)
> +        /* In case we have a pipe, there is not reason to insist writing
> +         * 8 bytes
> +         */
> +	if (len == -1 && errno == EAGAIN)
>  	    break;
>  
> +        if (len <= 0)
> +            break;
> +
>  	offset += len;
>      }
> -
> -    if (offset != 8)
> -	fprintf(stderr, "failed to notify io thread\n");
>  }
>  
>  /* If we have signalfd, we mask out the signals we want to handle and then
> @@ -2153,20 +2156,18 @@ static void sigfd_handler(void *opaque)
>  static void io_thread_wakeup(void *opaque)
>  {
>      int fd = (unsigned long)opaque;
> -    char buffer[8];
> -    size_t offset = 0;
> +    char buffer[4096];
>  
> -    while (offset < 8) {
> +    /* Drain the pipe/(eventfd) */
> +    while (1) {
>  	ssize_t len;
>  
> -	len = read(fd, buffer + offset, 8 - offset);
> +	len = read(fd, buffer, sizeof(buffer));
>  	if (len == -1 && errno == EINTR)
>  	    continue;
>  
>  	if (len <= 0)
>  	    break;
> -
> -	offset += len;
>      }
>  }
>  
> @@ -2184,6 +2185,9 @@ int kvm_main_loop(void)
>  	return -errno;
>      }
>  
> +    fcntl(fds[0], F_SETFL, O_NONBLOCK);
> +    fcntl(fds[1], F_SETFL, O_NONBLOCK);
> +
>      qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
>  			 (void *)(unsigned long)fds[0]);
>  

Looks fine to me. Anthony, can you ACK?


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti July 19, 2009, 12:16 a.m. UTC | #2
On Fri, Jul 17, 2009 at 01:18:58AM +0300, Dor Laor wrote:
> It replaces the previous fix of using select.

> >From ab5ae4bb69f8ab6c9a476f7823cb8d6729d31594 Mon Sep 17 00:00:00 2001
> From: Dor Laor <dor@redhat.com>
> Date: Wed, 15 Jul 2009 17:53:16 +0300
> Subject: [PATCH] Set the iothread's eventfd/pipe descriptors to non-blocking.
> 
> It fixes migration issue when the destination is loaded.
> 
> If the migration socket is full, we get EAGAIN for the write.
> The set_fd_handler2 defers the write for later on. The function
> tries to wake up the iothread by qemu_kvm_notify_work.
> Since this happens in a loop, multiple times, the pipe that emulates eventfd
> becomes full and we get a deadlock.
> 
> Mark McLoughlin suggested to remove spurious wake-up of the migration code
> when we get EAGAIN and wait for the socket to become writeable. (+1)
> 
> Nevertheless, the pipe descriptors shouldn't be blocking and the reader can
> also read several chunks in a time.
> 
> Signed-off-by: Dor Laor <dor@redhat.com>
> ---
>  qemu-kvm.c |   24 ++++++++++++++----------
>  1 files changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index ed7e466..68a453c 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -2107,14 +2107,17 @@ void qemu_kvm_notify_work(void)
>  	if (len == -1 && errno == EINTR)
>  	    continue;
>  
> -	if (len <= 0)
> +        /* In case we have a pipe, there is not reason to insist writing
> +         * 8 bytes
> +         */
> +	if (len == -1 && errno == EAGAIN)
>  	    break;
>  
> +        if (len <= 0)
> +            break;
> +
>  	offset += len;
>      }
> -
> -    if (offset != 8)
> -	fprintf(stderr, "failed to notify io thread\n");
>  }
>  
>  /* If we have signalfd, we mask out the signals we want to handle and then
> @@ -2153,20 +2156,18 @@ static void sigfd_handler(void *opaque)
>  static void io_thread_wakeup(void *opaque)
>  {
>      int fd = (unsigned long)opaque;
> -    char buffer[8];
> -    size_t offset = 0;
> +    char buffer[4096];
>  
> -    while (offset < 8) {
> +    /* Drain the pipe/(eventfd) */
> +    while (1) {
>  	ssize_t len;
>  
> -	len = read(fd, buffer + offset, 8 - offset);
> +	len = read(fd, buffer, sizeof(buffer));
>  	if (len == -1 && errno == EINTR)
>  	    continue;
>  
>  	if (len <= 0)
>  	    break;
> -
> -	offset += len;
>      }
>  }
>  
> @@ -2184,6 +2185,9 @@ int kvm_main_loop(void)
>  	return -errno;
>      }
>  
> +    fcntl(fds[0], F_SETFL, O_NONBLOCK);
> +    fcntl(fds[1], F_SETFL, O_NONBLOCK);
> +
>      qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
>  			 (void *)(unsigned long)fds[0]);

Applied, thanks.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/qemu-kvm.c b/qemu-kvm.c
index ed7e466..68a453c 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -2107,14 +2107,17 @@  void qemu_kvm_notify_work(void)
 	if (len == -1 && errno == EINTR)
 	    continue;
 
-	if (len <= 0)
+        /* In case we have a pipe, there is not reason to insist writing
+         * 8 bytes
+         */
+	if (len == -1 && errno == EAGAIN)
 	    break;
 
+        if (len <= 0)
+            break;
+
 	offset += len;
     }
-
-    if (offset != 8)
-	fprintf(stderr, "failed to notify io thread\n");
 }
 
 /* If we have signalfd, we mask out the signals we want to handle and then
@@ -2153,20 +2156,18 @@  static void sigfd_handler(void *opaque)
 static void io_thread_wakeup(void *opaque)
 {
     int fd = (unsigned long)opaque;
-    char buffer[8];
-    size_t offset = 0;
+    char buffer[4096];
 
-    while (offset < 8) {
+    /* Drain the pipe/(eventfd) */
+    while (1) {
 	ssize_t len;
 
-	len = read(fd, buffer + offset, 8 - offset);
+	len = read(fd, buffer, sizeof(buffer));
 	if (len == -1 && errno == EINTR)
 	    continue;
 
 	if (len <= 0)
 	    break;
-
-	offset += len;
     }
 }
 
@@ -2184,6 +2185,9 @@  int kvm_main_loop(void)
 	return -errno;
     }
 
+    fcntl(fds[0], F_SETFL, O_NONBLOCK);
+    fcntl(fds[1], F_SETFL, O_NONBLOCK);
+
     qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
 			 (void *)(unsigned long)fds[0]);