diff mbox

[V2,1/1] virtio:Allocate temporary VirtQueueElementOld on heap

Message ID 1457970015-3181-1-git-send-email-tiwari.jaya18@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jaya Tiwari March 14, 2016, 3:40 p.m. UTC
As per the list of functions in :
http://wiki.qemu.org/BiteSizedTasks#Large_frames,
qemu_get_virtqueue_element  and qemu_put_virtqueue_element
have large arrays on stack. Hence, moving them to heap
This reduced their stack size from something 49248
to fit into less than 200.

Signed-off-by: Jaya Tiwari <tiwari.jaya18@gmail.com>
---
 hw/virtio/virtio.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

Comments

Michael S. Tsirkin March 15, 2016, 7:02 a.m. UTC | #1
On Mon, Mar 14, 2016 at 09:10:15PM +0530, Jaya Tiwari wrote:
> As per the list of functions in :
> http://wiki.qemu.org/BiteSizedTasks#Large_frames,
> qemu_get_virtqueue_element  and qemu_put_virtqueue_element
> have large arrays on stack. Hence, moving them to heap
> This reduced their stack size from something 49248
> to fit into less than 200.
> 
> Signed-off-by: Jaya Tiwari <tiwari.jaya18@gmail.com>

This is not what that page suggests. It says:
	Make the stack array
	smaller and allocate on the heap in the rare case that the data does not
	fit in the small array:

This patch just uses heap unconditionally which is sure to hurt performance.


> ---
>  hw/virtio/virtio.c | 37 +++++++++++++++++++------------------
>  1 file changed, 19 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 08275a9..027e7e2 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -636,67 +636,68 @@ typedef struct VirtQueueElementOld {
>  void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
>  {
>      VirtQueueElement *elem;
> -    VirtQueueElementOld data;
> +    VirtQueueElementOld *data = g_new(VirtQueueElementOld, 1);
>      int i;
>  
> -    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
> +    qemu_get_buffer(f, (uint8_t *)data, sizeof(VirtQueueElementOld));
>  
> -    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
> -    elem->index = data.index;
> +    elem = virtqueue_alloc_element(sz, data->out_num, data->in_num);
> +    elem->index = data->index;
>  
>      for (i = 0; i < elem->in_num; i++) {
> -        elem->in_addr[i] = data.in_addr[i];
> +        elem->in_addr[i] = data->in_addr[i];
>      }
>  
>      for (i = 0; i < elem->out_num; i++) {
> -        elem->out_addr[i] = data.out_addr[i];
> +        elem->out_addr[i] = data->out_addr[i];
>      }
>  
>      for (i = 0; i < elem->in_num; i++) {
>          /* Base is overwritten by virtqueue_map.  */
>          elem->in_sg[i].iov_base = 0;
> -        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
> +        elem->in_sg[i].iov_len = data->in_sg[i].iov_len;
>      }
>  
>      for (i = 0; i < elem->out_num; i++) {
>          /* Base is overwritten by virtqueue_map.  */
>          elem->out_sg[i].iov_base = 0;
> -        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
> +        elem->out_sg[i].iov_len = data->out_sg[i].iov_len;
>      }
>  
> +    g_free(data);
>      virtqueue_map(elem);
>      return elem;
>  }
>  
>  void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
>  {
> -    VirtQueueElementOld data;
> +    VirtQueueElementOld *data = g_new0(VirtQueueElementOld, 1);
>      int i;
>  
> -    memset(&data, 0, sizeof(data));
> -    data.index = elem->index;
> -    data.in_num = elem->in_num;
> -    data.out_num = elem->out_num;
> +    data->index = elem->index;
> +    data->in_num = elem->in_num;
> +    data->out_num = elem->out_num;
>  
>      for (i = 0; i < elem->in_num; i++) {
> -        data.in_addr[i] = elem->in_addr[i];
> +        data->in_addr[i] = elem->in_addr[i];
>      }
>  
>      for (i = 0; i < elem->out_num; i++) {
> -        data.out_addr[i] = elem->out_addr[i];
> +        data->out_addr[i] = elem->out_addr[i];
>      }
>  
>      for (i = 0; i < elem->in_num; i++) {
>          /* Base is overwritten by virtqueue_map when loading.  Do not
>           * save it, as it would leak the QEMU address space layout.  */
> -        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
> +        data->in_sg[i].iov_len = elem->in_sg[i].iov_len;
>      }
>  
>      for (i = 0; i < elem->out_num; i++) {
>          /* Do not save iov_base as above.  */
> -        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
> +        data->out_sg[i].iov_len = elem->out_sg[i].iov_len;
>      }
> -    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
> +    qemu_put_buffer(f, (uint8_t *)data, sizeof(VirtQueueElementOld));
> +    g_free(data);
>  }
>  
>  /* virtio device */
> -- 
> 1.8.1.2
Jaya Tiwari March 15, 2016, 7:36 a.m. UTC | #2
On Tue, Mar 15, 2016 at 12:32 PM, Michael S. Tsirkin <mst@redhat.com> wrote:

> On Mon, Mar 14, 2016 at 09:10:15PM +0530, Jaya Tiwari wrote:
> > As per the list of functions in :
> > http://wiki.qemu.org/BiteSizedTasks#Large_frames,
> > qemu_get_virtqueue_element  and qemu_put_virtqueue_element
> > have large arrays on stack. Hence, moving them to heap
> > This reduced their stack size from something 49248
> > to fit into less than 200.
> >
> > Signed-off-by: Jaya Tiwari <tiwari.jaya18@gmail.com>
>
> This is not what that page suggests. It says:
>         Make the stack array
>         smaller and allocate on the heap in the rare case that the data
> does not
>         fit in the small array:
>
> This patch just uses heap unconditionally which is sure to hurt
> performance.
>
> Yes Okay.
Thank you for pointing it out.
So I should be including a condition to check with a small stack size, and
if the array crosses it, only then
it should be placed in heap, otherwise it should not be using heap.
Am I correct in my understanding here?


> > ---
> >  hw/virtio/virtio.c | 37 +++++++++++++++++++------------------
> >  1 file changed, 19 insertions(+), 18 deletions(-)
> >
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index 08275a9..027e7e2 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -636,67 +636,68 @@ typedef struct VirtQueueElementOld {
> >  void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
> >  {
> >      VirtQueueElement *elem;
> > -    VirtQueueElementOld data;
> > +    VirtQueueElementOld *data = g_new(VirtQueueElementOld, 1);
> >      int i;
> >
> > -    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
> > +    qemu_get_buffer(f, (uint8_t *)data, sizeof(VirtQueueElementOld));
> >
> > -    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
> > -    elem->index = data.index;
> > +    elem = virtqueue_alloc_element(sz, data->out_num, data->in_num);
> > +    elem->index = data->index;
> >
> >      for (i = 0; i < elem->in_num; i++) {
> > -        elem->in_addr[i] = data.in_addr[i];
> > +        elem->in_addr[i] = data->in_addr[i];
> >      }
> >
> >      for (i = 0; i < elem->out_num; i++) {
> > -        elem->out_addr[i] = data.out_addr[i];
> > +        elem->out_addr[i] = data->out_addr[i];
> >      }
> >
> >      for (i = 0; i < elem->in_num; i++) {
> >          /* Base is overwritten by virtqueue_map.  */
> >          elem->in_sg[i].iov_base = 0;
> > -        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
> > +        elem->in_sg[i].iov_len = data->in_sg[i].iov_len;
> >      }
> >
> >      for (i = 0; i < elem->out_num; i++) {
> >          /* Base is overwritten by virtqueue_map.  */
> >          elem->out_sg[i].iov_base = 0;
> > -        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
> > +        elem->out_sg[i].iov_len = data->out_sg[i].iov_len;
> >      }
> >
> > +    g_free(data);
> >      virtqueue_map(elem);
> >      return elem;
> >  }
> >
> >  void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
> >  {
> > -    VirtQueueElementOld data;
> > +    VirtQueueElementOld *data = g_new0(VirtQueueElementOld, 1);
> >      int i;
> >
> > -    memset(&data, 0, sizeof(data));
> > -    data.index = elem->index;
> > -    data.in_num = elem->in_num;
> > -    data.out_num = elem->out_num;
> > +    data->index = elem->index;
> > +    data->in_num = elem->in_num;
> > +    data->out_num = elem->out_num;
> >
> >      for (i = 0; i < elem->in_num; i++) {
> > -        data.in_addr[i] = elem->in_addr[i];
> > +        data->in_addr[i] = elem->in_addr[i];
> >      }
> >
> >      for (i = 0; i < elem->out_num; i++) {
> > -        data.out_addr[i] = elem->out_addr[i];
> > +        data->out_addr[i] = elem->out_addr[i];
> >      }
> >
> >      for (i = 0; i < elem->in_num; i++) {
> >          /* Base is overwritten by virtqueue_map when loading.  Do not
> >           * save it, as it would leak the QEMU address space layout.  */
> > -        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
> > +        data->in_sg[i].iov_len = elem->in_sg[i].iov_len;
> >      }
> >
> >      for (i = 0; i < elem->out_num; i++) {
> >          /* Do not save iov_base as above.  */
> > -        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
> > +        data->out_sg[i].iov_len = elem->out_sg[i].iov_len;
> >      }
> > -    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
> > +    qemu_put_buffer(f, (uint8_t *)data, sizeof(VirtQueueElementOld));
> > +    g_free(data);
> >  }
> >
> >  /* virtio device */
> > --
> > 1.8.1.2
>
Paolo Bonzini March 15, 2016, 9:16 a.m. UTC | #3
On 15/03/2016 08:36, Jaya Tiwari wrote:
> 
>     This is not what that page suggests. It says:
>     Make the stack array
>     smaller and allocate on the heap in the rare case that the
>     data does not fit in the small array:
> 
>     This patch just uses heap unconditionally which is sure to hurt
>     performance.

This is not a hot path.  It only happens when saving/loading data after
migration.  Surely the few microseconds wasted in allocating data on the
heap are beaten by zeroing the memory, by all the for loops in the
functions, and of course by the 3-500 *milli*seconds of downtime caused
by migration.

> Yes Okay.
> Thank you for pointing it out.
> So I should be including a condition to check with a small stack size,
> and if the array crosses it, only then
> it should be placed in heap, otherwise it should not be using heap.
> Am I correct in my understanding here? 

Jaya, this patch is okay.  What Michael said is true in other cases, but
not this one.

Paolo
Michael S. Tsirkin March 15, 2016, 9:40 a.m. UTC | #4
On Tue, Mar 15, 2016 at 10:16:00AM +0100, Paolo Bonzini wrote:
> 
> 
> On 15/03/2016 08:36, Jaya Tiwari wrote:
> > 
> >     This is not what that page suggests. It says:
> >     Make the stack array
> >     smaller and allocate on the heap in the rare case that the
> >     data does not fit in the small array:
> > 
> >     This patch just uses heap unconditionally which is sure to hurt
> >     performance.
> 
> This is not a hot path.  It only happens when saving/loading data after
> migration.  Surely the few microseconds wasted in allocating data on the
> heap are beaten by zeroing the memory, by all the for loops in the
> functions, and of course by the 3-500 *milli*seconds of downtime caused
> by migration.
> 
> > Yes Okay.
> > Thank you for pointing it out.
> > So I should be including a condition to check with a small stack size,
> > and if the array crosses it, only then
> > it should be placed in heap, otherwise it should not be using heap.
> > Am I correct in my understanding here? 
> 
> Jaya, this patch is okay.  What Michael said is true in other cases, but
> not this one.
> 
> Paolo

Hmm, I got confused. You are right.
I'll redo the review, sorry about the noise.
diff mbox

Patch

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 08275a9..027e7e2 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -636,67 +636,68 @@  typedef struct VirtQueueElementOld {
 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
 {
     VirtQueueElement *elem;
-    VirtQueueElementOld data;
+    VirtQueueElementOld *data = g_new(VirtQueueElementOld, 1);
     int i;
 
-    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
+    qemu_get_buffer(f, (uint8_t *)data, sizeof(VirtQueueElementOld));
 
-    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
-    elem->index = data.index;
+    elem = virtqueue_alloc_element(sz, data->out_num, data->in_num);
+    elem->index = data->index;
 
     for (i = 0; i < elem->in_num; i++) {
-        elem->in_addr[i] = data.in_addr[i];
+        elem->in_addr[i] = data->in_addr[i];
     }
 
     for (i = 0; i < elem->out_num; i++) {
-        elem->out_addr[i] = data.out_addr[i];
+        elem->out_addr[i] = data->out_addr[i];
     }
 
     for (i = 0; i < elem->in_num; i++) {
         /* Base is overwritten by virtqueue_map.  */
         elem->in_sg[i].iov_base = 0;
-        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
+        elem->in_sg[i].iov_len = data->in_sg[i].iov_len;
     }
 
     for (i = 0; i < elem->out_num; i++) {
         /* Base is overwritten by virtqueue_map.  */
         elem->out_sg[i].iov_base = 0;
-        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
+        elem->out_sg[i].iov_len = data->out_sg[i].iov_len;
     }
 
+    g_free(data);
     virtqueue_map(elem);
     return elem;
 }
 
 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
 {
-    VirtQueueElementOld data;
+    VirtQueueElementOld *data = g_new0(VirtQueueElementOld, 1);
     int i;
 
-    memset(&data, 0, sizeof(data));
-    data.index = elem->index;
-    data.in_num = elem->in_num;
-    data.out_num = elem->out_num;
+    data->index = elem->index;
+    data->in_num = elem->in_num;
+    data->out_num = elem->out_num;
 
     for (i = 0; i < elem->in_num; i++) {
-        data.in_addr[i] = elem->in_addr[i];
+        data->in_addr[i] = elem->in_addr[i];
     }
 
     for (i = 0; i < elem->out_num; i++) {
-        data.out_addr[i] = elem->out_addr[i];
+        data->out_addr[i] = elem->out_addr[i];
     }
 
     for (i = 0; i < elem->in_num; i++) {
         /* Base is overwritten by virtqueue_map when loading.  Do not
          * save it, as it would leak the QEMU address space layout.  */
-        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
+        data->in_sg[i].iov_len = elem->in_sg[i].iov_len;
     }
 
     for (i = 0; i < elem->out_num; i++) {
         /* Do not save iov_base as above.  */
-        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
+        data->out_sg[i].iov_len = elem->out_sg[i].iov_len;
     }
-    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
+    qemu_put_buffer(f, (uint8_t *)data, sizeof(VirtQueueElementOld));
+    g_free(data);
 }
 
 /* virtio device */