diff mbox

virtio-blk: notify guest directly

Message ID 1513690383-27269-1-git-send-email-sochin.jiang@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

sochin jiang Dec. 19, 2017, 1:33 p.m. UTC
From: "sochin.jiang" <sochin.jiang@huawei.com>

 Till now, we've already notify guest as a batch mostly, an
 extra BH won't decrease guest interrupts much, but cause a
 significant notification loss. Generally, we could have 15%
 or so performance lost in single queue IO models, as I tested.

Signed-off-by: sochin.jiang <sochin.jiang@huawei.com>
---
 hw/block/dataplane/virtio-blk.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

Comments

Paolo Bonzini Dec. 20, 2017, 12:57 a.m. UTC | #1
On 19/12/2017 14:33, sochin.jiang wrote:
> From: "sochin.jiang" <sochin.jiang@huawei.com>
> 
>  Till now, we've already notify guest as a batch mostly, an
>  extra BH won't decrease guest interrupts much, but cause a
>  significant notification loss. Generally, we could have 15%
>  or so performance lost in single queue IO models, as I tested.

Interesting, this was indeed done to decrease interrupt overhead:

    commit 5b2ffbe4d99843fd8305c573a100047a8c962327
    Author: Ming Lei <tom.leiming@gmail.com>
    Date:   Sat Jul 12 12:08:53 2014 +0800

    virtio-blk: dataplane: notify guest as a batch

    Now requests are submitted as a batch, so it is natural
    to notify guest as a batch too.

    This may suppress interrupt notification to VM a lot:

            - in my test, decreased by ~13K/sec

    Signed-off-by: Ming Lei <ming.lei@canonical.com>
    Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

Can you explain your benchmark setup?

Paolo


> Signed-off-by: sochin.jiang <sochin.jiang@huawei.com>
> ---
>  hw/block/dataplane/virtio-blk.c | 19 +++++++------------
>  1 file changed, 7 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index 5556f0e..a261a1d 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -32,7 +32,6 @@ struct VirtIOBlockDataPlane {
>  
>      VirtIOBlkConf *conf;
>      VirtIODevice *vdev;
> -    QEMUBH *bh;                     /* bh for guest notification */
>      unsigned long *batch_notify_vqs;
>  
>      /* Note that these EventNotifiers are assigned by value.  This is
> @@ -44,14 +43,7 @@ struct VirtIOBlockDataPlane {
>      AioContext *ctx;
>  };
>  
> -/* Raise an interrupt to signal guest, if necessary */
> -void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
> -{
> -    set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
> -    qemu_bh_schedule(s->bh);
> -}
> -
> -static void notify_guest_bh(void *opaque)
> +static void notify_guest(void *opaque)
>  {
>      VirtIOBlockDataPlane *s = opaque;
>      unsigned nvqs = s->conf->num_queues;
> @@ -75,7 +67,12 @@ static void notify_guest_bh(void *opaque)
>      }
>  }
>  
> -/* Context: QEMU global mutex held */
> +/* Raise an interrupt to signal guest, if necessary */
> +void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
> +{
> +    set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
> +    notify_guest(s);
> +}
>  void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
>                                    VirtIOBlockDataPlane **dataplane,
>                                    Error **errp)
> @@ -122,7 +119,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
>      } else {
>          s->ctx = qemu_get_aio_context();
>      }
> -    s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
>      s->batch_notify_vqs = bitmap_new(conf->num_queues);
>  
>      *dataplane = s;
> @@ -140,7 +136,6 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
>      vblk = VIRTIO_BLK(s->vdev);
>      assert(!vblk->dataplane_started);
>      g_free(s->batch_notify_vqs);
> -    qemu_bh_delete(s->bh);
>      if (s->iothread) {
>          object_unref(OBJECT(s->iothread));
>      }
>
sochin jiang Dec. 20, 2017, 2:53 a.m. UTC | #2
In fact, I firstly found a performance loss  before and after

commit 9ffe337 using fio tools in suse11-sp3 guest(vitio-blk), especially

when testing 4k single IO models(say, write, randwrite, read and

randread, with iodepth set to 1), the result is 15%-20% performance loss

since commit 9ffe337, difference is an extra notification bh in dataplane as

I posted.

Then, I tested more IO models. Indeed, an extra BH can decrease interrupts

in guest, but not much more like ~13K/sec now, and only when testing

large IO(bs set to 64k,128k, 1024k...) or large iodepths(64, 128...) models, ~5K/sec

decreased.

I did not test all IO models of course, and different models with different interrupts.

But on the whole, a performance loss is certain with an extra notification BH.


Sochin






On 2017/12/20 8:57, Paolo Bonzini wrote:
> On 19/12/2017 14:33, sochin.jiang wrote:
>> From: "sochin.jiang" <sochin.jiang@huawei.com>
>>
>>  Till now, we've already notify guest as a batch mostly, an
>>  extra BH won't decrease guest interrupts much, but cause a
>>  significant notification loss. Generally, we could have 15%
>>  or so performance lost in single queue IO models, as I tested.
> Interesting, this was indeed done to decrease interrupt overhead:
>
>     commit 5b2ffbe4d99843fd8305c573a100047a8c962327
>     Author: Ming Lei <tom.leiming@gmail.com>
>     Date:   Sat Jul 12 12:08:53 2014 +0800
>
>     virtio-blk: dataplane: notify guest as a batch
>
>     Now requests are submitted as a batch, so it is natural
>     to notify guest as a batch too.
>
>     This may suppress interrupt notification to VM a lot:
>
>             - in my test, decreased by ~13K/sec
>
>     Signed-off-by: Ming Lei <ming.lei@canonical.com>
>     Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>
> Can you explain your benchmark setup?
>
> Paolo
>
>
>> Signed-off-by: sochin.jiang <sochin.jiang@huawei.com>
>> ---
>>  hw/block/dataplane/virtio-blk.c | 19 +++++++------------
>>  1 file changed, 7 insertions(+), 12 deletions(-)
>>
>> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
>> index 5556f0e..a261a1d 100644
>> --- a/hw/block/dataplane/virtio-blk.c
>> +++ b/hw/block/dataplane/virtio-blk.c
>> @@ -32,7 +32,6 @@ struct VirtIOBlockDataPlane {
>>  
>>      VirtIOBlkConf *conf;
>>      VirtIODevice *vdev;
>> -    QEMUBH *bh;                     /* bh for guest notification */
>>      unsigned long *batch_notify_vqs;
>>  
>>      /* Note that these EventNotifiers are assigned by value.  This is
>> @@ -44,14 +43,7 @@ struct VirtIOBlockDataPlane {
>>      AioContext *ctx;
>>  };
>>  
>> -/* Raise an interrupt to signal guest, if necessary */
>> -void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
>> -{
>> -    set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
>> -    qemu_bh_schedule(s->bh);
>> -}
>> -
>> -static void notify_guest_bh(void *opaque)
>> +static void notify_guest(void *opaque)
>>  {
>>      VirtIOBlockDataPlane *s = opaque;
>>      unsigned nvqs = s->conf->num_queues;
>> @@ -75,7 +67,12 @@ static void notify_guest_bh(void *opaque)
>>      }
>>  }
>>  
>> -/* Context: QEMU global mutex held */
>> +/* Raise an interrupt to signal guest, if necessary */
>> +void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
>> +{
>> +    set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
>> +    notify_guest(s);
>> +}
>>  void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
>>                                    VirtIOBlockDataPlane **dataplane,
>>                                    Error **errp)
>> @@ -122,7 +119,6 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
>>      } else {
>>          s->ctx = qemu_get_aio_context();
>>      }
>> -    s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
>>      s->batch_notify_vqs = bitmap_new(conf->num_queues);
>>  
>>      *dataplane = s;
>> @@ -140,7 +136,6 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
>>      vblk = VIRTIO_BLK(s->vdev);
>>      assert(!vblk->dataplane_started);
>>      g_free(s->batch_notify_vqs);
>> -    qemu_bh_delete(s->bh);
>>      if (s->iothread) {
>>          object_unref(OBJECT(s->iothread));
>>      }
>>
>
> .
>
Stefan Hajnoczi Dec. 20, 2017, 4:20 p.m. UTC | #3
On Tue, Dec 19, 2017 at 09:33:03PM +0800, sochin.jiang wrote:
> From: "sochin.jiang" <sochin.jiang@huawei.com>
> 
>  Till now, we've already notify guest as a batch mostly, an
>  extra BH won't decrease guest interrupts much, but cause a
>  significant notification loss. Generally, we could have 15%
>  or so performance lost in single queue IO models, as I tested.

I have CCed Ming Lei, who originally implemented batched notifications
in commit 5b2ffbe4d99843fd8305c573a100047a8c962327 ("virtio-blk:
dataplane: notify guest as a batch").  The original commit mentions a 13K
interrupt/sec reduction, which is significant.

Which host storage device are you benchmarking and what is the benchmark
configuration?

How many interrupts/sec does the guest report (cat /proc/interrupts)
before and after this patch?

In the past I've noticed performance can vary significantly depending on
QEMUBH ordering in the AioContext->first_bh list.  Can you measure the
latency from virtio_blk_data_plane_notify() to aio_bh_poll() and compare
against the latency from virtio_blk_data_plane_notify() to
notify_guest_bh()?

> @@ -75,7 +67,12 @@ static void notify_guest_bh(void *opaque)
>      }
>  }
>  
> -/* Context: QEMU global mutex held */

Please keep this doc comment for virtio_blk_data_plane_create().
Stefan Hajnoczi March 2, 2018, 1:38 p.m. UTC | #4
On Tue, Dec 19, 2017 at 1:33 PM, sochin.jiang <sochin.jiang@huawei.com> wrote:
>  Till now, we've already notify guest as a batch mostly, an
>  extra BH won't decrease guest interrupts much, but cause a
>  significant notification loss. Generally, we could have 15%
>  or so performance lost in single queue IO models, as I tested.

Recent performance testing has shown that virtio-blk can underperform
virtio-scsi due to the extra latency added by the BH.

The virtqueue EVENT_IDX feature mitigates interrupts when the guest
interrupt handler has not had a chance to run yet.  Therefore, virtio
already offers one level of interrupt mitigation and the BH adds
additional latency on top.

The BH helps when the guest services interrupts very quickly
(defeating EVENT_IDX mitigation) and therefore the device raises
additional interrupts.  Network drivers in Linux solve this using
NAPI, where the driver disables interrupts during periods of high
completion rates.  We could do the same inside the guest driver and it
would not suffer from BH latency.

I now think that this patch would benefit most cases, although it may
harm the case mentioned above without further patches to the
virtio_blk.ko guest driver.

Please resend the patch with fio benchmark output in the commit
description and the issue I raised fixed:

> @@ -75,7 +67,12 @@ static void notify_guest_bh(void *opaque)
>      }
>  }
>
> -/* Context: QEMU global mutex held */

Please keep this doc comment for virtio_blk_data_plane_create().

Thanks,
Stefan
Sergio Lopez March 2, 2018, 4:20 p.m. UTC | #5
On Fri, Mar 02, 2018 at 01:38:52PM +0000, Stefan Hajnoczi wrote:
> On Tue, Dec 19, 2017 at 1:33 PM, sochin.jiang <sochin.jiang@huawei.com> wrote:
> >  Till now, we've already notify guest as a batch mostly, an
> >  extra BH won't decrease guest interrupts much, but cause a
> >  significant notification loss. Generally, we could have 15%
> >  or so performance lost in single queue IO models, as I tested.
> 
> Recent performance testing has shown that virtio-blk can underperform
> virtio-scsi due to the extra latency added by the BH.
> 
> The virtqueue EVENT_IDX feature mitigates interrupts when the guest
> interrupt handler has not had a chance to run yet.  Therefore, virtio
> already offers one level of interrupt mitigation and the BH adds
> additional latency on top.

FWIW, I'm writing a patch that disables the BH (notifying after each completion
instead) when EVENT_IDX is present. I'll send both the patch and some fio
numbers next week.

Sergio.
diff mbox

Patch

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 5556f0e..a261a1d 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -32,7 +32,6 @@  struct VirtIOBlockDataPlane {
 
     VirtIOBlkConf *conf;
     VirtIODevice *vdev;
-    QEMUBH *bh;                     /* bh for guest notification */
     unsigned long *batch_notify_vqs;
 
     /* Note that these EventNotifiers are assigned by value.  This is
@@ -44,14 +43,7 @@  struct VirtIOBlockDataPlane {
     AioContext *ctx;
 };
 
-/* Raise an interrupt to signal guest, if necessary */
-void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
-{
-    set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
-    qemu_bh_schedule(s->bh);
-}
-
-static void notify_guest_bh(void *opaque)
+static void notify_guest(void *opaque)
 {
     VirtIOBlockDataPlane *s = opaque;
     unsigned nvqs = s->conf->num_queues;
@@ -75,7 +67,12 @@  static void notify_guest_bh(void *opaque)
     }
 }
 
-/* Context: QEMU global mutex held */
+/* Raise an interrupt to signal guest, if necessary */
+void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
+{
+    set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
+    notify_guest(s);
+}
 void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
                                   VirtIOBlockDataPlane **dataplane,
                                   Error **errp)
@@ -122,7 +119,6 @@  void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
     } else {
         s->ctx = qemu_get_aio_context();
     }
-    s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
     s->batch_notify_vqs = bitmap_new(conf->num_queues);
 
     *dataplane = s;
@@ -140,7 +136,6 @@  void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
     vblk = VIRTIO_BLK(s->vdev);
     assert(!vblk->dataplane_started);
     g_free(s->batch_notify_vqs);
-    qemu_bh_delete(s->bh);
     if (s->iothread) {
         object_unref(OBJECT(s->iothread));
     }