diff mbox series

[v19,QEMU,1/4] virtio-balloon: Implement support for page poison tracking feature

Message ID 20200410034129.24738.36022.stgit@localhost.localdomain (mailing list archive)
State New, archived
Headers show
Series virtio-balloon: add support for free page reporting | expand

Commit Message

Alexander Duyck April 10, 2020, 3:41 a.m. UTC
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>

We need to make certain to advertise support for page poison tracking if
we want to actually get data on if the guest will be poisoning pages. So
if free page hinting is active we should add page poisoning support and
let the guest disable it if it isn't using it.

Page poisoning will result in a page being dirtied on free. As such we
cannot really avoid having to copy the page at least one more time since
we will need to write the poison value to the destination. As such we can
just ignore free page hinting if page poisoning is enabled as it will
actually reduce the work we have to do.

Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
---
 hw/virtio/virtio-balloon.c         |   26 ++++++++++++++++++++++----
 include/hw/virtio/virtio-balloon.h |    1 +
 2 files changed, 23 insertions(+), 4 deletions(-)

Comments

David Hildenbrand April 15, 2020, 8:08 a.m. UTC | #1
On 10.04.20 05:41, Alexander Duyck wrote:
> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> 
> We need to make certain to advertise support for page poison tracking if
> we want to actually get data on if the guest will be poisoning pages. So
> if free page hinting is active we should add page poisoning support and
> let the guest disable it if it isn't using it.
> 
> Page poisoning will result in a page being dirtied on free. As such we
> cannot really avoid having to copy the page at least one more time since
> we will need to write the poison value to the destination. As such we can
> just ignore free page hinting if page poisoning is enabled as it will
> actually reduce the work we have to do.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> ---
>  hw/virtio/virtio-balloon.c         |   26 ++++++++++++++++++++++----
>  include/hw/virtio/virtio-balloon.h |    1 +
>  2 files changed, 23 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index a4729f7fc930..1c6d36a29a04 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -531,6 +531,15 @@ static void virtio_balloon_free_page_start(VirtIOBalloon *s)
>          return;
>      }
>  
> +    /*
> +     * If page poisoning is enabled then we probably shouldn't bother with
> +     * the hinting since the poisoning will dirty the page and invalidate
> +     * the work we are doing anyway.
> +     */
> +    if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {

Why not check for the poison value instead? (as you do in patch #3) ?

> +        return;
> +    }
> +
>      if (s->free_page_report_cmd_id == UINT_MAX) {
>          s->free_page_report_cmd_id =
>                         VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;

We should rename all "free_page_report" stuff we can to
"free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my
side :) ) before adding free page reporting .

(looking at the virtio-balloon linux header, it's also confusing but
we're stuck with that - maybe we should add better comments)


> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
>      if (s->qemu_4_0_config_size) {
>          return sizeof(struct virtio_balloon_config);
>      }
> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
> +    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
> +        virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
>          return sizeof(struct virtio_balloon_config);
>      }
> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> -        return offsetof(struct virtio_balloon_config, poison_val);
> -    }

I am not sure this change is completely sane. Why is that necessary at all?

>      return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
>  }
>  
> @@ -634,6 +641,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
>  
>      config.num_pages = cpu_to_le32(dev->num_pages);
>      config.actual = cpu_to_le32(dev->actual);
> +    config.poison_val = cpu_to_le32(dev->poison_val);
>  
>      if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
>          config.free_page_report_cmd_id =
> @@ -697,6 +705,9 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
>          qapi_event_send_balloon_change(vm_ram_size -
>                          ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
>      }
> +    dev->poison_val = virtio_vdev_has_feature(vdev,
> +                                              VIRTIO_BALLOON_F_PAGE_POISON) ?
> +                      le32_to_cpu(config.poison_val) : 0;

Can we just do a


dev->poison_val = 0;
if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
	dev->poison_val = le32_to_cpu(config.poison_val);
}

instead?


>      trace_virtio_balloon_set_config(dev->actual, oldactual);
>  }
>  
> @@ -706,6 +717,9 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
>      VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
>      f |= dev->host_features;
>      virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
> +    if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> +        virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON);
> +    }
>  
>      return f;
>  }
> @@ -854,6 +868,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
>          g_free(s->stats_vq_elem);
>          s->stats_vq_elem = NULL;
>      }
> +
> +    s->poison_val = 0;
>  }
>  
>  static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
> @@ -916,6 +932,8 @@ static Property virtio_balloon_properties[] = {
>                      VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
>      DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
>                      VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
> +    DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features,
> +                    VIRTIO_BALLOON_F_PAGE_POISON, false),

Just curious, why an x- feature?
Alexander Duyck April 15, 2020, 5:17 p.m. UTC | #2
On Wed, Apr 15, 2020 at 1:08 AM David Hildenbrand <david@redhat.com> wrote:
>
> On 10.04.20 05:41, Alexander Duyck wrote:
> > From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> >
> > We need to make certain to advertise support for page poison tracking if
> > we want to actually get data on if the guest will be poisoning pages. So
> > if free page hinting is active we should add page poisoning support and
> > let the guest disable it if it isn't using it.
> >
> > Page poisoning will result in a page being dirtied on free. As such we
> > cannot really avoid having to copy the page at least one more time since
> > we will need to write the poison value to the destination. As such we can
> > just ignore free page hinting if page poisoning is enabled as it will
> > actually reduce the work we have to do.
> >
> > Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > ---
> >  hw/virtio/virtio-balloon.c         |   26 ++++++++++++++++++++++----
> >  include/hw/virtio/virtio-balloon.h |    1 +
> >  2 files changed, 23 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> > index a4729f7fc930..1c6d36a29a04 100644
> > --- a/hw/virtio/virtio-balloon.c
> > +++ b/hw/virtio/virtio-balloon.c
> > @@ -531,6 +531,15 @@ static void virtio_balloon_free_page_start(VirtIOBalloon *s)
> >          return;
> >      }
> >
> > +    /*
> > +     * If page poisoning is enabled then we probably shouldn't bother with
> > +     * the hinting since the poisoning will dirty the page and invalidate
> > +     * the work we are doing anyway.
> > +     */
> > +    if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
>
> Why not check for the poison value instead? (as you do in patch #3) ?

So if I recall correctly the vdev has feature requires the host to
indicate that the feature is in use. If page poisoning is not enabled
on the host then it clears the flag on its end and we can proceed with
the feature.

The comment above explains the "why". Basically poisoning a page will
dirty it. So why hint a page as free when that will drop it back into
the guest and result in it being dirtied again. What you end up with
is all the pages that were temporarily placed in the balloon are dirty
after the hinting report is finished at which point you made things
worse instead of helping to improve them.

>
> > +        return;
> > +    }
> > +
> >      if (s->free_page_report_cmd_id == UINT_MAX) {
> >          s->free_page_report_cmd_id =
> >                         VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
>
> We should rename all "free_page_report" stuff we can to
> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my
> side :) ) before adding free page reporting .
>
> (looking at the virtio-balloon linux header, it's also confusing but
> we're stuck with that - maybe we should add better comments)

Are we stuck? Couldn't we just convert it to an anonymous union with
free_page_hint_cmd_id and then use that where needed?

> > @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
> >      if (s->qemu_4_0_config_size) {
> >          return sizeof(struct virtio_balloon_config);
> >      }
> > -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
> > +    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
> > +        virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> >          return sizeof(struct virtio_balloon_config);
> >      }
> > -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> > -        return offsetof(struct virtio_balloon_config, poison_val);
> > -    }
>
> I am not sure this change is completely sane. Why is that necessary at all?

The poison_val is stored at the end of the structure and is required
in order to make free page hinting to work. What this change is doing
is forcing it so that we report the config size as the full size if
either poisoning or hinting are enabled since the poison val is the
last member of the config structure.

If the question is why bother reducing the size if free page hinting
is not present then I guess we could simplify this and just report
report the size of the config for all cases.

> >      return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
> >  }
> >
> > @@ -634,6 +641,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
> >
> >      config.num_pages = cpu_to_le32(dev->num_pages);
> >      config.actual = cpu_to_le32(dev->actual);
> > +    config.poison_val = cpu_to_le32(dev->poison_val);
> >
> >      if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
> >          config.free_page_report_cmd_id =
> > @@ -697,6 +705,9 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
> >          qapi_event_send_balloon_change(vm_ram_size -
> >                          ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
> >      }
> > +    dev->poison_val = virtio_vdev_has_feature(vdev,
> > +                                              VIRTIO_BALLOON_F_PAGE_POISON) ?
> > +                      le32_to_cpu(config.poison_val) : 0;
>
> Can we just do a
>
>
> dev->poison_val = 0;
> if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
>         dev->poison_val = le32_to_cpu(config.poison_val);
> }
>
> instead?

I can change it to that if that is what is preferred.

> >      trace_virtio_balloon_set_config(dev->actual, oldactual);
> >  }
> >
> > @@ -706,6 +717,9 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
> >      VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
> >      f |= dev->host_features;
> >      virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
> > +    if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> > +        virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON);
> > +    }
> >
> >      return f;
> >  }
> > @@ -854,6 +868,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
> >          g_free(s->stats_vq_elem);
> >          s->stats_vq_elem = NULL;
> >      }
> > +
> > +    s->poison_val = 0;
> >  }
> >
> >  static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
> > @@ -916,6 +932,8 @@ static Property virtio_balloon_properties[] = {
> >                      VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
> >      DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
> >                      VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
> > +    DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features,
> > +                    VIRTIO_BALLOON_F_PAGE_POISON, false),
>
> Just curious, why an x- feature?

It was something I didn't expect the users to enable. It gets enabled
when either free page hinting or free page reporting is enabled. So if
you look you will see that in virtio_balloon_get_features the page
poison feature is added if free page hinting is present. The guest
will clear the feature bit if poisoning is not enabled in the guest.
That results in the vdev getting the bit cleared.

Part of it was also about making this work with the existing feature
code as it had been added to the upstream kernel.
David Hildenbrand April 15, 2020, 6:16 p.m. UTC | #3
> 
> The comment above explains the "why". Basically poisoning a page will
> dirty it. So why hint a page as free when that will drop it back into
> the guest and result in it being dirtied again. What you end up with
> is all the pages that were temporarily placed in the balloon are dirty
> after the hinting report is finished at which point you made things
> worse instead of helping to improve them.

Let me think this through. What happens on free page hinting is that

a) we tell the guest that migration starts
b) it allocates pages (alloc_pages()), sends them to us and adds them to
   a list
b) we exclude these pages on migration
c) we tell the guest that migration is over
d) the guest frees all allocated pages

The "issue" with VIRTIO_BALLOON_F_PAGE_POISON is, that in d), the guest
will fill all pages again with a pattern (or zero).

AFAIKs, it's either

1) Not performing free page hinting, migrating pages filled with a
poison value (or zero).
2) Performing free page hinting, not migrating pages filled with a
poison value (or zero), letting only the guest fill them again.

I have the feeling, that 2) is still better for migration, because we
don't migrate useless pages and let the guest reconstruct the content?
(having a poison value of zero might be debatable)

Can you tell me what I am missing? :)

> 
>>
>>> +        return;
>>> +    }
>>> +
>>>      if (s->free_page_report_cmd_id == UINT_MAX) {
>>>          s->free_page_report_cmd_id =
>>>                         VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
>>
>> We should rename all "free_page_report" stuff we can to
>> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my
>> side :) ) before adding free page reporting .
>>
>> (looking at the virtio-balloon linux header, it's also confusing but
>> we're stuck with that - maybe we should add better comments)
> 
> Are we stuck? Couldn't we just convert it to an anonymous union with
> free_page_hint_cmd_id and then use that where needed?

Saw your patch already :)

> 
>>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
>>>      if (s->qemu_4_0_config_size) {
>>>          return sizeof(struct virtio_balloon_config);
>>>      }
>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
>>> +    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
>>> +        virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
>>>          return sizeof(struct virtio_balloon_config);
>>>      }
>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
>>> -        return offsetof(struct virtio_balloon_config, poison_val);
>>> -    }
>>
>> I am not sure this change is completely sane. Why is that necessary at all?
> 
> The poison_val is stored at the end of the structure and is required
> in order to make free page hinting to work. What this change is doing

Required to make it work? In the kernel,

commit 2e991629bcf55a43681aec1ee096eeb03cf81709
Author: Wei Wang <wei.w.wang@intel.com>
Date:   Mon Aug 27 09:32:19 2018 +0800

    virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON

was merged after

commit 86a559787e6f5cf662c081363f64a20cad654195
Author: Wei Wang <wei.w.wang@intel.com>
Date:   Mon Aug 27 09:32:17 2018 +0800

    virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

So I assume it's perfectly fine to not have it.

I'd say it's the responsibility of the guest to *not* use
VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning
without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself
into the foot.

> is forcing it so that we report the config size as the full size if
> either poisoning or hinting are enabled since the poison val is the
> last member of the config structure.
> 
> If the question is why bother reducing the size if free page hinting
> is not present then I guess we could simplify this and just report
> report the size of the config for all cases.

I guess the idea is that if you migrate from one QEMU to another, your
config size will not suddenly change (fenced by a feature set that will
not change during migration, observable by a running guest).

My guess would be that we cannot simply change existing configurations
(defined via feature bits) as you do here - see e.g., qemu-4-0-config-size.

[...]

>>>
>>> @@ -706,6 +717,9 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
>>>      VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
>>>      f |= dev->host_features;
>>>      virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
>>> +    if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
>>> +        virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON);
>>> +    }
>>>
>>>      return f;
>>>  }
>>> @@ -854,6 +868,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
>>>          g_free(s->stats_vq_elem);
>>>          s->stats_vq_elem = NULL;
>>>      }
>>> +
>>> +    s->poison_val = 0;
>>>  }
>>>
>>>  static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
>>> @@ -916,6 +932,8 @@ static Property virtio_balloon_properties[] = {
>>>                      VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
>>>      DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
>>>                      VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
>>> +    DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features,
>>> +                    VIRTIO_BALLOON_F_PAGE_POISON, false),
>>
>> Just curious, why an x- feature?
> 
> It was something I didn't expect the users to enable. It gets enabled
> when either free page hinting or free page reporting is enabled. So if
> you look you will see that in virtio_balloon_get_features the page
> poison feature is added if free page hinting is present. The guest
> will clear the feature bit if poisoning is not enabled in the guest.
> That results in the vdev getting the bit cleared.

The weird thing is that setting it to "false" will still enable it
automatically, depending on other features. Not sure how helpful that is.

I'd prefer to simply always enable it, similar to
VIRTIO_BALLOON_F_STATS_VQ - but it's late and I am confused how
migration with compat machines will work. :)

So, I wonder if we need this feature bit here at all.
Alexander Duyck April 15, 2020, 7:28 p.m. UTC | #4
On Wed, Apr 15, 2020 at 11:17 AM David Hildenbrand <david@redhat.com> wrote:
>
> >
> > The comment above explains the "why". Basically poisoning a page will
> > dirty it. So why hint a page as free when that will drop it back into
> > the guest and result in it being dirtied again. What you end up with
> > is all the pages that were temporarily placed in the balloon are dirty
> > after the hinting report is finished at which point you made things
> > worse instead of helping to improve them.
>
> Let me think this through. What happens on free page hinting is that
>
> a) we tell the guest that migration starts
> b) it allocates pages (alloc_pages()), sends them to us and adds them to
>    a list
> b) we exclude these pages on migration
> c) we tell the guest that migration is over
> d) the guest frees all allocated pages
>
> The "issue" with VIRTIO_BALLOON_F_PAGE_POISON is, that in d), the guest
> will fill all pages again with a pattern (or zero).

They should have already been filled with the poison pattern before we
got to d). A bigger worry is that we at some point in the future
update the kernel so that d) doesn't trigger re-poisoning, in which
case the pages won't be marked as dirty, we will have skipped the
migration, and we have no idea what will be in the pages at the end.

> AFAIKs, it's either
>
> 1) Not performing free page hinting, migrating pages filled with a
> poison value (or zero).
> 2) Performing free page hinting, not migrating pages filled with a
> poison value (or zero), letting only the guest fill them again.
>
> I have the feeling, that 2) is still better for migration, because we
> don't migrate useless pages and let the guest reconstruct the content?
> (having a poison value of zero might be debatable)
>
> Can you tell me what I am missing? :)

The goal of the free page hinting was to reduce the number of pages
that have to be migrated, in the second case you point out is is
basically deferring it and will make the post-copy quite more
expensive since all of the free memory will be pushed to the post-copy
which I would think would be undesirable since it means the VM would
have to be down for a greater amount of time with the poisoning
enabled.

The worst case scenario would be one in which the VM was suspended for
migration while there were still pages in the balloon that needed to
be drained. In such a case you would have them in an indeterminate
state since the last page poisoning for them might have been ignored
since they were marked as "free", so they are just going to be
whatever value they were if they had been migrated at all.

> >
> >>
> >>> +        return;
> >>> +    }
> >>> +
> >>>      if (s->free_page_report_cmd_id == UINT_MAX) {
> >>>          s->free_page_report_cmd_id =
> >>>                         VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
> >>
> >> We should rename all "free_page_report" stuff we can to
> >> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my
> >> side :) ) before adding free page reporting .
> >>
> >> (looking at the virtio-balloon linux header, it's also confusing but
> >> we're stuck with that - maybe we should add better comments)
> >
> > Are we stuck? Couldn't we just convert it to an anonymous union with
> > free_page_hint_cmd_id and then use that where needed?
>
> Saw your patch already :)
>
> >
> >>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
> >>>      if (s->qemu_4_0_config_size) {
> >>>          return sizeof(struct virtio_balloon_config);
> >>>      }
> >>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
> >>> +    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
> >>> +        virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> >>>          return sizeof(struct virtio_balloon_config);
> >>>      }
> >>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> >>> -        return offsetof(struct virtio_balloon_config, poison_val);
> >>> -    }
> >>
> >> I am not sure this change is completely sane. Why is that necessary at all?
> >
> > The poison_val is stored at the end of the structure and is required
> > in order to make free page hinting to work. What this change is doing
>
> Required to make it work? In the kernel,
>
> commit 2e991629bcf55a43681aec1ee096eeb03cf81709
> Author: Wei Wang <wei.w.wang@intel.com>
> Date:   Mon Aug 27 09:32:19 2018 +0800
>
>     virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
>
> was merged after
>
> commit 86a559787e6f5cf662c081363f64a20cad654195
> Author: Wei Wang <wei.w.wang@intel.com>
> Date:   Mon Aug 27 09:32:17 2018 +0800
>
>     virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
>
> So I assume it's perfectly fine to not have it.
>
> I'd say it's the responsibility of the guest to *not* use
> VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning
> without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself
> into the foot.

Based on the timing I am guessing the page poisoning was in the same
patch set as the free page hinting since there is only 2 seconds
between when the two are merged. If I recall the page poisoning logic
was added after the issue was pointed out that it needed to address
it.

I can probably make some changes to virtballoon_validate in the kernel
driver to address the fact that if we are poisoning pages we need to
either have the PAGE_POISON feature or we need to disable page
reporting and page hinting.

> > is forcing it so that we report the config size as the full size if
> > either poisoning or hinting are enabled since the poison val is the
> > last member of the config structure.
> >
> > If the question is why bother reducing the size if free page hinting
> > is not present then I guess we could simplify this and just report
> > report the size of the config for all cases.
>
> I guess the idea is that if you migrate from one QEMU to another, your
> config size will not suddenly change (fenced by a feature set that will
> not change during migration, observable by a running guest).
>
> My guess would be that we cannot simply change existing configurations
> (defined via feature bits) as you do here - see e.g., qemu-4-0-config-size.

Okay, I guess I can revert that bit.

> [...]
>
> >>>
> >>> @@ -706,6 +717,9 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
> >>>      VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
> >>>      f |= dev->host_features;
> >>>      virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
> >>> +    if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> >>> +        virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON);
> >>> +    }
> >>>
> >>>      return f;
> >>>  }
> >>> @@ -854,6 +868,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
> >>>          g_free(s->stats_vq_elem);
> >>>          s->stats_vq_elem = NULL;
> >>>      }
> >>> +
> >>> +    s->poison_val = 0;
> >>>  }
> >>>
> >>>  static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
> >>> @@ -916,6 +932,8 @@ static Property virtio_balloon_properties[] = {
> >>>                      VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
> >>>      DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
> >>>                      VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
> >>> +    DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features,
> >>> +                    VIRTIO_BALLOON_F_PAGE_POISON, false),
> >>
> >> Just curious, why an x- feature?
> >
> > It was something I didn't expect the users to enable. It gets enabled
> > when either free page hinting or free page reporting is enabled. So if
> > you look you will see that in virtio_balloon_get_features the page
> > poison feature is added if free page hinting is present. The guest
> > will clear the feature bit if poisoning is not enabled in the guest.
> > That results in the vdev getting the bit cleared.
>
> The weird thing is that setting it to "false" will still enable it
> automatically, depending on other features. Not sure how helpful that is.
>
> I'd prefer to simply always enable it, similar to
> VIRTIO_BALLOON_F_STATS_VQ - but it's late and I am confused how
> migration with compat machines will work. :)
>
> So, I wonder if we need this feature bit here at all.

I can drop it. I don't really recall why I added that piece.
David Hildenbrand April 15, 2020, 7:46 p.m. UTC | #5
> Am 15.04.2020 um 21:29 schrieb Alexander Duyck <alexander.duyck@gmail.com>:
> 
> On Wed, Apr 15, 2020 at 11:17 AM David Hildenbrand <david@redhat.com> wrote:
>> 
>>> 
>>> The comment above explains the "why". Basically poisoning a page will
>>> dirty it. So why hint a page as free when that will drop it back into
>>> the guest and result in it being dirtied again. What you end up with
>>> is all the pages that were temporarily placed in the balloon are dirty
>>> after the hinting report is finished at which point you made things
>>> worse instead of helping to improve them.
>> 
>> Let me think this through. What happens on free page hinting is that
>> 
>> a) we tell the guest that migration starts
>> b) it allocates pages (alloc_pages()), sends them to us and adds them to
>>   a list
>> b) we exclude these pages on migration
>> c) we tell the guest that migration is over
>> d) the guest frees all allocated pages
>> 
>> The "issue" with VIRTIO_BALLOON_F_PAGE_POISON is, that in d), the guest
>> will fill all pages again with a pattern (or zero).
> 
> They should have already been filled with the poison pattern before we
> got to d). A bigger worry is that we at some point in the future
> update the kernel so that d) doesn't trigger re-poisoning, in which
> case the pages won't be marked as dirty, we will have skipped the
> migration, and we have no idea what will be in the pages at the end.
> 
>> AFAIKs, it's either
>> 
>> 1) Not performing free page hinting, migrating pages filled with a
>> poison value (or zero).
>> 2) Performing free page hinting, not migrating pages filled with a
>> poison value (or zero), letting only the guest fill them again.
>> 
>> I have the feeling, that 2) is still better for migration, because we
>> don't migrate useless pages and let the guest reconstruct the content?
>> (having a poison value of zero might be debatable)
>> 
>> Can you tell me what I am missing? :)
> 
> The goal of the free page hinting was to reduce the number of pages
> that have to be migrated, in the second case you point out is is
> basically deferring it and will make the post-copy quite more
> expensive since all of the free memory will be pushed to the post-copy
> which I would think would be undesirable since it means the VM would
> have to be down for a greater amount of time with the poisoning
> enabled.

Postcopy is a very good point, bought!

But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then.

I could imagine that we also have to care about ordinary page inflation/deflation when handling page poisoning. (Iow, don‘t inflate/deflate if set for now).

> 
> The worst case scenario would be one in which the VM was suspended for
> migration while there were still pages in the balloon that needed to
> be drained. In such a case you would have them in an indeterminate
> state since the last page poisoning for them might have been ignored
> since they were marked as "free", so they are just going to be
> whatever value they were if they had been migrated at all.
> 
>>> 
>>>> 
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>>     if (s->free_page_report_cmd_id == UINT_MAX) {
>>>>>         s->free_page_report_cmd_id =
>>>>>                        VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
>>>> 
>>>> We should rename all "free_page_report" stuff we can to
>>>> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my
>>>> side :) ) before adding free page reporting .
>>>> 
>>>> (looking at the virtio-balloon linux header, it's also confusing but
>>>> we're stuck with that - maybe we should add better comments)
>>> 
>>> Are we stuck? Couldn't we just convert it to an anonymous union with
>>> free_page_hint_cmd_id and then use that where needed?
>> 
>> Saw your patch already :)
>> 
>>> 
>>>>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
>>>>>     if (s->qemu_4_0_config_size) {
>>>>>         return sizeof(struct virtio_balloon_config);
>>>>>     }
>>>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
>>>>> +    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
>>>>> +        virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
>>>>>         return sizeof(struct virtio_balloon_config);
>>>>>     }
>>>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
>>>>> -        return offsetof(struct virtio_balloon_config, poison_val);
>>>>> -    }
>>>> 
>>>> I am not sure this change is completely sane. Why is that necessary at all?
>>> 
>>> The poison_val is stored at the end of the structure and is required
>>> in order to make free page hinting to work. What this change is doing
>> 
>> Required to make it work? In the kernel,
>> 
>> commit 2e991629bcf55a43681aec1ee096eeb03cf81709
>> Author: Wei Wang <wei.w.wang@intel.com>
>> Date:   Mon Aug 27 09:32:19 2018 +0800
>> 
>>    virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
>> 
>> was merged after
>> 
>> commit 86a559787e6f5cf662c081363f64a20cad654195
>> Author: Wei Wang <wei.w.wang@intel.com>
>> Date:   Mon Aug 27 09:32:17 2018 +0800
>> 
>>    virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
>> 
>> So I assume it's perfectly fine to not have it.
>> 
>> I'd say it's the responsibility of the guest to *not* use
>> VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning
>> without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself
>> into the foot.
> 
> Based on the timing I am guessing the page poisoning was in the same
> patch set as the free page hinting since there is only 2 seconds
> between when the two are merged. If I recall the page poisoning logic
> was added after the issue was pointed out that it needed to address
> it.
> 

Yeah, but any other OS implementing this, not caring about page poisoning wouldn‘t need it. Maybe there is something in the spec.

Mental note: we‘ll have to migrate the poison value if not already done (writing on my mobile).
Alexander Duyck April 15, 2020, 9:16 p.m. UTC | #6
On Wed, Apr 15, 2020 at 12:46 PM David Hildenbrand <dhildenb@redhat.com> wrote:
>
>
>
> > Am 15.04.2020 um 21:29 schrieb Alexander Duyck <alexander.duyck@gmail.com>:
> >
> > On Wed, Apr 15, 2020 at 11:17 AM David Hildenbrand <david@redhat.com> wrote:
> >>
> >>>
> >>> The comment above explains the "why". Basically poisoning a page will
> >>> dirty it. So why hint a page as free when that will drop it back into
> >>> the guest and result in it being dirtied again. What you end up with
> >>> is all the pages that were temporarily placed in the balloon are dirty
> >>> after the hinting report is finished at which point you made things
> >>> worse instead of helping to improve them.
> >>
> >> Let me think this through. What happens on free page hinting is that
> >>
> >> a) we tell the guest that migration starts
> >> b) it allocates pages (alloc_pages()), sends them to us and adds them to
> >>   a list
> >> b) we exclude these pages on migration
> >> c) we tell the guest that migration is over
> >> d) the guest frees all allocated pages
> >>
> >> The "issue" with VIRTIO_BALLOON_F_PAGE_POISON is, that in d), the guest
> >> will fill all pages again with a pattern (or zero).
> >
> > They should have already been filled with the poison pattern before we
> > got to d). A bigger worry is that we at some point in the future
> > update the kernel so that d) doesn't trigger re-poisoning, in which
> > case the pages won't be marked as dirty, we will have skipped the
> > migration, and we have no idea what will be in the pages at the end.
> >
> >> AFAIKs, it's either
> >>
> >> 1) Not performing free page hinting, migrating pages filled with a
> >> poison value (or zero).
> >> 2) Performing free page hinting, not migrating pages filled with a
> >> poison value (or zero), letting only the guest fill them again.
> >>
> >> I have the feeling, that 2) is still better for migration, because we
> >> don't migrate useless pages and let the guest reconstruct the content?
> >> (having a poison value of zero might be debatable)
> >>
> >> Can you tell me what I am missing? :)
> >
> > The goal of the free page hinting was to reduce the number of pages
> > that have to be migrated, in the second case you point out is is
> > basically deferring it and will make the post-copy quite more
> > expensive since all of the free memory will be pushed to the post-copy
> > which I would think would be undesirable since it means the VM would
> > have to be down for a greater amount of time with the poisoning
> > enabled.
>
> Postcopy is a very good point, bought!
>
> But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then.

Do you have a link to the spec that I could look at? I am not hopeful
that this will be listed there since the poison side of QEMU was never
implemented. The flag is only here because it was copied over in the
kernel header.

> I could imagine that we also have to care about ordinary page inflation/deflation when handling page poisoning. (Iow, don‘t inflate/deflate if set for now).

The problem is this was broken from the start for that. The issue is
that the poison feature was wrapped up within the page hinting
feature. So as a result enabling it for a VM that doesn't recognize
the feature independently would likely leave the poison value
uninitialized and flagging as though a value of 0 is used. In addition
the original poison checking wasn't making sure that the page wasn't
init_on_free which has the same effect as page poisoning but isn't
page poisoning.

> >
> > The worst case scenario would be one in which the VM was suspended for
> > migration while there were still pages in the balloon that needed to
> > be drained. In such a case you would have them in an indeterminate
> > state since the last page poisoning for them might have been ignored
> > since they were marked as "free", so they are just going to be
> > whatever value they were if they had been migrated at all.
> >
> >>>
> >>>>
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>>     if (s->free_page_report_cmd_id == UINT_MAX) {
> >>>>>         s->free_page_report_cmd_id =
> >>>>>                        VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
> >>>>
> >>>> We should rename all "free_page_report" stuff we can to
> >>>> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my
> >>>> side :) ) before adding free page reporting .
> >>>>
> >>>> (looking at the virtio-balloon linux header, it's also confusing but
> >>>> we're stuck with that - maybe we should add better comments)
> >>>
> >>> Are we stuck? Couldn't we just convert it to an anonymous union with
> >>> free_page_hint_cmd_id and then use that where needed?
> >>
> >> Saw your patch already :)
> >>
> >>>
> >>>>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
> >>>>>     if (s->qemu_4_0_config_size) {
> >>>>>         return sizeof(struct virtio_balloon_config);
> >>>>>     }
> >>>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
> >>>>> +    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
> >>>>> +        virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> >>>>>         return sizeof(struct virtio_balloon_config);
> >>>>>     }
> >>>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> >>>>> -        return offsetof(struct virtio_balloon_config, poison_val);
> >>>>> -    }
> >>>>
> >>>> I am not sure this change is completely sane. Why is that necessary at all?
> >>>
> >>> The poison_val is stored at the end of the structure and is required
> >>> in order to make free page hinting to work. What this change is doing
> >>
> >> Required to make it work? In the kernel,
> >>
> >> commit 2e991629bcf55a43681aec1ee096eeb03cf81709
> >> Author: Wei Wang <wei.w.wang@intel.com>
> >> Date:   Mon Aug 27 09:32:19 2018 +0800
> >>
> >>    virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
> >>
> >> was merged after
> >>
> >> commit 86a559787e6f5cf662c081363f64a20cad654195
> >> Author: Wei Wang <wei.w.wang@intel.com>
> >> Date:   Mon Aug 27 09:32:17 2018 +0800
> >>
> >>    virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
> >>
> >> So I assume it's perfectly fine to not have it.
> >>
> >> I'd say it's the responsibility of the guest to *not* use
> >> VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning
> >> without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself
> >> into the foot.
> >
> > Based on the timing I am guessing the page poisoning was in the same
> > patch set as the free page hinting since there is only 2 seconds
> > between when the two are merged. If I recall the page poisoning logic
> > was added after the issue was pointed out that it needed to address
> > it.
> >
>
> Yeah, but any other OS implementing this, not caring about page poisoning wouldn‘t need it. Maybe there is something in the spec.
>
> Mental note: we‘ll have to migrate the poison value if not already done (writing on my mobile).

Right. We need to make sure any poison or init on free is migrated
over to the destination before we can say we are going to skip the
migration. If anything what I probably ought to look into would be if
we could change the code so that if we have a hint the page is unused,
poison is enabled, and the value is 0 we just ship over a 0 page
instead of giving up on hinting entirely.
David Hildenbrand April 16, 2020, 8:18 a.m. UTC | #7
>>
>> Postcopy is a very good point, bought!
>>
>> But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then.
> 
> Do you have a link to the spec that I could look at? I am not hopeful
> that this will be listed there since the poison side of QEMU was never
> implemented. The flag is only here because it was copied over in the
> kernel header.

Introducing a feature without

a) specification what it does
b) implementation (of both sides) showing what has to be done
c) any kind of documentation of what needs to be done

just horrible.

The latest-greatest spec lives in

https://github.com/oasis-tcs/virtio-spec.git

I can't spot any signs of free page hinting/page poisioning. :(

We should document our result of page poisoning, free page hinting, and
free page reporting there as well. I hope you'll have time for the latter.

-------------------------------------------------------------------------
Semantics of VIRTIO_BALLOON_F_PAGE_POISON
-------------------------------------------------------------------------

"The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the
guest is using page poisoning. Guest writes to the poison_val config
field to tell host about the page poisoning value that is in use."
-> Very little information, no signs about what has to be done.

"Let the hypervisor know that we are expecting a specific value to be
written back in balloon pages."
-> Okay, that talks about "balloon pages", which would include right now
-- pages "inflated" and then "deflated" using free page hinting
-- pages "inflated" and then "deflated" using oridnary inflate/deflate
   queue
-- pages "inflated" using free page reporting and automatically
   "deflated" on access

So VIRTIO_BALLOON_F_PAGE_POISON really means "whenever the guest
deflates a page (either explicitly, or implicitly with free page
reporting), it is filled with "poison_val".

And I would add

"However, if the inflated page was not filled with "poison_val" when
inflating, it's not predictable if the original page or a page filled
with "poison_val" is returned."

Which would cover the "we did not discard the page in the hypervisor, so
the original page is still there".


We should also document what is expected to happen if "poison_val" is
suddenly changed by the guest at one point in time again. (e.g., not
supported, unexpected things can happen, etc.) Also, we might have to
document that a device reset resets the poison_val to 0. (not sure if
that's really necessary)

-------------------------------------------------------------------------
What we have to do in the guest/Linux:
-------------------------------------------------------------------------

A guest which relies on this (esp., free page reporting in Linux only,
right?), has to not use/advertise VIRTIO_BALLOON_F_REPORTING *in case
VIRTIO_BALLOON_F_PAGE_POISON is not provided* by the host. AFAIKS,
ordinary inflation/deflation and free page hinting does currently not
care, as we go via free_page(), so that is currently fine AFAIKs.

-------------------------------------------------------------------------
What we have to do in the hypervisor/QEMU:
-------------------------------------------------------------------------

With VIRTIO_BALLOON_F_PAGE_POISON, we could provide free page reporting
easily IFF "page_val"==0. However, as you said, it will currently be
expensive in case of postcopy, as the guest still zeroes out all pages.
Document that properly.

With VIRTIO_BALLOON_F_PAGE_POISON, don't inflate any pages right now
(not discarding anything), OR fill the pages with poison_val when
deflating. I guess the latter would be better - even if current Linux
does not need it, it's what we are expected to do AFAIKS.

With VIRTIO_BALLOON_F_PAGE_POISON and page_val != 0, discard all free
page reporting attempts. (== what your patch #3 does). Document that
properly.


Makes sense? See below for guest migration thingies.

> 
>> I could imagine that we also have to care about ordinary page inflation/deflation when handling page poisoning. (Iow, don‘t inflate/deflate if set for now).
> 
> The problem is this was broken from the start for that. The issue is
> that the poison feature was wrapped up within the page hinting
> feature. So as a result enabling it for a VM that doesn't recognize
> the feature independently would likely leave the poison value
> uninitialized and flagging as though a value of 0 is used. In addition
> the original poison checking wasn't making sure that the page wasn't
> init_on_free which has the same effect as page poisoning but isn't
> page poisoning.

If the guest agrees to have VIRTIO_BALLOON_F_PAGE_POISON but does not
initialize poison_val, then it's a guest bug, I wouldn't worry about
that for now.

> 
>>>
>>> The worst case scenario would be one in which the VM was suspended for
>>> migration while there were still pages in the balloon that needed to
>>> be drained. In such a case you would have them in an indeterminate
>>> state since the last page poisoning for them might have been ignored
>>> since they were marked as "free", so they are just going to be
>>> whatever value they were if they had been migrated at all.
>>>
>>>>>
>>>>>>
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>>     if (s->free_page_report_cmd_id == UINT_MAX) {
>>>>>>>         s->free_page_report_cmd_id =
>>>>>>>                        VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
>>>>>>
>>>>>> We should rename all "free_page_report" stuff we can to
>>>>>> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my
>>>>>> side :) ) before adding free page reporting .
>>>>>>
>>>>>> (looking at the virtio-balloon linux header, it's also confusing but
>>>>>> we're stuck with that - maybe we should add better comments)
>>>>>
>>>>> Are we stuck? Couldn't we just convert it to an anonymous union with
>>>>> free_page_hint_cmd_id and then use that where needed?
>>>>
>>>> Saw your patch already :)
>>>>
>>>>>
>>>>>>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
>>>>>>>     if (s->qemu_4_0_config_size) {
>>>>>>>         return sizeof(struct virtio_balloon_config);
>>>>>>>     }
>>>>>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
>>>>>>> +    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
>>>>>>> +        virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
>>>>>>>         return sizeof(struct virtio_balloon_config);
>>>>>>>     }
>>>>>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
>>>>>>> -        return offsetof(struct virtio_balloon_config, poison_val);
>>>>>>> -    }
>>>>>>
>>>>>> I am not sure this change is completely sane. Why is that necessary at all?
>>>>>
>>>>> The poison_val is stored at the end of the structure and is required
>>>>> in order to make free page hinting to work. What this change is doing
>>>>
>>>> Required to make it work? In the kernel,
>>>>
>>>> commit 2e991629bcf55a43681aec1ee096eeb03cf81709
>>>> Author: Wei Wang <wei.w.wang@intel.com>
>>>> Date:   Mon Aug 27 09:32:19 2018 +0800
>>>>
>>>>    virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
>>>>
>>>> was merged after
>>>>
>>>> commit 86a559787e6f5cf662c081363f64a20cad654195
>>>> Author: Wei Wang <wei.w.wang@intel.com>
>>>> Date:   Mon Aug 27 09:32:17 2018 +0800
>>>>
>>>>    virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
>>>>
>>>> So I assume it's perfectly fine to not have it.
>>>>
>>>> I'd say it's the responsibility of the guest to *not* use
>>>> VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning
>>>> without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself
>>>> into the foot.
>>>
>>> Based on the timing I am guessing the page poisoning was in the same
>>> patch set as the free page hinting since there is only 2 seconds
>>> between when the two are merged. If I recall the page poisoning logic
>>> was added after the issue was pointed out that it needed to address
>>> it.
>>>
>>
>> Yeah, but any other OS implementing this, not caring about page poisoning wouldn‘t need it. Maybe there is something in the spec.
>>
>> Mental note: we‘ll have to migrate the poison value if not already done (writing on my mobile).
> 
> Right. We need to make sure any poison or init on free is migrated
> over to the destination before we can say we are going to skip the
> migration. If anything what I probably ought to look into would be if
> we could change the code so that if we have a hint the page is unused,
> poison is enabled, and the value is 0 we just ship over a 0 page
> instead of giving up on hinting entirely.
> 

Yeah, we have to migrate poison_val from source to destination. Also, we
should worry about us losing the page poisoning feature when migrating
from source to destination.

Thinking about it, it might make sense to completely decouple page
poisoning here. Assign it a separate property (as you did), default
enable it, but disable it via QEMU compat machines.

Then, we won't lose the feature when migrating.
David Hildenbrand April 16, 2020, 8:36 a.m. UTC | #8
On 16.04.20 10:18, David Hildenbrand wrote:
>>>
>>> Postcopy is a very good point, bought!
>>>
>>> But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then.
>>
>> Do you have a link to the spec that I could look at? I am not hopeful
>> that this will be listed there since the poison side of QEMU was never
>> implemented. The flag is only here because it was copied over in the
>> kernel header.
> 
> Introducing a feature without
> 
> a) specification what it does
> b) implementation (of both sides) showing what has to be done
> c) any kind of documentation of what needs to be done
> 
> just horrible.
> 
> The latest-greatest spec lives in
> 
> https://github.com/oasis-tcs/virtio-spec.git
> 
> I can't spot any signs of free page hinting/page poisioning. :(
> 
> We should document our result of page poisoning, free page hinting, and
> free page reporting there as well. I hope you'll have time for the latter.
> 
> -------------------------------------------------------------------------
> Semantics of VIRTIO_BALLOON_F_PAGE_POISON
> -------------------------------------------------------------------------
> 
> "The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the
> guest is using page poisoning. Guest writes to the poison_val config
> field to tell host about the page poisoning value that is in use."
> -> Very little information, no signs about what has to be done.
> 
> "Let the hypervisor know that we are expecting a specific value to be
> written back in balloon pages."
> -> Okay, that talks about "balloon pages", which would include right now
> -- pages "inflated" and then "deflated" using free page hinting
> -- pages "inflated" and then "deflated" using oridnary inflate/deflate
>    queue
> -- pages "inflated" using free page reporting and automatically
>    "deflated" on access
> 
> So VIRTIO_BALLOON_F_PAGE_POISON really means "whenever the guest
> deflates a page (either explicitly, or implicitly with free page
> reporting), it is filled with "poison_val".
> 
> And I would add
> 
> "However, if the inflated page was not filled with "poison_val" when
> inflating, it's not predictable if the original page or a page filled
> with "poison_val" is returned."
> 
> Which would cover the "we did not discard the page in the hypervisor, so
> the original page is still there".
> 
> 
> We should also document what is expected to happen if "poison_val" is
> suddenly changed by the guest at one point in time again. (e.g., not
> supported, unexpected things can happen, etc.) Also, we might have to
> document that a device reset resets the poison_val to 0. (not sure if
> that's really necessary)

Looking at the spec, we will only have to care about
"VIRTIO_BALLOON_F_MUST_TELL_HOST". Especially:

"If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the
driver MUST NOT use pages from the balloon until the device has
acknowledged the deflate request. Otherwise, if the
VIRTIO_BALLOON_F_MUST_TELL_HOST feature is not negotiated, the driver
MAY begin to re-use pages previously given to the balloon before the
device has acknowledged the deflate request."

So I guess, in QEMU, if
- VIRTIO_BALLOON_F_MUST_TELL_HOST is not set (== currently always)
- VIRTIO_BALLOON_F_PAGE_POISON is set
- poison_val is != 0
then, don't discard any pages, because we cannot fill the page reliably
during a deflation request (== guest could already be reusing the page
and expecting a certain value).

In QEMU, we should always set VIRTIO_BALLOON_F_MUST_TELL_HOST when
setting VIRTIO_BALLOON_F_PAGE_POISON.

In the spec, we should document that VIRTIO_BALLOON_F_PAGE_POISON should
only be used with VIRTIO_BALLOON_F_MUST_TELL_HOST or sth like that ...

confusing stuff. Let me know what you think.
Michael S. Tsirkin April 16, 2020, 2:33 p.m. UTC | #9
On Thu, Apr 16, 2020 at 10:18:49AM +0200, David Hildenbrand wrote:
> >>
> >> Postcopy is a very good point, bought!
> >>
> >> But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then.
> > 
> > Do you have a link to the spec that I could look at? I am not hopeful
> > that this will be listed there since the poison side of QEMU was never
> > implemented. The flag is only here because it was copied over in the
> > kernel header.
> 
> Introducing a feature without
> 
> a) specification what it does
> b) implementation (of both sides) showing what has to be done
> c) any kind of documentation of what needs to be done
> 
> just horrible.
> 
> The latest-greatest spec lives in
> 
> https://github.com/oasis-tcs/virtio-spec.git
> 
> I can't spot any signs of free page hinting/page poisioning. :(

Right. I merged the hinting support in Linux on the hope that
spec and qemu upstream will surface later. It seemed so
since IIRC there were some drafts (even though I don't
have any links to hand). Unfortunately neither happened.



> We should document our result of page poisoning, free page hinting, and
> free page reporting there as well. I hope you'll have time for the latter.
> 
> -------------------------------------------------------------------------
> Semantics of VIRTIO_BALLOON_F_PAGE_POISON
> -------------------------------------------------------------------------
> 
> "The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the
> guest is using page poisoning. Guest writes to the poison_val config
> field to tell host about the page poisoning value that is in use."
> -> Very little information, no signs about what has to be done.

I think it's an informational field. Knowing that free pages
are full of a specific pattern can be handy for the hypervisor
for a variety of reasons. E.g. compression/deduplication?


> "Let the hypervisor know that we are expecting a specific value to be
> written back in balloon pages."



> -> Okay, that talks about "balloon pages", which would include right now
> -- pages "inflated" and then "deflated" using free page hinting
> -- pages "inflated" and then "deflated" using oridnary inflate/deflate
>    queue

ATM, in this case driver calls "free" and that fills page with the
poison value.

> -- pages "inflated" using free page reporting and automatically
>    "deflated" on access
> 
> So VIRTIO_BALLOON_F_PAGE_POISON really means "whenever the guest
> deflates a page (either explicitly, or implicitly with free page
> reporting), it is filled with "poison_val".

It might be a valid optimization to allow driver to skip
poisoning of freed pages in this case.

> And I would add
> 
> "However, if the inflated page was not filled with "poison_val" when
> inflating, it's not predictable if the original page or a page filled
> with "poison_val" is returned."
> 
> Which would cover the "we did not discard the page in the hypervisor, so
> the original page is still there".
> 
> 
> We should also document what is expected to happen if "poison_val" is
> suddenly changed by the guest at one point in time again. (e.g., not
> supported, unexpected things can happen, etc.)

Right. I think we should require that this can only be changed
before features have been negotiated.
That is the only point where hypervisor can still fail
gracefully (i.e. fail FEATURES_OK).


> Also, we might have to
> document that a device reset resets the poison_val to 0. (not sure if
> that's really necessary)

Probably yes, for things like kexec.

> -------------------------------------------------------------------------
> What we have to do in the guest/Linux:
> -------------------------------------------------------------------------
> 
> A guest which relies on this (esp., free page reporting in Linux only,
> right?), has to not use/advertise VIRTIO_BALLOON_F_REPORTING *in case
> VIRTIO_BALLOON_F_PAGE_POISON is not provided* by the host. AFAIKS,
> ordinary inflation/deflation and free page hinting does currently not
> care, as we go via free_page(), so that is currently fine AFAIKs.
> 
> -------------------------------------------------------------------------
> What we have to do in the hypervisor/QEMU:
> -------------------------------------------------------------------------
> 
> With VIRTIO_BALLOON_F_PAGE_POISON, we could provide free page reporting
> easily IFF "page_val"==0. However, as you said, it will currently be
> expensive in case of postcopy, as the guest still zeroes out all pages.
> Document that properly.
> 
> With VIRTIO_BALLOON_F_PAGE_POISON, don't inflate any pages right now
> (not discarding anything), OR fill the pages with poison_val when
> deflating. I guess the latter would be better - even if current Linux
> does not need it, it's what we are expected to do AFAIKS.
> 
> With VIRTIO_BALLOON_F_PAGE_POISON and page_val != 0, discard all free
> page reporting attempts. (== what your patch #3 does). Document that
> properly.
> 
> 
> Makes sense? See below for guest migration thingies.
> 
> > 
> >> I could imagine that we also have to care about ordinary page inflation/deflation when handling page poisoning. (Iow, don‘t inflate/deflate if set for now).
> > 
> > The problem is this was broken from the start for that. The issue is
> > that the poison feature was wrapped up within the page hinting
> > feature. So as a result enabling it for a VM that doesn't recognize
> > the feature independently would likely leave the poison value
> > uninitialized and flagging as though a value of 0 is used. In addition
> > the original poison checking wasn't making sure that the page wasn't
> > init_on_free which has the same effect as page poisoning but isn't
> > page poisoning.
> 
> If the guest agrees to have VIRTIO_BALLOON_F_PAGE_POISON but does not
> initialize poison_val, then it's a guest bug, I wouldn't worry about
> that for now.
> 
> > 
> >>>
> >>> The worst case scenario would be one in which the VM was suspended for
> >>> migration while there were still pages in the balloon that needed to
> >>> be drained. In such a case you would have them in an indeterminate
> >>> state since the last page poisoning for them might have been ignored
> >>> since they were marked as "free", so they are just going to be
> >>> whatever value they were if they had been migrated at all.
> >>>
> >>>>>
> >>>>>>
> >>>>>>> +        return;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>>     if (s->free_page_report_cmd_id == UINT_MAX) {
> >>>>>>>         s->free_page_report_cmd_id =
> >>>>>>>                        VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
> >>>>>>
> >>>>>> We should rename all "free_page_report" stuff we can to
> >>>>>> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my
> >>>>>> side :) ) before adding free page reporting .
> >>>>>>
> >>>>>> (looking at the virtio-balloon linux header, it's also confusing but
> >>>>>> we're stuck with that - maybe we should add better comments)
> >>>>>
> >>>>> Are we stuck? Couldn't we just convert it to an anonymous union with
> >>>>> free_page_hint_cmd_id and then use that where needed?
> >>>>
> >>>> Saw your patch already :)
> >>>>
> >>>>>
> >>>>>>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s)
> >>>>>>>     if (s->qemu_4_0_config_size) {
> >>>>>>>         return sizeof(struct virtio_balloon_config);
> >>>>>>>     }
> >>>>>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
> >>>>>>> +    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
> >>>>>>> +        virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> >>>>>>>         return sizeof(struct virtio_balloon_config);
> >>>>>>>     }
> >>>>>>> -    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
> >>>>>>> -        return offsetof(struct virtio_balloon_config, poison_val);
> >>>>>>> -    }
> >>>>>>
> >>>>>> I am not sure this change is completely sane. Why is that necessary at all?
> >>>>>
> >>>>> The poison_val is stored at the end of the structure and is required
> >>>>> in order to make free page hinting to work. What this change is doing
> >>>>
> >>>> Required to make it work? In the kernel,
> >>>>
> >>>> commit 2e991629bcf55a43681aec1ee096eeb03cf81709
> >>>> Author: Wei Wang <wei.w.wang@intel.com>
> >>>> Date:   Mon Aug 27 09:32:19 2018 +0800
> >>>>
> >>>>    virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
> >>>>
> >>>> was merged after
> >>>>
> >>>> commit 86a559787e6f5cf662c081363f64a20cad654195
> >>>> Author: Wei Wang <wei.w.wang@intel.com>
> >>>> Date:   Mon Aug 27 09:32:17 2018 +0800
> >>>>
> >>>>    virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
> >>>>
> >>>> So I assume it's perfectly fine to not have it.
> >>>>
> >>>> I'd say it's the responsibility of the guest to *not* use
> >>>> VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning
> >>>> without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself
> >>>> into the foot.
> >>>
> >>> Based on the timing I am guessing the page poisoning was in the same
> >>> patch set as the free page hinting since there is only 2 seconds
> >>> between when the two are merged. If I recall the page poisoning logic
> >>> was added after the issue was pointed out that it needed to address
> >>> it.
> >>>
> >>
> >> Yeah, but any other OS implementing this, not caring about page poisoning wouldn‘t need it. Maybe there is something in the spec.
> >>
> >> Mental note: we‘ll have to migrate the poison value if not already done (writing on my mobile).
> > 
> > Right. We need to make sure any poison or init on free is migrated
> > over to the destination before we can say we are going to skip the
> > migration. If anything what I probably ought to look into would be if
> > we could change the code so that if we have a hint the page is unused,
> > poison is enabled, and the value is 0 we just ship over a 0 page
> > instead of giving up on hinting entirely.
> > 
> 
> Yeah, we have to migrate poison_val from source to destination. Also, we
> should worry about us losing the page poisoning feature when migrating
> from source to destination.
> 
> Thinking about it, it might make sense to completely decouple page
> poisoning here. Assign it a separate property (as you did), default
> enable it, but disable it via QEMU compat machines.
> 
> Then, we won't lose the feature when migrating.
> 
> -- 
> Thanks,
> 
> David / dhildenb
David Hildenbrand April 16, 2020, 2:55 p.m. UTC | #10
>> We should document our result of page poisoning, free page hinting, and
>> free page reporting there as well. I hope you'll have time for the latter.
>>
>> -------------------------------------------------------------------------
>> Semantics of VIRTIO_BALLOON_F_PAGE_POISON
>> -------------------------------------------------------------------------
>>
>> "The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the
>> guest is using page poisoning. Guest writes to the poison_val config
>> field to tell host about the page poisoning value that is in use."
>> -> Very little information, no signs about what has to be done.
> 
> I think it's an informational field. Knowing that free pages
> are full of a specific pattern can be handy for the hypervisor
> for a variety of reasons. E.g. compression/deduplication?

I was referring to the documentation of the feature and what we
(hypervisor) are expected to do (in regards to inflation/deflation).

Yes, it might be valuable to know that the guest is using poisoning. I
assume compression/deduplication (IOW KSM) will figure out themselves
that such pages are equal.

>> "Let the hypervisor know that we are expecting a specific value to be
>> written back in balloon pages."
> 
> 
> 
>> -> Okay, that talks about "balloon pages", which would include right now
>> -- pages "inflated" and then "deflated" using free page hinting
>> -- pages "inflated" and then "deflated" using oridnary inflate/deflate
>>    queue
> 
> ATM, in this case driver calls "free" and that fills page with the
> poison value.

Yes, that's what I mentioned somehwere, it's currently done by Linux and ...

> 
> It might be a valid optimization to allow driver to skip
> poisoning of freed pages in this case.

... we should prepare for that :)

> 
>> And I would add
>>
>> "However, if the inflated page was not filled with "poison_val" when
>> inflating, it's not predictable if the original page or a page filled
>> with "poison_val" is returned."
>>
>> Which would cover the "we did not discard the page in the hypervisor, so
>> the original page is still there".
>>
>>
>> We should also document what is expected to happen if "poison_val" is
>> suddenly changed by the guest at one point in time again. (e.g., not
>> supported, unexpected things can happen, etc.)
> 
> Right. I think we should require that this can only be changed
> before features have been negotiated.
> That is the only point where hypervisor can still fail
> gracefully (i.e. fail FEATURES_OK).

Agreed.

I can totally understand if Alex would want to stop working on
VIRTIO_BALLOON_F_PAGE_POISON at this point and only fix the guest to not
enable free page reporting in case we don't have
VIRTIO_BALLOON_F_PAGE_POISON (unless that's already done), lol. :)
Alexander Duyck April 16, 2020, 6:21 p.m. UTC | #11
On Thu, Apr 16, 2020 at 7:55 AM David Hildenbrand <david@redhat.com> wrote:
>
> >> We should document our result of page poisoning, free page hinting, and
> >> free page reporting there as well. I hope you'll have time for the latter.
> >>
> >> -------------------------------------------------------------------------
> >> Semantics of VIRTIO_BALLOON_F_PAGE_POISON
> >> -------------------------------------------------------------------------
> >>
> >> "The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the
> >> guest is using page poisoning. Guest writes to the poison_val config
> >> field to tell host about the page poisoning value that is in use."
> >> -> Very little information, no signs about what has to be done.
> >
> > I think it's an informational field. Knowing that free pages
> > are full of a specific pattern can be handy for the hypervisor
> > for a variety of reasons. E.g. compression/deduplication?
>
> I was referring to the documentation of the feature and what we
> (hypervisor) are expected to do (in regards to inflation/deflation).
>
> Yes, it might be valuable to know that the guest is using poisoning. I
> assume compression/deduplication (IOW KSM) will figure out themselves
> that such pages are equal.

The other thing to keep in mind is that the poison value only really
comes into play with hinting/reporting. In the case of the standard
balloon the pages are considered allocated from the guest's
perspective until the balloon is deflated. Then any poison/init will
occur over again anyway so I don't think the standard balloon should
really care.

For hinting it somewhat depends. Currently the implementation is
inflating a balloon so having poisoning or init_on_free means it is
written to immediately after it is freed so it defeats the purpose of
the hinting. However that is a Linux implementation issue, not
necessarily an issue with the QEMU implementation. As such may be I
should fix that in the Linux driver since that has been ignored in
QEMU up until now anyway. The more interesting bit is what should the
behavior be from the hypervisor when a page is marked as being hinted.
I think right now the behavior is to just not migrate the page. I
wonder though if we shouldn't instead just consider the page a zero
page, and then maybe modify the zero page behavior for the case where
we know page poisoning is enabled.

For reporting it is a matter of tracking the contents. We don't want
to modify the contents in any way as we are attempting to essentially
do in-place tracking of the page. So if it is poisoned or initialized
it needs to stay in that state so we cannot invalidate the page if
doing so will cause it to lose state information.

> >> "Let the hypervisor know that we are expecting a specific value to be
> >> written back in balloon pages."
> >
> >
> >
> >> -> Okay, that talks about "balloon pages", which would include right now
> >> -- pages "inflated" and then "deflated" using free page hinting
> >> -- pages "inflated" and then "deflated" using oridnary inflate/deflate
> >>    queue
> >
> > ATM, in this case driver calls "free" and that fills page with the
> > poison value.
>
> Yes, that's what I mentioned somehwere, it's currently done by Linux and ...
>
> >
> > It might be a valid optimization to allow driver to skip
> > poisoning of freed pages in this case.
>
> ... we should prepare for that :)

Agreed.

> >
> >> And I would add
> >>
> >> "However, if the inflated page was not filled with "poison_val" when
> >> inflating, it's not predictable if the original page or a page filled
> >> with "poison_val" is returned."
> >>
> >> Which would cover the "we did not discard the page in the hypervisor, so
> >> the original page is still there".
> >>
> >>
> >> We should also document what is expected to happen if "poison_val" is
> >> suddenly changed by the guest at one point in time again. (e.g., not
> >> supported, unexpected things can happen, etc.)
> >
> > Right. I think we should require that this can only be changed
> > before features have been negotiated.
> > That is the only point where hypervisor can still fail
> > gracefully (i.e. fail FEATURES_OK).
>
> Agreed.

I believe that is the current behavior. Essentially if poisoning
enabled then the feature flag is left set. I think the one change I
will make in the driver is that if poisoning is enabled in the kernel,
but PAGE_POISON is not available as a feature, I am going to disable
both the reporting and hinting features in virtballoon_validate.

> I can totally understand if Alex would want to stop working on
> VIRTIO_BALLOON_F_PAGE_POISON at this point and only fix the guest to not
> enable free page reporting in case we don't have
> VIRTIO_BALLOON_F_PAGE_POISON (unless that's already done), lol. :)

I already have a patch for that.

The bigger issue is how to deal with the PAGE_POISON being enabled
with FREE_PAGE_HINTING. The legacy code at this point is just broken
and is plowing through with FREE_PAGE_HINTING while it is enabled.
That is safe for now because it is using a balloon, the side effect is
that it is going to defer migration. If it switches to a page
reporting type setup at some point in the future we would need to make
sure something is written to the other end to identify the poison/zero
pages.
David Hildenbrand April 16, 2020, 6:33 p.m. UTC | #12
> The other thing to keep in mind is that the poison value only really
> comes into play with hinting/reporting. In the case of the standard
> balloon the pages are considered allocated from the guest's

Currently just as free page hinting IMHO. They are temporarily
considered allocated.

> perspective until the balloon is deflated. Then any poison/init will
> occur over again anyway so I don't think the standard balloon should
> really care.

I think we should make this consistent. And as we discuss below, this
allows for a nice optimization in the guest even for ordinary
inflation/deflation (no need to zero out/poison again when giving the
pages back to the buddy).

> 
> For hinting it somewhat depends. Currently the implementation is
> inflating a balloon so having poisoning or init_on_free means it is
> written to immediately after it is freed so it defeats the purpose of
> the hinting. However that is a Linux implementation issue, not

Yeah, and as we discuss below, we can optimize that later in Linux. It's
sub-optimal, I agree.

> necessarily an issue with the QEMU implementation. As such may be I
> should fix that in the Linux driver since that has been ignored in
> QEMU up until now anyway. The more interesting bit is what should the
> behavior be from the hypervisor when a page is marked as being hinted.
> I think right now the behavior is to just not migrate the page. I
> wonder though if we shouldn't instead just consider the page a zero
> page, and then maybe modify the zero page behavior for the case where
> we know page poisoning is enabled.

I consider that maybe future work. Let's keep it simple for now (iow,
try to get page poisoning handling right first). The optimize the guest
handling on balloon deflation / end of free page hinting.

[...]

>> I can totally understand if Alex would want to stop working on
>> VIRTIO_BALLOON_F_PAGE_POISON at this point and only fix the guest to not
>> enable free page reporting in case we don't have
>> VIRTIO_BALLOON_F_PAGE_POISON (unless that's already done), lol. :)
> 
> I already have a patch for that.
> 
> The bigger issue is how to deal with the PAGE_POISON being enabled
> with FREE_PAGE_HINTING. The legacy code at this point is just broken
> and is plowing through with FREE_PAGE_HINTING while it is enabled.
> That is safe for now because it is using a balloon, the side effect is
> that it is going to defer migration. If it switches to a page
> reporting type setup at some point in the future we would need to make
> sure something is written to the other end to identify the poison/zero
> pages.


I think we don't have to worry about that for now. Might be sub-optimal,
but then, I don't think actual page poisoning isn't all that frequently
used in production setups.
diff mbox series

Patch

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index a4729f7fc930..1c6d36a29a04 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -531,6 +531,15 @@  static void virtio_balloon_free_page_start(VirtIOBalloon *s)
         return;
     }
 
+    /*
+     * If page poisoning is enabled then we probably shouldn't bother with
+     * the hinting since the poisoning will dirty the page and invalidate
+     * the work we are doing anyway.
+     */
+    if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
+        return;
+    }
+
     if (s->free_page_report_cmd_id == UINT_MAX) {
         s->free_page_report_cmd_id =
                        VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
@@ -618,12 +627,10 @@  static size_t virtio_balloon_config_size(VirtIOBalloon *s)
     if (s->qemu_4_0_config_size) {
         return sizeof(struct virtio_balloon_config);
     }
-    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
+    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) ||
+        virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
         return sizeof(struct virtio_balloon_config);
     }
-    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
-        return offsetof(struct virtio_balloon_config, poison_val);
-    }
     return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
 }
 
@@ -634,6 +641,7 @@  static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 
     config.num_pages = cpu_to_le32(dev->num_pages);
     config.actual = cpu_to_le32(dev->actual);
+    config.poison_val = cpu_to_le32(dev->poison_val);
 
     if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
         config.free_page_report_cmd_id =
@@ -697,6 +705,9 @@  static void virtio_balloon_set_config(VirtIODevice *vdev,
         qapi_event_send_balloon_change(vm_ram_size -
                         ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
     }
+    dev->poison_val = virtio_vdev_has_feature(vdev,
+                                              VIRTIO_BALLOON_F_PAGE_POISON) ?
+                      le32_to_cpu(config.poison_val) : 0;
     trace_virtio_balloon_set_config(dev->actual, oldactual);
 }
 
@@ -706,6 +717,9 @@  static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
     f |= dev->host_features;
     virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
+    if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+        virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON);
+    }
 
     return f;
 }
@@ -854,6 +868,8 @@  static void virtio_balloon_device_reset(VirtIODevice *vdev)
         g_free(s->stats_vq_elem);
         s->stats_vq_elem = NULL;
     }
+
+    s->poison_val = 0;
 }
 
 static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
@@ -916,6 +932,8 @@  static Property virtio_balloon_properties[] = {
                     VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
     DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
                     VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
+    DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features,
+                    VIRTIO_BALLOON_F_PAGE_POISON, false),
     /* QEMU 4.0 accidentally changed the config size even when free-page-hint
      * is disabled, resulting in QEMU 3.1 migration incompatibility.  This
      * property retains this quirk for QEMU 4.1 machine types.
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index d1c968d2376e..7fe78e5c14d7 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -70,6 +70,7 @@  typedef struct VirtIOBalloon {
     uint32_t host_features;
 
     bool qemu_4_0_config_size;
+    uint32_t poison_val;
 } VirtIOBalloon;
 
 #endif