diff mbox series

[3/3] x86/mem_sharing: make fork_reset more configurable

Message ID fb437a16517d343ba3432aa64b9e14b34630a750.1647970630.git.tamas.lengyel@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/3] x86/mem_sharing: option to skip populating special pages during fork | expand

Commit Message

Tamas K Lengyel March 22, 2022, 5:41 p.m. UTC
Allow specify distinct parts of the fork VM to be reset. This is useful when a
fuzzing operation involves mapping in only a handful of pages that are known
ahead of time. Throwing these pages away just to be re-copied immediately is
expensive, thus allowing to specify partial resets can speed things up.

Also allow resetting to be initiated from vm_event responses as an
optiomization.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 tools/include/xenctrl.h                |  3 ++-
 tools/libs/ctrl/xc_memshr.c            |  7 ++++++-
 xen/arch/x86/include/asm/mem_sharing.h |  9 +++++++++
 xen/arch/x86/mm/mem_sharing.c          | 22 +++++++++++++++++-----
 xen/common/vm_event.c                  | 14 ++++++++++++++
 xen/include/public/memory.h            |  4 +++-
 xen/include/public/vm_event.h          |  8 ++++++++
 7 files changed, 59 insertions(+), 8 deletions(-)

Comments

Roger Pau Monné March 24, 2022, 3:45 p.m. UTC | #1
On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 208d8dcbd9..30ce23c5a7 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
>                  uint32_t gref;     /* IN: gref to debug         */
>              } u;
>          } debug;
> -        struct mem_sharing_op_fork {      /* OP_FORK */
> +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
>              domid_t parent_domain;        /* IN: parent's domain id */
>  /* These flags only makes sense for short-lived forks */
>  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
>  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
>  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
>              uint16_t flags;               /* IN: optional settings */
>              uint32_t pad;                 /* Must be set to 0 */
>          } fork;
> diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> index bb003d21d0..81c2ee28cc 100644
> --- a/xen/include/public/vm_event.h
> +++ b/xen/include/public/vm_event.h
> @@ -127,6 +127,14 @@
>   * Reset the vmtrace buffer (if vmtrace is enabled)
>   */
>  #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
> +/*
> + * Reset the VM state (if VM is fork)
> + */
> +#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
> +/*
> + * Remove unshared entried from physmap (if VM is fork)
> + */
> +#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)

I'm confused about why two different interfaces are added to do this
kind of selective resets, one to vm_event and one to xenmem_fork?

I thin k the natural place for the option to live would be
XENMEM_FORK?

Thanks, Roger.
Tamas K Lengyel March 24, 2022, 3:52 p.m. UTC | #2
On Thu, Mar 24, 2022 at 11:46 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
>
> On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> > index 208d8dcbd9..30ce23c5a7 100644
> > --- a/xen/include/public/memory.h
> > +++ b/xen/include/public/memory.h
> > @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
> >                  uint32_t gref;     /* IN: gref to debug         */
> >              } u;
> >          } debug;
> > -        struct mem_sharing_op_fork {      /* OP_FORK */
> > +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
> >              domid_t parent_domain;        /* IN: parent's domain id */
> >  /* These flags only makes sense for short-lived forks */
> >  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
> >  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
> >  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> > +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> > +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
> >              uint16_t flags;               /* IN: optional settings */
> >              uint32_t pad;                 /* Must be set to 0 */
> >          } fork;
> > diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> > index bb003d21d0..81c2ee28cc 100644
> > --- a/xen/include/public/vm_event.h
> > +++ b/xen/include/public/vm_event.h
> > @@ -127,6 +127,14 @@
> >   * Reset the vmtrace buffer (if vmtrace is enabled)
> >   */
> >  #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
> > +/*
> > + * Reset the VM state (if VM is fork)
> > + */
> > +#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
> > +/*
> > + * Remove unshared entried from physmap (if VM is fork)
> > + */
> > +#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
>
> I'm confused about why two different interfaces are added to do this
> kind of selective resets, one to vm_event and one to xenmem_fork?
>
> I thin k the natural place for the option to live would be
> XENMEM_FORK?

Yes, that's the natural place for it. But we are adding it to both for
a reason. In our use-case the reset operation will happen after a
vm_event is received to which we already must send a reply. Setting
the flag on the vm_event reply saves us having to issue an extra memop
hypercall afterwards.

Tamas
Roger Pau Monné March 24, 2022, 4:03 p.m. UTC | #3
On Thu, Mar 24, 2022 at 11:52:38AM -0400, Tamas K Lengyel wrote:
> On Thu, Mar 24, 2022 at 11:46 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
> >
> > On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> > > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> > > index 208d8dcbd9..30ce23c5a7 100644
> > > --- a/xen/include/public/memory.h
> > > +++ b/xen/include/public/memory.h
> > > @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
> > >                  uint32_t gref;     /* IN: gref to debug         */
> > >              } u;
> > >          } debug;
> > > -        struct mem_sharing_op_fork {      /* OP_FORK */
> > > +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
> > >              domid_t parent_domain;        /* IN: parent's domain id */
> > >  /* These flags only makes sense for short-lived forks */
> > >  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
> > >  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
> > >  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> > > +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> > > +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
> > >              uint16_t flags;               /* IN: optional settings */
> > >              uint32_t pad;                 /* Must be set to 0 */
> > >          } fork;
> > > diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> > > index bb003d21d0..81c2ee28cc 100644
> > > --- a/xen/include/public/vm_event.h
> > > +++ b/xen/include/public/vm_event.h
> > > @@ -127,6 +127,14 @@
> > >   * Reset the vmtrace buffer (if vmtrace is enabled)
> > >   */
> > >  #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
> > > +/*
> > > + * Reset the VM state (if VM is fork)
> > > + */
> > > +#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
> > > +/*
> > > + * Remove unshared entried from physmap (if VM is fork)
> > > + */
> > > +#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
> >
> > I'm confused about why two different interfaces are added to do this
> > kind of selective resets, one to vm_event and one to xenmem_fork?
> >
> > I thin k the natural place for the option to live would be
> > XENMEM_FORK?
> 
> Yes, that's the natural place for it. But we are adding it to both for
> a reason. In our use-case the reset operation will happen after a
> vm_event is received to which we already must send a reply. Setting
> the flag on the vm_event reply saves us having to issue an extra memop
> hypercall afterwards.

Can you do a multicall and batch both operations in a single
hypercall?

That would seem more natural than adding duplicated interfaces.

Thanks, Roger.
Tamas K Lengyel March 24, 2022, 4:22 p.m. UTC | #4
On Thu, Mar 24, 2022 at 12:04 PM Roger Pau Monné <roger.pau@citrix.com> wrote:
>
> On Thu, Mar 24, 2022 at 11:52:38AM -0400, Tamas K Lengyel wrote:
> > On Thu, Mar 24, 2022 at 11:46 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
> > >
> > > On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> > > > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> > > > index 208d8dcbd9..30ce23c5a7 100644
> > > > --- a/xen/include/public/memory.h
> > > > +++ b/xen/include/public/memory.h
> > > > @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
> > > >                  uint32_t gref;     /* IN: gref to debug         */
> > > >              } u;
> > > >          } debug;
> > > > -        struct mem_sharing_op_fork {      /* OP_FORK */
> > > > +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
> > > >              domid_t parent_domain;        /* IN: parent's domain id */
> > > >  /* These flags only makes sense for short-lived forks */
> > > >  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
> > > >  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
> > > >  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> > > > +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> > > > +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
> > > >              uint16_t flags;               /* IN: optional settings */
> > > >              uint32_t pad;                 /* Must be set to 0 */
> > > >          } fork;
> > > > diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> > > > index bb003d21d0..81c2ee28cc 100644
> > > > --- a/xen/include/public/vm_event.h
> > > > +++ b/xen/include/public/vm_event.h
> > > > @@ -127,6 +127,14 @@
> > > >   * Reset the vmtrace buffer (if vmtrace is enabled)
> > > >   */
> > > >  #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
> > > > +/*
> > > > + * Reset the VM state (if VM is fork)
> > > > + */
> > > > +#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
> > > > +/*
> > > > + * Remove unshared entried from physmap (if VM is fork)
> > > > + */
> > > > +#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
> > >
> > > I'm confused about why two different interfaces are added to do this
> > > kind of selective resets, one to vm_event and one to xenmem_fork?
> > >
> > > I thin k the natural place for the option to live would be
> > > XENMEM_FORK?
> >
> > Yes, that's the natural place for it. But we are adding it to both for
> > a reason. In our use-case the reset operation will happen after a
> > vm_event is received to which we already must send a reply. Setting
> > the flag on the vm_event reply saves us having to issue an extra memop
> > hypercall afterwards.
>
> Can you do a multicall and batch both operations in a single
> hypercall?
>
> That would seem more natural than adding duplicated interfaces.

Not in a straight forward way, no. There is no exposed API in libxc to
do a multicall. Even if that was an option it is still easier for me
to just flip a bit in the response field than having to construct a
whole standalone hypercall structure to be sent as part of a
multicall.

Tamas
Roger Pau Monné March 24, 2022, 4:43 p.m. UTC | #5
On Thu, Mar 24, 2022 at 12:22:49PM -0400, Tamas K Lengyel wrote:
> On Thu, Mar 24, 2022 at 12:04 PM Roger Pau Monné <roger.pau@citrix.com> wrote:
> >
> > On Thu, Mar 24, 2022 at 11:52:38AM -0400, Tamas K Lengyel wrote:
> > > On Thu, Mar 24, 2022 at 11:46 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
> > > >
> > > > On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> > > > > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> > > > > index 208d8dcbd9..30ce23c5a7 100644
> > > > > --- a/xen/include/public/memory.h
> > > > > +++ b/xen/include/public/memory.h
> > > > > @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
> > > > >                  uint32_t gref;     /* IN: gref to debug         */
> > > > >              } u;
> > > > >          } debug;
> > > > > -        struct mem_sharing_op_fork {      /* OP_FORK */
> > > > > +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
> > > > >              domid_t parent_domain;        /* IN: parent's domain id */
> > > > >  /* These flags only makes sense for short-lived forks */
> > > > >  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
> > > > >  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
> > > > >  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> > > > > +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> > > > > +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
> > > > >              uint16_t flags;               /* IN: optional settings */
> > > > >              uint32_t pad;                 /* Must be set to 0 */
> > > > >          } fork;
> > > > > diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> > > > > index bb003d21d0..81c2ee28cc 100644
> > > > > --- a/xen/include/public/vm_event.h
> > > > > +++ b/xen/include/public/vm_event.h
> > > > > @@ -127,6 +127,14 @@
> > > > >   * Reset the vmtrace buffer (if vmtrace is enabled)
> > > > >   */
> > > > >  #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
> > > > > +/*
> > > > > + * Reset the VM state (if VM is fork)
> > > > > + */
> > > > > +#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
> > > > > +/*
> > > > > + * Remove unshared entried from physmap (if VM is fork)
> > > > > + */
> > > > > +#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
> > > >
> > > > I'm confused about why two different interfaces are added to do this
> > > > kind of selective resets, one to vm_event and one to xenmem_fork?
> > > >
> > > > I thin k the natural place for the option to live would be
> > > > XENMEM_FORK?
> > >
> > > Yes, that's the natural place for it. But we are adding it to both for
> > > a reason. In our use-case the reset operation will happen after a
> > > vm_event is received to which we already must send a reply. Setting
> > > the flag on the vm_event reply saves us having to issue an extra memop
> > > hypercall afterwards.
> >
> > Can you do a multicall and batch both operations in a single
> > hypercall?
> >
> > That would seem more natural than adding duplicated interfaces.
> 
> Not in a straight forward way, no. There is no exposed API in libxc to
> do a multicall. Even if that was an option it is still easier for me
> to just flip a bit in the response field than having to construct a
> whole standalone hypercall structure to be sent as part of a
> multicall.

Right, I can see it being easier, but it seems like a bad choice from
an interface PoV. You are the maintainer of both subsystems, but it
would seem to me it's in your best interest to try to keep the
interfaces separated and clean.

Would it be possible for the reset XENMEM_FORK op to have the side
effect of performing what you would instead do with the vm_event
hypercall?

Thanks, Roger.
Tamas K Lengyel March 24, 2022, 5:02 p.m. UTC | #6
On Thu, Mar 24, 2022 at 12:44 PM Roger Pau Monné <roger.pau@citrix.com> wrote:
>
> On Thu, Mar 24, 2022 at 12:22:49PM -0400, Tamas K Lengyel wrote:
> > On Thu, Mar 24, 2022 at 12:04 PM Roger Pau Monné <roger.pau@citrix.com> wrote:
> > >
> > > On Thu, Mar 24, 2022 at 11:52:38AM -0400, Tamas K Lengyel wrote:
> > > > On Thu, Mar 24, 2022 at 11:46 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
> > > > >
> > > > > On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> > > > > > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> > > > > > index 208d8dcbd9..30ce23c5a7 100644
> > > > > > --- a/xen/include/public/memory.h
> > > > > > +++ b/xen/include/public/memory.h
> > > > > > @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
> > > > > >                  uint32_t gref;     /* IN: gref to debug         */
> > > > > >              } u;
> > > > > >          } debug;
> > > > > > -        struct mem_sharing_op_fork {      /* OP_FORK */
> > > > > > +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
> > > > > >              domid_t parent_domain;        /* IN: parent's domain id */
> > > > > >  /* These flags only makes sense for short-lived forks */
> > > > > >  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
> > > > > >  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
> > > > > >  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> > > > > > +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> > > > > > +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
> > > > > >              uint16_t flags;               /* IN: optional settings */
> > > > > >              uint32_t pad;                 /* Must be set to 0 */
> > > > > >          } fork;
> > > > > > diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> > > > > > index bb003d21d0..81c2ee28cc 100644
> > > > > > --- a/xen/include/public/vm_event.h
> > > > > > +++ b/xen/include/public/vm_event.h
> > > > > > @@ -127,6 +127,14 @@
> > > > > >   * Reset the vmtrace buffer (if vmtrace is enabled)
> > > > > >   */
> > > > > >  #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
> > > > > > +/*
> > > > > > + * Reset the VM state (if VM is fork)
> > > > > > + */
> > > > > > +#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
> > > > > > +/*
> > > > > > + * Remove unshared entried from physmap (if VM is fork)
> > > > > > + */
> > > > > > +#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
> > > > >
> > > > > I'm confused about why two different interfaces are added to do this
> > > > > kind of selective resets, one to vm_event and one to xenmem_fork?
> > > > >
> > > > > I thin k the natural place for the option to live would be
> > > > > XENMEM_FORK?
> > > >
> > > > Yes, that's the natural place for it. But we are adding it to both for
> > > > a reason. In our use-case the reset operation will happen after a
> > > > vm_event is received to which we already must send a reply. Setting
> > > > the flag on the vm_event reply saves us having to issue an extra memop
> > > > hypercall afterwards.
> > >
> > > Can you do a multicall and batch both operations in a single
> > > hypercall?
> > >
> > > That would seem more natural than adding duplicated interfaces.
> >
> > Not in a straight forward way, no. There is no exposed API in libxc to
> > do a multicall. Even if that was an option it is still easier for me
> > to just flip a bit in the response field than having to construct a
> > whole standalone hypercall structure to be sent as part of a
> > multicall.
>
> Right, I can see it being easier, but it seems like a bad choice from
> an interface PoV. You are the maintainer of both subsystems, but it
> would seem to me it's in your best interest to try to keep the
> interfaces separated and clean.
>
> Would it be possible for the reset XENMEM_FORK op to have the side
> effect of performing what you would instead do with the vm_event
> hypercall?

Yes, the event response is really just an event channel signal to Xen,
so the memop hypercall could similarly encode the "now check the
vm_event response" as an optional field. But why is that any better
than the current event channel route processing the vm_response
encoding the "now do these ops on the fork"?

We already have a bunch of different operations you can encode in the
vm_event response field, so it reduces the complexity on the toolstack
side since I don't have to switch around which hypercall I need to
issue depending on what extra ops I want to put into a single
hypercall.

Tamas
Jan Beulich March 25, 2022, 9:04 a.m. UTC | #7
On 24.03.2022 18:02, Tamas K Lengyel wrote:
> On Thu, Mar 24, 2022 at 12:44 PM Roger Pau Monné <roger.pau@citrix.com> wrote:
>>
>> On Thu, Mar 24, 2022 at 12:22:49PM -0400, Tamas K Lengyel wrote:
>>> On Thu, Mar 24, 2022 at 12:04 PM Roger Pau Monné <roger.pau@citrix.com> wrote:
>>>>
>>>> On Thu, Mar 24, 2022 at 11:52:38AM -0400, Tamas K Lengyel wrote:
>>>>> On Thu, Mar 24, 2022 at 11:46 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
>>>>>>
>>>>>> On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
>>>>>>> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
>>>>>>> index 208d8dcbd9..30ce23c5a7 100644
>>>>>>> --- a/xen/include/public/memory.h
>>>>>>> +++ b/xen/include/public/memory.h
>>>>>>> @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
>>>>>>>                  uint32_t gref;     /* IN: gref to debug         */
>>>>>>>              } u;
>>>>>>>          } debug;
>>>>>>> -        struct mem_sharing_op_fork {      /* OP_FORK */
>>>>>>> +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
>>>>>>>              domid_t parent_domain;        /* IN: parent's domain id */
>>>>>>>  /* These flags only makes sense for short-lived forks */
>>>>>>>  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
>>>>>>>  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
>>>>>>>  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
>>>>>>> +#define XENMEM_FORK_RESET_STATE        (1u << 3)
>>>>>>> +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
>>>>>>>              uint16_t flags;               /* IN: optional settings */
>>>>>>>              uint32_t pad;                 /* Must be set to 0 */
>>>>>>>          } fork;
>>>>>>> diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
>>>>>>> index bb003d21d0..81c2ee28cc 100644
>>>>>>> --- a/xen/include/public/vm_event.h
>>>>>>> +++ b/xen/include/public/vm_event.h
>>>>>>> @@ -127,6 +127,14 @@
>>>>>>>   * Reset the vmtrace buffer (if vmtrace is enabled)
>>>>>>>   */
>>>>>>>  #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
>>>>>>> +/*
>>>>>>> + * Reset the VM state (if VM is fork)
>>>>>>> + */
>>>>>>> +#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
>>>>>>> +/*
>>>>>>> + * Remove unshared entried from physmap (if VM is fork)
>>>>>>> + */
>>>>>>> +#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
>>>>>>
>>>>>> I'm confused about why two different interfaces are added to do this
>>>>>> kind of selective resets, one to vm_event and one to xenmem_fork?
>>>>>>
>>>>>> I thin k the natural place for the option to live would be
>>>>>> XENMEM_FORK?
>>>>>
>>>>> Yes, that's the natural place for it. But we are adding it to both for
>>>>> a reason. In our use-case the reset operation will happen after a
>>>>> vm_event is received to which we already must send a reply. Setting
>>>>> the flag on the vm_event reply saves us having to issue an extra memop
>>>>> hypercall afterwards.
>>>>
>>>> Can you do a multicall and batch both operations in a single
>>>> hypercall?
>>>>
>>>> That would seem more natural than adding duplicated interfaces.
>>>
>>> Not in a straight forward way, no. There is no exposed API in libxc to
>>> do a multicall. Even if that was an option it is still easier for me
>>> to just flip a bit in the response field than having to construct a
>>> whole standalone hypercall structure to be sent as part of a
>>> multicall.
>>
>> Right, I can see it being easier, but it seems like a bad choice from
>> an interface PoV. You are the maintainer of both subsystems, but it
>> would seem to me it's in your best interest to try to keep the
>> interfaces separated and clean.
>>
>> Would it be possible for the reset XENMEM_FORK op to have the side
>> effect of performing what you would instead do with the vm_event
>> hypercall?
> 
> Yes, the event response is really just an event channel signal to Xen,
> so the memop hypercall could similarly encode the "now check the
> vm_event response" as an optional field. But why is that any better
> than the current event channel route processing the vm_response
> encoding the "now do these ops on the fork"?

Well, as Roger said: Less duplication in the interface.

> We already have a bunch of different operations you can encode in the
> vm_event response field, so it reduces the complexity on the toolstack
> side since I don't have to switch around which hypercall I need to
> issue depending on what extra ops I want to put into a single
> hypercall.

The two goals need to be weighed against one another; for the moment
I think I'm with Roger aiming at a clean interface.

Jan
Tamas K Lengyel March 25, 2022, 10:48 a.m. UTC | #8
On Fri, Mar 25, 2022, 5:04 AM Jan Beulich <jbeulich@suse.com> wrote:

> On 24.03.2022 18:02, Tamas K Lengyel wrote:
> > On Thu, Mar 24, 2022 at 12:44 PM Roger Pau Monné <roger.pau@citrix.com>
> wrote:
> >>
> >> On Thu, Mar 24, 2022 at 12:22:49PM -0400, Tamas K Lengyel wrote:
> >>> On Thu, Mar 24, 2022 at 12:04 PM Roger Pau Monné <roger.pau@citrix.com>
> wrote:
> >>>>
> >>>> On Thu, Mar 24, 2022 at 11:52:38AM -0400, Tamas K Lengyel wrote:
> >>>>> On Thu, Mar 24, 2022 at 11:46 AM Roger Pau Monné <
> roger.pau@citrix.com> wrote:
> >>>>>>
> >>>>>> On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> >>>>>>> diff --git a/xen/include/public/memory.h
> b/xen/include/public/memory.h
> >>>>>>> index 208d8dcbd9..30ce23c5a7 100644
> >>>>>>> --- a/xen/include/public/memory.h
> >>>>>>> +++ b/xen/include/public/memory.h
> >>>>>>> @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
> >>>>>>>                  uint32_t gref;     /* IN: gref to debug         */
> >>>>>>>              } u;
> >>>>>>>          } debug;
> >>>>>>> -        struct mem_sharing_op_fork {      /* OP_FORK */
> >>>>>>> +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
> >>>>>>>              domid_t parent_domain;        /* IN: parent's domain
> id */
> >>>>>>>  /* These flags only makes sense for short-lived forks */
> >>>>>>>  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
> >>>>>>>  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
> >>>>>>>  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> >>>>>>> +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> >>>>>>> +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
> >>>>>>>              uint16_t flags;               /* IN: optional
> settings */
> >>>>>>>              uint32_t pad;                 /* Must be set to 0 */
> >>>>>>>          } fork;
> >>>>>>> diff --git a/xen/include/public/vm_event.h
> b/xen/include/public/vm_event.h
> >>>>>>> index bb003d21d0..81c2ee28cc 100644
> >>>>>>> --- a/xen/include/public/vm_event.h
> >>>>>>> +++ b/xen/include/public/vm_event.h
> >>>>>>> @@ -127,6 +127,14 @@
> >>>>>>>   * Reset the vmtrace buffer (if vmtrace is enabled)
> >>>>>>>   */
> >>>>>>>  #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
> >>>>>>> +/*
> >>>>>>> + * Reset the VM state (if VM is fork)
> >>>>>>> + */
> >>>>>>> +#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
> >>>>>>> +/*
> >>>>>>> + * Remove unshared entried from physmap (if VM is fork)
> >>>>>>> + */
> >>>>>>> +#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
> >>>>>>
> >>>>>> I'm confused about why two different interfaces are added to do this
> >>>>>> kind of selective resets, one to vm_event and one to xenmem_fork?
> >>>>>>
> >>>>>> I thin k the natural place for the option to live would be
> >>>>>> XENMEM_FORK?
> >>>>>
> >>>>> Yes, that's the natural place for it. But we are adding it to both
> for
> >>>>> a reason. In our use-case the reset operation will happen after a
> >>>>> vm_event is received to which we already must send a reply. Setting
> >>>>> the flag on the vm_event reply saves us having to issue an extra
> memop
> >>>>> hypercall afterwards.
> >>>>
> >>>> Can you do a multicall and batch both operations in a single
> >>>> hypercall?
> >>>>
> >>>> That would seem more natural than adding duplicated interfaces.
> >>>
> >>> Not in a straight forward way, no. There is no exposed API in libxc to
> >>> do a multicall. Even if that was an option it is still easier for me
> >>> to just flip a bit in the response field than having to construct a
> >>> whole standalone hypercall structure to be sent as part of a
> >>> multicall.
> >>
> >> Right, I can see it being easier, but it seems like a bad choice from
> >> an interface PoV. You are the maintainer of both subsystems, but it
> >> would seem to me it's in your best interest to try to keep the
> >> interfaces separated and clean.
> >>
> >> Would it be possible for the reset XENMEM_FORK op to have the side
> >> effect of performing what you would instead do with the vm_event
> >> hypercall?
> >
> > Yes, the event response is really just an event channel signal to Xen,
> > so the memop hypercall could similarly encode the "now check the
> > vm_event response" as an optional field. But why is that any better
> > than the current event channel route processing the vm_response
> > encoding the "now do these ops on the fork"?
>
> Well, as Roger said: Less duplication in the interface.
>

No, you would just duplicate something else instead, ie. the event channel
hypercall.


> > We already have a bunch of different operations you can encode in the
> > vm_event response field, so it reduces the complexity on the toolstack
> > side since I don't have to switch around which hypercall I need to
> > issue depending on what extra ops I want to put into a single
> > hypercall.
>
> The two goals need to be weighed against one another; for the moment
> I think I'm with Roger aiming at a clean interface.
>

It may look like that from the Xen side but from the toolstack side this is
actually the cleanest way to achieve what we need. The vm_event interfaces
are already strongly integrated with both the mem_sharing and mem_paging
subsystems so nothing is gained by now for no reason trying to keep them
separate. So I strongly disagree with this suggestion and I'm going to keep
it as-is. I appreciate the feedback nevertheless.

Tamas

>
Roger Pau Monné March 25, 2022, 12:14 p.m. UTC | #9
On Fri, Mar 25, 2022 at 06:48:42AM -0400, Tamas K Lengyel wrote:
> On Fri, Mar 25, 2022, 5:04 AM Jan Beulich <jbeulich@suse.com> wrote:
> 
> > On 24.03.2022 18:02, Tamas K Lengyel wrote:
> > > On Thu, Mar 24, 2022 at 12:44 PM Roger Pau Monné <roger.pau@citrix.com>
> > wrote:
> > >>
> > >> On Thu, Mar 24, 2022 at 12:22:49PM -0400, Tamas K Lengyel wrote:
> > >>> On Thu, Mar 24, 2022 at 12:04 PM Roger Pau Monné <roger.pau@citrix.com>
> > wrote:
> > >>>>
> > >>>> On Thu, Mar 24, 2022 at 11:52:38AM -0400, Tamas K Lengyel wrote:
> > >>>>> On Thu, Mar 24, 2022 at 11:46 AM Roger Pau Monné <
> > roger.pau@citrix.com> wrote:
> > >>>>>>
> > >>>>>> On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> > >>>>>>> diff --git a/xen/include/public/memory.h
> > b/xen/include/public/memory.h
> > >>>>>>> index 208d8dcbd9..30ce23c5a7 100644
> > >>>>>>> --- a/xen/include/public/memory.h
> > >>>>>>> +++ b/xen/include/public/memory.h
> > >>>>>>> @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
> > >>>>>>>                  uint32_t gref;     /* IN: gref to debug         */
> > >>>>>>>              } u;
> > >>>>>>>          } debug;
> > >>>>>>> -        struct mem_sharing_op_fork {      /* OP_FORK */
> > >>>>>>> +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
> > >>>>>>>              domid_t parent_domain;        /* IN: parent's domain
> > id */
> > >>>>>>>  /* These flags only makes sense for short-lived forks */
> > >>>>>>>  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
> > >>>>>>>  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
> > >>>>>>>  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> > >>>>>>> +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> > >>>>>>> +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
> > >>>>>>>              uint16_t flags;               /* IN: optional
> > settings */
> > >>>>>>>              uint32_t pad;                 /* Must be set to 0 */
> > >>>>>>>          } fork;
> > >>>>>>> diff --git a/xen/include/public/vm_event.h
> > b/xen/include/public/vm_event.h
> > >>>>>>> index bb003d21d0..81c2ee28cc 100644
> > >>>>>>> --- a/xen/include/public/vm_event.h
> > >>>>>>> +++ b/xen/include/public/vm_event.h
> > >>>>>>> @@ -127,6 +127,14 @@
> > >>>>>>>   * Reset the vmtrace buffer (if vmtrace is enabled)
> > >>>>>>>   */
> > >>>>>>>  #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
> > >>>>>>> +/*
> > >>>>>>> + * Reset the VM state (if VM is fork)
> > >>>>>>> + */
> > >>>>>>> +#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
> > >>>>>>> +/*
> > >>>>>>> + * Remove unshared entried from physmap (if VM is fork)
> > >>>>>>> + */
> > >>>>>>> +#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
> > >>>>>>
> > >>>>>> I'm confused about why two different interfaces are added to do this
> > >>>>>> kind of selective resets, one to vm_event and one to xenmem_fork?
> > >>>>>>
> > >>>>>> I thin k the natural place for the option to live would be
> > >>>>>> XENMEM_FORK?
> > >>>>>
> > >>>>> Yes, that's the natural place for it. But we are adding it to both
> > for
> > >>>>> a reason. In our use-case the reset operation will happen after a
> > >>>>> vm_event is received to which we already must send a reply. Setting
> > >>>>> the flag on the vm_event reply saves us having to issue an extra
> > memop
> > >>>>> hypercall afterwards.
> > >>>>
> > >>>> Can you do a multicall and batch both operations in a single
> > >>>> hypercall?
> > >>>>
> > >>>> That would seem more natural than adding duplicated interfaces.
> > >>>
> > >>> Not in a straight forward way, no. There is no exposed API in libxc to
> > >>> do a multicall. Even if that was an option it is still easier for me
> > >>> to just flip a bit in the response field than having to construct a
> > >>> whole standalone hypercall structure to be sent as part of a
> > >>> multicall.
> > >>
> > >> Right, I can see it being easier, but it seems like a bad choice from
> > >> an interface PoV. You are the maintainer of both subsystems, but it
> > >> would seem to me it's in your best interest to try to keep the
> > >> interfaces separated and clean.
> > >>
> > >> Would it be possible for the reset XENMEM_FORK op to have the side
> > >> effect of performing what you would instead do with the vm_event
> > >> hypercall?
> > >
> > > Yes, the event response is really just an event channel signal to Xen,
> > > so the memop hypercall could similarly encode the "now check the
> > > vm_event response" as an optional field. But why is that any better
> > > than the current event channel route processing the vm_response
> > > encoding the "now do these ops on the fork"?
> >
> > Well, as Roger said: Less duplication in the interface.
> >
> 
> No, you would just duplicate something else instead, ie. the event channel
> hypercall.
> 
> 
> > > We already have a bunch of different operations you can encode in the
> > > vm_event response field, so it reduces the complexity on the toolstack
> > > side since I don't have to switch around which hypercall I need to
> > > issue depending on what extra ops I want to put into a single
> > > hypercall.
> >
> > The two goals need to be weighed against one another; for the moment
> > I think I'm with Roger aiming at a clean interface.
> >
> 
> It may look like that from the Xen side but from the toolstack side this is
> actually the cleanest way to achieve what we need. The vm_event interfaces
> are already strongly integrated with both the mem_sharing and mem_paging
> subsystems so nothing is gained by now for no reason trying to keep them
> separate. So I strongly disagree with this suggestion and I'm going to keep
> it as-is. I appreciate the feedback nevertheless.

I'm not opposed to it, I just would like to better understand why you
are proposing such interface.

Thanks, Roger.
Roger Pau Monné March 25, 2022, 1:42 p.m. UTC | #10
On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> index a21c781452..bfa6082f13 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -1892,15 +1892,19 @@ static int fork(struct domain *cd, struct domain *d, uint16_t flags)
>   * footprints the hypercall continuation should be implemented (or if this
>   * feature needs to be become "stable").
>   */
> -static int mem_sharing_fork_reset(struct domain *d)
> +int mem_sharing_fork_reset(struct domain *d, bool reset_state,
> +                           bool reset_memory)
>  {
> -    int rc;
> +    int rc = 0;
>      struct domain *pd = d->parent;
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>      struct page_info *page, *tmp;
>  
>      domain_pause(d);

I would assert that at least one of reset_sate or reset_memory is set
here, as callers already do the checks.

>  
> +    if ( !reset_memory )
> +        goto state;

I don't like using labels and goto like this as I think it makes the
code harder to follow, and so more likely to introduce bugs. I would
rather place the memory reset parts inside of an if ( reset_memory ) {
... }, but that's my taste.

> +
>      /* need recursive lock because we will free pages */
>      spin_lock_recursive(&d->page_alloc_lock);
>      page_list_for_each_safe(page, tmp, &d->page_list)
> @@ -1933,7 +1937,9 @@ static int mem_sharing_fork_reset(struct domain *d)
>      }
>      spin_unlock_recursive(&d->page_alloc_lock);
>  
> -    rc = copy_settings(d, pd, d->arch.hvm.mem_sharing.skip_special_pages);
> + state:
> +    if ( reset_state )
> +        rc = copy_settings(d, pd, d->arch.hvm.mem_sharing.skip_special_pages);
>  
>      domain_unpause(d);
>  
> @@ -2239,15 +2245,21 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
>  
>      case XENMEM_sharing_op_fork_reset:
>      {
> +        bool reset_state = mso.u.fork.flags & XENMEM_FORK_RESET_STATE;
> +        bool reset_memory = mso.u.fork.flags & XENMEM_FORK_RESET_MEMORY;
> +
>          rc = -EINVAL;
> -        if ( mso.u.fork.pad || mso.u.fork.flags )
> +        if ( mso.u.fork.pad || (!reset_state && !reset_memory) )
> +            goto out;
> +        if ( mso.u.fork.flags &
> +             ~(XENMEM_FORK_RESET_STATE | XENMEM_FORK_RESET_MEMORY) )
>              goto out;
>  
>          rc = -ENOSYS;
>          if ( !d->parent )
>              goto out;
>  
> -        rc = mem_sharing_fork_reset(d);
> +        rc = mem_sharing_fork_reset(d, reset_state, reset_memory);
>          break;
>      }
>  
> diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
> index 84cf52636b..a7b192be0d 100644
> --- a/xen/common/vm_event.c
> +++ b/xen/common/vm_event.c
> @@ -28,6 +28,11 @@
>  #include <asm/p2m.h>
>  #include <asm/monitor.h>
>  #include <asm/vm_event.h>
> +
> +#ifdef CONFIG_MEM_SHARING
> +#include <asm/mem_sharing.h>
> +#endif
> +
>  #include <xsm/xsm.h>
>  #include <public/hvm/params.h>
>  
> @@ -394,6 +399,15 @@ static int vm_event_resume(struct domain *d, struct vm_event_domain *ved)
>              if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
>                  p2m_mem_paging_resume(d, &rsp);
>  #endif
> +#ifdef CONFIG_MEM_SHARING
> +            do {
> +                bool reset_state = rsp.flags & VM_EVENT_FLAG_RESET_FORK_STATE;
> +                bool reset_mem = rsp.flags & VM_EVENT_FLAG_RESET_FORK_MEMORY;
> +
> +                if ( reset_state || reset_mem )
> +                    mem_sharing_fork_reset(d, reset_state, reset_mem);

You seem to drop the error code returned by mem_sharing_fork_reset.

> +            } while(0);
> +#endif

I think you can avoid the do {} while(0); just using the braces will
allow you to define local variables in the inner block.

>              /*
>               * Check emulation flags in the arch-specific handler only, as it
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 208d8dcbd9..30ce23c5a7 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
>                  uint32_t gref;     /* IN: gref to debug         */
>              } u;
>          } debug;
> -        struct mem_sharing_op_fork {      /* OP_FORK */
> +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
>              domid_t parent_domain;        /* IN: parent's domain id */
>  /* These flags only makes sense for short-lived forks */
>  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
>  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
>  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)

For backward compatibility purposes should the flags be added
backwards, ie:

#define XENMEM_FORK_KEEP_STATE        (1u << 3)
#define XENMEM_FORK_KEEP_MEMORY       (1u << 4)

So that existing callers of XENMEM_sharing_op_fork_reset will continue
working as expected?

Or we don't care about that as the interface is protected with
__XEN_TOOLS__?

Thanks, Roger.
Tamas K Lengyel March 25, 2022, 1:53 p.m. UTC | #11
On Fri, Mar 25, 2022 at 9:42 AM Roger Pau Monné <roger.pau@citrix.com> wrote:
>
> On Tue, Mar 22, 2022 at 01:41:39PM -0400, Tamas K Lengyel wrote:
> > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> > index a21c781452..bfa6082f13 100644
> > --- a/xen/arch/x86/mm/mem_sharing.c
> > +++ b/xen/arch/x86/mm/mem_sharing.c
> > @@ -1892,15 +1892,19 @@ static int fork(struct domain *cd, struct domain *d, uint16_t flags)
> >   * footprints the hypercall continuation should be implemented (or if this
> >   * feature needs to be become "stable").
> >   */
> > -static int mem_sharing_fork_reset(struct domain *d)
> > +int mem_sharing_fork_reset(struct domain *d, bool reset_state,
> > +                           bool reset_memory)
> >  {
> > -    int rc;
> > +    int rc = 0;
> >      struct domain *pd = d->parent;
> >      struct p2m_domain *p2m = p2m_get_hostp2m(d);
> >      struct page_info *page, *tmp;
> >
> >      domain_pause(d);
>
> I would assert that at least one of reset_sate or reset_memory is set
> here, as callers already do the checks.

Sure.

>
> >
> > +    if ( !reset_memory )
> > +        goto state;
>
> I don't like using labels and goto like this as I think it makes the
> code harder to follow, and so more likely to introduce bugs. I would
> rather place the memory reset parts inside of an if ( reset_memory ) {
> ... }, but that's my taste.

I did that first but because it requires shifting everything to the
right it requires odd line breaks.

>
> > +
> >      /* need recursive lock because we will free pages */
> >      spin_lock_recursive(&d->page_alloc_lock);
> >      page_list_for_each_safe(page, tmp, &d->page_list)
> > @@ -1933,7 +1937,9 @@ static int mem_sharing_fork_reset(struct domain *d)
> >      }
> >      spin_unlock_recursive(&d->page_alloc_lock);
> >
> > -    rc = copy_settings(d, pd, d->arch.hvm.mem_sharing.skip_special_pages);
> > + state:
> > +    if ( reset_state )
> > +        rc = copy_settings(d, pd, d->arch.hvm.mem_sharing.skip_special_pages);
> >
> >      domain_unpause(d);
> >
> > @@ -2239,15 +2245,21 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
> >
> >      case XENMEM_sharing_op_fork_reset:
> >      {
> > +        bool reset_state = mso.u.fork.flags & XENMEM_FORK_RESET_STATE;
> > +        bool reset_memory = mso.u.fork.flags & XENMEM_FORK_RESET_MEMORY;
> > +
> >          rc = -EINVAL;
> > -        if ( mso.u.fork.pad || mso.u.fork.flags )
> > +        if ( mso.u.fork.pad || (!reset_state && !reset_memory) )
> > +            goto out;
> > +        if ( mso.u.fork.flags &
> > +             ~(XENMEM_FORK_RESET_STATE | XENMEM_FORK_RESET_MEMORY) )
> >              goto out;
> >
> >          rc = -ENOSYS;
> >          if ( !d->parent )
> >              goto out;
> >
> > -        rc = mem_sharing_fork_reset(d);
> > +        rc = mem_sharing_fork_reset(d, reset_state, reset_memory);
> >          break;
> >      }
> >
> > diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
> > index 84cf52636b..a7b192be0d 100644
> > --- a/xen/common/vm_event.c
> > +++ b/xen/common/vm_event.c
> > @@ -28,6 +28,11 @@
> >  #include <asm/p2m.h>
> >  #include <asm/monitor.h>
> >  #include <asm/vm_event.h>
> > +
> > +#ifdef CONFIG_MEM_SHARING
> > +#include <asm/mem_sharing.h>
> > +#endif
> > +
> >  #include <xsm/xsm.h>
> >  #include <public/hvm/params.h>
> >
> > @@ -394,6 +399,15 @@ static int vm_event_resume(struct domain *d, struct vm_event_domain *ved)
> >              if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
> >                  p2m_mem_paging_resume(d, &rsp);
> >  #endif
> > +#ifdef CONFIG_MEM_SHARING
> > +            do {
> > +                bool reset_state = rsp.flags & VM_EVENT_FLAG_RESET_FORK_STATE;
> > +                bool reset_mem = rsp.flags & VM_EVENT_FLAG_RESET_FORK_MEMORY;
> > +
> > +                if ( reset_state || reset_mem )
> > +                    mem_sharing_fork_reset(d, reset_state, reset_mem);
>
> You seem to drop the error code returned by mem_sharing_fork_reset.

Yes, there is no response that could be sent to the toolstack from
here. I could add an ASSERT that rc is 0 though. If the fork() op
successfully finished then fork_reset() couldn't reasonably end up in
a path where it fails.

>
> > +            } while(0);
> > +#endif
>
> I think you can avoid the do {} while(0); just using the braces will
> allow you to define local variables in the inner block.

Sure.

>
> >              /*
> >               * Check emulation flags in the arch-specific handler only, as it
> > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> > index 208d8dcbd9..30ce23c5a7 100644
> > --- a/xen/include/public/memory.h
> > +++ b/xen/include/public/memory.h
> > @@ -541,12 +541,14 @@ struct xen_mem_sharing_op {
> >                  uint32_t gref;     /* IN: gref to debug         */
> >              } u;
> >          } debug;
> > -        struct mem_sharing_op_fork {      /* OP_FORK */
> > +        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
> >              domid_t parent_domain;        /* IN: parent's domain id */
> >  /* These flags only makes sense for short-lived forks */
> >  #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
> >  #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
> >  #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
> > +#define XENMEM_FORK_RESET_STATE        (1u << 3)
> > +#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
>
> For backward compatibility purposes should the flags be added
> backwards, ie:
>
> #define XENMEM_FORK_KEEP_STATE        (1u << 3)
> #define XENMEM_FORK_KEEP_MEMORY       (1u << 4)
>
> So that existing callers of XENMEM_sharing_op_fork_reset will continue
> working as expected?
>
> Or we don't care about that as the interface is protected with
> __XEN_TOOLS__?

I would say don't care, we are updating the only toolstack that uses
this in lock-step with Xen.

Tamas
diff mbox series

Patch

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 95bd5eca67..1b089a2c02 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -2290,7 +2290,8 @@  int xc_memshr_fork(xc_interface *xch,
  *
  * With VMs that have a lot of memory this call may block for a long time.
  */
-int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain);
+int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain,
+                         bool reset_state, bool reset_memory);
 
 /* Debug calls: return the number of pages referencing the shared frame backing
  * the input argument. Should be one or greater.
diff --git a/tools/libs/ctrl/xc_memshr.c b/tools/libs/ctrl/xc_memshr.c
index a6cfd7dccf..a0d0b894e2 100644
--- a/tools/libs/ctrl/xc_memshr.c
+++ b/tools/libs/ctrl/xc_memshr.c
@@ -257,12 +257,17 @@  int xc_memshr_fork(xc_interface *xch, uint32_t pdomid, uint32_t domid,
     return xc_memshr_memop(xch, domid, &mso);
 }
 
-int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid)
+int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid, bool reset_state,
+                         bool reset_memory)
 {
     xen_mem_sharing_op_t mso;
 
     memset(&mso, 0, sizeof(mso));
     mso.op = XENMEM_sharing_op_fork_reset;
+    if ( reset_state )
+        mso.u.fork.flags |= XENMEM_FORK_RESET_STATE;
+    if ( reset_memory )
+        mso.u.fork.flags |= XENMEM_FORK_RESET_MEMORY;
 
     return xc_memshr_memop(xch, domid, &mso);
 }
diff --git a/xen/arch/x86/include/asm/mem_sharing.h b/xen/arch/x86/include/asm/mem_sharing.h
index b4a8e8795a..fca5ec8aeb 100644
--- a/xen/arch/x86/include/asm/mem_sharing.h
+++ b/xen/arch/x86/include/asm/mem_sharing.h
@@ -85,6 +85,9 @@  static inline bool mem_sharing_is_fork(const struct domain *d)
 int mem_sharing_fork_page(struct domain *d, gfn_t gfn,
                           bool unsharing);
 
+int mem_sharing_fork_reset(struct domain *d, bool reset_state,
+                           bool reset_memory);
+
 /*
  * If called by a foreign domain, possible errors are
  *   -EBUSY -> ring full
@@ -148,6 +151,12 @@  static inline int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool lock)
     return -EOPNOTSUPP;
 }
 
+static inline int mem_sharing_fork_reset(struct domain *d, bool reset_state,
+                                         bool reset_memory)
+{
+    return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* __MEM_SHARING_H__ */
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index a21c781452..bfa6082f13 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1892,15 +1892,19 @@  static int fork(struct domain *cd, struct domain *d, uint16_t flags)
  * footprints the hypercall continuation should be implemented (or if this
  * feature needs to be become "stable").
  */
-static int mem_sharing_fork_reset(struct domain *d)
+int mem_sharing_fork_reset(struct domain *d, bool reset_state,
+                           bool reset_memory)
 {
-    int rc;
+    int rc = 0;
     struct domain *pd = d->parent;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     struct page_info *page, *tmp;
 
     domain_pause(d);
 
+    if ( !reset_memory )
+        goto state;
+
     /* need recursive lock because we will free pages */
     spin_lock_recursive(&d->page_alloc_lock);
     page_list_for_each_safe(page, tmp, &d->page_list)
@@ -1933,7 +1937,9 @@  static int mem_sharing_fork_reset(struct domain *d)
     }
     spin_unlock_recursive(&d->page_alloc_lock);
 
-    rc = copy_settings(d, pd, d->arch.hvm.mem_sharing.skip_special_pages);
+ state:
+    if ( reset_state )
+        rc = copy_settings(d, pd, d->arch.hvm.mem_sharing.skip_special_pages);
 
     domain_unpause(d);
 
@@ -2239,15 +2245,21 @@  int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
 
     case XENMEM_sharing_op_fork_reset:
     {
+        bool reset_state = mso.u.fork.flags & XENMEM_FORK_RESET_STATE;
+        bool reset_memory = mso.u.fork.flags & XENMEM_FORK_RESET_MEMORY;
+
         rc = -EINVAL;
-        if ( mso.u.fork.pad || mso.u.fork.flags )
+        if ( mso.u.fork.pad || (!reset_state && !reset_memory) )
+            goto out;
+        if ( mso.u.fork.flags &
+             ~(XENMEM_FORK_RESET_STATE | XENMEM_FORK_RESET_MEMORY) )
             goto out;
 
         rc = -ENOSYS;
         if ( !d->parent )
             goto out;
 
-        rc = mem_sharing_fork_reset(d);
+        rc = mem_sharing_fork_reset(d, reset_state, reset_memory);
         break;
     }
 
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 84cf52636b..a7b192be0d 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -28,6 +28,11 @@ 
 #include <asm/p2m.h>
 #include <asm/monitor.h>
 #include <asm/vm_event.h>
+
+#ifdef CONFIG_MEM_SHARING
+#include <asm/mem_sharing.h>
+#endif
+
 #include <xsm/xsm.h>
 #include <public/hvm/params.h>
 
@@ -394,6 +399,15 @@  static int vm_event_resume(struct domain *d, struct vm_event_domain *ved)
             if ( rsp.reason == VM_EVENT_REASON_MEM_PAGING )
                 p2m_mem_paging_resume(d, &rsp);
 #endif
+#ifdef CONFIG_MEM_SHARING
+            do {
+                bool reset_state = rsp.flags & VM_EVENT_FLAG_RESET_FORK_STATE;
+                bool reset_mem = rsp.flags & VM_EVENT_FLAG_RESET_FORK_MEMORY;
+
+                if ( reset_state || reset_mem )
+                    mem_sharing_fork_reset(d, reset_state, reset_mem);
+            } while(0);
+#endif
 
             /*
              * Check emulation flags in the arch-specific handler only, as it
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 208d8dcbd9..30ce23c5a7 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -541,12 +541,14 @@  struct xen_mem_sharing_op {
                 uint32_t gref;     /* IN: gref to debug         */
             } u;
         } debug;
-        struct mem_sharing_op_fork {      /* OP_FORK */
+        struct mem_sharing_op_fork {      /* OP_FORK/_RESET */
             domid_t parent_domain;        /* IN: parent's domain id */
 /* These flags only makes sense for short-lived forks */
 #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
 #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
 #define XENMEM_FORK_SKIP_SPECIAL_PAGES (1u << 2)
+#define XENMEM_FORK_RESET_STATE        (1u << 3)
+#define XENMEM_FORK_RESET_MEMORY       (1u << 4)
             uint16_t flags;               /* IN: optional settings */
             uint32_t pad;                 /* Must be set to 0 */
         } fork;
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index bb003d21d0..81c2ee28cc 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -127,6 +127,14 @@ 
  * Reset the vmtrace buffer (if vmtrace is enabled)
  */
 #define VM_EVENT_FLAG_RESET_VMTRACE      (1 << 13)
+/*
+ * Reset the VM state (if VM is fork)
+ */
+#define VM_EVENT_FLAG_RESET_FORK_STATE   (1 << 14)
+/*
+ * Remove unshared entried from physmap (if VM is fork)
+ */
+#define VM_EVENT_FLAG_RESET_FORK_MEMORY  (1 << 15)
 
 /*
  * Reasons for the vm event request