diff mbox series

[v3,1/8] domain: GADDR based shared guest area registration alternative - teardown

Message ID 68cdb299-c41c-b6a5-c9ce-bd915508ecf2@suse.com (mailing list archive)
State New, archived
Headers show
Series runstate/time area registration by (guest) physical address | expand

Commit Message

Jan Beulich May 3, 2023, 3:54 p.m. UTC
In preparation of the introduction of new vCPU operations allowing to
register the respective areas (one of the two is x86-specific) by
guest-physical address, add the necessary domain cleanup hooks.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>
---
RFC: Zapping the areas in pv_shim_shutdown() may not be strictly
     necessary: Aiui unmap_vcpu_info() is called only because the vCPU
     info area cannot be re-registered. Beyond that I guess the
     assumption is that the areas would only be re-registered as they
     were before. If that's not the case I wonder whether the guest
     handles for both areas shouldn't also be zapped.
---
v2: Add assertion in unmap_guest_area().

Comments

Roger Pau Monné Sept. 27, 2023, 8:51 a.m. UTC | #1
On Wed, May 03, 2023 at 05:54:47PM +0200, Jan Beulich wrote:
> In preparation of the introduction of new vCPU operations allowing to
> register the respective areas (one of the two is x86-specific) by
> guest-physical address, add the necessary domain cleanup hooks.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Reviewed-by: Julien Grall <jgrall@amazon.com>
> ---
> RFC: Zapping the areas in pv_shim_shutdown() may not be strictly
>      necessary: Aiui unmap_vcpu_info() is called only because the vCPU
>      info area cannot be re-registered. Beyond that I guess the
>      assumption is that the areas would only be re-registered as they
>      were before. If that's not the case I wonder whether the guest
>      handles for both areas shouldn't also be zapped.

IIRC the reason was to unset v->vcpu_info_mfn so that it could be set
again on resume from suspension, or else the hypercall will return an
error.

I guess we should also zap the runstate handlers, or else we might
corrupt guest state.

> ---
> v2: Add assertion in unmap_guest_area().
> 
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1019,7 +1019,10 @@ int arch_domain_soft_reset(struct domain
>      }
>  
>      for_each_vcpu ( d, v )
> +    {
>          set_xen_guest_handle(v->arch.time_info_guest, NULL);
> +        unmap_guest_area(v, &v->arch.time_guest_area);
> +    }
>  
>   exit_put_gfn:
>      put_gfn(d, gfn_x(gfn));
> @@ -2356,6 +2359,8 @@ int domain_relinquish_resources(struct d
>              if ( ret )
>                  return ret;
>  
> +            unmap_guest_area(v, &v->arch.time_guest_area);
> +
>              vpmu_destroy(v);
>          }
>  
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -669,6 +669,7 @@ struct arch_vcpu
>  
>      /* A secondary copy of the vcpu time info. */
>      XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
> +    struct guest_area time_guest_area;
>  
>      struct arch_vm_event *vm_event;
>  
> --- a/xen/arch/x86/pv/shim.c
> +++ b/xen/arch/x86/pv/shim.c
> @@ -382,8 +382,10 @@ int pv_shim_shutdown(uint8_t reason)
>  
>      for_each_vcpu ( d, v )
>      {
> -        /* Unmap guest vcpu_info pages. */
> +        /* Unmap guest vcpu_info page and runstate/time areas. */
>          unmap_vcpu_info(v);
> +        unmap_guest_area(v, &v->runstate_guest_area);
> +        unmap_guest_area(v, &v->arch.time_guest_area);
>  
>          /* Reset the periodic timer to the default value. */
>          vcpu_set_periodic_timer(v, MILLISECS(10));
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -963,7 +963,10 @@ int domain_kill(struct domain *d)
>          if ( cpupool_move_domain(d, cpupool0) )
>              return -ERESTART;
>          for_each_vcpu ( d, v )
> +        {
>              unmap_vcpu_info(v);
> +            unmap_guest_area(v, &v->runstate_guest_area);
> +        }
>          d->is_dying = DOMDYING_dead;
>          /* Mem event cleanup has to go here because the rings 
>           * have to be put before we call put_domain. */
> @@ -1417,6 +1420,7 @@ int domain_soft_reset(struct domain *d,
>      {
>          set_xen_guest_handle(runstate_guest(v), NULL);
>          unmap_vcpu_info(v);
> +        unmap_guest_area(v, &v->runstate_guest_area);
>      }
>  
>      rc = arch_domain_soft_reset(d);
> @@ -1568,6 +1572,19 @@ void unmap_vcpu_info(struct vcpu *v)
>      put_page_and_type(mfn_to_page(mfn));
>  }
>  
> +/*
> + * This is only intended to be used for domain cleanup (or more generally only
> + * with at least the respective vCPU, if it's not the current one, reliably
> + * paused).
> + */
> +void unmap_guest_area(struct vcpu *v, struct guest_area *area)

vcpu param could be const given the current code, but I assume this
will be changed by future patches.  Same could be said about
guest_area but I'm confident that will change.

> +{
> +    struct domain *d = v->domain;
> +
> +    if ( v != current )
> +        ASSERT(atomic_read(&v->pause_count) | atomic_read(&d->pause_count));

Isn't this racy?  What guarantees that the vcpu won't be kicked just
after the check has been performed?

Thanks, Roger.
Jan Beulich Sept. 27, 2023, 9:55 a.m. UTC | #2
On 27.09.2023 10:51, Roger Pau Monné wrote:
> On Wed, May 03, 2023 at 05:54:47PM +0200, Jan Beulich wrote:
>> In preparation of the introduction of new vCPU operations allowing to
>> register the respective areas (one of the two is x86-specific) by
>> guest-physical address, add the necessary domain cleanup hooks.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> Reviewed-by: Julien Grall <jgrall@amazon.com>
>> ---
>> RFC: Zapping the areas in pv_shim_shutdown() may not be strictly
>>      necessary: Aiui unmap_vcpu_info() is called only because the vCPU
>>      info area cannot be re-registered. Beyond that I guess the
>>      assumption is that the areas would only be re-registered as they
>>      were before. If that's not the case I wonder whether the guest
>>      handles for both areas shouldn't also be zapped.
> 
> IIRC the reason was to unset v->vcpu_info_mfn so that it could be set
> again on resume from suspension, or else the hypercall will return an
> error.

Right, that's the re-registration aspect mentioned.

> I guess we should also zap the runstate handlers, or else we might
> corrupt guest state.

So you think the guest might re-register a different area post resume?
I can certainly add another patch to zap the handles; I don't think it
should be done right here, not the least because if we see room for
such behavior, that change may even want backporting.

>> @@ -1568,6 +1572,19 @@ void unmap_vcpu_info(struct vcpu *v)
>>      put_page_and_type(mfn_to_page(mfn));
>>  }
>>  
>> +/*
>> + * This is only intended to be used for domain cleanup (or more generally only
>> + * with at least the respective vCPU, if it's not the current one, reliably
>> + * paused).
>> + */
>> +void unmap_guest_area(struct vcpu *v, struct guest_area *area)
> 
> vcpu param could be const given the current code, but I assume this
> will be changed by future patches.  Same could be said about
> guest_area but I'm confident that will change.

True for both.

>> +{
>> +    struct domain *d = v->domain;
>> +
>> +    if ( v != current )
>> +        ASSERT(atomic_read(&v->pause_count) | atomic_read(&d->pause_count));
> 
> Isn't this racy?

It is, yes.

>  What guarantees that the vcpu won't be kicked just
> after the check has been performed?

Nothing. This check isn't any better than assertions towards an ordinary
spinlock being held. I assume you realize that we've got a number of such
assertions elsewhere already.

Jan
Roger Pau Monné Sept. 27, 2023, 10:42 a.m. UTC | #3
On Wed, Sep 27, 2023 at 11:55:19AM +0200, Jan Beulich wrote:
> On 27.09.2023 10:51, Roger Pau Monné wrote:
> > On Wed, May 03, 2023 at 05:54:47PM +0200, Jan Beulich wrote:
> > I guess we should also zap the runstate handlers, or else we might
> > corrupt guest state.
> 
> So you think the guest might re-register a different area post resume?
> I can certainly add another patch to zap the handles; I don't think it
> should be done right here, not the least because if we see room for
> such behavior, that change may even want backporting.

For correctness it would be good to zap them, but there's no rush, as
I do think guests will set to the same address on resume.

> >> @@ -1568,6 +1572,19 @@ void unmap_vcpu_info(struct vcpu *v)
> >>      put_page_and_type(mfn_to_page(mfn));
> >>  }
> >>  
> >> +/*
> >> + * This is only intended to be used for domain cleanup (or more generally only
> >> + * with at least the respective vCPU, if it's not the current one, reliably
> >> + * paused).
> >> + */
> >> +void unmap_guest_area(struct vcpu *v, struct guest_area *area)
> > 
> > vcpu param could be const given the current code, but I assume this
> > will be changed by future patches.  Same could be said about
> > guest_area but I'm confident that will change.
> 
> True for both.
> 
> >> +{
> >> +    struct domain *d = v->domain;
> >> +
> >> +    if ( v != current )
> >> +        ASSERT(atomic_read(&v->pause_count) | atomic_read(&d->pause_count));
> > 
> > Isn't this racy?
> 
> It is, yes.
> 
> >  What guarantees that the vcpu won't be kicked just
> > after the check has been performed?
> 
> Nothing. This check isn't any better than assertions towards an ordinary
> spinlock being held. I assume you realize that we've got a number of such
> assertions elsewhere already.

Right, but different from spinlock assertions, the code here could be
made safe just by pausing the vCPU?

Thanks, Roger.
Jan Beulich Sept. 27, 2023, 10:46 a.m. UTC | #4
On 27.09.2023 12:42, Roger Pau Monné wrote:
> On Wed, Sep 27, 2023 at 11:55:19AM +0200, Jan Beulich wrote:
>> On 27.09.2023 10:51, Roger Pau Monné wrote:
>>> On Wed, May 03, 2023 at 05:54:47PM +0200, Jan Beulich wrote:
>>> I guess we should also zap the runstate handlers, or else we might
>>> corrupt guest state.
>>
>> So you think the guest might re-register a different area post resume?
>> I can certainly add another patch to zap the handles; I don't think it
>> should be done right here, not the least because if we see room for
>> such behavior, that change may even want backporting.
> 
> For correctness it would be good to zap them, but there's no rush, as
> I do think guests will set to the same address on resume.

Well, I've already added a new patch coming ahead of this one.

>>>> @@ -1568,6 +1572,19 @@ void unmap_vcpu_info(struct vcpu *v)
>>>>      put_page_and_type(mfn_to_page(mfn));
>>>>  }
>>>>  
>>>> +/*
>>>> + * This is only intended to be used for domain cleanup (or more generally only
>>>> + * with at least the respective vCPU, if it's not the current one, reliably
>>>> + * paused).
>>>> + */
>>>> +void unmap_guest_area(struct vcpu *v, struct guest_area *area)
>>>
>>> vcpu param could be const given the current code, but I assume this
>>> will be changed by future patches.  Same could be said about
>>> guest_area but I'm confident that will change.
>>
>> True for both.
>>
>>>> +{
>>>> +    struct domain *d = v->domain;
>>>> +
>>>> +    if ( v != current )
>>>> +        ASSERT(atomic_read(&v->pause_count) | atomic_read(&d->pause_count));
>>>
>>> Isn't this racy?
>>
>> It is, yes.
>>
>>>  What guarantees that the vcpu won't be kicked just
>>> after the check has been performed?
>>
>> Nothing. This check isn't any better than assertions towards an ordinary
>> spinlock being held. I assume you realize that we've got a number of such
>> assertions elsewhere already.
> 
> Right, but different from spinlock assertions, the code here could be
> made safe just by pausing the vCPU?

That's what the assertion is checking (see also the comment ahead of the
function). It's just that the assertions cannot be made more strict, at
least from all I can tell.

Jan
Roger Pau Monné Sept. 27, 2023, 10:50 a.m. UTC | #5
On Wed, Sep 27, 2023 at 12:46:07PM +0200, Jan Beulich wrote:
> On 27.09.2023 12:42, Roger Pau Monné wrote:
> > On Wed, Sep 27, 2023 at 11:55:19AM +0200, Jan Beulich wrote:
> >> On 27.09.2023 10:51, Roger Pau Monné wrote:
> >>> On Wed, May 03, 2023 at 05:54:47PM +0200, Jan Beulich wrote:
> >>>> +{
> >>>> +    struct domain *d = v->domain;
> >>>> +
> >>>> +    if ( v != current )
> >>>> +        ASSERT(atomic_read(&v->pause_count) | atomic_read(&d->pause_count));
> >>>
> >>> Isn't this racy?
> >>
> >> It is, yes.
> >>
> >>>  What guarantees that the vcpu won't be kicked just
> >>> after the check has been performed?
> >>
> >> Nothing. This check isn't any better than assertions towards an ordinary
> >> spinlock being held. I assume you realize that we've got a number of such
> >> assertions elsewhere already.
> > 
> > Right, but different from spinlock assertions, the code here could be
> > made safe just by pausing the vCPU?
> 
> That's what the assertion is checking (see also the comment ahead of the
> function). It's just that the assertions cannot be made more strict, at
> least from all I can tell.

But the assertion might no longer be true by the time the code
afterwards is executed.  Why not wrap the code in a pair of
vcpu_{,un}pause() calls?

Thanks, Roger.
Jan Beulich Sept. 27, 2023, 11:44 a.m. UTC | #6
On 27.09.2023 12:50, Roger Pau Monné wrote:
> On Wed, Sep 27, 2023 at 12:46:07PM +0200, Jan Beulich wrote:
>> On 27.09.2023 12:42, Roger Pau Monné wrote:
>>> On Wed, Sep 27, 2023 at 11:55:19AM +0200, Jan Beulich wrote:
>>>> On 27.09.2023 10:51, Roger Pau Monné wrote:
>>>>> On Wed, May 03, 2023 at 05:54:47PM +0200, Jan Beulich wrote:
>>>>>> +{
>>>>>> +    struct domain *d = v->domain;
>>>>>> +
>>>>>> +    if ( v != current )
>>>>>> +        ASSERT(atomic_read(&v->pause_count) | atomic_read(&d->pause_count));
>>>>>
>>>>> Isn't this racy?
>>>>
>>>> It is, yes.
>>>>
>>>>>  What guarantees that the vcpu won't be kicked just
>>>>> after the check has been performed?
>>>>
>>>> Nothing. This check isn't any better than assertions towards an ordinary
>>>> spinlock being held. I assume you realize that we've got a number of such
>>>> assertions elsewhere already.
>>>
>>> Right, but different from spinlock assertions, the code here could be
>>> made safe just by pausing the vCPU?
>>
>> That's what the assertion is checking (see also the comment ahead of the
>> function). It's just that the assertions cannot be made more strict, at
>> least from all I can tell.
> 
> But the assertion might no longer be true by the time the code
> afterwards is executed.  Why not wrap the code in a pair of
> vcpu_{,un}pause() calls?

Because it's not quite as simple (if I was to do so, I'd want to do it
correctly, and not just give the impression of universal usability). See
how map_guest_area() involves hypercall_deadlock_mutex. Hence I continue
to think it is okay the way I have it, with all callers satisfying the
requirement (afaict).

Jan
diff mbox series

Patch

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1019,7 +1019,10 @@  int arch_domain_soft_reset(struct domain
     }
 
     for_each_vcpu ( d, v )
+    {
         set_xen_guest_handle(v->arch.time_info_guest, NULL);
+        unmap_guest_area(v, &v->arch.time_guest_area);
+    }
 
  exit_put_gfn:
     put_gfn(d, gfn_x(gfn));
@@ -2356,6 +2359,8 @@  int domain_relinquish_resources(struct d
             if ( ret )
                 return ret;
 
+            unmap_guest_area(v, &v->arch.time_guest_area);
+
             vpmu_destroy(v);
         }
 
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -669,6 +669,7 @@  struct arch_vcpu
 
     /* A secondary copy of the vcpu time info. */
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
+    struct guest_area time_guest_area;
 
     struct arch_vm_event *vm_event;
 
--- a/xen/arch/x86/pv/shim.c
+++ b/xen/arch/x86/pv/shim.c
@@ -382,8 +382,10 @@  int pv_shim_shutdown(uint8_t reason)
 
     for_each_vcpu ( d, v )
     {
-        /* Unmap guest vcpu_info pages. */
+        /* Unmap guest vcpu_info page and runstate/time areas. */
         unmap_vcpu_info(v);
+        unmap_guest_area(v, &v->runstate_guest_area);
+        unmap_guest_area(v, &v->arch.time_guest_area);
 
         /* Reset the periodic timer to the default value. */
         vcpu_set_periodic_timer(v, MILLISECS(10));
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -963,7 +963,10 @@  int domain_kill(struct domain *d)
         if ( cpupool_move_domain(d, cpupool0) )
             return -ERESTART;
         for_each_vcpu ( d, v )
+        {
             unmap_vcpu_info(v);
+            unmap_guest_area(v, &v->runstate_guest_area);
+        }
         d->is_dying = DOMDYING_dead;
         /* Mem event cleanup has to go here because the rings 
          * have to be put before we call put_domain. */
@@ -1417,6 +1420,7 @@  int domain_soft_reset(struct domain *d,
     {
         set_xen_guest_handle(runstate_guest(v), NULL);
         unmap_vcpu_info(v);
+        unmap_guest_area(v, &v->runstate_guest_area);
     }
 
     rc = arch_domain_soft_reset(d);
@@ -1568,6 +1572,19 @@  void unmap_vcpu_info(struct vcpu *v)
     put_page_and_type(mfn_to_page(mfn));
 }
 
+/*
+ * This is only intended to be used for domain cleanup (or more generally only
+ * with at least the respective vCPU, if it's not the current one, reliably
+ * paused).
+ */
+void unmap_guest_area(struct vcpu *v, struct guest_area *area)
+{
+    struct domain *d = v->domain;
+
+    if ( v != current )
+        ASSERT(atomic_read(&v->pause_count) | atomic_read(&d->pause_count));
+}
+
 int default_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
     struct vcpu_guest_context *ctxt;
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -5,6 +5,12 @@ 
 #include <xen/types.h>
 
 #include <public/xen.h>
+
+struct guest_area {
+    struct page_info *pg;
+    void *map;
+};
+
 #include <asm/domain.h>
 #include <asm/numa.h>
 
@@ -76,6 +82,11 @@  void arch_vcpu_destroy(struct vcpu *v);
 int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned int offset);
 void unmap_vcpu_info(struct vcpu *v);
 
+int map_guest_area(struct vcpu *v, paddr_t gaddr, unsigned int size,
+                   struct guest_area *area,
+                   void (*populate)(void *dst, struct vcpu *v));
+void unmap_guest_area(struct vcpu *v, struct guest_area *area);
+
 int arch_domain_create(struct domain *d,
                        struct xen_domctl_createdomain *config,
                        unsigned int flags);
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -202,6 +202,7 @@  struct vcpu
         XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
     } runstate_guest; /* guest address */
 #endif
+    struct guest_area runstate_guest_area;
     unsigned int     new_state;
 
     /* Has the FPU been initialised? */