diff mbox series

xen/sched: don't let XEN_RUNSTATE_UPDATE leak into vcpu_runstate_get()

Message ID 20190924074202.4064-1-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen/sched: don't let XEN_RUNSTATE_UPDATE leak into vcpu_runstate_get() | expand

Commit Message

Jürgen Groß Sept. 24, 2019, 7:42 a.m. UTC
vcpu_runstate_get() should never return a state entry time with
XEN_RUNSTATE_UPDATE set. To avoid this let update_runstate_area()
operate on a local runstate copy.

This problem was introduced with commit 2529c850ea48f036 ("add update
indicator to vcpu_runstate_info").

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/arch/x86/domain.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

Comments

Jan Beulich Sept. 24, 2019, 8:39 a.m. UTC | #1
On 24.09.2019 09:42, Juergen Gross wrote:
> vcpu_runstate_get() should never return a state entry time with
> XEN_RUNSTATE_UPDATE set. To avoid this let update_runstate_area()
> operate on a local runstate copy.
> 
> This problem was introduced with commit 2529c850ea48f036 ("add update
> indicator to vcpu_runstate_info").
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Perhaps this also wants a Reported-by tag?

In principle the change is fine, but I wonder whether you're (a)
going a little too far and thus you are (b) missing some cleanup
potential:

> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1600,21 +1600,24 @@ bool update_runstate_area(struct vcpu *v)
>      bool rc;
>      struct guest_memory_policy policy = { .nested_guest_mode = false };
>      void __user *guest_handle = NULL;
> +    struct vcpu_runstate_info runstate;

I don't think the full structure needs copying. You already use ...

>      if ( guest_handle_is_null(runstate_guest(v)) )
>          return true;
>  
>      update_guest_memory_policy(v, &policy);
>  
> +    memcpy(&runstate, &v->runstate, sizeof(runstate));
> +
>      if ( VM_ASSIST(v->domain, runstate_update_flag) )
>      {
>          guest_handle = has_32bit_shinfo(v->domain)
>              ? &v->runstate_guest.compat.p->state_entry_time + 1
>              : &v->runstate_guest.native.p->state_entry_time + 1;
>          guest_handle--;

... trickery to get at just the state_entry_time field. I think
you would get away with making a local copy of just that one,
thus ...

> -        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
> +        runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>          __raw_copy_to_guest(guest_handle,
> -                            (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
> +                            (void *)(&runstate.state_entry_time + 1) - 1, 1);

... reducing the complexity of this at least a little, while ...

> @@ -1622,20 +1625,20 @@ bool update_runstate_area(struct vcpu *v)
>      {
>          struct compat_vcpu_runstate_info info;
>  
> -        XLAT_vcpu_runstate_info(&info, &v->runstate);
> +        XLAT_vcpu_runstate_info(&info, &runstate);
>          __copy_to_guest(v->runstate_guest.compat, &info, 1);
>          rc = true;
>      }
>      else
> -        rc = __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
> -             sizeof(v->runstate);
> +        rc = __copy_to_guest(runstate_guest(v), &runstate, 1) !=
> +             sizeof(runstate);

... taking the opportunity to make this use __copy_to_guest_field()
(storing state_entry_time last), in turn allowing ...

>      if ( guest_handle )
>      {
> -        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
> +        runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
>          smp_wmb();
>          __raw_copy_to_guest(guest_handle,
> -                            (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
> +                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
>      }

... to drop this altogether.

Thoughts?

Jan
Jürgen Groß Sept. 24, 2019, 9:03 a.m. UTC | #2
On 24.09.19 10:39, Jan Beulich wrote:
> On 24.09.2019 09:42, Juergen Gross wrote:
>> vcpu_runstate_get() should never return a state entry time with
>> XEN_RUNSTATE_UPDATE set. To avoid this let update_runstate_area()
>> operate on a local runstate copy.
>>
>> This problem was introduced with commit 2529c850ea48f036 ("add update
>> indicator to vcpu_runstate_info").
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> Perhaps this also wants a Reported-by tag?

Yes. That was Andrew, right?

> 
> In principle the change is fine, but I wonder whether you're (a)
> going a little too far and thus you are (b) missing some cleanup
> potential:
> 
>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -1600,21 +1600,24 @@ bool update_runstate_area(struct vcpu *v)
>>       bool rc;
>>       struct guest_memory_policy policy = { .nested_guest_mode = false };
>>       void __user *guest_handle = NULL;
>> +    struct vcpu_runstate_info runstate;
> 
> I don't think the full structure needs copying. You already use ...
> 
>>       if ( guest_handle_is_null(runstate_guest(v)) )
>>           return true;
>>   
>>       update_guest_memory_policy(v, &policy);
>>   
>> +    memcpy(&runstate, &v->runstate, sizeof(runstate));
>> +
>>       if ( VM_ASSIST(v->domain, runstate_update_flag) )
>>       {
>>           guest_handle = has_32bit_shinfo(v->domain)
>>               ? &v->runstate_guest.compat.p->state_entry_time + 1
>>               : &v->runstate_guest.native.p->state_entry_time + 1;
>>           guest_handle--;
> 
> ... trickery to get at just the state_entry_time field. I think
> you would get away with making a local copy of just that one,
> thus ...
> 
>> -        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>> +        runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>>           __raw_copy_to_guest(guest_handle,
>> -                            (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
>> +                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
> 
> ... reducing the complexity of this at least a little, while ...
> 
>> @@ -1622,20 +1625,20 @@ bool update_runstate_area(struct vcpu *v)
>>       {
>>           struct compat_vcpu_runstate_info info;
>>   
>> -        XLAT_vcpu_runstate_info(&info, &v->runstate);
>> +        XLAT_vcpu_runstate_info(&info, &runstate);
>>           __copy_to_guest(v->runstate_guest.compat, &info, 1);
>>           rc = true;
>>       }
>>       else
>> -        rc = __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
>> -             sizeof(v->runstate);
>> +        rc = __copy_to_guest(runstate_guest(v), &runstate, 1) !=
>> +             sizeof(runstate);
> 
> ... taking the opportunity to make this use __copy_to_guest_field()
> (storing state_entry_time last), in turn allowing ...
> 
>>       if ( guest_handle )
>>       {
>> -        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
>> +        runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
>>           smp_wmb();
>>           __raw_copy_to_guest(guest_handle,
>> -                            (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
>> +                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
>>       }
> 
> ... to drop this altogether.
> 
> Thoughts?

Hmm, I'm not sure this will make things easier.

The requested sequence is:

- copy the byte with the XEN_RUNSTATE_UPDATE bit set first
- copy all other bytes (not clearing the XEN_RUNSTATE_UPDATE bit)
- copy the byte with the now cleared XEN_RUNSTATE_UPDATE bit last

And this has to work for 64- and 32-bit variants of the structure.

So dropping the last hunk is wrong already, and I don't think having
a local copy of state_entry_time only will make things easier, as
you'd need to:

- copy the byte with the XEN_RUNSTATE_UPDATE bit set first
- copy v->runstate.state
- copy local runstate.state_entry_time
- copy v->runstate.time[]
- copy the byte with the now cleared XEN_RUNSTATE_UPDATE bit last

And you'd need to special case the compat case.


Juergen
Jan Beulich Sept. 24, 2019, 9:26 a.m. UTC | #3
On 24.09.2019 11:03, Jürgen Groß wrote:
> On 24.09.19 10:39, Jan Beulich wrote:
>> On 24.09.2019 09:42, Juergen Gross wrote:
>>> vcpu_runstate_get() should never return a state entry time with
>>> XEN_RUNSTATE_UPDATE set. To avoid this let update_runstate_area()
>>> operate on a local runstate copy.
>>>
>>> This problem was introduced with commit 2529c850ea48f036 ("add update
>>> indicator to vcpu_runstate_info").
>>>
>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>
>> Perhaps this also wants a Reported-by tag?
> 
> Yes. That was Andrew, right?

Oh, sorry, I had actually meant to ask at the same time: The mail
describing the issue came from him, but it saying "After a
complicated investigation", and based on prior instances I wouldn't
be certain it wasn't one of his colleagues Cc-ed there who actually
isolated it. Andrew, could you clarify?

>> In principle the change is fine, but I wonder whether you're (a)
>> going a little too far and thus you are (b) missing some cleanup
>> potential:
>>
>>> --- a/xen/arch/x86/domain.c
>>> +++ b/xen/arch/x86/domain.c
>>> @@ -1600,21 +1600,24 @@ bool update_runstate_area(struct vcpu *v)
>>>       bool rc;
>>>       struct guest_memory_policy policy = { .nested_guest_mode = false };
>>>       void __user *guest_handle = NULL;
>>> +    struct vcpu_runstate_info runstate;
>>
>> I don't think the full structure needs copying. You already use ...
>>
>>>       if ( guest_handle_is_null(runstate_guest(v)) )
>>>           return true;
>>>   
>>>       update_guest_memory_policy(v, &policy);
>>>   
>>> +    memcpy(&runstate, &v->runstate, sizeof(runstate));
>>> +
>>>       if ( VM_ASSIST(v->domain, runstate_update_flag) )
>>>       {
>>>           guest_handle = has_32bit_shinfo(v->domain)
>>>               ? &v->runstate_guest.compat.p->state_entry_time + 1
>>>               : &v->runstate_guest.native.p->state_entry_time + 1;
>>>           guest_handle--;
>>
>> ... trickery to get at just the state_entry_time field. I think
>> you would get away with making a local copy of just that one,
>> thus ...
>>
>>> -        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>>> +        runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>>>           __raw_copy_to_guest(guest_handle,
>>> -                            (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
>>> +                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
>>
>> ... reducing the complexity of this at least a little, while ...
>>
>>> @@ -1622,20 +1625,20 @@ bool update_runstate_area(struct vcpu *v)
>>>       {
>>>           struct compat_vcpu_runstate_info info;
>>>   
>>> -        XLAT_vcpu_runstate_info(&info, &v->runstate);
>>> +        XLAT_vcpu_runstate_info(&info, &runstate);
>>>           __copy_to_guest(v->runstate_guest.compat, &info, 1);
>>>           rc = true;
>>>       }
>>>       else
>>> -        rc = __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
>>> -             sizeof(v->runstate);
>>> +        rc = __copy_to_guest(runstate_guest(v), &runstate, 1) !=
>>> +             sizeof(runstate);
>>
>> ... taking the opportunity to make this use __copy_to_guest_field()
>> (storing state_entry_time last), in turn allowing ...
>>
>>>       if ( guest_handle )
>>>       {
>>> -        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
>>> +        runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
>>>           smp_wmb();
>>>           __raw_copy_to_guest(guest_handle,
>>> -                            (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
>>> +                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
>>>       }
>>
>> ... to drop this altogether.
>>
>> Thoughts?
> 
> Hmm, I'm not sure this will make things easier.
> 
> The requested sequence is:
> 
> - copy the byte with the XEN_RUNSTATE_UPDATE bit set first
> - copy all other bytes (not clearing the XEN_RUNSTATE_UPDATE bit)
> - copy the byte with the now cleared XEN_RUNSTATE_UPDATE bit last
> 
> And this has to work for 64- and 32-bit variants of the structure.
> 
> So dropping the last hunk is wrong already,

For big-endian - yes. For little endian, if there's no 64-bit
store available, I agree as well. But Arm32 e.g. does have a
suitable store insn, which iirc is even atomic when used with
a sufficiently aligned memory address.

> and I don't think having
> a local copy of state_entry_time only will make things easier, as
> you'd need to:
> 
> - copy the byte with the XEN_RUNSTATE_UPDATE bit set first
> - copy v->runstate.state
> - copy local runstate.state_entry_time
> - copy v->runstate.time[]
> - copy the byte with the now cleared XEN_RUNSTATE_UPDATE bit last

The plan was to simply copy the entire state_entry_time last.
But yes, I can see how we might make too many assumptions on
how the guest-copying functions work, or demand too much of
extra special casing there by an architecture.

I'm not overly happy with the current model, but based on the
above
Reviewed-by: Jan Beulich <jbeulich@suse.com>

> And you'd need to special case the compat case.

There's already a suitable if() / else covering this.

Jan
Jan Beulich Sept. 24, 2019, 3:16 p.m. UTC | #4
On 24.09.2019 09:42, Juergen Gross wrote:
> vcpu_runstate_get() should never return a state entry time with
> XEN_RUNSTATE_UPDATE set. To avoid this let update_runstate_area()
> operate on a local runstate copy.
> 
> This problem was introduced with commit 2529c850ea48f036 ("add update
> indicator to vcpu_runstate_info").
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
>  xen/arch/x86/domain.c | 17 ++++++++++-------
>  1 file changed, 10 insertions(+), 7 deletions(-)

I had this committed already and was about to push, but this
definitely wants a similar change for Arm.

Jan
Jürgen Groß Sept. 24, 2019, 3:22 p.m. UTC | #5
On 24.09.19 17:16, Jan Beulich wrote:
> On 24.09.2019 09:42, Juergen Gross wrote:
>> vcpu_runstate_get() should never return a state entry time with
>> XEN_RUNSTATE_UPDATE set. To avoid this let update_runstate_area()
>> operate on a local runstate copy.
>>
>> This problem was introduced with commit 2529c850ea48f036 ("add update
>> indicator to vcpu_runstate_info").
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>>   xen/arch/x86/domain.c | 17 ++++++++++-------
>>   1 file changed, 10 insertions(+), 7 deletions(-)
> 
> I had this committed already and was about to push, but this
> definitely wants a similar change for Arm.

Oh, I was fooled by cscope again, which gets fed an arch specific
file list.

Sending V2 soon.


Juergen
diff mbox series

Patch

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index dbdf6b1bc2..c4eceaab3f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1600,21 +1600,24 @@  bool update_runstate_area(struct vcpu *v)
     bool rc;
     struct guest_memory_policy policy = { .nested_guest_mode = false };
     void __user *guest_handle = NULL;
+    struct vcpu_runstate_info runstate;
 
     if ( guest_handle_is_null(runstate_guest(v)) )
         return true;
 
     update_guest_memory_policy(v, &policy);
 
+    memcpy(&runstate, &v->runstate, sizeof(runstate));
+
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
         guest_handle = has_32bit_shinfo(v->domain)
             ? &v->runstate_guest.compat.p->state_entry_time + 1
             : &v->runstate_guest.native.p->state_entry_time + 1;
         guest_handle--;
-        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+        runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
-                            (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
+                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
         smp_wmb();
     }
 
@@ -1622,20 +1625,20 @@  bool update_runstate_area(struct vcpu *v)
     {
         struct compat_vcpu_runstate_info info;
 
-        XLAT_vcpu_runstate_info(&info, &v->runstate);
+        XLAT_vcpu_runstate_info(&info, &runstate);
         __copy_to_guest(v->runstate_guest.compat, &info, 1);
         rc = true;
     }
     else
-        rc = __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
-             sizeof(v->runstate);
+        rc = __copy_to_guest(runstate_guest(v), &runstate, 1) !=
+             sizeof(runstate);
 
     if ( guest_handle )
     {
-        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
         smp_wmb();
         __raw_copy_to_guest(guest_handle,
-                            (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
+                            (void *)(&runstate.state_entry_time + 1) - 1, 1);
     }
 
     update_guest_memory_policy(v, &policy);