diff mbox series

[V6,2/4] x86/altp2m: Add hypercall to set a range of sve bits

Message ID 20191223140409.32449-2-aisaila@bitdefender.com (mailing list archive)
State Superseded
Headers show
Series [V6,1/4] x86/mm: Add array_index_nospec to guest provided index values | expand

Commit Message

Alexandru Stefan ISAILA Dec. 23, 2019, 2:04 p.m. UTC
By default the sve bits are not set.
This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
to set a range of sve bits.
The core function, p2m_set_suppress_ve_multi(), does not brake in case
of a error and it is doing a best effort for setting the bits in the
given range. A check for continuation is made in order to have
preemption on big ranges.
The gfn of the first error is stored in
xen_hvm_altp2m_suppress_ve_multi.first_error and the error code is
stored in xen_hvm_altp2m_suppress_ve_multi.first_error_code.
If no error occurred the values will be 0.

Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
Acked-by: Jan Beulich <jbeulich@suse.com>

---
CC: Ian Jackson <ian.jackson@eu.citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: George Dunlap <George.Dunlap@eu.citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
CC: Julien Grall <julien@xen.org>
CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: "Roger Pau Monné" <roger.pau@citrix.com>
CC: George Dunlap <george.dunlap@eu.citrix.com>
CC: Razvan Cojocaru <rcojocaru@bitdefender.com>
CC: Tamas K Lengyel <tamas@tklengyel.com>
CC: Petre Pircalabu <ppircalabu@bitdefender.com>
---
Changes since V5:
	- Change first_error_code to first_error and first_error to first_error_gfn
	- Update the requested comments.
---
 tools/libxc/include/xenctrl.h   |  4 +++
 tools/libxc/xc_altp2m.c         | 33 +++++++++++++++++
 xen/arch/x86/hvm/hvm.c          | 20 +++++++++++
 xen/arch/x86/mm/p2m.c           | 64 +++++++++++++++++++++++++++++++++
 xen/include/public/hvm/hvm_op.h | 13 +++++++
 xen/include/xen/mem_access.h    |  3 ++
 6 files changed, 137 insertions(+)

Comments

Tamas K Lengyel Dec. 23, 2019, 4:31 p.m. UTC | #1
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index 4fc919a9c5..de832dcc6d 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -3070,6 +3070,70 @@ out:
>      return rc;
>  }
>
> +/*
> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
> + */

I have to say I find it a bit odd why this function is in p2m.c but
it's declaration...

> +int p2m_set_suppress_ve_multi(struct domain *d,
> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
> +{

...

> diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h
> index e4d24502e0..00e594a0ad 100644
> --- a/xen/include/xen/mem_access.h
> +++ b/xen/include/xen/mem_access.h
> @@ -75,6 +75,9 @@ long p2m_set_mem_access_multi(struct domain *d,
>  int p2m_set_suppress_ve(struct domain *d, gfn_t gfn, bool suppress_ve,
>                          unsigned int altp2m_idx);
>

.. in mem_access.h?

> +int p2m_set_suppress_ve_multi(struct domain *d,
> +                              struct xen_hvm_altp2m_suppress_ve_multi *suppress_ve);
> +

I mean, even altp2m.h would make sore sense for this. So what's the
rational behind that?

Tamas
George Dunlap Dec. 24, 2019, 8:30 a.m. UTC | #2
On 12/23/19 2:04 PM, Alexandru Stefan ISAILA wrote:
> By default the sve bits are not set.
> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
> to set a range of sve bits.
> The core function, p2m_set_suppress_ve_multi(), does not brake in case

*break

> of a error and it is doing a best effort for setting the bits in the
> given range. A check for continuation is made in order to have
> preemption on big ranges.

Weird English quirk: this should be "large".  ("Big" and "large" are
both adjectives, and "ranges" is a noun, so theoretically it should be
OK; but if you ask almost any native English speaker they'll say that
"big" sounds wrong in this case.  No real idea why.)

Both of these could be fixed on check-in.

> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index 4fc919a9c5..de832dcc6d 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -3070,6 +3070,70 @@ out:
>      return rc;
>  }
>  
> +/*
> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
> + */
> +int p2m_set_suppress_ve_multi(struct domain *d,
> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
> +{
> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
> +    struct p2m_domain *ap2m = NULL;
> +    struct p2m_domain *p2m = host_p2m;
> +    uint64_t start = sve->first_gfn;
> +    int rc = 0;
> +
> +    if ( sve->view > 0 )
> +    {
> +        if ( sve->view >= MAX_ALTP2M ||
> +             d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_ALTP2M)] ==
> +             mfn_x(INVALID_MFN) )
> +            return -EINVAL;
> +
> +        p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(sve->view,
> +                                                           MAX_ALTP2M)];
> +    }
> +
> +    p2m_lock(host_p2m);
> +
> +    if ( ap2m )
> +        p2m_lock(ap2m);
> +
> +    while ( sve->last_gfn >= start )
> +    {
> +        p2m_access_t a;
> +        p2m_type_t t;
> +        mfn_t mfn;
> +        int err = 0;
> +
> +        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
> +            a = p2m->default_access;

So in the single-entry version, if altp2m_get_effective_entry() returns
an error, you pass that error up the stack; but in the multiple-entry
version, you ignore the error and simply set the access to
default_access?  I don't think that can be right.  If it is right, then
it definitely needs a comment.

This points out another issue: implementing this functionality twice
risks having this sort of drift between the single-entry version and the
multiple-entry version.  Would it make sense instead to implement the
single-entry version hypercall using p2m_set_suppress_ve_multi?

 -George
Alexandru Stefan ISAILA Dec. 24, 2019, 8:48 a.m. UTC | #3
On 24.12.2019 10:30, George Dunlap wrote:
> On 12/23/19 2:04 PM, Alexandru Stefan ISAILA wrote:
>> By default the sve bits are not set.
>> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
>> to set a range of sve bits.
>> The core function, p2m_set_suppress_ve_multi(), does not brake in case
> 
> *break

Sorry for the typo.

> 
>> of a error and it is doing a best effort for setting the bits in the
>> given range. A check for continuation is made in order to have
>> preemption on big ranges.
> 
> Weird English quirk: this should be "large".  ("Big" and "large" are
> both adjectives, and "ranges" is a noun, so theoretically it should be
> OK; but if you ask almost any native English speaker they'll say that
> "big" sounds wrong in this case.  No real idea why.)
> 
> Both of these could be fixed on check-in.
> 
>> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
>> index 4fc919a9c5..de832dcc6d 100644
>> --- a/xen/arch/x86/mm/p2m.c
>> +++ b/xen/arch/x86/mm/p2m.c
>> @@ -3070,6 +3070,70 @@ out:
>>       return rc;
>>   }
>>   
>> +/*
>> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
>> + */
>> +int p2m_set_suppress_ve_multi(struct domain *d,
>> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
>> +{
>> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
>> +    struct p2m_domain *ap2m = NULL;
>> +    struct p2m_domain *p2m = host_p2m;
>> +    uint64_t start = sve->first_gfn;
>> +    int rc = 0;
>> +
>> +    if ( sve->view > 0 )
>> +    {
>> +        if ( sve->view >= MAX_ALTP2M ||
>> +             d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_ALTP2M)] ==
>> +             mfn_x(INVALID_MFN) )
>> +            return -EINVAL;
>> +
>> +        p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(sve->view,
>> +                                                           MAX_ALTP2M)];
>> +    }
>> +
>> +    p2m_lock(host_p2m);
>> +
>> +    if ( ap2m )
>> +        p2m_lock(ap2m);
>> +
>> +    while ( sve->last_gfn >= start )
>> +    {
>> +        p2m_access_t a;
>> +        p2m_type_t t;
>> +        mfn_t mfn;
>> +        int err = 0;
>> +
>> +        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
>> +            a = p2m->default_access;
> 
> So in the single-entry version, if altp2m_get_effective_entry() returns
> an error, you pass that error up the stack; but in the multiple-entry
> version, you ignore the error and simply set the access to
> default_access?  I don't think that can be right.  If it is right, then
> it definitely needs a comment.
> 

The idea behind this was to have a best effort try and signal the first 
error. If the get_entry fails then the best way to go is with 
default_access but this is open for debate.

Another way to solve this is to update the first_error_gfn/first_error 
and then continue. I think this ca be used to make p2m_set_suppress_ve() 
call p2m_set_suppress_ve_multi.

> This points out another issue: implementing this functionality twice
> risks having this sort of drift between the single-entry version and the
> multiple-entry version.  Would it make sense instead to implement the
> single-entry version hypercall using p2m_set_suppress_ve_multi?
> 

In the single version there is no best-effort idea because the user can 
make use of every single error.

Alex
George Dunlap Dec. 24, 2019, 8:58 a.m. UTC | #4
On 12/24/19 8:48 AM, Alexandru Stefan ISAILA wrote:
> 
> 
> On 24.12.2019 10:30, George Dunlap wrote:
>> On 12/23/19 2:04 PM, Alexandru Stefan ISAILA wrote:
>>> By default the sve bits are not set.
>>> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
>>> to set a range of sve bits.
>>> The core function, p2m_set_suppress_ve_multi(), does not brake in case
>>
>> *break
> 
> Sorry for the typo.
> 
>>
>>> of a error and it is doing a best effort for setting the bits in the
>>> given range. A check for continuation is made in order to have
>>> preemption on big ranges.
>>
>> Weird English quirk: this should be "large".  ("Big" and "large" are
>> both adjectives, and "ranges" is a noun, so theoretically it should be
>> OK; but if you ask almost any native English speaker they'll say that
>> "big" sounds wrong in this case.  No real idea why.)
>>
>> Both of these could be fixed on check-in.
>>
>>> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
>>> index 4fc919a9c5..de832dcc6d 100644
>>> --- a/xen/arch/x86/mm/p2m.c
>>> +++ b/xen/arch/x86/mm/p2m.c
>>> @@ -3070,6 +3070,70 @@ out:
>>>       return rc;
>>>   }
>>>   
>>> +/*
>>> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
>>> + */
>>> +int p2m_set_suppress_ve_multi(struct domain *d,
>>> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
>>> +{
>>> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
>>> +    struct p2m_domain *ap2m = NULL;
>>> +    struct p2m_domain *p2m = host_p2m;
>>> +    uint64_t start = sve->first_gfn;
>>> +    int rc = 0;
>>> +
>>> +    if ( sve->view > 0 )
>>> +    {
>>> +        if ( sve->view >= MAX_ALTP2M ||
>>> +             d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_ALTP2M)] ==
>>> +             mfn_x(INVALID_MFN) )
>>> +            return -EINVAL;
>>> +
>>> +        p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(sve->view,
>>> +                                                           MAX_ALTP2M)];
>>> +    }
>>> +
>>> +    p2m_lock(host_p2m);
>>> +
>>> +    if ( ap2m )
>>> +        p2m_lock(ap2m);
>>> +
>>> +    while ( sve->last_gfn >= start )
>>> +    {
>>> +        p2m_access_t a;
>>> +        p2m_type_t t;
>>> +        mfn_t mfn;
>>> +        int err = 0;
>>> +
>>> +        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
>>> +            a = p2m->default_access;
>>
>> So in the single-entry version, if altp2m_get_effective_entry() returns
>> an error, you pass that error up the stack; but in the multiple-entry
>> version, you ignore the error and simply set the access to
>> default_access?  I don't think that can be right.  If it is right, then
>> it definitely needs a comment.
>>
> 
> The idea behind this was to have a best effort try and signal the first 
> error. If the get_entry fails then the best way to go is with 
> default_access but this is open for debate.

I don't see how it's a good idea at all. If get_effective_entry fails,
then mfn and t may both be uninitialized.  If an attacker can arrange
for those to have the values she wants, she could use this to take over
the system.

> Another way to solve this is to update the first_error_gfn/first_error 
> and then continue. I think this ca be used to make p2m_set_suppress_ve() 
> call p2m_set_suppress_ve_multi.

Isn't that exactly the semantics you want -- try gfn N, if that fails,
record it and move on to the next one?  Why would "write an entry with
random values for mfn and type, but with the default access" be a better
response?

 -George
Alexandru Stefan ISAILA Dec. 24, 2019, 9:04 a.m. UTC | #5
>>>> +/*
>>>> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
>>>> + */
>>>> +int p2m_set_suppress_ve_multi(struct domain *d,
>>>> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
>>>> +{
>>>> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
>>>> +    struct p2m_domain *ap2m = NULL;
>>>> +    struct p2m_domain *p2m = host_p2m;
>>>> +    uint64_t start = sve->first_gfn;
>>>> +    int rc = 0;
>>>> +
>>>> +    if ( sve->view > 0 )
>>>> +    {
>>>> +        if ( sve->view >= MAX_ALTP2M ||
>>>> +             d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_ALTP2M)] ==
>>>> +             mfn_x(INVALID_MFN) )
>>>> +            return -EINVAL;
>>>> +
>>>> +        p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(sve->view,
>>>> +                                                           MAX_ALTP2M)];
>>>> +    }
>>>> +
>>>> +    p2m_lock(host_p2m);
>>>> +
>>>> +    if ( ap2m )
>>>> +        p2m_lock(ap2m);
>>>> +
>>>> +    while ( sve->last_gfn >= start )
>>>> +    {
>>>> +        p2m_access_t a;
>>>> +        p2m_type_t t;
>>>> +        mfn_t mfn;
>>>> +        int err = 0;
>>>> +
>>>> +        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
>>>> +            a = p2m->default_access;
>>>
>>> So in the single-entry version, if altp2m_get_effective_entry() returns
>>> an error, you pass that error up the stack; but in the multiple-entry
>>> version, you ignore the error and simply set the access to
>>> default_access?  I don't think that can be right.  If it is right, then
>>> it definitely needs a comment.
>>>
>>
>> The idea behind this was to have a best effort try and signal the first
>> error. If the get_entry fails then the best way to go is with
>> default_access but this is open for debate.
> 
> I don't see how it's a good idea at all. If get_effective_entry fails,
> then mfn and t may both be uninitialized.  If an attacker can arrange
> for those to have the values she wants, she could use this to take over
> the system.
> 
>> Another way to solve this is to update the first_error_gfn/first_error
>> and then continue. I think this ca be used to make p2m_set_suppress_ve()
>> call p2m_set_suppress_ve_multi.
> 
> Isn't that exactly the semantics you want -- try gfn N, if that fails,
> record it and move on to the next one?  Why would "write an entry with
> random values for mfn and type, but with the default access" be a better
> response?
> 

That is right, I'll go with this for the next version. Should I have the 
single version call the _multi version after this change?

Alex
George Dunlap Dec. 24, 2019, 9:25 a.m. UTC | #6
On 12/24/19 9:04 AM, Alexandru Stefan ISAILA wrote:
>>>>> +/*
>>>>> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
>>>>> + */
>>>>> +int p2m_set_suppress_ve_multi(struct domain *d,
>>>>> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
>>>>> +{
>>>>> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
>>>>> +    struct p2m_domain *ap2m = NULL;
>>>>> +    struct p2m_domain *p2m = host_p2m;
>>>>> +    uint64_t start = sve->first_gfn;
>>>>> +    int rc = 0;
>>>>> +
>>>>> +    if ( sve->view > 0 )
>>>>> +    {
>>>>> +        if ( sve->view >= MAX_ALTP2M ||
>>>>> +             d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_ALTP2M)] ==
>>>>> +             mfn_x(INVALID_MFN) )
>>>>> +            return -EINVAL;
>>>>> +
>>>>> +        p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(sve->view,
>>>>> +                                                           MAX_ALTP2M)];
>>>>> +    }
>>>>> +
>>>>> +    p2m_lock(host_p2m);
>>>>> +
>>>>> +    if ( ap2m )
>>>>> +        p2m_lock(ap2m);
>>>>> +
>>>>> +    while ( sve->last_gfn >= start )
>>>>> +    {
>>>>> +        p2m_access_t a;
>>>>> +        p2m_type_t t;
>>>>> +        mfn_t mfn;
>>>>> +        int err = 0;
>>>>> +
>>>>> +        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
>>>>> +            a = p2m->default_access;
>>>>
>>>> So in the single-entry version, if altp2m_get_effective_entry() returns
>>>> an error, you pass that error up the stack; but in the multiple-entry
>>>> version, you ignore the error and simply set the access to
>>>> default_access?  I don't think that can be right.  If it is right, then
>>>> it definitely needs a comment.
>>>>
>>>
>>> The idea behind this was to have a best effort try and signal the first
>>> error. If the get_entry fails then the best way to go is with
>>> default_access but this is open for debate.
>>
>> I don't see how it's a good idea at all. If get_effective_entry fails,
>> then mfn and t may both be uninitialized.  If an attacker can arrange
>> for those to have the values she wants, she could use this to take over
>> the system.
>>
>>> Another way to solve this is to update the first_error_gfn/first_error
>>> and then continue. I think this ca be used to make p2m_set_suppress_ve()
>>> call p2m_set_suppress_ve_multi.
>>
>> Isn't that exactly the semantics you want -- try gfn N, if that fails,
>> record it and move on to the next one?  Why would "write an entry with
>> random values for mfn and type, but with the default access" be a better
>> response?
>>
> 
> That is right, I'll go with this for the next version. 

So, one potential behavior you might want.  Consider the following case:

gfn 'A' isn't mapped in the hostp2m yet.
1. Create altp2m X
2. Tools set the sve gfn A
3. Host adds mapping for A
4. Guest accesses A, faulting the mapping over to the altp2m

At the moment, for the single-entry call, #2 will fail, and #4 will get
the default sve value.  It might be nice for #2 to succeed, and #4 to
copy over the mfn, type, &c, but use the sve value specified in #2.

But at the moment, altp2m_get_or_propagate() won't end up copying sve
over if the altp2m entry is invalid (AFAICT).  So I think for now,
skipping that entry and leaving it an error is the best thing to do.

> Should I have the 
> single version call the _multi version after this change?

That seems like a good thing to try.  Thanks.

 -George
Jan Beulich Dec. 27, 2019, 8:01 a.m. UTC | #7
(re-sending, as I still don't see the mail having appeared on the list)

On 23.12.2019 15:04, Alexandru Stefan ISAILA wrote:
> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -46,6 +46,16 @@ struct xen_hvm_altp2m_suppress_ve {
>      uint64_t gfn;
>  };
>  
> +struct xen_hvm_altp2m_suppress_ve_multi {
> +    uint16_t view;
> +    uint8_t suppress_ve; /* Boolean type. */
> +    uint8_t pad1;
> +    int32_t first_error; /* Should be set to 0 . */

Stray blank before full stop.

> +    uint64_t first_gfn; /* Value may be updated */
> +    uint64_t last_gfn;
> +    uint64_t first_error_gfn; /* Gfn of the first error. */
> +};

Please be consistent about your comment style here: The full
stop isn't mandatory, but at least adjacent comments (all
added at the same time) should have identical style.

Please may I ask that you apply a little more care when doing
updates, rather than relying on others to spend their time on
catching issues?

Jan
Alexandru Stefan ISAILA Jan. 6, 2020, 9:21 a.m. UTC | #8
On 23.12.2019 18:31, Tamas K Lengyel wrote:
>> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
>> index 4fc919a9c5..de832dcc6d 100644
>> --- a/xen/arch/x86/mm/p2m.c
>> +++ b/xen/arch/x86/mm/p2m.c
>> @@ -3070,6 +3070,70 @@ out:
>>       return rc;
>>   }
>>
>> +/*
>> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
>> + */
> 
> I have to say I find it a bit odd why this function is in p2m.c but
> it's declaration...
> 
>> +int p2m_set_suppress_ve_multi(struct domain *d,
>> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
>> +{
> 
> ...
> 
>> diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h
>> index e4d24502e0..00e594a0ad 100644
>> --- a/xen/include/xen/mem_access.h
>> +++ b/xen/include/xen/mem_access.h
>> @@ -75,6 +75,9 @@ long p2m_set_mem_access_multi(struct domain *d,
>>   int p2m_set_suppress_ve(struct domain *d, gfn_t gfn, bool suppress_ve,
>>                           unsigned int altp2m_idx);
>>
> 
> .. in mem_access.h?
> 
>> +int p2m_set_suppress_ve_multi(struct domain *d,
>> +                              struct xen_hvm_altp2m_suppress_ve_multi *suppress_ve);
>> +
> 
> I mean, even altp2m.h would make sore sense for this. So what's the
> rational behind that?
> 

Indeed it's odd but p2m_set_suppress_ve() is declared above this. I 
don't now how it got there in the first place but I just followed that 
pattern.

Alex
diff mbox series

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 75f191ae3a..cc4eb1e3d3 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1923,6 +1923,10 @@  int xc_altp2m_switch_to_view(xc_interface *handle, uint32_t domid,
                              uint16_t view_id);
 int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
                               uint16_t view_id, xen_pfn_t gfn, bool sve);
+int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
+                                   uint16_t view_id, xen_pfn_t first_gfn,
+                                   xen_pfn_t last_gfn, bool sve,
+                                   xen_pfn_t *error_gfn, int32_t *error_code);
 int xc_altp2m_get_suppress_ve(xc_interface *handle, uint32_t domid,
                               uint16_t view_id, xen_pfn_t gfn, bool *sve);
 int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
diff --git a/tools/libxc/xc_altp2m.c b/tools/libxc/xc_altp2m.c
index 09dad0355e..46fb725806 100644
--- a/tools/libxc/xc_altp2m.c
+++ b/tools/libxc/xc_altp2m.c
@@ -234,6 +234,39 @@  int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
     return rc;
 }
 
+int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
+                                   uint16_t view_id, xen_pfn_t first_gfn,
+                                   xen_pfn_t last_gfn, bool sve,
+                                   xen_pfn_t *error_gfn, int32_t *error_code)
+{
+    int rc;
+    DECLARE_HYPERCALL_BUFFER(xen_hvm_altp2m_op_t, arg);
+
+    arg = xc_hypercall_buffer_alloc(handle, arg, sizeof(*arg));
+    if ( arg == NULL )
+        return -1;
+
+    arg->version = HVMOP_ALTP2M_INTERFACE_VERSION;
+    arg->cmd = HVMOP_altp2m_set_suppress_ve_multi;
+    arg->domain = domid;
+    arg->u.suppress_ve_multi.view = view_id;
+    arg->u.suppress_ve_multi.first_gfn = first_gfn;
+    arg->u.suppress_ve_multi.last_gfn = last_gfn;
+    arg->u.suppress_ve_multi.suppress_ve = sve;
+
+    rc = xencall2(handle->xcall, __HYPERVISOR_hvm_op, HVMOP_altp2m,
+                  HYPERCALL_BUFFER_AS_ARG(arg));
+
+    if ( arg->u.suppress_ve_multi.first_error )
+    {
+        *error_gfn = arg->u.suppress_ve_multi.first_error_gfn;
+        *error_code = arg->u.suppress_ve_multi.first_error;
+    }
+
+    xc_hypercall_buffer_free(handle, arg);
+    return rc;
+}
+
 int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
                              uint16_t view_id, xen_pfn_t gfn,
                              xenmem_access_t access)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 4dfaf35566..4db15768d4 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4526,6 +4526,7 @@  static int do_altp2m_op(
     case HVMOP_altp2m_destroy_p2m:
     case HVMOP_altp2m_switch_p2m:
     case HVMOP_altp2m_set_suppress_ve:
+    case HVMOP_altp2m_set_suppress_ve_multi:
     case HVMOP_altp2m_get_suppress_ve:
     case HVMOP_altp2m_set_mem_access:
     case HVMOP_altp2m_set_mem_access_multi:
@@ -4684,6 +4685,25 @@  static int do_altp2m_op(
         }
         break;
 
+    case HVMOP_altp2m_set_suppress_ve_multi:
+    {
+        uint64_t max_phys_addr = (1UL << d->arch.cpuid->extd.maxphysaddr) - 1;
+
+        a.u.suppress_ve_multi.last_gfn = min(a.u.suppress_ve_multi.last_gfn,
+                                             max_phys_addr);
+
+        if ( a.u.suppress_ve_multi.pad1 ||
+             a.u.suppress_ve_multi.first_gfn > a.u.suppress_ve_multi.last_gfn )
+            rc = -EINVAL;
+        else
+        {
+            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve_multi);
+            if ( (!rc || rc == -ERESTART) && __copy_to_guest(arg, &a, 1) )
+                rc = -EFAULT;
+        }
+        break;
+    }
+
     case HVMOP_altp2m_get_suppress_ve:
         if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
             rc = -EINVAL;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 4fc919a9c5..de832dcc6d 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -3070,6 +3070,70 @@  out:
     return rc;
 }
 
+/*
+ * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
+ */
+int p2m_set_suppress_ve_multi(struct domain *d,
+                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
+{
+    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
+    struct p2m_domain *ap2m = NULL;
+    struct p2m_domain *p2m = host_p2m;
+    uint64_t start = sve->first_gfn;
+    int rc = 0;
+
+    if ( sve->view > 0 )
+    {
+        if ( sve->view >= MAX_ALTP2M ||
+             d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_ALTP2M)] ==
+             mfn_x(INVALID_MFN) )
+            return -EINVAL;
+
+        p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(sve->view,
+                                                           MAX_ALTP2M)];
+    }
+
+    p2m_lock(host_p2m);
+
+    if ( ap2m )
+        p2m_lock(ap2m);
+
+    while ( sve->last_gfn >= start )
+    {
+        p2m_access_t a;
+        p2m_type_t t;
+        mfn_t mfn;
+        int err = 0;
+
+        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
+            a = p2m->default_access;
+
+        if ( (err = p2m->set_entry(p2m, _gfn(start), mfn, PAGE_ORDER_4K, t, a,
+                                   sve->suppress_ve)) &&
+             !sve->first_error)
+        {
+            sve->first_error_gfn = start; /* Save the gfn of the first error */
+            sve->first_error = err; /* Save the first error code */
+        }
+
+        /* Check for continuation if it's not the last iteration. */
+        if ( sve->last_gfn >= ++start && hypercall_preempt_check() )
+        {
+            rc = -ERESTART;
+            break;
+        }
+    }
+
+    sve->first_gfn = start;
+
+    if ( ap2m )
+        p2m_unlock(ap2m);
+
+    p2m_unlock(host_p2m);
+
+    return rc;
+}
+
 int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
                         unsigned int altp2m_idx)
 {
diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
index 353f8034d9..1f049cfa2e 100644
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -46,6 +46,16 @@  struct xen_hvm_altp2m_suppress_ve {
     uint64_t gfn;
 };
 
+struct xen_hvm_altp2m_suppress_ve_multi {
+    uint16_t view;
+    uint8_t suppress_ve; /* Boolean type. */
+    uint8_t pad1;
+    int32_t first_error; /* Should be set to 0 . */
+    uint64_t first_gfn; /* Value may be updated */
+    uint64_t last_gfn;
+    uint64_t first_error_gfn; /* Gfn of the first error. */
+};
+
 #if __XEN_INTERFACE_VERSION__ < 0x00040900
 
 /* Set the logical level of one of a domain's PCI INTx wires. */
@@ -339,6 +349,8 @@  struct xen_hvm_altp2m_op {
 #define HVMOP_altp2m_vcpu_disable_notify  13
 /* Get the active vcpu p2m index */
 #define HVMOP_altp2m_get_p2m_idx          14
+/* Set the "Supress #VE" bit for a range of pages */
+#define HVMOP_altp2m_set_suppress_ve_multi 15
     domid_t domain;
     uint16_t pad1;
     uint32_t pad2;
@@ -353,6 +365,7 @@  struct xen_hvm_altp2m_op {
         struct xen_hvm_altp2m_change_gfn           change_gfn;
         struct xen_hvm_altp2m_set_mem_access_multi set_mem_access_multi;
         struct xen_hvm_altp2m_suppress_ve          suppress_ve;
+        struct xen_hvm_altp2m_suppress_ve_multi    suppress_ve_multi;
         struct xen_hvm_altp2m_vcpu_disable_notify  disable_notify;
         struct xen_hvm_altp2m_get_vcpu_p2m_idx     get_vcpu_p2m_idx;
         uint8_t pad[64];
diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h
index e4d24502e0..00e594a0ad 100644
--- a/xen/include/xen/mem_access.h
+++ b/xen/include/xen/mem_access.h
@@ -75,6 +75,9 @@  long p2m_set_mem_access_multi(struct domain *d,
 int p2m_set_suppress_ve(struct domain *d, gfn_t gfn, bool suppress_ve,
                         unsigned int altp2m_idx);
 
+int p2m_set_suppress_ve_multi(struct domain *d,
+                              struct xen_hvm_altp2m_suppress_ve_multi *suppress_ve);
+
 int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
                         unsigned int altp2m_idx);