diff mbox series

[V3,1/2] x86/altp2m: Add hypercall to set a range of sve bits

Message ID 20191121150124.15865-1-aisaila@bitdefender.com (mailing list archive)
State New, archived
Headers show
Series [V3,1/2] x86/altp2m: Add hypercall to set a range of sve bits | expand

Commit Message

Alexandru Stefan ISAILA Nov. 21, 2019, 3:02 p.m. UTC
By default the sve bits are not set.
This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
to set a range of sve bits.
The core function, p2m_set_suppress_ve_multi(), does not brake in case
of a error and it is doing a best effort for setting the bits in the
given range. A check for continuation is made in order to have
preemption on big ranges.

Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
---
CC: Ian Jackson <ian.jackson@eu.citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: George Dunlap <George.Dunlap@eu.citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
CC: Julien Grall <julien@xen.org>
CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: "Roger Pau Monné" <roger.pau@citrix.com>
CC: George Dunlap <george.dunlap@eu.citrix.com>
CC: Razvan Cojocaru <rcojocaru@bitdefender.com>
CC: Tamas K Lengyel <tamas@tklengyel.com>
CC: Petre Pircalabu <ppircalabu@bitdefender.com>
---
Changes since V2:
	- Add a new structure "xen_hvm_altp2m_suppress_ve_multi"
	- Copy the gfn of the first error to the caller
	- Revert xen_hvm_altp2m_suppress_ve
	- Add a mechanism to save the first error.
---
 tools/libxc/include/xenctrl.h   |  4 +++
 tools/libxc/xc_altp2m.c         | 30 +++++++++++++++++
 xen/arch/x86/hvm/hvm.c          | 13 +++++++
 xen/arch/x86/mm/p2m.c           | 60 +++++++++++++++++++++++++++++++++
 xen/include/public/hvm/hvm_op.h | 14 ++++++++
 xen/include/xen/mem_access.h    |  3 ++
 6 files changed, 124 insertions(+)

Comments

Jan Beulich Nov. 29, 2019, 11:31 a.m. UTC | #1
On 21.11.2019 16:02, Alexandru Stefan ISAILA wrote:
> Changes since V2:
> 	- Add a new structure "xen_hvm_altp2m_suppress_ve_multi"
> 	- Copy the gfn of the first error to the caller
> 	- Revert xen_hvm_altp2m_suppress_ve
> 	- Add a mechanism to save the first error.

And I guess you want to adjust the commit message to cover this
fact.

> @@ -4711,6 +4712,18 @@ static int do_altp2m_op(
>          }
>          break;
>  
> +    case HVMOP_altp2m_set_suppress_ve_multi:
> +        if ( a.u.suppress_ve_multi.pad1 || !a.u.suppress_ve_multi.pad2 )
> +            rc = -EINVAL;
> +        else
> +        {
> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve_multi);
> +
> +            if ( __copy_to_guest(arg, &a, 1) )
> +                rc = -EFAULT;

Do you really want to replace a possible prior error here?

> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -3059,6 +3059,66 @@ out:
>      return rc;
>  }
>  
> +/*
> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
> + */
> +int p2m_set_suppress_ve_multi(struct domain *d,
> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
> +{
> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
> +    struct p2m_domain *ap2m = NULL;
> +    struct p2m_domain *p2m;
> +    uint64_t start = sve->opaque ?: sve->first_gfn;
> +    int rc = 0;
> +
> +    if ( sve->view > 0 )
> +    {
> +        if ( sve->view >= MAX_ALTP2M ||
> +             d->arch.altp2m_eptp[sve->view] == mfn_x(INVALID_MFN) )
> +            return -EINVAL;
> +
> +        p2m = ap2m = d->arch.altp2m_p2m[sve->view];

These want array_index_nospec() or alike used (and the pre-existing
similar uses taken care of in a separate patch).

> +    }
> +    else
> +        p2m = host_p2m;

Each time I see yet another instance of this pattern appear, I
wonder why this is. Use (or not) of initializers should be
consistent at least within individual functions. I.e. either
you initialize both ap2m and p2m in their declaration, or you
do so for neither of them.

> +    p2m_lock(host_p2m);
> +
> +    if ( ap2m )
> +        p2m_lock(ap2m);
> +
> +

Please no two blank lines next to one another.

> +    while ( sve->last_gfn >= start )

There are no checks on ->last_gfn, ->first_gfn, or ->opaque.
At the very least a bogus ->opaque should result in an error.
I wonder though why you don't simply update ->first_gfn,
omitting the need for ->opaque. All this would need is a
comment in the public header clarifying that callers should
expect the values to change.

Furthermore I think it would be helpful to bail on entirely
out of range ->first_gfn. This being a 64-bit field, only
40 of the bits are actually usable from an architecture pov
(in reality it may be even less). Otherwise you potentially
invoke p2m_ept_set_entry() perhaps trillions of times just
for it to return -EINVAL from its first if().

> +    {
> +        p2m_access_t a;
> +        p2m_type_t t;
> +        mfn_t mfn;
> +
> +        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
> +            a = p2m->default_access;
> +
> +        if ( p2m->set_entry(p2m, _gfn(start), mfn, PAGE_ORDER_4K, t, a,
> +                            sve->suppress_ve) && !sve->first_error )
> +            sve->first_error = start; /* Save the gfn from of the first error */

Drop either "from" or "of"?

> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -46,6 +46,17 @@ struct xen_hvm_altp2m_suppress_ve {
>      uint64_t gfn;
>  };
>  
> +struct xen_hvm_altp2m_suppress_ve_multi {
> +    uint16_t view;
> +    uint8_t suppress_ve; /* Boolean type. */
> +    uint8_t pad1;
> +    uint32_t pad2;

Perhaps use this field to report the error code of the first
error encountered?

> +    uint64_t first_gfn;
> +    uint64_t last_gfn;
> +    uint64_t opaque;

Afaics there's a requirement that the caller put zero in here
for the initial invocation. This should be noted in a comment.

> +    uint64_t first_error; /* Gfn of the first error. */

Actually the same appears to apply to this one.

Jan
Alexandru Stefan ISAILA Dec. 2, 2019, 2:40 p.m. UTC | #2
On 29.11.2019 13:31, Jan Beulich wrote:
> On 21.11.2019 16:02, Alexandru Stefan ISAILA wrote:
>> Changes since V2:
>> 	- Add a new structure "xen_hvm_altp2m_suppress_ve_multi"
>> 	- Copy the gfn of the first error to the caller
>> 	- Revert xen_hvm_altp2m_suppress_ve
>> 	- Add a mechanism to save the first error.
> 
> And I guess you want to adjust the commit message to cover this
> fact.

I will update the commit message.

> 
>> @@ -4711,6 +4712,18 @@ static int do_altp2m_op(
>>           }
>>           break;
>>   
>> +    case HVMOP_altp2m_set_suppress_ve_multi:
>> +        if ( a.u.suppress_ve_multi.pad1 || !a.u.suppress_ve_multi.pad2 )
>> +            rc = -EINVAL;
>> +        else
>> +        {
>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve_multi);
>> +
>> +            if ( __copy_to_guest(arg, &a, 1) )
>> +                rc = -EFAULT;
> 
> Do you really want to replace a possible prior error here?

I thought about this and the only error that can be replaced here is 
EINVAL. A error on __copy_to_guest has a grater importance if this fails.

> 
>> --- a/xen/arch/x86/mm/p2m.c
>> +++ b/xen/arch/x86/mm/p2m.c
>> @@ -3059,6 +3059,66 @@ out:
>>       return rc;
>>   }
>>   
>> +/*
>> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
>> + */
>> +int p2m_set_suppress_ve_multi(struct domain *d,
>> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
>> +{
>> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
>> +    struct p2m_domain *ap2m = NULL;
>> +    struct p2m_domain *p2m;
>> +    uint64_t start = sve->opaque ?: sve->first_gfn;
>> +    int rc = 0;
>> +
>> +    if ( sve->view > 0 )
>> +    {
>> +        if ( sve->view >= MAX_ALTP2M ||
>> +             d->arch.altp2m_eptp[sve->view] == mfn_x(INVALID_MFN) )
>> +            return -EINVAL;
>> +
>> +        p2m = ap2m = d->arch.altp2m_p2m[sve->view];
> 
> These want array_index_nospec() or alike used (and the pre-existing
> similar uses taken care of in a separate patch).

Sure, this can change to p2m = ap2m = 
d->arch.altp2m_p2m[array_index_nospec(sve->view, MAX_ALTP2M).

But what preexisting uses are you talking about? All the places where 
d->arch.altp2m_p2m[idx] is used? If so, there will be a handful of 
changes in that new patch.

> 
>> +    }
>> +    else
>> +        p2m = host_p2m;
> 
> Each time I see yet another instance of this pattern appear, I
> wonder why this is. Use (or not) of initializers should be
> consistent at least within individual functions. I.e. either
> you initialize both ap2m and p2m in their declaration, or you
> do so for neither of them.

The only reason I can see for this pattern is that p2m will be assigned 
a value but ap2m can never get a value. But I agree with you and I will 
have them both initialized with NULL.

> 
>> +    p2m_lock(host_p2m);
>> +
>> +    if ( ap2m )
>> +        p2m_lock(ap2m);
>> +
>> +
> 
> Please no two blank lines next to one another.
> 
>> +    while ( sve->last_gfn >= start )
> 
> There are no checks on ->last_gfn, ->first_gfn, or ->opaque.
> At the very least a bogus ->opaque should result in an error.
> I wonder though why you don't simply update ->first_gfn,
> omitting the need for ->opaque. All this would need is a
> comment in the public header clarifying that callers should
> expect the values to change.

I was following the pattern from range_share() after Tamas requested the 
opaque field. I agree that it would be simpler to have ->first_gfn 
update and I can change to that in the next version.

> 
> Furthermore I think it would be helpful to bail on entirely
> out of range ->first_gfn. This being a 64-bit field, only
> 40 of the bits are actually usable from an architecture pov
> (in reality it may be even less). Otherwise you potentially
> invoke p2m_ept_set_entry() perhaps trillions of times just
> for it to return -EINVAL from its first if().

Do you mean to check ->first_gfn(that will be updated in the next 
version) against domain_get_maximum_gpfn() and bail after that range?

> 
>> +    {
>> +        p2m_access_t a;
>> +        p2m_type_t t;
>> +        mfn_t mfn;
>> +
>> +        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
>> +            a = p2m->default_access;
>> +
>> +        if ( p2m->set_entry(p2m, _gfn(start), mfn, PAGE_ORDER_4K, t, a,
>> +                            sve->suppress_ve) && !sve->first_error )
>> +            sve->first_error = start; /* Save the gfn from of the first error */
> 
> Drop either "from" or "of"?
> 
>> --- a/xen/include/public/hvm/hvm_op.h
>> +++ b/xen/include/public/hvm/hvm_op.h
>> @@ -46,6 +46,17 @@ struct xen_hvm_altp2m_suppress_ve {
>>       uint64_t gfn;
>>   };
>>   
>> +struct xen_hvm_altp2m_suppress_ve_multi {
>> +    uint16_t view;
>> +    uint8_t suppress_ve; /* Boolean type. */
>> +    uint8_t pad1;
>> +    uint32_t pad2;
> 
> Perhaps use this field to report the error code of the first
> error encountered?

That sound good.

> 
>> +    uint64_t first_gfn;
>> +    uint64_t last_gfn;
>> +    uint64_t opaque;
> 
> Afaics there's a requirement that the caller put zero in here
> for the initial invocation. This should be noted in a comment.
> 
>> +    uint64_t first_error; /* Gfn of the first error. */
> 
> Actually the same appears to apply to this one.

I will update the comments.

Thanks,
Alex
Jan Beulich Dec. 3, 2019, 8:14 a.m. UTC | #3
On 02.12.2019 15:40, Alexandru Stefan ISAILA wrote:
> On 29.11.2019 13:31, Jan Beulich wrote:
>> On 21.11.2019 16:02, Alexandru Stefan ISAILA wrote:
>>> @@ -4711,6 +4712,18 @@ static int do_altp2m_op(
>>>           }
>>>           break;
>>>   
>>> +    case HVMOP_altp2m_set_suppress_ve_multi:
>>> +        if ( a.u.suppress_ve_multi.pad1 || !a.u.suppress_ve_multi.pad2 )
>>> +            rc = -EINVAL;
>>> +        else
>>> +        {
>>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve_multi);
>>> +
>>> +            if ( __copy_to_guest(arg, &a, 1) )
>>> +                rc = -EFAULT;
>>
>> Do you really want to replace a possible prior error here?
> 
> I thought about this and the only error that can be replaced here is 
> EINVAL. A error on __copy_to_guest has a grater importance if this fails.

I'm afraid I don't understand the reference to EINVAL.

As to "greater importance" - I'm not sure I follow. Please take a
look at e.g. do_event_channel_op(), but there are numerous other
examples throughout the tree. The pattern there is a common on,
and what you do here doesn't match that.

>>> --- a/xen/arch/x86/mm/p2m.c
>>> +++ b/xen/arch/x86/mm/p2m.c
>>> @@ -3059,6 +3059,66 @@ out:
>>>       return rc;
>>>   }
>>>   
>>> +/*
>>> + * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
>>> + */
>>> +int p2m_set_suppress_ve_multi(struct domain *d,
>>> +                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
>>> +{
>>> +    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
>>> +    struct p2m_domain *ap2m = NULL;
>>> +    struct p2m_domain *p2m;
>>> +    uint64_t start = sve->opaque ?: sve->first_gfn;
>>> +    int rc = 0;
>>> +
>>> +    if ( sve->view > 0 )
>>> +    {
>>> +        if ( sve->view >= MAX_ALTP2M ||
>>> +             d->arch.altp2m_eptp[sve->view] == mfn_x(INVALID_MFN) )
>>> +            return -EINVAL;
>>> +
>>> +        p2m = ap2m = d->arch.altp2m_p2m[sve->view];
>>
>> These want array_index_nospec() or alike used (and the pre-existing
>> similar uses taken care of in a separate patch).
> 
> Sure, this can change to p2m = ap2m = 
> d->arch.altp2m_p2m[array_index_nospec(sve->view, MAX_ALTP2M).
> 
> But what preexisting uses are you talking about? All the places where 
> d->arch.altp2m_p2m[idx] is used? If so, there will be a handful of 
> changes in that new patch.

Indeed, all the places where a caller (i.e. potentially guest)
provided value gets used as array index.

>>
>>> +    }
>>> +    else
>>> +        p2m = host_p2m;
>>
>> Each time I see yet another instance of this pattern appear, I
>> wonder why this is. Use (or not) of initializers should be
>> consistent at least within individual functions. I.e. either
>> you initialize both ap2m and p2m in their declaration, or you
>> do so for neither of them.
> 
> The only reason I can see for this pattern is that p2m will be assigned 
> a value but ap2m can never get a value. But I agree with you and I will 
> have them both initialized with NULL.

Why NULL? This isn't what I had in mind. Quite clearly you would
initialize p2m from host_p2m, eliminating the need for the "else"
altogether.

>>> +    while ( sve->last_gfn >= start )
>>
>> There are no checks on ->last_gfn, ->first_gfn, or ->opaque.
>> At the very least a bogus ->opaque should result in an error.
>> I wonder though why you don't simply update ->first_gfn,
>> omitting the need for ->opaque. All this would need is a
>> comment in the public header clarifying that callers should
>> expect the values to change.
> 
> I was following the pattern from range_share() after Tamas requested the 
> opaque field. I agree that it would be simpler to have ->first_gfn 
> update and I can change to that in the next version.
> 
>> Furthermore I think it would be helpful to bail on entirely
>> out of range ->first_gfn. This being a 64-bit field, only
>> 40 of the bits are actually usable from an architecture pov
>> (in reality it may be even less). Otherwise you potentially
>> invoke p2m_ept_set_entry() perhaps trillions of times just
>> for it to return -EINVAL from its first if().
> 
> Do you mean to check ->first_gfn(that will be updated in the next 
> version) against domain_get_maximum_gpfn() and bail after that range?

This may be one possibility (depending on what the inteneded
behavior for GFNs above this value is). Another would be to
simply judge from the guest's CPUID setting for the number of
physical address bits.

Jan
George Dunlap Dec. 6, 2019, 3:29 p.m. UTC | #4
On 11/21/19 3:02 PM, Alexandru Stefan ISAILA wrote:
> By default the sve bits are not set.
> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
> to set a range of sve bits.
> The core function, p2m_set_suppress_ve_multi(), does not brake in case
> of a error and it is doing a best effort for setting the bits in the
> given range. A check for continuation is made in order to have
> preemption on big ranges.
> 
> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>

There's something strangely deformed in your mail that makes it hard for
me to apply the patches to my tree, and I'm not sure why.

It seems the core mail is base64-encrypted; and that *inside* that
base64 encryption is a bunch of Windows-style linefeeds.  The result is
that when I try to download your series and apply it with git-am, I get
loads of rejected hunks with "^M" at the end of them.

Sometimes I've been able to work around this by going on patchew.org/Xen
and getting an mbox from there; but it doesn't seem to have your series
(perhaps because it doesn't have a cover letter).

Looking at the headers, it seems this is coming from git itself.  Do you
perhaps have "transferEncoding" set to "base64"?  If so, chance you
could try setting it to 'auto', and setting 'assume8bitEncoding = true"?

Not sure why this is such a pain; it seems like this should have been
sorted out somehow by now.

 -George
Alexandru Stefan ISAILA Dec. 12, 2019, 9:37 a.m. UTC | #5
On 06.12.2019 17:29, George Dunlap wrote:
> On 11/21/19 3:02 PM, Alexandru Stefan ISAILA wrote:
>> By default the sve bits are not set.
>> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
>> to set a range of sve bits.
>> The core function, p2m_set_suppress_ve_multi(), does not brake in case
>> of a error and it is doing a best effort for setting the bits in the
>> given range. A check for continuation is made in order to have
>> preemption on big ranges.
>>
>> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
> 
> There's something strangely deformed in your mail that makes it hard for
> me to apply the patches to my tree, and I'm not sure why.
> 
> It seems the core mail is base64-encrypted; and that *inside* that
> base64 encryption is a bunch of Windows-style linefeeds.  The result is
> that when I try to download your series and apply it with git-am, I get
> loads of rejected hunks with "^M" at the end of them.
> 
> Sometimes I've been able to work around this by going on patchew.org/Xen
> and getting an mbox from there; but it doesn't seem to have your series
> (perhaps because it doesn't have a cover letter).
> 
> Looking at the headers, it seems this is coming from git itself.  Do you
> perhaps have "transferEncoding" set to "base64"?  If so, chance you
> could try setting it to 'auto', and setting 'assume8bitEncoding = true"?

I didn't have anything set for transferEncoding in .gitconfig but I can set
         assume8bitEncoding = yes
         transferEncoding = 8bit

for the future.

Sorry for the inconvenience.

Alex
George Dunlap Dec. 12, 2019, 11:26 a.m. UTC | #6
On 12/12/19 9:37 AM, Alexandru Stefan ISAILA wrote:
> 
> 
> On 06.12.2019 17:29, George Dunlap wrote:
>> On 11/21/19 3:02 PM, Alexandru Stefan ISAILA wrote:
>>> By default the sve bits are not set.
>>> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
>>> to set a range of sve bits.
>>> The core function, p2m_set_suppress_ve_multi(), does not brake in case
>>> of a error and it is doing a best effort for setting the bits in the
>>> given range. A check for continuation is made in order to have
>>> preemption on big ranges.
>>>
>>> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
>>
>> There's something strangely deformed in your mail that makes it hard for
>> me to apply the patches to my tree, and I'm not sure why.
>>
>> It seems the core mail is base64-encrypted; and that *inside* that
>> base64 encryption is a bunch of Windows-style linefeeds.  The result is
>> that when I try to download your series and apply it with git-am, I get
>> loads of rejected hunks with "^M" at the end of them.
>>
>> Sometimes I've been able to work around this by going on patchew.org/Xen
>> and getting an mbox from there; but it doesn't seem to have your series
>> (perhaps because it doesn't have a cover letter).
>>
>> Looking at the headers, it seems this is coming from git itself.  Do you
>> perhaps have "transferEncoding" set to "base64"?  If so, chance you
>> could try setting it to 'auto', and setting 'assume8bitEncoding = true"?
> 
> I didn't have anything set for transferEncoding in .gitconfig but I can set
>          assume8bitEncoding = yes
>          transferEncoding = 8bit
> 
> for the future.
> 
> Sorry for the inconvenience.

Well, I'm also sorry that I'm having trouble on my end.  :-)  You'd
think that you doing "git send-email" and me doing "git am" would Just
Work(tm), and it's frustrating that it doesn't.  *Hopefully* those
changes will make it work; otherwise we'll have to figure out something
else.

 -George
Alexandru Stefan ISAILA Dec. 12, 2019, 12:54 p.m. UTC | #7
On 12.12.2019 13:26, George Dunlap wrote:
> On 12/12/19 9:37 AM, Alexandru Stefan ISAILA wrote:
>>
>>
>> On 06.12.2019 17:29, George Dunlap wrote:
>>> On 11/21/19 3:02 PM, Alexandru Stefan ISAILA wrote:
>>>> By default the sve bits are not set.
>>>> This patch adds a new hypercall, xc_altp2m_set_supress_ve_multi(),
>>>> to set a range of sve bits.
>>>> The core function, p2m_set_suppress_ve_multi(), does not brake in case
>>>> of a error and it is doing a best effort for setting the bits in the
>>>> given range. A check for continuation is made in order to have
>>>> preemption on big ranges.
>>>>
>>>> Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com>
>>>
>>> There's something strangely deformed in your mail that makes it hard for
>>> me to apply the patches to my tree, and I'm not sure why.
>>>
>>> It seems the core mail is base64-encrypted; and that *inside* that
>>> base64 encryption is a bunch of Windows-style linefeeds.  The result is
>>> that when I try to download your series and apply it with git-am, I get
>>> loads of rejected hunks with "^M" at the end of them.
>>>
>>> Sometimes I've been able to work around this by going on patchew.org/Xen
>>> and getting an mbox from there; but it doesn't seem to have your series
>>> (perhaps because it doesn't have a cover letter).
>>>
>>> Looking at the headers, it seems this is coming from git itself.  Do you
>>> perhaps have "transferEncoding" set to "base64"?  If so, chance you
>>> could try setting it to 'auto', and setting 'assume8bitEncoding = true"?
>>
>> I didn't have anything set for transferEncoding in .gitconfig but I can set
>>           assume8bitEncoding = yes
>>           transferEncoding = 8bit
>>
>> for the future.
>>
>> Sorry for the inconvenience.
> 
> Well, I'm also sorry that I'm having trouble on my end.  :-)  You'd
> think that you doing "git send-email" and me doing "git am" would Just
> Work(tm), and it's frustrating that it doesn't.  *Hopefully* those
> changes will make it work; otherwise we'll have to figure out something
> else.
> 

We have to solve this somehow so on the next ver. please let me know if 
everything is ok.

Alex
Alexandru Stefan ISAILA Dec. 13, 2019, 10:08 a.m. UTC | #8
On 03.12.2019 10:14, Jan Beulich wrote:
> On 02.12.2019 15:40, Alexandru Stefan ISAILA wrote:
>> On 29.11.2019 13:31, Jan Beulich wrote:
>>> On 21.11.2019 16:02, Alexandru Stefan ISAILA wrote:
>>>> @@ -4711,6 +4712,18 @@ static int do_altp2m_op(
>>>>            }
>>>>            break;
>>>>    
>>>> +    case HVMOP_altp2m_set_suppress_ve_multi:
>>>> +        if ( a.u.suppress_ve_multi.pad1 || !a.u.suppress_ve_multi.pad2 )
>>>> +            rc = -EINVAL;
>>>> +        else
>>>> +        {
>>>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve_multi);
>>>> +
>>>> +            if ( __copy_to_guest(arg, &a, 1) )
>>>> +                rc = -EFAULT;
>>>
>>> Do you really want to replace a possible prior error here?
>>
>> I thought about this and the only error that can be replaced here is
>> EINVAL. A error on __copy_to_guest has a grater importance if this fails.
> 
> I'm afraid I don't understand the reference to EINVAL.
> 
> As to "greater importance" - I'm not sure I follow. Please take a
> look at e.g. do_event_channel_op(), but there are numerous other
> examples throughout the tree. The pattern there is a common on,
> and what you do here doesn't match that.

I will follow that pattern.


> 
>>>> +    while ( sve->last_gfn >= start )
>>>
>>> There are no checks on ->last_gfn, ->first_gfn, or ->opaque.
>>> At the very least a bogus ->opaque should result in an error.
>>> I wonder though why you don't simply update ->first_gfn,
>>> omitting the need for ->opaque. All this would need is a
>>> comment in the public header clarifying that callers should
>>> expect the values to change.
>>
>> I was following the pattern from range_share() after Tamas requested the
>> opaque field. I agree that it would be simpler to have ->first_gfn
>> update and I can change to that in the next version.
>>
>>> Furthermore I think it would be helpful to bail on entirely
>>> out of range ->first_gfn. This being a 64-bit field, only
>>> 40 of the bits are actually usable from an architecture pov
>>> (in reality it may be even less). Otherwise you potentially
>>> invoke p2m_ept_set_entry() perhaps trillions of times just
>>> for it to return -EINVAL from its first if().
>>
>> Do you mean to check ->first_gfn(that will be updated in the next
>> version) against domain_get_maximum_gpfn() and bail after that range?
> 
> This may be one possibility (depending on what the inteneded
> behavior for GFNs above this value is). Another would be to
> simply judge from the guest's CPUID setting for the number of
> physical address bits.
> 

I will check ->first_gfn against d->arch.cpuid->extd.maxphysaddr and 
bail out on out of range.

Thanks,
Alex
Alexandru Stefan ISAILA Dec. 13, 2019, 10:08 a.m. UTC | #9
On 03.12.2019 10:14, Jan Beulich wrote:
> On 02.12.2019 15:40, Alexandru Stefan ISAILA wrote:
>> On 29.11.2019 13:31, Jan Beulich wrote:
>>> On 21.11.2019 16:02, Alexandru Stefan ISAILA wrote:
>>>> @@ -4711,6 +4712,18 @@ static int do_altp2m_op(
>>>>            }
>>>>            break;
>>>>    
>>>> +    case HVMOP_altp2m_set_suppress_ve_multi:
>>>> +        if ( a.u.suppress_ve_multi.pad1 || !a.u.suppress_ve_multi.pad2 )
>>>> +            rc = -EINVAL;
>>>> +        else
>>>> +        {
>>>> +            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve_multi);
>>>> +
>>>> +            if ( __copy_to_guest(arg, &a, 1) )
>>>> +                rc = -EFAULT;
>>>
>>> Do you really want to replace a possible prior error here?
>>
>> I thought about this and the only error that can be replaced here is
>> EINVAL. A error on __copy_to_guest has a grater importance if this fails.
> 
> I'm afraid I don't understand the reference to EINVAL.
> 
> As to "greater importance" - I'm not sure I follow. Please take a
> look at e.g. do_event_channel_op(), but there are numerous other
> examples throughout the tree. The pattern there is a common on,
> and what you do here doesn't match that.

I will follow that pattern.


> 
>>>> +    while ( sve->last_gfn >= start )
>>>
>>> There are no checks on ->last_gfn, ->first_gfn, or ->opaque.
>>> At the very least a bogus ->opaque should result in an error.
>>> I wonder though why you don't simply update ->first_gfn,
>>> omitting the need for ->opaque. All this would need is a
>>> comment in the public header clarifying that callers should
>>> expect the values to change.
>>
>> I was following the pattern from range_share() after Tamas requested the
>> opaque field. I agree that it would be simpler to have ->first_gfn
>> update and I can change to that in the next version.
>>
>>> Furthermore I think it would be helpful to bail on entirely
>>> out of range ->first_gfn. This being a 64-bit field, only
>>> 40 of the bits are actually usable from an architecture pov
>>> (in reality it may be even less). Otherwise you potentially
>>> invoke p2m_ept_set_entry() perhaps trillions of times just
>>> for it to return -EINVAL from its first if().
>>
>> Do you mean to check ->first_gfn(that will be updated in the next
>> version) against domain_get_maximum_gpfn() and bail after that range?
> 
> This may be one possibility (depending on what the inteneded
> behavior for GFNs above this value is). Another would be to
> simply judge from the guest's CPUID setting for the number of
> physical address bits.
> 

I will check ->first_gfn against d->arch.cpuid->extd.maxphysaddr and 
bail out on out of range.

Thanks,
Alex
diff mbox series

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index f4431687b3..356533e391 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1923,6 +1923,10 @@  int xc_altp2m_switch_to_view(xc_interface *handle, uint32_t domid,
                              uint16_t view_id);
 int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
                               uint16_t view_id, xen_pfn_t gfn, bool sve);
+int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
+                                   uint16_t view_id, xen_pfn_t first_gfn,
+                                   xen_pfn_t last_gfn, bool sve,
+                                   xen_pfn_t *error);
 int xc_altp2m_get_suppress_ve(xc_interface *handle, uint32_t domid,
                               uint16_t view_id, xen_pfn_t gfn, bool *sve);
 int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
diff --git a/tools/libxc/xc_altp2m.c b/tools/libxc/xc_altp2m.c
index 09dad0355e..03bd651a61 100644
--- a/tools/libxc/xc_altp2m.c
+++ b/tools/libxc/xc_altp2m.c
@@ -234,6 +234,36 @@  int xc_altp2m_set_suppress_ve(xc_interface *handle, uint32_t domid,
     return rc;
 }
 
+int xc_altp2m_set_supress_ve_multi(xc_interface *handle, uint32_t domid,
+                                   uint16_t view_id, xen_pfn_t first_gfn,
+                                   xen_pfn_t last_gfn, bool sve,
+                                   xen_pfn_t *error)
+{
+    int rc;
+    DECLARE_HYPERCALL_BUFFER(xen_hvm_altp2m_op_t, arg);
+
+    arg = xc_hypercall_buffer_alloc(handle, arg, sizeof(*arg));
+    if ( arg == NULL )
+        return -1;
+
+    arg->version = HVMOP_ALTP2M_INTERFACE_VERSION;
+    arg->cmd = HVMOP_altp2m_set_suppress_ve_multi;
+    arg->domain = domid;
+    arg->u.suppress_ve_multi.view = view_id;
+    arg->u.suppress_ve_multi.first_gfn = first_gfn;
+    arg->u.suppress_ve_multi.last_gfn = last_gfn;
+    arg->u.suppress_ve_multi.suppress_ve = sve;
+
+    rc = xencall2(handle->xcall, __HYPERVISOR_hvm_op, HVMOP_altp2m,
+                  HYPERCALL_BUFFER_AS_ARG(arg));
+
+    if ( arg->u.suppress_ve_multi.first_error )
+        *error = arg->u.suppress_ve_multi.first_error;
+
+    xc_hypercall_buffer_free(handle, arg);
+    return rc;
+}
+
 int xc_altp2m_set_mem_access(xc_interface *handle, uint32_t domid,
                              uint16_t view_id, xen_pfn_t gfn,
                              xenmem_access_t access)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 818e705fd1..8a2d4325f9 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4553,6 +4553,7 @@  static int do_altp2m_op(
     case HVMOP_altp2m_destroy_p2m:
     case HVMOP_altp2m_switch_p2m:
     case HVMOP_altp2m_set_suppress_ve:
+    case HVMOP_altp2m_set_suppress_ve_multi:
     case HVMOP_altp2m_get_suppress_ve:
     case HVMOP_altp2m_set_mem_access:
     case HVMOP_altp2m_set_mem_access_multi:
@@ -4711,6 +4712,18 @@  static int do_altp2m_op(
         }
         break;
 
+    case HVMOP_altp2m_set_suppress_ve_multi:
+        if ( a.u.suppress_ve_multi.pad1 || !a.u.suppress_ve_multi.pad2 )
+            rc = -EINVAL;
+        else
+        {
+            rc = p2m_set_suppress_ve_multi(d, &a.u.suppress_ve_multi);
+
+            if ( __copy_to_guest(arg, &a, 1) )
+                rc = -EFAULT;
+        }
+        break;
+
     case HVMOP_altp2m_get_suppress_ve:
         if ( a.u.suppress_ve.pad1 || a.u.suppress_ve.pad2 )
             rc = -EINVAL;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index ba126f790a..2b51a7f50f 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -3059,6 +3059,66 @@  out:
     return rc;
 }
 
+/*
+ * Set/clear the #VE suppress bit for multiple pages.  Only available on VMX.
+ */
+int p2m_set_suppress_ve_multi(struct domain *d,
+                              struct xen_hvm_altp2m_suppress_ve_multi *sve)
+{
+    struct p2m_domain *host_p2m = p2m_get_hostp2m(d);
+    struct p2m_domain *ap2m = NULL;
+    struct p2m_domain *p2m;
+    uint64_t start = sve->opaque ?: sve->first_gfn;
+    int rc = 0;
+
+    if ( sve->view > 0 )
+    {
+        if ( sve->view >= MAX_ALTP2M ||
+             d->arch.altp2m_eptp[sve->view] == mfn_x(INVALID_MFN) )
+            return -EINVAL;
+
+        p2m = ap2m = d->arch.altp2m_p2m[sve->view];
+    }
+    else
+        p2m = host_p2m;
+
+    p2m_lock(host_p2m);
+
+    if ( ap2m )
+        p2m_lock(ap2m);
+
+
+    while ( sve->last_gfn >= start )
+    {
+        p2m_access_t a;
+        p2m_type_t t;
+        mfn_t mfn;
+
+        if ( altp2m_get_effective_entry(p2m, _gfn(start), &mfn, &t, &a, AP2MGET_query) )
+            a = p2m->default_access;
+
+        if ( p2m->set_entry(p2m, _gfn(start), mfn, PAGE_ORDER_4K, t, a,
+                            sve->suppress_ve) && !sve->first_error )
+            sve->first_error = start; /* Save the gfn from of the first error */
+
+        /* Check for continuation if it's not the last iteration. */
+        if ( sve->last_gfn >= ++start && hypercall_preempt_check() )
+        {
+            rc = -ERESTART;
+            break;
+        }
+    }
+
+    sve->opaque = start;
+
+    if ( ap2m )
+        p2m_unlock(ap2m);
+
+    p2m_unlock(host_p2m);
+
+    return rc;
+}
+
 int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
                         unsigned int altp2m_idx)
 {
diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
index 353f8034d9..10ba0149f1 100644
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -46,6 +46,17 @@  struct xen_hvm_altp2m_suppress_ve {
     uint64_t gfn;
 };
 
+struct xen_hvm_altp2m_suppress_ve_multi {
+    uint16_t view;
+    uint8_t suppress_ve; /* Boolean type. */
+    uint8_t pad1;
+    uint32_t pad2;
+    uint64_t first_gfn;
+    uint64_t last_gfn;
+    uint64_t opaque;
+    uint64_t first_error; /* Gfn of the first error. */
+};
+
 #if __XEN_INTERFACE_VERSION__ < 0x00040900
 
 /* Set the logical level of one of a domain's PCI INTx wires. */
@@ -339,6 +350,8 @@  struct xen_hvm_altp2m_op {
 #define HVMOP_altp2m_vcpu_disable_notify  13
 /* Get the active vcpu p2m index */
 #define HVMOP_altp2m_get_p2m_idx          14
+/* Set the "Supress #VE" bit for a range of pages */
+#define HVMOP_altp2m_set_suppress_ve_multi 15
     domid_t domain;
     uint16_t pad1;
     uint32_t pad2;
@@ -353,6 +366,7 @@  struct xen_hvm_altp2m_op {
         struct xen_hvm_altp2m_change_gfn           change_gfn;
         struct xen_hvm_altp2m_set_mem_access_multi set_mem_access_multi;
         struct xen_hvm_altp2m_suppress_ve          suppress_ve;
+        struct xen_hvm_altp2m_suppress_ve_multi    suppress_ve_multi;
         struct xen_hvm_altp2m_vcpu_disable_notify  disable_notify;
         struct xen_hvm_altp2m_get_vcpu_p2m_idx     get_vcpu_p2m_idx;
         uint8_t pad[64];
diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h
index e4d24502e0..00e594a0ad 100644
--- a/xen/include/xen/mem_access.h
+++ b/xen/include/xen/mem_access.h
@@ -75,6 +75,9 @@  long p2m_set_mem_access_multi(struct domain *d,
 int p2m_set_suppress_ve(struct domain *d, gfn_t gfn, bool suppress_ve,
                         unsigned int altp2m_idx);
 
+int p2m_set_suppress_ve_multi(struct domain *d,
+                              struct xen_hvm_altp2m_suppress_ve_multi *suppress_ve);
+
 int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
                         unsigned int altp2m_idx);