diff mbox series

[1/3] x86/pv: Options to disable and/or compile out 32bit PV support

Message ID 20200417155004.16806-2-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series x86/pv: Start to trim 32bit support | expand

Commit Message

Andrew Cooper April 17, 2020, 3:50 p.m. UTC
This is the start of some performance and security-hardening improvements,
based on the fact that 32bit PV guests are few and far between these days.

Ring1 is full or architectural corner cases, such as counting as supervisor
from a paging point of view.  This accounts for a substantial performance hit
on processors from the last 8 years (adjusting SMEP/SMAP on every privilege
transition), and the gap is only going to get bigger with new hardware
features.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>

There is a series I can't quite post yet which wants to conditionally turn
opt_pv32 off, which is why I've put it straight in in an int8_t form rather
than a straight boolean form.
---
 docs/misc/xen-command-line.pandoc | 12 +++++++++++-
 xen/arch/x86/Kconfig              | 16 ++++++++++++++++
 xen/arch/x86/pv/domain.c          | 35 +++++++++++++++++++++++++++++++++++
 xen/arch/x86/setup.c              |  9 +++++++--
 xen/include/asm-x86/pv/domain.h   |  6 ++++++
 5 files changed, 75 insertions(+), 3 deletions(-)

Comments

Roger Pau Monne April 20, 2020, 1:47 p.m. UTC | #1
On Fri, Apr 17, 2020 at 04:50:02PM +0100, Andrew Cooper wrote:
> This is the start of some performance and security-hardening improvements,
> based on the fact that 32bit PV guests are few and far between these days.
> 
> Ring1 is full or architectural corner cases, such as counting as supervisor
                ^ of
> from a paging point of view.  This accounts for a substantial performance hit
> on processors from the last 8 years (adjusting SMEP/SMAP on every privilege
> transition), and the gap is only going to get bigger with new hardware
> features.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Wei Liu <wl@xen.org>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> 
> There is a series I can't quite post yet which wants to conditionally turn
> opt_pv32 off, which is why I've put it straight in in an int8_t form rather

s/in in/in/

> than a straight boolean form.
> ---
>  docs/misc/xen-command-line.pandoc | 12 +++++++++++-
>  xen/arch/x86/Kconfig              | 16 ++++++++++++++++
>  xen/arch/x86/pv/domain.c          | 35 +++++++++++++++++++++++++++++++++++
>  xen/arch/x86/setup.c              |  9 +++++++--
>  xen/include/asm-x86/pv/domain.h   |  6 ++++++
>  5 files changed, 75 insertions(+), 3 deletions(-)
> 
> diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
> index acd0b3d994..ee12b0f53f 100644
> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -1694,7 +1694,17 @@ The following resources are available:
>      CDP, one COS will corespond two CBMs other than one with CAT, due to the
>      sum of CBMs is fixed, that means actual `cos_max` in use will automatically
>      reduce to half when CDP is enabled.
> -	
> +
> +### pv
> +    = List of [ 32=<bool> ]
> +
> +    Applicability: x86
> +
> +Controls for aspects of PV guest support.
> +
> +*   The `32` boolean controls whether 32bit PV guests can be created.  It
> +    defaults to `true`, and is ignored when `CONFIG_PV32` is compiled out.
> +
>  ### pv-linear-pt (x86)
>  > `= <boolean>`
>  
> diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
> index 8149362bde..4c52197de3 100644
> --- a/xen/arch/x86/Kconfig
> +++ b/xen/arch/x86/Kconfig
> @@ -49,6 +49,22 @@ config PV
>  
>  	  If unsure, say Y.
>  
> +config PV32
> +	bool "Support for 32bit PV guests"
> +	depends on PV
> +	default y
> +	---help---
> +	  The 32bit PV ABI uses Ring1, an area of the x86 architecture which
> +	  was deprecated and mostly removed in the AMD64 spec.  As a result,
> +	  it occasionally conflicts with newer x86 hardware features, causing
> +	  overheads for Xen to maintain backwards compatibility.
> +
> +	  People may wish to disable 32bit PV guests for attack surface
> +	  reduction, or performance reasons.  Backwards compatibility can be
> +	  provided via the PV Shim mechanism.
> +
> +	  If unsure, say Y.
> +
>  config PV_LINEAR_PT
>         bool "Support for PV linear pagetables"
>         depends on PV
> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
> index 70fae43965..47a0db082f 100644
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -16,6 +16,39 @@
>  #include <asm/pv/domain.h>
>  #include <asm/shadow.h>
>  
> +#ifdef CONFIG_PV32
> +int8_t __read_mostly opt_pv32 = -1;
> +#endif
> +
> +static int parse_pv(const char *s)

__init

With that:

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks, Roger.
Jan Beulich April 20, 2020, 2:05 p.m. UTC | #2
On 17.04.2020 17:50, Andrew Cooper wrote:
> This is the start of some performance and security-hardening improvements,
> based on the fact that 32bit PV guests are few and far between these days.
> 
> Ring1 is full or architectural corner cases, such as counting as supervisor

... full of ...

> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -1694,7 +1694,17 @@ The following resources are available:
>      CDP, one COS will corespond two CBMs other than one with CAT, due to the
>      sum of CBMs is fixed, that means actual `cos_max` in use will automatically
>      reduce to half when CDP is enabled.
> -	
> +
> +### pv
> +    = List of [ 32=<bool> ]
> +
> +    Applicability: x86
> +
> +Controls for aspects of PV guest support.
> +
> +*   The `32` boolean controls whether 32bit PV guests can be created.  It
> +    defaults to `true`, and is ignored when `CONFIG_PV32` is compiled out.

Rather than ignoring in the way you do, how about ...

> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -16,6 +16,39 @@
>  #include <asm/pv/domain.h>
>  #include <asm/shadow.h>
>  
> +#ifdef CONFIG_PV32
> +int8_t __read_mostly opt_pv32 = -1;
> +#endif
> +
> +static int parse_pv(const char *s)
> +{
> +    const char *ss;
> +    int val, rc = 0;
> +
> +    do {
> +        ss = strchr(s, ',');
> +        if ( !ss )
> +            ss = strchr(s, '\0');
> +
> +        if ( (val = parse_boolean("32", s, ss)) >= 0 )
> +        {
> +#ifdef CONFIG_PV32
> +            opt_pv32 = val;
> +#else
> +            printk(XENLOG_INFO
> +                   "CONFIG_PV32 disabled - ignoring 'pv=32' setting\n");
> +#endif
> +        }

... moving the #ifdef ahead of the if(), and the #endif here (may
want converting to "else if" with a placeholder if(0) for this
purpose), with the separate printk() dropped? I'm in particular
concerned that we may gain a large number of such printk()s over
time, if we added them in such cases. See parse_iommu_param() for
how I'd prefer things to look like in the long run.

Jan
Jan Beulich April 20, 2020, 2:15 p.m. UTC | #3
On 17.04.2020 17:50, Andrew Cooper wrote:
> --- a/xen/arch/x86/Kconfig
> +++ b/xen/arch/x86/Kconfig
> @@ -49,6 +49,22 @@ config PV
>  
>  	  If unsure, say Y.
>  
> +config PV32
> +	bool "Support for 32bit PV guests"
> +	depends on PV
> +	default y

I guess I can see why you don't want an EXPERT dependency here, but
I guess we really need to settle on our (as a community) position
on the growth of varying configs people can build and expect to be
supported.

Jan
Andrew Cooper April 20, 2020, 5:31 p.m. UTC | #4
On 20/04/2020 14:47, Roger Pau Monné wrote:
> On Fri, Apr 17, 2020 at 04:50:02PM +0100, Andrew Cooper wrote:
>> This is the start of some performance and security-hardening improvements,
>> based on the fact that 32bit PV guests are few and far between these days.
>>
>> Ring1 is full or architectural corner cases, such as counting as supervisor
>                 ^ of

Already fixed (I spotted it 30s after posting).

>> from a paging point of view.  This accounts for a substantial performance hit
>> on processors from the last 8 years (adjusting SMEP/SMAP on every privilege
>> transition), and the gap is only going to get bigger with new hardware
>> features.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> ---
>> CC: Jan Beulich <JBeulich@suse.com>
>> CC: Wei Liu <wl@xen.org>
>> CC: Roger Pau Monné <roger.pau@citrix.com>
>>
>> There is a series I can't quite post yet which wants to conditionally turn
>> opt_pv32 off, which is why I've put it straight in in an int8_t form rather
> s/in in/in/

"in in" is legitimate in some cases, despite it looking awkard.   In
this case, the structure is "straight in", separate from "in an int8_t
form".

If this sentence were for inclusion in the commit message, I'd have
probably spent more effort trying to phrase it differently.

>
>> than a straight boolean form.
>> ---
>>  docs/misc/xen-command-line.pandoc | 12 +++++++++++-
>>  xen/arch/x86/Kconfig              | 16 ++++++++++++++++
>>  xen/arch/x86/pv/domain.c          | 35 +++++++++++++++++++++++++++++++++++
>>  xen/arch/x86/setup.c              |  9 +++++++--
>>  xen/include/asm-x86/pv/domain.h   |  6 ++++++
>>  5 files changed, 75 insertions(+), 3 deletions(-)
>>
>> diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
>> index acd0b3d994..ee12b0f53f 100644
>> --- a/docs/misc/xen-command-line.pandoc
>> +++ b/docs/misc/xen-command-line.pandoc
>> @@ -1694,7 +1694,17 @@ The following resources are available:
>>      CDP, one COS will corespond two CBMs other than one with CAT, due to the
>>      sum of CBMs is fixed, that means actual `cos_max` in use will automatically
>>      reduce to half when CDP is enabled.
>> -	
>> +
>> +### pv
>> +    = List of [ 32=<bool> ]
>> +
>> +    Applicability: x86
>> +
>> +Controls for aspects of PV guest support.
>> +
>> +*   The `32` boolean controls whether 32bit PV guests can be created.  It
>> +    defaults to `true`, and is ignored when `CONFIG_PV32` is compiled out.
>> +
>>  ### pv-linear-pt (x86)
>>  > `= <boolean>`
>>  
>> diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
>> index 8149362bde..4c52197de3 100644
>> --- a/xen/arch/x86/Kconfig
>> +++ b/xen/arch/x86/Kconfig
>> @@ -49,6 +49,22 @@ config PV
>>  
>>  	  If unsure, say Y.
>>  
>> +config PV32
>> +	bool "Support for 32bit PV guests"
>> +	depends on PV
>> +	default y
>> +	---help---
>> +	  The 32bit PV ABI uses Ring1, an area of the x86 architecture which
>> +	  was deprecated and mostly removed in the AMD64 spec.  As a result,
>> +	  it occasionally conflicts with newer x86 hardware features, causing
>> +	  overheads for Xen to maintain backwards compatibility.
>> +
>> +	  People may wish to disable 32bit PV guests for attack surface
>> +	  reduction, or performance reasons.  Backwards compatibility can be
>> +	  provided via the PV Shim mechanism.
>> +
>> +	  If unsure, say Y.
>> +
>>  config PV_LINEAR_PT
>>         bool "Support for PV linear pagetables"
>>         depends on PV
>> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
>> index 70fae43965..47a0db082f 100644
>> --- a/xen/arch/x86/pv/domain.c
>> +++ b/xen/arch/x86/pv/domain.c
>> @@ -16,6 +16,39 @@
>>  #include <asm/pv/domain.h>
>>  #include <asm/shadow.h>
>>  
>> +#ifdef CONFIG_PV32
>> +int8_t __read_mostly opt_pv32 = -1;
>> +#endif
>> +
>> +static int parse_pv(const char *s)
> __init
>
> With that:
>
> Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks,

~Andrew
Andrew Cooper April 20, 2020, 6:05 p.m. UTC | #5
On 20/04/2020 15:05, Jan Beulich wrote:
> On 17.04.2020 17:50, Andrew Cooper wrote:
>> This is the start of some performance and security-hardening improvements,
>> based on the fact that 32bit PV guests are few and far between these days.
>>
>> Ring1 is full or architectural corner cases, such as counting as supervisor
> ... full of ...
>
>> --- a/docs/misc/xen-command-line.pandoc
>> +++ b/docs/misc/xen-command-line.pandoc
>> @@ -1694,7 +1694,17 @@ The following resources are available:
>>      CDP, one COS will corespond two CBMs other than one with CAT, due to the
>>      sum of CBMs is fixed, that means actual `cos_max` in use will automatically
>>      reduce to half when CDP is enabled.
>> -	
>> +
>> +### pv
>> +    = List of [ 32=<bool> ]
>> +
>> +    Applicability: x86
>> +
>> +Controls for aspects of PV guest support.
>> +
>> +*   The `32` boolean controls whether 32bit PV guests can be created.  It
>> +    defaults to `true`, and is ignored when `CONFIG_PV32` is compiled out.
> Rather than ignoring in the way you do, how about ...
>
>> --- a/xen/arch/x86/pv/domain.c
>> +++ b/xen/arch/x86/pv/domain.c
>> @@ -16,6 +16,39 @@
>>  #include <asm/pv/domain.h>
>>  #include <asm/shadow.h>
>>  
>> +#ifdef CONFIG_PV32
>> +int8_t __read_mostly opt_pv32 = -1;
>> +#endif
>> +
>> +static int parse_pv(const char *s)
>> +{
>> +    const char *ss;
>> +    int val, rc = 0;
>> +
>> +    do {
>> +        ss = strchr(s, ',');
>> +        if ( !ss )
>> +            ss = strchr(s, '\0');
>> +
>> +        if ( (val = parse_boolean("32", s, ss)) >= 0 )
>> +        {
>> +#ifdef CONFIG_PV32
>> +            opt_pv32 = val;
>> +#else
>> +            printk(XENLOG_INFO
>> +                   "CONFIG_PV32 disabled - ignoring 'pv=32' setting\n");
>> +#endif
>> +        }
> ... moving the #ifdef ahead of the if(), and the #endif here (may
> want converting to "else if" with a placeholder if(0) for this
> purpose), with the separate printk() dropped?

In this specific case, it would be even more awkward as there is no use
of val outside of the ifdef.

> I'm in particular
> concerned that we may gain a large number of such printk()s over
> time, if we added them in such cases.

The printk() was a bit of an afterthought, but deliberately avoiding the
-EINVAL path was specifically not.

In the case that the user tries to use `pv=no-32` without CONFIG_PV32,
they should see something other than

(XEN) parameter "pv=no-32" unknown!

I don't think overloading the return value is a clever move, because
then every parse function has to take care of ensuring that -EOPNOTSUPP
(or ENODEV?) never clobbers -EINVAL.

We could have a generic helper which looks like:

static inline void ignored_param(const char *cfg, const char *name,
const char *s, const char *ss)
{
    printk(XENLOG_INFO "%s disabled - ignoring '%s=%*.s' setting\n",
cfg, name, s, (int)(ss - s));
}

which at least would keep all the users consistent.

> See parse_iommu_param() for
> how I'd prefer things to look like in the long run.

I'm aware of that, just as you are aware of my specific dislike for the
ifndefs, which make the logic opaque and hard to follow.

~Andrew
Jan Beulich April 21, 2020, 6:02 a.m. UTC | #6
On 20.04.2020 20:05, Andrew Cooper wrote:
> On 20/04/2020 15:05, Jan Beulich wrote:
>> On 17.04.2020 17:50, Andrew Cooper wrote:
>>> This is the start of some performance and security-hardening improvements,
>>> based on the fact that 32bit PV guests are few and far between these days.
>>>
>>> Ring1 is full or architectural corner cases, such as counting as supervisor
>> ... full of ...
>>
>>> --- a/docs/misc/xen-command-line.pandoc
>>> +++ b/docs/misc/xen-command-line.pandoc
>>> @@ -1694,7 +1694,17 @@ The following resources are available:
>>>      CDP, one COS will corespond two CBMs other than one with CAT, due to the
>>>      sum of CBMs is fixed, that means actual `cos_max` in use will automatically
>>>      reduce to half when CDP is enabled.
>>> -	
>>> +
>>> +### pv
>>> +    = List of [ 32=<bool> ]
>>> +
>>> +    Applicability: x86
>>> +
>>> +Controls for aspects of PV guest support.
>>> +
>>> +*   The `32` boolean controls whether 32bit PV guests can be created.  It
>>> +    defaults to `true`, and is ignored when `CONFIG_PV32` is compiled out.
>> Rather than ignoring in the way you do, how about ...
>>
>>> --- a/xen/arch/x86/pv/domain.c
>>> +++ b/xen/arch/x86/pv/domain.c
>>> @@ -16,6 +16,39 @@
>>>  #include <asm/pv/domain.h>
>>>  #include <asm/shadow.h>
>>>  
>>> +#ifdef CONFIG_PV32
>>> +int8_t __read_mostly opt_pv32 = -1;
>>> +#endif
>>> +
>>> +static int parse_pv(const char *s)
>>> +{
>>> +    const char *ss;
>>> +    int val, rc = 0;
>>> +
>>> +    do {
>>> +        ss = strchr(s, ',');
>>> +        if ( !ss )
>>> +            ss = strchr(s, '\0');
>>> +
>>> +        if ( (val = parse_boolean("32", s, ss)) >= 0 )
>>> +        {
>>> +#ifdef CONFIG_PV32
>>> +            opt_pv32 = val;
>>> +#else
>>> +            printk(XENLOG_INFO
>>> +                   "CONFIG_PV32 disabled - ignoring 'pv=32' setting\n");
>>> +#endif
>>> +        }
>> ... moving the #ifdef ahead of the if(), and the #endif here (may
>> want converting to "else if" with a placeholder if(0) for this
>> purpose), with the separate printk() dropped?
> 
> In this specific case, it would be even more awkward as there is no use
> of val outside of the ifdef.

True, but can be dealt with in the body of the suggested placeholder
if(0).

>> I'm in particular
>> concerned that we may gain a large number of such printk()s over
>> time, if we added them in such cases.
> 
> The printk() was a bit of an afterthought, but deliberately avoiding the
> -EINVAL path was specifically not.
> 
> In the case that the user tries to use `pv=no-32` without CONFIG_PV32,
> they should see something other than
> 
> (XEN) parameter "pv=no-32" unknown!

Why - to this specific build of Xen the parameter is unknown.

> I don't think overloading the return value is a clever move, because
> then every parse function has to take care of ensuring that -EOPNOTSUPP
> (or ENODEV?) never clobbers -EINVAL.

I didn't suggest overloading the return value. Instead I
specifically want this to go the -EINVAL path.

> We could have a generic helper which looks like:
> 
> static inline void ignored_param(const char *cfg, const char *name,
> const char *s, const char *ss)
> {
>     printk(XENLOG_INFO "%s disabled - ignoring '%s=%*.s' setting\n",
> cfg, name, s, (int)(ss - s));
> }
> 
> which at least would keep all the users consistent.

Further bloating the binary with (almost) useless string literals.
I'd specifically like to avoid this.

>> See parse_iommu_param() for
>> how I'd prefer things to look like in the long run.
> 
> I'm aware of that, just as you are aware of my specific dislike for the
> ifndefs, which make the logic opaque and hard to follow.

A matter of taste and/or perception, I guess, but yes, I'm aware.
I don't recall a suggestion (from you or anyone else) as to a good
alternative, though. What I'd like to achieve is that command line
options valid only in certain configurations get similar treatment
no matter by whom they were added. It doesn't need to be my way of
handling, but I'm pretty convinced that consistency especially in
such "user interface" aspects is very desirable goal.

Jan
Andrew Cooper April 23, 2020, 5:35 p.m. UTC | #7
On 21/04/2020 07:02, Jan Beulich wrote:
> On 20.04.2020 20:05, Andrew Cooper wrote:
>> On 20/04/2020 15:05, Jan Beulich wrote:
>>> I'm in particular
>>> concerned that we may gain a large number of such printk()s over
>>> time, if we added them in such cases.
>> The printk() was a bit of an afterthought, but deliberately avoiding the
>> -EINVAL path was specifically not.
>>
>> In the case that the user tries to use `pv=no-32` without CONFIG_PV32,
>> they should see something other than
>>
>> (XEN) parameter "pv=no-32" unknown!
> Why - to this specific build of Xen the parameter is unknown.

Because it is unnecessarily problematic and borderline obnoxious to
users, as well as occasional Xen developers.

"you've not got the correct CONFIG_$X for that to be meaningful" is
specifically useful to separate from "I've got no idea".

>> I don't think overloading the return value is a clever move, because
>> then every parse function has to take care of ensuring that -EOPNOTSUPP
>> (or ENODEV?) never clobbers -EINVAL.
> I didn't suggest overloading the return value. Instead I
> specifically want this to go the -EINVAL path.
>
>> We could have a generic helper which looks like:
>>
>> static inline void ignored_param(const char *cfg, const char *name,
>> const char *s, const char *ss)
>> {
>>     printk(XENLOG_INFO "%s disabled - ignoring '%s=%*.s' setting\n",
>> cfg, name, s, (int)(ss - s));
>> }
>>
>> which at least would keep all the users consistent.
> Further bloating the binary with (almost) useless string literals.
> I'd specifically like to avoid this.

I don't accept that as a valid argument.

We're talking about literally tens of bytes (which will merge anyway, so
0 in practice), and a resulting UI which helps people get out of
problems rather than penalises them for having gotten into a problem to
begin with.

I will absolutely prioritise a more helpful UI over a handful of bytes.

~Andrew
Jürgen Groß April 24, 2020, 5:28 a.m. UTC | #8
On 23.04.20 19:35, Andrew Cooper wrote:
> On 21/04/2020 07:02, Jan Beulich wrote:
>> On 20.04.2020 20:05, Andrew Cooper wrote:
>>> On 20/04/2020 15:05, Jan Beulich wrote:
>>>> I'm in particular
>>>> concerned that we may gain a large number of such printk()s over
>>>> time, if we added them in such cases.
>>> The printk() was a bit of an afterthought, but deliberately avoiding the
>>> -EINVAL path was specifically not.
>>>
>>> In the case that the user tries to use `pv=no-32` without CONFIG_PV32,
>>> they should see something other than
>>>
>>> (XEN) parameter "pv=no-32" unknown!
>> Why - to this specific build of Xen the parameter is unknown.
> 
> Because it is unnecessarily problematic and borderline obnoxious to
> users, as well as occasional Xen developers.
> 
> "you've not got the correct CONFIG_$X for that to be meaningful" is
> specifically useful to separate from "I've got no idea".
> 
>>> I don't think overloading the return value is a clever move, because
>>> then every parse function has to take care of ensuring that -EOPNOTSUPP
>>> (or ENODEV?) never clobbers -EINVAL.
>> I didn't suggest overloading the return value. Instead I
>> specifically want this to go the -EINVAL path.
>>
>>> We could have a generic helper which looks like:
>>>
>>> static inline void ignored_param(const char *cfg, const char *name,
>>> const char *s, const char *ss)
>>> {
>>>      printk(XENLOG_INFO "%s disabled - ignoring '%s=%*.s' setting\n",
>>> cfg, name, s, (int)(ss - s));
>>> }
>>>
>>> which at least would keep all the users consistent.
>> Further bloating the binary with (almost) useless string literals.
>> I'd specifically like to avoid this.
> 
> I don't accept that as a valid argument.
> 
> We're talking about literally tens of bytes (which will merge anyway, so
> 0 in practice), and a resulting UI which helps people get out of
> problems rather than penalises them for having gotten into a problem to
> begin with.
> 
> I will absolutely prioritise a more helpful UI over a handful of bytes.

What about a kconfig option (defaulting to "yes") enabling this feature?

That way an embedded variant can be made smaller while a server one is
more user friendly.


Juergen
Jan Beulich April 24, 2020, 6:11 a.m. UTC | #9
On 23.04.2020 19:35, Andrew Cooper wrote:
> On 21/04/2020 07:02, Jan Beulich wrote:
>> On 20.04.2020 20:05, Andrew Cooper wrote:
>>> On 20/04/2020 15:05, Jan Beulich wrote:
>>>> I'm in particular
>>>> concerned that we may gain a large number of such printk()s over
>>>> time, if we added them in such cases.
>>> The printk() was a bit of an afterthought, but deliberately avoiding the
>>> -EINVAL path was specifically not.
>>>
>>> In the case that the user tries to use `pv=no-32` without CONFIG_PV32,
>>> they should see something other than
>>>
>>> (XEN) parameter "pv=no-32" unknown!
>> Why - to this specific build of Xen the parameter is unknown.
> 
> Because it is unnecessarily problematic and borderline obnoxious to
> users, as well as occasional Xen developers.
> 
> "you've not got the correct CONFIG_$X for that to be meaningful" is
> specifically useful to separate from "I've got no idea".
> 
>>> I don't think overloading the return value is a clever move, because
>>> then every parse function has to take care of ensuring that -EOPNOTSUPP
>>> (or ENODEV?) never clobbers -EINVAL.
>> I didn't suggest overloading the return value. Instead I
>> specifically want this to go the -EINVAL path.
>>
>>> We could have a generic helper which looks like:
>>>
>>> static inline void ignored_param(const char *cfg, const char *name,
>>> const char *s, const char *ss)
>>> {
>>>     printk(XENLOG_INFO "%s disabled - ignoring '%s=%*.s' setting\n",
>>> cfg, name, s, (int)(ss - s));
>>> }
>>>
>>> which at least would keep all the users consistent.
>> Further bloating the binary with (almost) useless string literals.
>> I'd specifically like to avoid this.
> 
> I don't accept that as a valid argument.
> 
> We're talking about literally tens of bytes (which will merge anyway, so
> 0 in practice), and a resulting UI which helps people get out of
> problems rather than penalises them for having gotten into a problem to
> begin with.

How will they merge? The different instances of the format string
above may/should, yes, but the different "CONFIG_xyz" literals the
callers pass won't, for example.

> I will absolutely prioritise a more helpful UI over a handful of bytes.

This "a handful of bytes doesn't matter" attitude has lead to
imo unacceptable growth of various software packages over the
years.

Anyway - I don't want to block this change over this argument,
so I'm willing to ack a patch with the helper introduced (and
preferably the "CONFIG_" part of the cfg arguments moved into
the helper's format string), as long as we plan to then make
consistent use of the helper everywhere. That said, I don't
immediately see what would be passed for "cfg" in some of
parse_iommu_param()'s cases.

Jan
Andrew Cooper April 27, 2020, 8:02 p.m. UTC | #10
On 24/04/2020 06:28, Jürgen Groß wrote:
> On 23.04.20 19:35, Andrew Cooper wrote:
>> On 21/04/2020 07:02, Jan Beulich wrote:
>>> On 20.04.2020 20:05, Andrew Cooper wrote:
>>>> On 20/04/2020 15:05, Jan Beulich wrote:
>>>>> I'm in particular
>>>>> concerned that we may gain a large number of such printk()s over
>>>>> time, if we added them in such cases.
>>>> The printk() was a bit of an afterthought, but deliberately
>>>> avoiding the
>>>> -EINVAL path was specifically not.
>>>>
>>>> In the case that the user tries to use `pv=no-32` without CONFIG_PV32,
>>>> they should see something other than
>>>>
>>>> (XEN) parameter "pv=no-32" unknown!
>>> Why - to this specific build of Xen the parameter is unknown.
>>
>> Because it is unnecessarily problematic and borderline obnoxious to
>> users, as well as occasional Xen developers.
>>
>> "you've not got the correct CONFIG_$X for that to be meaningful" is
>> specifically useful to separate from "I've got no idea".
>>
>>>> I don't think overloading the return value is a clever move, because
>>>> then every parse function has to take care of ensuring that
>>>> -EOPNOTSUPP
>>>> (or ENODEV?) never clobbers -EINVAL.
>>> I didn't suggest overloading the return value. Instead I
>>> specifically want this to go the -EINVAL path.
>>>
>>>> We could have a generic helper which looks like:
>>>>
>>>> static inline void ignored_param(const char *cfg, const char *name,
>>>> const char *s, const char *ss)
>>>> {
>>>>      printk(XENLOG_INFO "%s disabled - ignoring '%s=%*.s' setting\n",
>>>> cfg, name, s, (int)(ss - s));
>>>> }
>>>>
>>>> which at least would keep all the users consistent.
>>> Further bloating the binary with (almost) useless string literals.
>>> I'd specifically like to avoid this.
>>
>> I don't accept that as a valid argument.
>>
>> We're talking about literally tens of bytes (which will merge anyway, so
>> 0 in practice), and a resulting UI which helps people get out of
>> problems rather than penalises them for having gotten into a problem to
>> begin with.
>>
>> I will absolutely prioritise a more helpful UI over a handful of bytes.
>
> What about a kconfig option (defaulting to "yes") enabling this feature?

IMO, that's literally not worth the bytes taken to implement.

> That way an embedded variant can be made smaller while a server one is
> more user friendly.

There is far lower hanging fruit for an embedded usecase, and its not at
all a foregone conclusion that such a usecase would want the less user
friendly version.  (Embedded very definitely doesn't mean that it won't
have users interacting with the command line).

I would certainly recommend against attempting to speculatively fix
something which isn't a problem, based on guesswork about what a
hypothetical group of people might want while totally ignoring the far
larger savings to be gained by e.g. making CONFIG_INTEL/AMD work.

~Andrew
diff mbox series

Patch

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index acd0b3d994..ee12b0f53f 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1694,7 +1694,17 @@  The following resources are available:
     CDP, one COS will corespond two CBMs other than one with CAT, due to the
     sum of CBMs is fixed, that means actual `cos_max` in use will automatically
     reduce to half when CDP is enabled.
-	
+
+### pv
+    = List of [ 32=<bool> ]
+
+    Applicability: x86
+
+Controls for aspects of PV guest support.
+
+*   The `32` boolean controls whether 32bit PV guests can be created.  It
+    defaults to `true`, and is ignored when `CONFIG_PV32` is compiled out.
+
 ### pv-linear-pt (x86)
 > `= <boolean>`
 
diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
index 8149362bde..4c52197de3 100644
--- a/xen/arch/x86/Kconfig
+++ b/xen/arch/x86/Kconfig
@@ -49,6 +49,22 @@  config PV
 
 	  If unsure, say Y.
 
+config PV32
+	bool "Support for 32bit PV guests"
+	depends on PV
+	default y
+	---help---
+	  The 32bit PV ABI uses Ring1, an area of the x86 architecture which
+	  was deprecated and mostly removed in the AMD64 spec.  As a result,
+	  it occasionally conflicts with newer x86 hardware features, causing
+	  overheads for Xen to maintain backwards compatibility.
+
+	  People may wish to disable 32bit PV guests for attack surface
+	  reduction, or performance reasons.  Backwards compatibility can be
+	  provided via the PV Shim mechanism.
+
+	  If unsure, say Y.
+
 config PV_LINEAR_PT
        bool "Support for PV linear pagetables"
        depends on PV
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 70fae43965..47a0db082f 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -16,6 +16,39 @@ 
 #include <asm/pv/domain.h>
 #include <asm/shadow.h>
 
+#ifdef CONFIG_PV32
+int8_t __read_mostly opt_pv32 = -1;
+#endif
+
+static int parse_pv(const char *s)
+{
+    const char *ss;
+    int val, rc = 0;
+
+    do {
+        ss = strchr(s, ',');
+        if ( !ss )
+            ss = strchr(s, '\0');
+
+        if ( (val = parse_boolean("32", s, ss)) >= 0 )
+        {
+#ifdef CONFIG_PV32
+            opt_pv32 = val;
+#else
+            printk(XENLOG_INFO
+                   "CONFIG_PV32 disabled - ignoring 'pv=32' setting\n");
+#endif
+        }
+        else
+            rc = -EINVAL;
+
+        s = ss + 1;
+    } while ( *ss );
+
+    return rc;
+}
+custom_param("pv", parse_pv);
+
 static __read_mostly enum {
     PCID_OFF,
     PCID_ALL,
@@ -174,6 +207,8 @@  int switch_compat(struct domain *d)
 
     BUILD_BUG_ON(offsetof(struct shared_info, vcpu_info) != 0);
 
+    if ( !opt_pv32 )
+        return -EOPNOTSUPP;
     if ( is_hvm_domain(d) || domain_tot_pages(d) != 0 )
         return -EACCES;
     if ( is_pv_32bit_domain(d) )
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 885919d5c3..c50aefb2de 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -53,6 +53,7 @@ 
 #include <asm/spec_ctrl.h>
 #include <asm/guest.h>
 #include <asm/microcode.h>
+#include <asm/pv/domain.h>
 
 /* opt_nosmp: If true, secondary processors are ignored. */
 static bool __initdata opt_nosmp;
@@ -1875,8 +1876,12 @@  void arch_get_xen_caps(xen_capabilities_info_t *info)
     {
         snprintf(s, sizeof(s), "xen-%d.%d-x86_64 ", major, minor);
         safe_strcat(*info, s);
-        snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
-        safe_strcat(*info, s);
+
+        if ( opt_pv32 )
+        {
+            snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
+            safe_strcat(*info, s);
+        }
     }
     if ( hvm_enabled )
     {
diff --git a/xen/include/asm-x86/pv/domain.h b/xen/include/asm-x86/pv/domain.h
index 7a69bfb303..df9716ff26 100644
--- a/xen/include/asm-x86/pv/domain.h
+++ b/xen/include/asm-x86/pv/domain.h
@@ -23,6 +23,12 @@ 
 
 #include <xen/sched.h>
 
+#ifdef CONFIG_PV32
+extern int8_t opt_pv32;
+#else
+# define opt_pv32 false
+#endif
+
 /*
  * PCID values for the address spaces of 64-bit pv domains:
  *