diff mbox series

[v4,02/10] x86/vmx: add IPT cpu feature

Message ID 7302dbfcd07dfaad9e50bb772673e588fcc4de67.1593519420.git.michal.leszczynski@cert.pl (mailing list archive)
State Superseded
Headers show
Series Implement support for external IPT monitoring | expand

Commit Message

Michał Leszczyński June 30, 2020, 12:33 p.m. UTC
From: Michal Leszczynski <michal.leszczynski@cert.pl>

Check if Intel Processor Trace feature is supported by current
processor. Define vmtrace_supported global variable.

Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
---
 xen/arch/x86/hvm/vmx/vmcs.c                 | 7 ++++++-
 xen/common/domain.c                         | 2 ++
 xen/include/asm-x86/cpufeature.h            | 1 +
 xen/include/asm-x86/hvm/vmx/vmcs.h          | 1 +
 xen/include/public/arch-x86/cpufeatureset.h | 1 +
 xen/include/xen/domain.h                    | 2 ++
 6 files changed, 13 insertions(+), 1 deletion(-)

Comments

Roger Pau Monné July 1, 2020, 9:49 a.m. UTC | #1
On Tue, Jun 30, 2020 at 02:33:45PM +0200, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Check if Intel Processor Trace feature is supported by current
> processor. Define vmtrace_supported global variable.
> 
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
>  xen/arch/x86/hvm/vmx/vmcs.c                 | 7 ++++++-
>  xen/common/domain.c                         | 2 ++
>  xen/include/asm-x86/cpufeature.h            | 1 +
>  xen/include/asm-x86/hvm/vmx/vmcs.h          | 1 +
>  xen/include/public/arch-x86/cpufeatureset.h | 1 +
>  xen/include/xen/domain.h                    | 2 ++
>  6 files changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index ca94c2bedc..b73d824357 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -291,6 +291,12 @@ static int vmx_init_vmcs_config(void)
>          _vmx_cpu_based_exec_control &=
>              ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
>  
> +    rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
> +
> +    /* Check whether IPT is supported in VMX operation. */
> +    vmtrace_supported = cpu_has_ipt &&
> +                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);

This function gets called for every CPU that's brought up, so you need
to set it on the BSP, and then check that the APs also support the
feature or else fail to bring them up AFAICT. If not you could end up
with a non working system.

I agree it's very unlikely to boot on a system with such differences
between CPUs, but better be safe than sorry.

> +
>      if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
>      {
>          min = 0;
> @@ -305,7 +311,6 @@ static int vmx_init_vmcs_config(void)
>                 SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
>                 SECONDARY_EXEC_XSAVES |
>                 SECONDARY_EXEC_TSC_SCALING);
> -        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>          if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
>              opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
>          if ( opt_vpid_enabled )
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 7cc9526139..0a33e0dfd6 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>  
>  vcpu_info_t dummy_vcpu_info;
>  
> +bool_t vmtrace_supported;

Plain bool, and I think it wants to be __read_mostly.

I'm also unsure whether this is the best place to put such variable,
since there are no users introduced on this patch it's hard to tell.

Thanks, Roger.
Julien Grall July 1, 2020, 3:12 p.m. UTC | #2
On 30/06/2020 13:33, Michał Leszczyński wrote:
> @@ -305,7 +311,6 @@ static int vmx_init_vmcs_config(void)
>                  SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
>                  SECONDARY_EXEC_XSAVES |
>                  SECONDARY_EXEC_TSC_SCALING);
> -        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>           if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
>               opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
>           if ( opt_vpid_enabled )
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 7cc9526139..0a33e0dfd6 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>   
>   vcpu_info_t dummy_vcpu_info;
>   
> +bool_t vmtrace_supported;

All the code looks x86 specific. So may I ask why this was implemented 
in common code?

Cheers,
Andrew Cooper July 1, 2020, 4:06 p.m. UTC | #3
On 01/07/2020 16:12, Julien Grall wrote:
> On 30/06/2020 13:33, Michał Leszczyński wrote:
>> @@ -305,7 +311,6 @@ static int vmx_init_vmcs_config(void)
>>                  SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
>>                  SECONDARY_EXEC_XSAVES |
>>                  SECONDARY_EXEC_TSC_SCALING);
>> -        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>           if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
>>               opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
>>           if ( opt_vpid_enabled )
>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>> index 7cc9526139..0a33e0dfd6 100644
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>>     vcpu_info_t dummy_vcpu_info;
>>   +bool_t vmtrace_supported;
>
> All the code looks x86 specific. So may I ask why this was implemented
> in common code?

There were some questions directed specifically at the ARM maintainers
about CoreSight, which have gone unanswered.

~Andrew
Julien Grall July 1, 2020, 4:17 p.m. UTC | #4
On 01/07/2020 17:06, Andrew Cooper wrote:
> On 01/07/2020 16:12, Julien Grall wrote:
>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>> @@ -305,7 +311,6 @@ static int vmx_init_vmcs_config(void)
>>>                   SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
>>>                   SECONDARY_EXEC_XSAVES |
>>>                   SECONDARY_EXEC_TSC_SCALING);
>>> -        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>>            if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
>>>                opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
>>>            if ( opt_vpid_enabled )
>>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>>> index 7cc9526139..0a33e0dfd6 100644
>>> --- a/xen/common/domain.c
>>> +++ b/xen/common/domain.c
>>> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>>>      vcpu_info_t dummy_vcpu_info;
>>>    +bool_t vmtrace_supported;
>>
>> All the code looks x86 specific. So may I ask why this was implemented
>> in common code?
> 
> There were some questions directed specifically at the ARM maintainers
> about CoreSight, which have gone unanswered.

I can only find one question related to the size. Is there any other?

I don't know how the interface will look like given that AFAICT the 
buffer may be embedded in the HW. We would need to investigate how to 
differentiate between two domUs in this case without impacting the 
performance in the common code.

So I think it is a little premature to implement this in common code and 
always compiled in for Arm. It would be best if this stay in x86 code.

Cheers,
Julien Grall July 1, 2020, 4:18 p.m. UTC | #5
On 01/07/2020 17:17, Julien Grall wrote:
> 
> 
> On 01/07/2020 17:06, Andrew Cooper wrote:
>> On 01/07/2020 16:12, Julien Grall wrote:
>>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>>> @@ -305,7 +311,6 @@ static int vmx_init_vmcs_config(void)
>>>>                   SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
>>>>                   SECONDARY_EXEC_XSAVES |
>>>>                   SECONDARY_EXEC_TSC_SCALING);
>>>> -        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>>>            if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
>>>>                opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
>>>>            if ( opt_vpid_enabled )
>>>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>>>> index 7cc9526139..0a33e0dfd6 100644
>>>> --- a/xen/common/domain.c
>>>> +++ b/xen/common/domain.c
>>>> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>>>>      vcpu_info_t dummy_vcpu_info;
>>>>    +bool_t vmtrace_supported;
>>>
>>> All the code looks x86 specific. So may I ask why this was implemented
>>> in common code?
>>
>> There were some questions directed specifically at the ARM maintainers
>> about CoreSight, which have gone unanswered.
> 
> I can only find one question related to the size. Is there any other?
> 
> I don't know how the interface will look like given that AFAICT the 
> buffer may be embedded in the HW. We would need to investigate how to 
> differentiate between two domUs in this case without impacting the 
> performance in the common code.

s/in the common code/during the context switch/

> So I think it is a little premature to implement this in common code and 
> always compiled in for Arm. It would be best if this stay in x86 code.
> 
> Cheers,
>
Andrew Cooper July 1, 2020, 5:26 p.m. UTC | #6
On 01/07/2020 17:18, Julien Grall wrote:
>
>
> On 01/07/2020 17:17, Julien Grall wrote:
>>
>>
>> On 01/07/2020 17:06, Andrew Cooper wrote:
>>> On 01/07/2020 16:12, Julien Grall wrote:
>>>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>>>> @@ -305,7 +311,6 @@ static int vmx_init_vmcs_config(void)
>>>>>                   SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
>>>>>                   SECONDARY_EXEC_XSAVES |
>>>>>                   SECONDARY_EXEC_TSC_SCALING);
>>>>> -        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>>>>            if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
>>>>>                opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
>>>>>            if ( opt_vpid_enabled )
>>>>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>>>>> index 7cc9526139..0a33e0dfd6 100644
>>>>> --- a/xen/common/domain.c
>>>>> +++ b/xen/common/domain.c
>>>>> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>>>>>      vcpu_info_t dummy_vcpu_info;
>>>>>    +bool_t vmtrace_supported;
>>>>
>>>> All the code looks x86 specific. So may I ask why this was implemented
>>>> in common code?
>>>
>>> There were some questions directed specifically at the ARM maintainers
>>> about CoreSight, which have gone unanswered.
>>
>> I can only find one question related to the size. Is there any other?
>>
>> I don't know how the interface will look like given that AFAICT the
>> buffer may be embedded in the HW. We would need to investigate how to
>> differentiate between two domUs in this case without impacting the
>> performance in the common code.
>
> s/in the common code/during the context switch/
>
>> So I think it is a little premature to implement this in common code
>> and always compiled in for Arm. It would be best if this stay in x86
>> code.

I've just checked with a colleague.  CoreSight can dump to a memory
buffer - there's even a decode library for the packet stream
https://github.com/Linaro/OpenCSD, although ultimately it is platform
specific as to whether the feature is supported.

Furthermore, the choice isn't "x86 vs ARM", now that RISCv support is
on-list, and Power9 is floating on the horizon.

For the sake of what is literally just one byte in common code, I stand
my original suggestion of this being a common interface.  It is not
something which should be x86 specific.

~Andrew
Julien Grall July 1, 2020, 6:02 p.m. UTC | #7
Hi,

On 01/07/2020 18:26, Andrew Cooper wrote:
> On 01/07/2020 17:18, Julien Grall wrote:
>>
>>
>> On 01/07/2020 17:17, Julien Grall wrote:
>>>
>>>
>>> On 01/07/2020 17:06, Andrew Cooper wrote:
>>>> On 01/07/2020 16:12, Julien Grall wrote:
>>>>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>>>>> @@ -305,7 +311,6 @@ static int vmx_init_vmcs_config(void)
>>>>>>                    SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
>>>>>>                    SECONDARY_EXEC_XSAVES |
>>>>>>                    SECONDARY_EXEC_TSC_SCALING);
>>>>>> -        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>>>>>             if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
>>>>>>                 opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
>>>>>>             if ( opt_vpid_enabled )
>>>>>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>>>>>> index 7cc9526139..0a33e0dfd6 100644
>>>>>> --- a/xen/common/domain.c
>>>>>> +++ b/xen/common/domain.c
>>>>>> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>>>>>>       vcpu_info_t dummy_vcpu_info;
>>>>>>     +bool_t vmtrace_supported;
>>>>>
>>>>> All the code looks x86 specific. So may I ask why this was implemented
>>>>> in common code?
>>>>
>>>> There were some questions directed specifically at the ARM maintainers
>>>> about CoreSight, which have gone unanswered.
>>>
>>> I can only find one question related to the size. Is there any other?
>>>
>>> I don't know how the interface will look like given that AFAICT the
>>> buffer may be embedded in the HW. We would need to investigate how to
>>> differentiate between two domUs in this case without impacting the
>>> performance in the common code.
>>
>> s/in the common code/during the context switch/
>>
>>> So I think it is a little premature to implement this in common code
>>> and always compiled in for Arm. It would be best if this stay in x86
>>> code.
> 
> I've just checked with a colleague.  CoreSight can dump to a memory
> buffer - there's even a decode library for the packet stream
> https://github.com/Linaro/OpenCSD, although ultimately it is platform
> specific as to whether the feature is supported.
> 
> Furthermore, the choice isn't "x86 vs ARM", now that RISCv support is
> on-list, and Power9 is floating on the horizon.
> 
> For the sake of what is literally just one byte in common code, I stand
> my original suggestion of this being a common interface.  It is not
> something which should be x86 specific.

This argument can also be used against putting in common code. What I am 
the most concern of is we are trying to guess how the interface will 
look like for another architecture. Your suggested interface may work, 
but this also may end up to be a complete mess.

So I think we want to wait for a new architecture to use vmtrace before 
moving to common code. This is not going to be a massive effort to move 
that bit in common if needed.

Cheers,
Andrew Cooper July 1, 2020, 6:06 p.m. UTC | #8
On 01/07/2020 19:02, Julien Grall wrote:
> Hi,
>
> On 01/07/2020 18:26, Andrew Cooper wrote:
>> On 01/07/2020 17:18, Julien Grall wrote:
>>>
>>>
>>> On 01/07/2020 17:17, Julien Grall wrote:
>>>>
>>>>
>>>> On 01/07/2020 17:06, Andrew Cooper wrote:
>>>>> On 01/07/2020 16:12, Julien Grall wrote:
>>>>>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>>>>>> @@ -305,7 +311,6 @@ static int vmx_init_vmcs_config(void)
>>>>>>>                    SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
>>>>>>>                    SECONDARY_EXEC_XSAVES |
>>>>>>>                    SECONDARY_EXEC_TSC_SCALING);
>>>>>>> -        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>>>>>>             if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
>>>>>>>                 opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
>>>>>>>             if ( opt_vpid_enabled )
>>>>>>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>>>>>>> index 7cc9526139..0a33e0dfd6 100644
>>>>>>> --- a/xen/common/domain.c
>>>>>>> +++ b/xen/common/domain.c
>>>>>>> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>>>>>>>       vcpu_info_t dummy_vcpu_info;
>>>>>>>     +bool_t vmtrace_supported;
>>>>>>
>>>>>> All the code looks x86 specific. So may I ask why this was
>>>>>> implemented
>>>>>> in common code?
>>>>>
>>>>> There were some questions directed specifically at the ARM
>>>>> maintainers
>>>>> about CoreSight, which have gone unanswered.
>>>>
>>>> I can only find one question related to the size. Is there any other?
>>>>
>>>> I don't know how the interface will look like given that AFAICT the
>>>> buffer may be embedded in the HW. We would need to investigate how to
>>>> differentiate between two domUs in this case without impacting the
>>>> performance in the common code.
>>>
>>> s/in the common code/during the context switch/
>>>
>>>> So I think it is a little premature to implement this in common code
>>>> and always compiled in for Arm. It would be best if this stay in x86
>>>> code.
>>
>> I've just checked with a colleague.  CoreSight can dump to a memory
>> buffer - there's even a decode library for the packet stream
>> https://github.com/Linaro/OpenCSD, although ultimately it is platform
>> specific as to whether the feature is supported.
>>
>> Furthermore, the choice isn't "x86 vs ARM", now that RISCv support is
>> on-list, and Power9 is floating on the horizon.
>>
>> For the sake of what is literally just one byte in common code, I stand
>> my original suggestion of this being a common interface.  It is not
>> something which should be x86 specific.
>
> This argument can also be used against putting in common code. What I
> am the most concern of is we are trying to guess how the interface
> will look like for another architecture. Your suggested interface may
> work, but this also may end up to be a complete mess.
>
> So I think we want to wait for a new architecture to use vmtrace
> before moving to common code. This is not going to be a massive effort
> to move that bit in common if needed.

I suggest you read the series.

The only thing in common code is the bit of the interface saying "I'd
like buffers this big please".

~Andrew
Julien Grall July 1, 2020, 6:09 p.m. UTC | #9
On 01/07/2020 19:06, Andrew Cooper wrote:
> On 01/07/2020 19:02, Julien Grall wrote:
>> Hi,
>>
>> On 01/07/2020 18:26, Andrew Cooper wrote:
>>> On 01/07/2020 17:18, Julien Grall wrote:
>>>>
>>>>
>>>> On 01/07/2020 17:17, Julien Grall wrote:
>>>>>
>>>>>
>>>>> On 01/07/2020 17:06, Andrew Cooper wrote:
>>>>>> On 01/07/2020 16:12, Julien Grall wrote:
>>>>>>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>>>>>>> @@ -305,7 +311,6 @@ static int vmx_init_vmcs_config(void)
>>>>>>>>                     SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
>>>>>>>>                     SECONDARY_EXEC_XSAVES |
>>>>>>>>                     SECONDARY_EXEC_TSC_SCALING);
>>>>>>>> -        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>>>>>>>              if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
>>>>>>>>                  opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
>>>>>>>>              if ( opt_vpid_enabled )
>>>>>>>> diff --git a/xen/common/domain.c b/xen/common/domain.c
>>>>>>>> index 7cc9526139..0a33e0dfd6 100644
>>>>>>>> --- a/xen/common/domain.c
>>>>>>>> +++ b/xen/common/domain.c
>>>>>>>> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>>>>>>>>        vcpu_info_t dummy_vcpu_info;
>>>>>>>>      +bool_t vmtrace_supported;
>>>>>>>
>>>>>>> All the code looks x86 specific. So may I ask why this was
>>>>>>> implemented
>>>>>>> in common code?
>>>>>>
>>>>>> There were some questions directed specifically at the ARM
>>>>>> maintainers
>>>>>> about CoreSight, which have gone unanswered.
>>>>>
>>>>> I can only find one question related to the size. Is there any other?
>>>>>
>>>>> I don't know how the interface will look like given that AFAICT the
>>>>> buffer may be embedded in the HW. We would need to investigate how to
>>>>> differentiate between two domUs in this case without impacting the
>>>>> performance in the common code.
>>>>
>>>> s/in the common code/during the context switch/
>>>>
>>>>> So I think it is a little premature to implement this in common code
>>>>> and always compiled in for Arm. It would be best if this stay in x86
>>>>> code.
>>>
>>> I've just checked with a colleague.  CoreSight can dump to a memory
>>> buffer - there's even a decode library for the packet stream
>>> https://github.com/Linaro/OpenCSD, although ultimately it is platform
>>> specific as to whether the feature is supported.
>>>
>>> Furthermore, the choice isn't "x86 vs ARM", now that RISCv support is
>>> on-list, and Power9 is floating on the horizon.
>>>
>>> For the sake of what is literally just one byte in common code, I stand
>>> my original suggestion of this being a common interface.  It is not
>>> something which should be x86 specific.
>>
>> This argument can also be used against putting in common code. What I
>> am the most concern of is we are trying to guess how the interface
>> will look like for another architecture. Your suggested interface may
>> work, but this also may end up to be a complete mess.
>>
>> So I think we want to wait for a new architecture to use vmtrace
>> before moving to common code. This is not going to be a massive effort
>> to move that bit in common if needed.
> 
> I suggest you read the series.

Already went through the series and ...

> 
> The only thing in common code is the bit of the interface saying "I'd
> like buffers this big please".

... I stand by my point. There is no need to have this code in common 
code until someone else need it. This code can be easily implemented in 
arch_domain_create().

Cheers,
Andrew Cooper July 1, 2020, 9:42 p.m. UTC | #10
On 30/06/2020 13:33, Michał Leszczyński wrote:
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index ca94c2bedc..b73d824357 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -291,6 +291,12 @@ static int vmx_init_vmcs_config(void)
>          _vmx_cpu_based_exec_control &=
>              ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
>  
> +    rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
> +
> +    /* Check whether IPT is supported in VMX operation. */
> +    vmtrace_supported = cpu_has_ipt &&
> +                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);

There is a subtle corner case here.  vmx_init_vmcs_config() is called on
all CPUs, and is supposed to level things down safely if we find any
asymmetry.

If instead you go with something like this:

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index b73d824357..6960109183 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -294,8 +294,8 @@ static int vmx_init_vmcs_config(void)
     rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
 
     /* Check whether IPT is supported in VMX operation. */
-    vmtrace_supported = cpu_has_ipt &&
-                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);
+    if ( !(_vmx_misc_cap & VMX_MISC_PT_SUPPORTED) )
+        vmtrace_supported = false;
 
     if ( _vmx_cpu_based_exec_control &
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
     {
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index c9b6af826d..9d7822e006 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1092,6 +1092,9 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 #endif
     }
 
+    /* Set a default for VMTrace before HVM setup occurs. */
+    vmtrace_supported = cpu_has_ipt;
+
     /* Sanitise the raw E820 map to produce a final clean version. */
     max_page = raw_max_page = init_e820(memmap_type, &e820_raw);
 

Then you'll also get a vmtrace_supported=true which works correctly in
the Broadwell and no-VT-x case as well.


> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 7cc9526139..0a33e0dfd6 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -82,6 +82,8 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>  
>  vcpu_info_t dummy_vcpu_info;
>  
> +bool_t vmtrace_supported;

bool please.  We're in the process of converting over to C99 bools, and
objection was taken to a tree-wide cleanup.

> +
>  static void __domain_finalise_shutdown(struct domain *d)
>  {
>      struct vcpu *v;
> diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
> index f790d5c1f8..8d7955dd87 100644
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -104,6 +104,7 @@
>  #define cpu_has_clwb            boot_cpu_has(X86_FEATURE_CLWB)
>  #define cpu_has_avx512er        boot_cpu_has(X86_FEATURE_AVX512ER)
>  #define cpu_has_avx512cd        boot_cpu_has(X86_FEATURE_AVX512CD)
> +#define cpu_has_ipt             boot_cpu_has(X86_FEATURE_IPT)
>  #define cpu_has_sha             boot_cpu_has(X86_FEATURE_SHA)
>  #define cpu_has_avx512bw        boot_cpu_has(X86_FEATURE_AVX512BW)
>  #define cpu_has_avx512vl        boot_cpu_has(X86_FEATURE_AVX512VL)
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
> index 906810592f..0e9a0b8de6 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -283,6 +283,7 @@ extern u32 vmx_secondary_exec_control;
>  #define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL
>  extern u64 vmx_ept_vpid_cap;
>  
> +#define VMX_MISC_PT_SUPPORTED                   0x00004000

VMX_MISC_PROC_TRACE, and ...

>  #define VMX_MISC_CR3_TARGET                     0x01ff0000
>  #define VMX_MISC_VMWRITE_ALL                    0x20000000
>  
> diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
> index 5ca35d9d97..0d3f15f628 100644
> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -217,6 +217,7 @@ XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
>  XEN_CPUFEATURE(AVX512_IFMA,   5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
>  XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
>  XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
> +XEN_CPUFEATURE(IPT,           5*32+25) /*   Intel Processor Trace */

.. any chance we can spell this out as PROC_TRACE?  The "Intel" part
won't be true if any of the other vendors choose to implement this
interface to the spec.

Otherwise, LGTM.

~Andrew
Roger Pau Monné July 2, 2020, 8:10 a.m. UTC | #11
On Wed, Jul 01, 2020 at 10:42:55PM +0100, Andrew Cooper wrote:
> On 30/06/2020 13:33, Michał Leszczyński wrote:
> > diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> > index ca94c2bedc..b73d824357 100644
> > --- a/xen/arch/x86/hvm/vmx/vmcs.c
> > +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> > @@ -291,6 +291,12 @@ static int vmx_init_vmcs_config(void)
> >          _vmx_cpu_based_exec_control &=
> >              ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
> >  
> > +    rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
> > +
> > +    /* Check whether IPT is supported in VMX operation. */
> > +    vmtrace_supported = cpu_has_ipt &&
> > +                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);
> 
> There is a subtle corner case here.  vmx_init_vmcs_config() is called on
> all CPUs, and is supposed to level things down safely if we find any
> asymmetry.
> 
> If instead you go with something like this:
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index b73d824357..6960109183 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -294,8 +294,8 @@ static int vmx_init_vmcs_config(void)
>      rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>  
>      /* Check whether IPT is supported in VMX operation. */
> -    vmtrace_supported = cpu_has_ipt &&
> -                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);
> +    if ( !(_vmx_misc_cap & VMX_MISC_PT_SUPPORTED) )
> +        vmtrace_supported = false;

This is also used during hotplug, so I'm not sure it's safe to turn
vmtrace_supported off during runtime, where VMs might be already using
it. IMO it would be easier to just set it on the BSP, and then refuse
to bring up any AP that doesn't have the feature. TBH I don't think we
are likely to find any system with such configuration, but seems more
robust than changing vmtrace_supported at runtime.

Roger.
Jan Beulich July 2, 2020, 8:29 a.m. UTC | #12
On 01.07.2020 20:09, Julien Grall wrote:
> On 01/07/2020 19:06, Andrew Cooper wrote:
>> On 01/07/2020 19:02, Julien Grall wrote:
>>> On 01/07/2020 18:26, Andrew Cooper wrote:
>>>> For the sake of what is literally just one byte in common code, I stand
>>>> my original suggestion of this being a common interface.  It is not
>>>> something which should be x86 specific.
>>>
>>> This argument can also be used against putting in common code. What I
>>> am the most concern of is we are trying to guess how the interface
>>> will look like for another architecture. Your suggested interface may
>>> work, but this also may end up to be a complete mess.
>>>
>>> So I think we want to wait for a new architecture to use vmtrace
>>> before moving to common code. This is not going to be a massive effort
>>> to move that bit in common if needed.
>>
>> I suggest you read the series.
> 
> Already went through the series and ...
> 
>>
>> The only thing in common code is the bit of the interface saying "I'd
>> like buffers this big please".
> 
> ... I stand by my point. There is no need to have this code in common 
> code until someone else need it. This code can be easily implemented in 
> arch_domain_create().

I'm with Andrew here, fwiw, as long as the little bit of code that
is actually put in common/ or include/xen/ doesn't imply arbitrary
restrictions on acceptable values. For example, unless there is
proof that for all architectures of interest currently or in the
not too distant future an order value is fine (as opposed to a
size one), then an order field would be fine to live in common
code imo. Otherwise it would need to be a size one, with per-arch
enforcement of further imposed restrictions (like needing to be a
power of 2).

Jan
Jan Beulich July 2, 2020, 8:34 a.m. UTC | #13
On 02.07.2020 10:10, Roger Pau Monné wrote:
> On Wed, Jul 01, 2020 at 10:42:55PM +0100, Andrew Cooper wrote:
>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
>>> index ca94c2bedc..b73d824357 100644
>>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>>> @@ -291,6 +291,12 @@ static int vmx_init_vmcs_config(void)
>>>          _vmx_cpu_based_exec_control &=
>>>              ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
>>>  
>>> +    rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>> +
>>> +    /* Check whether IPT is supported in VMX operation. */
>>> +    vmtrace_supported = cpu_has_ipt &&
>>> +                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);
>>
>> There is a subtle corner case here.  vmx_init_vmcs_config() is called on
>> all CPUs, and is supposed to level things down safely if we find any
>> asymmetry.
>>
>> If instead you go with something like this:
>>
>> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
>> index b73d824357..6960109183 100644
>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>> @@ -294,8 +294,8 @@ static int vmx_init_vmcs_config(void)
>>      rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>  
>>      /* Check whether IPT is supported in VMX operation. */
>> -    vmtrace_supported = cpu_has_ipt &&
>> -                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);
>> +    if ( !(_vmx_misc_cap & VMX_MISC_PT_SUPPORTED) )
>> +        vmtrace_supported = false;
> 
> This is also used during hotplug, so I'm not sure it's safe to turn
> vmtrace_supported off during runtime, where VMs might be already using
> it. IMO it would be easier to just set it on the BSP, and then refuse
> to bring up any AP that doesn't have the feature.

+1

IOW I also don't think that "vmx_init_vmcs_config() ... is supposed to
level things down safely". Instead I think the expectation is for
CPU onlining to fail if a CPU lacks features compared to the BSP. As
can be implied from what Roger says, doing like what you suggest may
be fine during boot, but past that only at times where we know there's
no user of a certain feature, and where discarding the feature flag
won't lead to other inconsistencies (which may very well mean "never").

Jan
Julien Grall July 2, 2020, 8:42 a.m. UTC | #14
Hi Jan,

On 02/07/2020 09:29, Jan Beulich wrote:
> On 01.07.2020 20:09, Julien Grall wrote:
>> On 01/07/2020 19:06, Andrew Cooper wrote:
>>> On 01/07/2020 19:02, Julien Grall wrote:
>>>> On 01/07/2020 18:26, Andrew Cooper wrote:
>>>>> For the sake of what is literally just one byte in common code, I stand
>>>>> my original suggestion of this being a common interface.  It is not
>>>>> something which should be x86 specific.
>>>>
>>>> This argument can also be used against putting in common code. What I
>>>> am the most concern of is we are trying to guess how the interface
>>>> will look like for another architecture. Your suggested interface may
>>>> work, but this also may end up to be a complete mess.
>>>>
>>>> So I think we want to wait for a new architecture to use vmtrace
>>>> before moving to common code. This is not going to be a massive effort
>>>> to move that bit in common if needed.
>>>
>>> I suggest you read the series.
>>
>> Already went through the series and ...
>>
>>>
>>> The only thing in common code is the bit of the interface saying "I'd
>>> like buffers this big please".
>>
>> ... I stand by my point. There is no need to have this code in common
>> code until someone else need it. This code can be easily implemented in
>> arch_domain_create().
> 
> I'm with Andrew here, fwiw, as long as the little bit of code that
> is actually put in common/ or include/xen/ doesn't imply arbitrary
> restrictions on acceptable values.
Well yes the code is simple. However, the code as it is wouldn't be 
usuable on other architecture without additional work (aside arch 
specific code). For instance, there is no way to map the buffer outside 
of Xen as it is all x86 specific.

If you want the allocation to be in the common code, then the 
infrastructure to map/unmap the buffer should also be in common code. 
Otherwise, there is no point to allocate it in common.

Cheers,
Jan Beulich July 2, 2020, 8:50 a.m. UTC | #15
On 02.07.2020 10:42, Julien Grall wrote:
> On 02/07/2020 09:29, Jan Beulich wrote:
>> I'm with Andrew here, fwiw, as long as the little bit of code that
>> is actually put in common/ or include/xen/ doesn't imply arbitrary
>> restrictions on acceptable values.
> Well yes the code is simple. However, the code as it is wouldn't be 
> usuable on other architecture without additional work (aside arch 
> specific code). For instance, there is no way to map the buffer outside 
> of Xen as it is all x86 specific.
> 
> If you want the allocation to be in the common code, then the 
> infrastructure to map/unmap the buffer should also be in common code. 
> Otherwise, there is no point to allocate it in common.

I don't think I agree here - I see nothing wrong with exposing of
the memory being arch specific, when allocation is generic. This
is no different from, in just x86, allocation logic being common
to PV and HVM, but exposing being different for both.

Jan
Julien Grall July 2, 2020, 8:54 a.m. UTC | #16
On 02/07/2020 09:50, Jan Beulich wrote:
> On 02.07.2020 10:42, Julien Grall wrote:
>> On 02/07/2020 09:29, Jan Beulich wrote:
>>> I'm with Andrew here, fwiw, as long as the little bit of code that
>>> is actually put in common/ or include/xen/ doesn't imply arbitrary
>>> restrictions on acceptable values.
>> Well yes the code is simple. However, the code as it is wouldn't be
>> usuable on other architecture without additional work (aside arch
>> specific code). For instance, there is no way to map the buffer outside
>> of Xen as it is all x86 specific.
>>
>> If you want the allocation to be in the common code, then the
>> infrastructure to map/unmap the buffer should also be in common code.
>> Otherwise, there is no point to allocate it in common.
> 
> I don't think I agree here - I see nothing wrong with exposing of
> the memory being arch specific, when allocation is generic. This
> is no different from, in just x86, allocation logic being common
> to PV and HVM, but exposing being different for both.

Are you suggesting that the way it would be exposed may be different for 
other architecture?

If so, this is one more reason to not impose a way for allocating the 
buffer in the common code until another arch add support for vmtrace.

Cheers,
Jan Beulich July 2, 2020, 9:18 a.m. UTC | #17
On 02.07.2020 10:54, Julien Grall wrote:
> 
> 
> On 02/07/2020 09:50, Jan Beulich wrote:
>> On 02.07.2020 10:42, Julien Grall wrote:
>>> On 02/07/2020 09:29, Jan Beulich wrote:
>>>> I'm with Andrew here, fwiw, as long as the little bit of code that
>>>> is actually put in common/ or include/xen/ doesn't imply arbitrary
>>>> restrictions on acceptable values.
>>> Well yes the code is simple. However, the code as it is wouldn't be
>>> usuable on other architecture without additional work (aside arch
>>> specific code). For instance, there is no way to map the buffer outside
>>> of Xen as it is all x86 specific.
>>>
>>> If you want the allocation to be in the common code, then the
>>> infrastructure to map/unmap the buffer should also be in common code.
>>> Otherwise, there is no point to allocate it in common.
>>
>> I don't think I agree here - I see nothing wrong with exposing of
>> the memory being arch specific, when allocation is generic. This
>> is no different from, in just x86, allocation logic being common
>> to PV and HVM, but exposing being different for both.
> 
> Are you suggesting that the way it would be exposed may be different for 
> other architecture?

Why not? To take a possibly extreme example - consider an arch
where (for bare metal) the buffer is specified to appear at a
fixed range of addresses. This would then want to be this way
in the virtualized case as well. There'd be no point in using
any common logic mapping the buffer at a guest requested
address. Instead it would simply appear at the arch mandated
one, without the guest needing to take any action.

> If so, this is one more reason to not impose a way for allocating the 
> buffer in the common code until another arch add support for vmtrace.

I'm still not seeing why allocation and exposure need to be done
at the same place.

Jan
Julien Grall July 2, 2020, 9:57 a.m. UTC | #18
Hi,

On 02/07/2020 10:18, Jan Beulich wrote:
> On 02.07.2020 10:54, Julien Grall wrote:
>>
>>
>> On 02/07/2020 09:50, Jan Beulich wrote:
>>> On 02.07.2020 10:42, Julien Grall wrote:
>>>> On 02/07/2020 09:29, Jan Beulich wrote:
>>>>> I'm with Andrew here, fwiw, as long as the little bit of code that
>>>>> is actually put in common/ or include/xen/ doesn't imply arbitrary
>>>>> restrictions on acceptable values.
>>>> Well yes the code is simple. However, the code as it is wouldn't be
>>>> usuable on other architecture without additional work (aside arch
>>>> specific code). For instance, there is no way to map the buffer outside
>>>> of Xen as it is all x86 specific.
>>>>
>>>> If you want the allocation to be in the common code, then the
>>>> infrastructure to map/unmap the buffer should also be in common code.
>>>> Otherwise, there is no point to allocate it in common.
>>>
>>> I don't think I agree here - I see nothing wrong with exposing of
>>> the memory being arch specific, when allocation is generic. This
>>> is no different from, in just x86, allocation logic being common
>>> to PV and HVM, but exposing being different for both.
>>
>> Are you suggesting that the way it would be exposed may be different for
>> other architecture?
> 
> Why not? To take a possibly extreme example - consider an arch
> where (for bare metal) the buffer is specified to appear at a
> fixed range of addresses.

I am probably missing something here... The current goal is the buffer 
will be mapped in the dom0. Most likely the way to map it will be using 
the acquire hypercall (unless you invent a brand new one...).

For a guest, you could possibly reserve a fixed range and then map it 
when creating the vCPU in Xen. But then, you will likely want a fixed 
size... So why would you bother to ask the user to define the size?

Another way to do it, would be the toolstack to do the mapping. At which 
point, you still need an hypercall to do the mapping (probably the 
hypercall acquire).

> 
>> If so, this is one more reason to not impose a way for allocating the
>> buffer in the common code until another arch add support for vmtrace.
> 
> I'm still not seeing why allocation and exposure need to be done
> at the same place.

If I were going to add support for CoreSight on Arm, then the acquire 
hypercall is likely going to be the way to go for mapping the resource 
in the tools. At which point this will need to be common.

I am still not entirely happy to define the interface yet, but this 
would still be better than trying to make the allocation in common code 
and the leaving the mapping aside. After all, this is a "little bit" 
more code.

Cheers,
Jan Beulich July 2, 2020, 1:30 p.m. UTC | #19
On 02.07.2020 11:57, Julien Grall wrote:
> Hi,
> 
> On 02/07/2020 10:18, Jan Beulich wrote:
>> On 02.07.2020 10:54, Julien Grall wrote:
>>>
>>>
>>> On 02/07/2020 09:50, Jan Beulich wrote:
>>>> On 02.07.2020 10:42, Julien Grall wrote:
>>>>> On 02/07/2020 09:29, Jan Beulich wrote:
>>>>>> I'm with Andrew here, fwiw, as long as the little bit of code that
>>>>>> is actually put in common/ or include/xen/ doesn't imply arbitrary
>>>>>> restrictions on acceptable values.
>>>>> Well yes the code is simple. However, the code as it is wouldn't be
>>>>> usuable on other architecture without additional work (aside arch
>>>>> specific code). For instance, there is no way to map the buffer outside
>>>>> of Xen as it is all x86 specific.
>>>>>
>>>>> If you want the allocation to be in the common code, then the
>>>>> infrastructure to map/unmap the buffer should also be in common code.
>>>>> Otherwise, there is no point to allocate it in common.
>>>>
>>>> I don't think I agree here - I see nothing wrong with exposing of
>>>> the memory being arch specific, when allocation is generic. This
>>>> is no different from, in just x86, allocation logic being common
>>>> to PV and HVM, but exposing being different for both.
>>>
>>> Are you suggesting that the way it would be exposed may be different for
>>> other architecture?
>>
>> Why not? To take a possibly extreme example - consider an arch
>> where (for bare metal) the buffer is specified to appear at a
>> fixed range of addresses.
> 
> I am probably missing something here... The current goal is the buffer 
> will be mapped in the dom0. Most likely the way to map it will be using 
> the acquire hypercall (unless you invent a brand new one...).
> 
> For a guest, you could possibly reserve a fixed range and then map it 
> when creating the vCPU in Xen. But then, you will likely want a fixed 
> size... So why would you bother to ask the user to define the size?

Because there may be the option to only populate part of the fixed
range?

> Another way to do it, would be the toolstack to do the mapping. At which 
> point, you still need an hypercall to do the mapping (probably the 
> hypercall acquire).

There may not be any mapping to do in such a contrived, fixed-range
environment. This scenario was specifically to demonstrate that the
way the mapping gets done may be arch-specific (here: a no-op)
despite the allocation not being so.

Jan
Julien Grall July 2, 2020, 2:14 p.m. UTC | #20
Hi,

On 02/07/2020 14:30, Jan Beulich wrote:
> On 02.07.2020 11:57, Julien Grall wrote:
>> Hi,
>>
>> On 02/07/2020 10:18, Jan Beulich wrote:
>>> On 02.07.2020 10:54, Julien Grall wrote:
>>>>
>>>>
>>>> On 02/07/2020 09:50, Jan Beulich wrote:
>>>>> On 02.07.2020 10:42, Julien Grall wrote:
>>>>>> On 02/07/2020 09:29, Jan Beulich wrote:
>>>>>>> I'm with Andrew here, fwiw, as long as the little bit of code that
>>>>>>> is actually put in common/ or include/xen/ doesn't imply arbitrary
>>>>>>> restrictions on acceptable values.
>>>>>> Well yes the code is simple. However, the code as it is wouldn't be
>>>>>> usuable on other architecture without additional work (aside arch
>>>>>> specific code). For instance, there is no way to map the buffer outside
>>>>>> of Xen as it is all x86 specific.
>>>>>>
>>>>>> If you want the allocation to be in the common code, then the
>>>>>> infrastructure to map/unmap the buffer should also be in common code.
>>>>>> Otherwise, there is no point to allocate it in common.
>>>>>
>>>>> I don't think I agree here - I see nothing wrong with exposing of
>>>>> the memory being arch specific, when allocation is generic. This
>>>>> is no different from, in just x86, allocation logic being common
>>>>> to PV and HVM, but exposing being different for both.
>>>>
>>>> Are you suggesting that the way it would be exposed may be different for
>>>> other architecture?
>>>
>>> Why not? To take a possibly extreme example - consider an arch
>>> where (for bare metal) the buffer is specified to appear at a
>>> fixed range of addresses.
>>
>> I am probably missing something here... The current goal is the buffer
>> will be mapped in the dom0. Most likely the way to map it will be using
>> the acquire hypercall (unless you invent a brand new one...).
>>
>> For a guest, you could possibly reserve a fixed range and then map it
>> when creating the vCPU in Xen. But then, you will likely want a fixed
>> size... So why would you bother to ask the user to define the size?
> 
> Because there may be the option to only populate part of the fixed
> range?

It was yet another extreme case ;).

> 
>> Another way to do it, would be the toolstack to do the mapping. At which
>> point, you still need an hypercall to do the mapping (probably the
>> hypercall acquire).
> 
> There may not be any mapping to do in such a contrived, fixed-range
> environment. This scenario was specifically to demonstrate that the
> way the mapping gets done may be arch-specific (here: a no-op)
> despite the allocation not being so.
You are arguing on extreme cases which I don't think is really helpful 
here. Yes if you want to map at a fixed address in a guest you may not 
need the acquire hypercall. But in most of the other cases (see has for 
the tools) you will need it.

So what's the problem with requesting to have the acquire hypercall 
implemented in common code?

Cheers,
Jan Beulich July 2, 2020, 2:17 p.m. UTC | #21
On 02.07.2020 16:14, Julien Grall wrote:
> Hi,
> 
> On 02/07/2020 14:30, Jan Beulich wrote:
>> On 02.07.2020 11:57, Julien Grall wrote:
>>> Hi,
>>>
>>> On 02/07/2020 10:18, Jan Beulich wrote:
>>>> On 02.07.2020 10:54, Julien Grall wrote:
>>>>>
>>>>>
>>>>> On 02/07/2020 09:50, Jan Beulich wrote:
>>>>>> On 02.07.2020 10:42, Julien Grall wrote:
>>>>>>> On 02/07/2020 09:29, Jan Beulich wrote:
>>>>>>>> I'm with Andrew here, fwiw, as long as the little bit of code that
>>>>>>>> is actually put in common/ or include/xen/ doesn't imply arbitrary
>>>>>>>> restrictions on acceptable values.
>>>>>>> Well yes the code is simple. However, the code as it is wouldn't be
>>>>>>> usuable on other architecture without additional work (aside arch
>>>>>>> specific code). For instance, there is no way to map the buffer outside
>>>>>>> of Xen as it is all x86 specific.
>>>>>>>
>>>>>>> If you want the allocation to be in the common code, then the
>>>>>>> infrastructure to map/unmap the buffer should also be in common code.
>>>>>>> Otherwise, there is no point to allocate it in common.
>>>>>>
>>>>>> I don't think I agree here - I see nothing wrong with exposing of
>>>>>> the memory being arch specific, when allocation is generic. This
>>>>>> is no different from, in just x86, allocation logic being common
>>>>>> to PV and HVM, but exposing being different for both.
>>>>>
>>>>> Are you suggesting that the way it would be exposed may be different for
>>>>> other architecture?
>>>>
>>>> Why not? To take a possibly extreme example - consider an arch
>>>> where (for bare metal) the buffer is specified to appear at a
>>>> fixed range of addresses.
>>>
>>> I am probably missing something here... The current goal is the buffer
>>> will be mapped in the dom0. Most likely the way to map it will be using
>>> the acquire hypercall (unless you invent a brand new one...).
>>>
>>> For a guest, you could possibly reserve a fixed range and then map it
>>> when creating the vCPU in Xen. But then, you will likely want a fixed
>>> size... So why would you bother to ask the user to define the size?
>>
>> Because there may be the option to only populate part of the fixed
>> range?
> 
> It was yet another extreme case ;).

Yes, sure - just to demonstrate my point.

>>> Another way to do it, would be the toolstack to do the mapping. At which
>>> point, you still need an hypercall to do the mapping (probably the
>>> hypercall acquire).
>>
>> There may not be any mapping to do in such a contrived, fixed-range
>> environment. This scenario was specifically to demonstrate that the
>> way the mapping gets done may be arch-specific (here: a no-op)
>> despite the allocation not being so.
> You are arguing on extreme cases which I don't think is really helpful 
> here. Yes if you want to map at a fixed address in a guest you may not 
> need the acquire hypercall. But in most of the other cases (see has for 
> the tools) you will need it.
> 
> So what's the problem with requesting to have the acquire hypercall 
> implemented in common code?

Didn't we start out by you asking that there be as little common code
as possible for the time being? I have no issue with putting the
acquire implementation there ...

Jan
Julien Grall July 2, 2020, 2:31 p.m. UTC | #22
On 02/07/2020 15:17, Jan Beulich wrote:
> On 02.07.2020 16:14, Julien Grall wrote:
>> On 02/07/2020 14:30, Jan Beulich wrote:
>>> On 02.07.2020 11:57, Julien Grall wrote:
>>>> On 02/07/2020 10:18, Jan Beulich wrote:
>>>>> On 02.07.2020 10:54, Julien Grall wrote:
>>>>>> On 02/07/2020 09:50, Jan Beulich wrote:
>>>>>>> On 02.07.2020 10:42, Julien Grall wrote:
>>>>>>>> On 02/07/2020 09:29, Jan Beulich wrote:
>>>> Another way to do it, would be the toolstack to do the mapping. At which
>>>> point, you still need an hypercall to do the mapping (probably the
>>>> hypercall acquire).
>>>
>>> There may not be any mapping to do in such a contrived, fixed-range
>>> environment. This scenario was specifically to demonstrate that the
>>> way the mapping gets done may be arch-specific (here: a no-op)
>>> despite the allocation not being so.
>> You are arguing on extreme cases which I don't think is really helpful
>> here. Yes if you want to map at a fixed address in a guest you may not
>> need the acquire hypercall. But in most of the other cases (see has for
>> the tools) you will need it.
>>
>> So what's the problem with requesting to have the acquire hypercall
>> implemented in common code?
> 
> Didn't we start out by you asking that there be as little common code
> as possible for the time being?

Well as I said I am not in favor of having the allocation in common 
code, but if you want to keep it then you also want to implement 
map/unmap in the common code ([1], [2]).

> I have no issue with putting the
> acquire implementation there ...
This was definitely not clear given how you argued with extreme cases...

Cheers,

[1] <9a3f4d58-e5ad-c7a1-6c5f-42aa92101ca1@xen.org>
[2] <cf41855b-9e5e-13f2-9ab0-04b98f8b3cdd@xen.org>
Michał Leszczyński July 2, 2020, 8:28 p.m. UTC | #23
----- 2 lip 2020 o 16:31, Julien Grall julien@xen.org napisał(a):

> On 02/07/2020 15:17, Jan Beulich wrote:
>> On 02.07.2020 16:14, Julien Grall wrote:
>>> On 02/07/2020 14:30, Jan Beulich wrote:
>>>> On 02.07.2020 11:57, Julien Grall wrote:
>>>>> On 02/07/2020 10:18, Jan Beulich wrote:
>>>>>> On 02.07.2020 10:54, Julien Grall wrote:
>>>>>>> On 02/07/2020 09:50, Jan Beulich wrote:
>>>>>>>> On 02.07.2020 10:42, Julien Grall wrote:
>>>>>>>>> On 02/07/2020 09:29, Jan Beulich wrote:
>>>>> Another way to do it, would be the toolstack to do the mapping. At which
>>>>> point, you still need an hypercall to do the mapping (probably the
>>>>> hypercall acquire).
>>>>
>>>> There may not be any mapping to do in such a contrived, fixed-range
>>>> environment. This scenario was specifically to demonstrate that the
>>>> way the mapping gets done may be arch-specific (here: a no-op)
>>>> despite the allocation not being so.
>>> You are arguing on extreme cases which I don't think is really helpful
>>> here. Yes if you want to map at a fixed address in a guest you may not
>>> need the acquire hypercall. But in most of the other cases (see has for
>>> the tools) you will need it.
>>>
>>> So what's the problem with requesting to have the acquire hypercall
>>> implemented in common code?
>> 
>> Didn't we start out by you asking that there be as little common code
>> as possible for the time being?
> 
> Well as I said I am not in favor of having the allocation in common
> code, but if you want to keep it then you also want to implement
> map/unmap in the common code ([1], [2]).
> 
>> I have no issue with putting the
>> acquire implementation there ...
> This was definitely not clear given how you argued with extreme cases...
> 
> Cheers,
> 
> [1] <9a3f4d58-e5ad-c7a1-6c5f-42aa92101ca1@xen.org>
> [2] <cf41855b-9e5e-13f2-9ab0-04b98f8b3cdd@xen.org>
> 
> --
> Julien Grall


Guys,

could you express your final decision on this topic?

While I understand the discussion and the arguments you've raised,
I would like to know what particular elements should be moved where.

So are we going abstract way, or non-abstract-x86 only way?

Best regards,
Michał Leszczyński
CERT Polska
Michał Leszczyński July 2, 2020, 8:29 p.m. UTC | #24
----- 2 lip 2020 o 10:34, Jan Beulich jbeulich@suse.com napisał(a):

> On 02.07.2020 10:10, Roger Pau Monné wrote:
>> On Wed, Jul 01, 2020 at 10:42:55PM +0100, Andrew Cooper wrote:
>>> On 30/06/2020 13:33, Michał Leszczyński wrote:
>>>> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
>>>> index ca94c2bedc..b73d824357 100644
>>>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>>>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>>>> @@ -291,6 +291,12 @@ static int vmx_init_vmcs_config(void)
>>>>          _vmx_cpu_based_exec_control &=
>>>>              ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
>>>>  
>>>> +    rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>>> +
>>>> +    /* Check whether IPT is supported in VMX operation. */
>>>> +    vmtrace_supported = cpu_has_ipt &&
>>>> +                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);
>>>
>>> There is a subtle corner case here.  vmx_init_vmcs_config() is called on
>>> all CPUs, and is supposed to level things down safely if we find any
>>> asymmetry.
>>>
>>> If instead you go with something like this:
>>>
>>> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
>>> index b73d824357..6960109183 100644
>>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>>> @@ -294,8 +294,8 @@ static int vmx_init_vmcs_config(void)
>>>      rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>>>  
>>>      /* Check whether IPT is supported in VMX operation. */
>>> -    vmtrace_supported = cpu_has_ipt &&
>>> -                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);
>>> +    if ( !(_vmx_misc_cap & VMX_MISC_PT_SUPPORTED) )
>>> +        vmtrace_supported = false;
>> 
>> This is also used during hotplug, so I'm not sure it's safe to turn
>> vmtrace_supported off during runtime, where VMs might be already using
>> it. IMO it would be easier to just set it on the BSP, and then refuse
>> to bring up any AP that doesn't have the feature.
> 
> +1
> 
> IOW I also don't think that "vmx_init_vmcs_config() ... is supposed to
> level things down safely". Instead I think the expectation is for
> CPU onlining to fail if a CPU lacks features compared to the BSP. As
> can be implied from what Roger says, doing like what you suggest may
> be fine during boot, but past that only at times where we know there's
> no user of a certain feature, and where discarding the feature flag
> won't lead to other inconsistencies (which may very well mean "never").
> 
> Jan


Ok, I will modify it in a way Roger suggested for the previous patch
version. CPU onlining will fail if there is an inconsistency.

Best regards,
Michał Leszczyński
CERT Polska
Julien Grall July 3, 2020, 7:58 a.m. UTC | #25
Hi,

On 02/07/2020 21:28, Michał Leszczyński wrote:
> ----- 2 lip 2020 o 16:31, Julien Grall julien@xen.org napisał(a):
> 
>> On 02/07/2020 15:17, Jan Beulich wrote:
>>> On 02.07.2020 16:14, Julien Grall wrote:
>>>> On 02/07/2020 14:30, Jan Beulich wrote:
>>>>> On 02.07.2020 11:57, Julien Grall wrote:
>>>>>> On 02/07/2020 10:18, Jan Beulich wrote:
>>>>>>> On 02.07.2020 10:54, Julien Grall wrote:
>>>>>>>> On 02/07/2020 09:50, Jan Beulich wrote:
>>>>>>>>> On 02.07.2020 10:42, Julien Grall wrote:
>>>>>>>>>> On 02/07/2020 09:29, Jan Beulich wrote:
>>>>>> Another way to do it, would be the toolstack to do the mapping. At which
>>>>>> point, you still need an hypercall to do the mapping (probably the
>>>>>> hypercall acquire).
>>>>>
>>>>> There may not be any mapping to do in such a contrived, fixed-range
>>>>> environment. This scenario was specifically to demonstrate that the
>>>>> way the mapping gets done may be arch-specific (here: a no-op)
>>>>> despite the allocation not being so.
>>>> You are arguing on extreme cases which I don't think is really helpful
>>>> here. Yes if you want to map at a fixed address in a guest you may not
>>>> need the acquire hypercall. But in most of the other cases (see has for
>>>> the tools) you will need it.
>>>>
>>>> So what's the problem with requesting to have the acquire hypercall
>>>> implemented in common code?
>>>
>>> Didn't we start out by you asking that there be as little common code
>>> as possible for the time being?
>>
>> Well as I said I am not in favor of having the allocation in common
>> code, but if you want to keep it then you also want to implement
>> map/unmap in the common code ([1], [2]).
>>
>>> I have no issue with putting the
>>> acquire implementation there ...
>> This was definitely not clear given how you argued with extreme cases...
>>
>> Cheers,
>>
>> [1] <9a3f4d58-e5ad-c7a1-6c5f-42aa92101ca1@xen.org>
>> [2] <cf41855b-9e5e-13f2-9ab0-04b98f8b3cdd@xen.org>
>>
>> --
>> Julien Grall
> 
> 
> Guys,
> 
> could you express your final decision on this topic?

Can you move the acquire implementation from x86 to common code?

Cheers,
Michał Leszczyński July 4, 2020, 7:16 p.m. UTC | #26
----- 3 lip 2020 o 9:58, Julien Grall julien@xen.org napisał(a):

> Hi,
> 
> On 02/07/2020 21:28, Michał Leszczyński wrote:
>> ----- 2 lip 2020 o 16:31, Julien Grall julien@xen.org napisał(a):
>> 
>>> On 02/07/2020 15:17, Jan Beulich wrote:
>>>> On 02.07.2020 16:14, Julien Grall wrote:
>>>>> On 02/07/2020 14:30, Jan Beulich wrote:
>>>>>> On 02.07.2020 11:57, Julien Grall wrote:
>>>>>>> On 02/07/2020 10:18, Jan Beulich wrote:
>>>>>>>> On 02.07.2020 10:54, Julien Grall wrote:
>>>>>>>>> On 02/07/2020 09:50, Jan Beulich wrote:
>>>>>>>>>> On 02.07.2020 10:42, Julien Grall wrote:
>>>>>>>>>>> On 02/07/2020 09:29, Jan Beulich wrote:
>>>>>>> Another way to do it, would be the toolstack to do the mapping. At which
>>>>>>> point, you still need an hypercall to do the mapping (probably the
>>>>>>> hypercall acquire).
>>>>>>
>>>>>> There may not be any mapping to do in such a contrived, fixed-range
>>>>>> environment. This scenario was specifically to demonstrate that the
>>>>>> way the mapping gets done may be arch-specific (here: a no-op)
>>>>>> despite the allocation not being so.
>>>>> You are arguing on extreme cases which I don't think is really helpful
>>>>> here. Yes if you want to map at a fixed address in a guest you may not
>>>>> need the acquire hypercall. But in most of the other cases (see has for
>>>>> the tools) you will need it.
>>>>>
>>>>> So what's the problem with requesting to have the acquire hypercall
>>>>> implemented in common code?
>>>>
>>>> Didn't we start out by you asking that there be as little common code
>>>> as possible for the time being?
>>>
>>> Well as I said I am not in favor of having the allocation in common
>>> code, but if you want to keep it then you also want to implement
>>> map/unmap in the common code ([1], [2]).
>>>
>>>> I have no issue with putting the
>>>> acquire implementation there ...
>>> This was definitely not clear given how you argued with extreme cases...
>>>
>>> Cheers,
>>>
>>> [1] <9a3f4d58-e5ad-c7a1-6c5f-42aa92101ca1@xen.org>
>>> [2] <cf41855b-9e5e-13f2-9ab0-04b98f8b3cdd@xen.org>
>>>
>>> --
>>> Julien Grall
>> 
>> 
>> Guys,
>> 
>> could you express your final decision on this topic?
> 
> Can you move the acquire implementation from x86 to common code?
> 
> Cheers,
> 
> --
> Julien Grall


Ok, sure. This will be done within the patch v5.

Best regards,
Michał Leszczyński
CERT Polska
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index ca94c2bedc..b73d824357 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -291,6 +291,12 @@  static int vmx_init_vmcs_config(void)
         _vmx_cpu_based_exec_control &=
             ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
 
+    rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
+
+    /* Check whether IPT is supported in VMX operation. */
+    vmtrace_supported = cpu_has_ipt &&
+                        (_vmx_misc_cap & VMX_MISC_PT_SUPPORTED);
+
     if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
     {
         min = 0;
@@ -305,7 +311,6 @@  static int vmx_init_vmcs_config(void)
                SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
                SECONDARY_EXEC_XSAVES |
                SECONDARY_EXEC_TSC_SCALING);
-        rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
         if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
             opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
         if ( opt_vpid_enabled )
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 7cc9526139..0a33e0dfd6 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -82,6 +82,8 @@  struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
 
 vcpu_info_t dummy_vcpu_info;
 
+bool_t vmtrace_supported;
+
 static void __domain_finalise_shutdown(struct domain *d)
 {
     struct vcpu *v;
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index f790d5c1f8..8d7955dd87 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -104,6 +104,7 @@ 
 #define cpu_has_clwb            boot_cpu_has(X86_FEATURE_CLWB)
 #define cpu_has_avx512er        boot_cpu_has(X86_FEATURE_AVX512ER)
 #define cpu_has_avx512cd        boot_cpu_has(X86_FEATURE_AVX512CD)
+#define cpu_has_ipt             boot_cpu_has(X86_FEATURE_IPT)
 #define cpu_has_sha             boot_cpu_has(X86_FEATURE_SHA)
 #define cpu_has_avx512bw        boot_cpu_has(X86_FEATURE_AVX512BW)
 #define cpu_has_avx512vl        boot_cpu_has(X86_FEATURE_AVX512VL)
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 906810592f..0e9a0b8de6 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -283,6 +283,7 @@  extern u32 vmx_secondary_exec_control;
 #define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL
 extern u64 vmx_ept_vpid_cap;
 
+#define VMX_MISC_PT_SUPPORTED                   0x00004000
 #define VMX_MISC_CR3_TARGET                     0x01ff0000
 #define VMX_MISC_VMWRITE_ALL                    0x20000000
 
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index 5ca35d9d97..0d3f15f628 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -217,6 +217,7 @@  XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access Prevention */
 XEN_CPUFEATURE(AVX512_IFMA,   5*32+21) /*A  AVX-512 Integer Fused Multiply Add */
 XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
 XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
+XEN_CPUFEATURE(IPT,           5*32+25) /*   Intel Processor Trace */
 XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
 XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal Instrs */
 XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection Instrs */
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index 7e51d361de..6c786a56c2 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -130,4 +130,6 @@  struct vnuma_info {
 
 void vnuma_destroy(struct vnuma_info *vnuma);
 
+extern bool_t vmtrace_supported;
+
 #endif /* __XEN_DOMAIN_H__ */