diff mbox series

[v15,09/13] tsc: Use the GUEST_TSC_FREQ MSR for discovering TSC frequency

Message ID 20241203090045.942078-10-nikunj@amd.com (mailing list archive)
State New, archived
Headers show
Series Add Secure TSC support for SNP guests | expand

Commit Message

Nikunj A Dadhania Dec. 3, 2024, 9 a.m. UTC
Calibrating the TSC frequency using the kvmclock is not correct for
SecureTSC enabled guests. Use the platform provided TSC frequency via the
GUEST_TSC_FREQ MSR (C001_0134h).

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
---
 arch/x86/include/asm/sev.h |  2 ++
 arch/x86/coco/sev/core.c   | 16 ++++++++++++++++
 arch/x86/kernel/tsc.c      |  5 +++++
 3 files changed, 23 insertions(+)

Comments

Tom Lendacky Dec. 16, 2024, 4:31 p.m. UTC | #1
On 12/3/24 03:00, Nikunj A Dadhania wrote:
> Calibrating the TSC frequency using the kvmclock is not correct for
> SecureTSC enabled guests. Use the platform provided TSC frequency via the
> GUEST_TSC_FREQ MSR (C001_0134h).
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
> ---
>  arch/x86/include/asm/sev.h |  2 ++
>  arch/x86/coco/sev/core.c   | 16 ++++++++++++++++
>  arch/x86/kernel/tsc.c      |  5 +++++
>  3 files changed, 23 insertions(+)
> 
> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
> index 9fd02efef08e..c4dca06b3b01 100644
> --- a/arch/x86/include/asm/sev.h
> +++ b/arch/x86/include/asm/sev.h
> @@ -493,6 +493,7 @@ int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req
>  			   struct snp_guest_request_ioctl *rio);
>  
>  void __init snp_secure_tsc_prepare(void);
> +void __init snp_secure_tsc_init(void);
>  
>  #else	/* !CONFIG_AMD_MEM_ENCRYPT */
>  
> @@ -536,6 +537,7 @@ static inline void snp_msg_free(struct snp_msg_desc *mdesc) { }
>  static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
>  					 struct snp_guest_request_ioctl *rio) { return -ENODEV; }
>  static inline void __init snp_secure_tsc_prepare(void) { }
> +static inline void __init snp_secure_tsc_init(void) { }
>  
>  #endif	/* CONFIG_AMD_MEM_ENCRYPT */
>  
> diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
> index 59c5e716fdd1..1bc668883058 100644
> --- a/arch/x86/coco/sev/core.c
> +++ b/arch/x86/coco/sev/core.c
> @@ -3279,3 +3279,19 @@ void __init snp_secure_tsc_prepare(void)
>  
>  	pr_debug("SecureTSC enabled");
>  }
> +
> +static unsigned long securetsc_get_tsc_khz(void)
> +{
> +	unsigned long long tsc_freq_mhz;
> +
> +	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> +	rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);

This should never change, right? Can this be put in snp_secure_tsc_init()
and just return a saved value that is already in khz form? No reason to
perform the MSR access and multiplication every time.

Thanks,
Tom

> +
> +	return (unsigned long)(tsc_freq_mhz * 1000);
> +}
> +
> +void __init snp_secure_tsc_init(void)
> +{
> +	x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
> +	x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
> +}
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 67aeaba4ba9c..c0eef924b84e 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -30,6 +30,7 @@
>  #include <asm/i8259.h>
>  #include <asm/topology.h>
>  #include <asm/uv/uv.h>
> +#include <asm/sev.h>
>  
>  unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
>  EXPORT_SYMBOL(cpu_khz);
> @@ -1515,6 +1516,10 @@ void __init tsc_early_init(void)
>  	/* Don't change UV TSC multi-chassis synchronization */
>  	if (is_early_uv_system())
>  		return;
> +
> +	if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
> +		snp_secure_tsc_init();
> +
>  	if (!determine_cpu_tsc_frequencies(true))
>  		return;
>  	tsc_enable_sched_clock();
Nikunj A Dadhania Dec. 17, 2024, 6:27 a.m. UTC | #2
On 12/16/2024 10:01 PM, Tom Lendacky wrote:
> On 12/3/24 03:00, Nikunj A Dadhania wrote:
>> Calibrating the TSC frequency using the kvmclock is not correct for
>> SecureTSC enabled guests. Use the platform provided TSC frequency via the
>> GUEST_TSC_FREQ MSR (C001_0134h).
>>
>> Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
>> ---
>>  arch/x86/include/asm/sev.h |  2 ++
>>  arch/x86/coco/sev/core.c   | 16 ++++++++++++++++
>>  arch/x86/kernel/tsc.c      |  5 +++++
>>  3 files changed, 23 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
>> index 9fd02efef08e..c4dca06b3b01 100644
>> --- a/arch/x86/include/asm/sev.h
>> +++ b/arch/x86/include/asm/sev.h
>> @@ -493,6 +493,7 @@ int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req
>>  			   struct snp_guest_request_ioctl *rio);
>>  
>>  void __init snp_secure_tsc_prepare(void);
>> +void __init snp_secure_tsc_init(void);
>>  
>>  #else	/* !CONFIG_AMD_MEM_ENCRYPT */
>>  
>> @@ -536,6 +537,7 @@ static inline void snp_msg_free(struct snp_msg_desc *mdesc) { }
>>  static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
>>  					 struct snp_guest_request_ioctl *rio) { return -ENODEV; }
>>  static inline void __init snp_secure_tsc_prepare(void) { }
>> +static inline void __init snp_secure_tsc_init(void) { }
>>  
>>  #endif	/* CONFIG_AMD_MEM_ENCRYPT */
>>  
>> diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
>> index 59c5e716fdd1..1bc668883058 100644
>> --- a/arch/x86/coco/sev/core.c
>> +++ b/arch/x86/coco/sev/core.c
>> @@ -3279,3 +3279,19 @@ void __init snp_secure_tsc_prepare(void)
>>  
>>  	pr_debug("SecureTSC enabled");
>>  }
>> +
>> +static unsigned long securetsc_get_tsc_khz(void)
>> +{
>> +	unsigned long long tsc_freq_mhz;
>> +
>> +	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
>> +	rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
> 
> This should never change, right? Can this be put in snp_secure_tsc_init()
> and just return a saved value that is already in khz form? No reason to
> perform the MSR access and multiplication every time.

This happens a couple of times during the boot, so I think this does not
have much overhead. Something like below ?

diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index c7870294a957..69b65c4c850c 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -103,6 +103,7 @@ static u64 secrets_pa __ro_after_init;
  */
 static u64 snp_tsc_scale __ro_after_init;
 static u64 snp_tsc_offset __ro_after_init;
+static u64 snp_tsc_freq_khz __ro_after_init;
 
 /* #VC handler runtime per-CPU data */
 struct sev_es_runtime_data {
@@ -3282,16 +3283,18 @@ void __init snp_secure_tsc_prepare(void)
 
 static unsigned long securetsc_get_tsc_khz(void)
 {
-	unsigned long long tsc_freq_mhz;
-
 	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
-	rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
 
-	return (unsigned long)(tsc_freq_mhz * 1000);
+	return snp_tsc_freq_khz;
 }
 
 void __init snp_secure_tsc_init(void)
 {
+	unsigned long long tsc_freq_mhz;
+
+	rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
+	snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000);
+
 	x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
 	x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
 }


---

Regards
Nikunj
Tom Lendacky Dec. 17, 2024, 7:05 a.m. UTC | #3
On 12/17/24 00:27, Nikunj A Dadhania wrote:
> On 12/16/2024 10:01 PM, Tom Lendacky wrote:
>> On 12/3/24 03:00, Nikunj A Dadhania wrote:
>>> Calibrating the TSC frequency using the kvmclock is not correct for
>>> SecureTSC enabled guests. Use the platform provided TSC frequency via the
>>> GUEST_TSC_FREQ MSR (C001_0134h).
>>>
>>> Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
>>> ---
>>>  arch/x86/include/asm/sev.h |  2 ++
>>>  arch/x86/coco/sev/core.c   | 16 ++++++++++++++++
>>>  arch/x86/kernel/tsc.c      |  5 +++++
>>>  3 files changed, 23 insertions(+)
>>>

> @@ -3282,16 +3283,18 @@ void __init snp_secure_tsc_prepare(void)
>  
>  static unsigned long securetsc_get_tsc_khz(void)
>  {
> -	unsigned long long tsc_freq_mhz;
> -
>  	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);

I was thinking even this can be moved.

Thanks,
Tom

> -	rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
>  
> -	return (unsigned long)(tsc_freq_mhz * 1000);
> +	return snp_tsc_freq_khz;
>  }
>  
>  void __init snp_secure_tsc_init(void)
>  {
> +	unsigned long long tsc_freq_mhz;
> +
> +	rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
> +	snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000);
> +
>  	x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
>  	x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
>  }
> 
> 
> ---
> 
> Regards
> Nikunj
Nikunj A Dadhania Dec. 17, 2024, 7:57 a.m. UTC | #4
On 12/17/2024 12:35 PM, Tom Lendacky wrote:
> On 12/17/24 00:27, Nikunj A Dadhania wrote:
>> On 12/16/2024 10:01 PM, Tom Lendacky wrote:
>>> On 12/3/24 03:00, Nikunj A Dadhania wrote:
>>>> Calibrating the TSC frequency using the kvmclock is not correct for
>>>> SecureTSC enabled guests. Use the platform provided TSC frequency via the
>>>> GUEST_TSC_FREQ MSR (C001_0134h).
>>>>
>>>> Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
>>>> ---
>>>>  arch/x86/include/asm/sev.h |  2 ++
>>>>  arch/x86/coco/sev/core.c   | 16 ++++++++++++++++
>>>>  arch/x86/kernel/tsc.c      |  5 +++++
>>>>  3 files changed, 23 insertions(+)
>>>>
> 
>> @@ -3282,16 +3283,18 @@ void __init snp_secure_tsc_prepare(void)
>>  
>>  static unsigned long securetsc_get_tsc_khz(void)
>>  {
>> -	unsigned long long tsc_freq_mhz;
>> -
>>  	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> 
> I was thinking even this can be moved.

Yes, I am testing that out. If there is no dependency, I will move it.

Regards,
Nikunj
Borislav Petkov Dec. 30, 2024, 11:29 a.m. UTC | #5
On Tue, Dec 03, 2024 at 02:30:41PM +0530, Nikunj A Dadhania wrote:
> Subject: Re: [PATCH v15 09/13] tsc: Use the GUEST_TSC_FREQ MSR for...

The tip tree preferred format for patch subject prefixes is
'subsys/component:', e.g. 'x86/apic:', 'x86/mm/fault:', 'sched/fair:',
'genirq/core:'. Please do not use file names or complete file paths as
prefix. 'git log path/to/file' should give you a reasonable hint in most
cases.

Audit your whole set pls.

> +void __init snp_secure_tsc_init(void)
> +{
> +	x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
> +	x86_platform.calibrate_tsc = securetsc_get_tsc_khz;

The fact that you assign the same function to two different function ptrs
already hints at some sort of improper functionality split.

> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 67aeaba4ba9c..c0eef924b84e 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -30,6 +30,7 @@
>  #include <asm/i8259.h>
>  #include <asm/topology.h>
>  #include <asm/uv/uv.h>
> +#include <asm/sev.h>
>  
>  unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
>  EXPORT_SYMBOL(cpu_khz);
> @@ -1515,6 +1516,10 @@ void __init tsc_early_init(void)
>  	/* Don't change UV TSC multi-chassis synchronization */
>  	if (is_early_uv_system())
>  		return;
> +
> +	if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
> +		snp_secure_tsc_init();

diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index aac066d798ef..24e7c6cf3e29 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -3287,6 +3287,9 @@ static unsigned long securetsc_get_tsc_khz(void)
 
 void __init snp_secure_tsc_init(void)
 {
+	if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
+		return;
+
 	x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
 	x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
 }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c0eef924b84e..0864b314c26a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1517,8 +1517,7 @@ void __init tsc_early_init(void)
 	if (is_early_uv_system())
 		return;
 
-	if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
-		snp_secure_tsc_init();
+	snp_secure_tsc_init();
 
 	if (!determine_cpu_tsc_frequencies(true))
 		return;
Nikunj A Dadhania Jan. 1, 2025, 8:56 a.m. UTC | #6
On 12/30/2024 4:59 PM, Borislav Petkov wrote:
> On Tue, Dec 03, 2024 at 02:30:41PM +0530, Nikunj A Dadhania wrote:
>> Subject: Re: [PATCH v15 09/13] tsc: Use the GUEST_TSC_FREQ MSR for...
> 
> The tip tree preferred format for patch subject prefixes is
> 'subsys/component:', e.g. 'x86/apic:', 'x86/mm/fault:', 'sched/fair:',
> 'genirq/core:'. Please do not use file names or complete file paths as
> prefix. 'git log path/to/file' should give you a reasonable hint in most
> cases.
> 
> Audit your whole set pls.
> 

Sure, will update

>> +void __init snp_secure_tsc_init(void)
>> +{
>> +	x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
>> +	x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
> 
> The fact that you assign the same function to two different function ptrs
> already hints at some sort of improper functionality split.

As kvm-clock would have set the callbacks, I need to point them to securetsc_get_tsc_khz().

arch/x86/kernel/kvmclock.c:     x86_platform.calibrate_tsc = kvm_get_tsc_khz;
arch/x86/kernel/kvmclock.c:     x86_platform.calibrate_cpu = kvm_get_tsc_khz;

For virtualized environments, I see that all of them are assigning the same functions to different function ptrs.

arch/x86/kernel/cpu/mshyperv.c:         x86_platform.calibrate_tsc = hv_get_tsc_khz;
arch/x86/kernel/cpu/mshyperv.c:         x86_platform.calibrate_cpu = hv_get_tsc_khz;
arch/x86/kernel/cpu/vmware.c:           x86_platform.calibrate_tsc = vmware_get_tsc_khz;
arch/x86/kernel/cpu/vmware.c:           x86_platform.calibrate_cpu = vmware_get_tsc_khz;
arch/x86/kernel/jailhouse.c:    x86_platform.calibrate_cpu              = jailhouse_get_tsc;
arch/x86/kernel/jailhouse.c:    x86_platform.calibrate_tsc              = jailhouse_get_tsc;

> 
>> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
>> index 67aeaba4ba9c..c0eef924b84e 100644
>> --- a/arch/x86/kernel/tsc.c
>> +++ b/arch/x86/kernel/tsc.c
>> @@ -30,6 +30,7 @@
>>  #include <asm/i8259.h>
>>  #include <asm/topology.h>
>>  #include <asm/uv/uv.h>
>> +#include <asm/sev.h>
>>  
>>  unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
>>  EXPORT_SYMBOL(cpu_khz);
>> @@ -1515,6 +1516,10 @@ void __init tsc_early_init(void)
>>  	/* Don't change UV TSC multi-chassis synchronization */
>>  	if (is_early_uv_system())
>>  		return;
>> +
>> +	if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
>> +		snp_secure_tsc_init();
> 
> diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
> index aac066d798ef..24e7c6cf3e29 100644
> --- a/arch/x86/coco/sev/core.c
> +++ b/arch/x86/coco/sev/core.c
> @@ -3287,6 +3287,9 @@ static unsigned long securetsc_get_tsc_khz(void)
>  
>  void __init snp_secure_tsc_init(void)
>  {
> +	if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
> +		return;
> +
>  	x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
>  	x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
>  }
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index c0eef924b84e..0864b314c26a 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -1517,8 +1517,7 @@ void __init tsc_early_init(void)
>  	if (is_early_uv_system())
>  		return;
>  
> -	if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
> -		snp_secure_tsc_init();
> +	snp_secure_tsc_init();
>  
>  	if (!determine_cpu_tsc_frequencies(true))
>  		return;
> 

Sure will update.

Regards,
Nikunj
Borislav Petkov Jan. 1, 2025, 4:15 p.m. UTC | #7
On Wed, Jan 01, 2025 at 02:26:04PM +0530, Nikunj A. Dadhania wrote:
> As kvm-clock would have set the callbacks, I need to point them to securetsc_get_tsc_khz().
> 
> arch/x86/kernel/kvmclock.c:     x86_platform.calibrate_tsc = kvm_get_tsc_khz;
> arch/x86/kernel/kvmclock.c:     x86_platform.calibrate_cpu = kvm_get_tsc_khz;
> 
> For virtualized environments, I see that all of them are assigning the same functions to different function ptrs.

So just because the others do it, you should do it too without even figuring
out *why*?

How about you do some git archeology and get to the actual reason why there
are two different function pointers and then analyze whether a secure TSC
guest can use the same function for both?
Nikunj A Dadhania Jan. 2, 2025, 5:10 a.m. UTC | #8
On 1/1/2025 9:45 PM, Borislav Petkov wrote:
> On Wed, Jan 01, 2025 at 02:26:04PM +0530, Nikunj A. Dadhania wrote:
>> As kvm-clock would have set the callbacks, I need to point them to securetsc_get_tsc_khz().
>>
>> arch/x86/kernel/kvmclock.c:     x86_platform.calibrate_tsc = kvm_get_tsc_khz;
>> arch/x86/kernel/kvmclock.c:     x86_platform.calibrate_cpu = kvm_get_tsc_khz;
>>
>> For virtualized environments, I see that all of them are assigning the same functions to different function ptrs.
> 
> So just because the others do it, you should do it too without even figuring
> out *why*?

Again: As kvm-clock has over-ridden both the callbacks, SecureTSC needs to override them with its own.

Regards,
Nikunj
Borislav Petkov Jan. 2, 2025, 9:17 a.m. UTC | #9
On Thu, Jan 02, 2025 at 10:40:05AM +0530, Nikunj A. Dadhania wrote:
> Again: As kvm-clock has over-ridden both the callbacks, SecureTSC needs to
> override them with its own.

Again?

Where do you state this fact?

Because I don't see it in the commit message:

"Calibrating the TSC frequency using the kvmclock is not correct for
SecureTSC enabled guests. Use the platform provided TSC frequency via the
GUEST_TSC_FREQ MSR (C001_0134h)."

Yes, you had this in your reply but that's not good enough.

So again: you need to explain exactly *why* you're doing stuff in patches
because I don't have a crystal ball and I don't have special capabilities of
reading people's minds. If I had those, I wouldn't be doing this.

And if you had read my reply properly you would've realized that this is not
really what I'm asking. I'm asking why you have to assign the *same* function
to both function pointers.

And if you had done some git archeology, you would've found this:

aa297292d708 ("x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID")

and then you would've been able to state that you can assign the same function
to both function ptrs because the difference between CPU base and TSC
frequency does not apply in this case.

But that's too much to ask, right? :-(
Nikunj A Dadhania Jan. 2, 2025, 10:01 a.m. UTC | #10
On 1/2/2025 2:47 PM, Borislav Petkov wrote:
> On Thu, Jan 02, 2025 at 10:40:05AM +0530, Nikunj A. Dadhania wrote:
>> Again: As kvm-clock has over-ridden both the callbacks, SecureTSC needs to
>> override them with its own.
> 
> Again?
> 
> Where do you state this fact?
> 
> Because I don't see it in the commit message:
> 
> "Calibrating the TSC frequency using the kvmclock is not correct for
> SecureTSC enabled guests. Use the platform provided TSC frequency via the
> GUEST_TSC_FREQ MSR (C001_0134h)."
> 
> Yes, you had this in your reply but that's not good enough.

Sure, how about the below:

For SecureTSC enabled guests, TSC frequency should be obtained from the platform 
provided GUEST_TSC_FREQ MSR (C001_0134h). As paravirt clock(kvm-clock) has over-ridden 
x86_platform.calibrate_cpu() and x86_platform.calibrate_tsc() callbacks, 
SecureTSC needs to override them with its own callbacks.


Regards
Nikunj
Borislav Petkov Jan. 2, 2025, 10:45 a.m. UTC | #11
On Thu, Jan 02, 2025 at 03:31:49PM +0530, Nikunj A. Dadhania wrote:
> Sure, how about the below:
> 
> For SecureTSC enabled guests, TSC frequency should be obtained from the platform 
> provided GUEST_TSC_FREQ MSR (C001_0134h). As paravirt clock(kvm-clock) has over-ridden 
> x86_platform.calibrate_cpu() and x86_platform.calibrate_tsc() callbacks, 
> SecureTSC needs to override them with its own callbacks.

Not really.

It's like you're in a contest of "how to write a commit message which is the
shortest and has as less information as possible."

Go back, read the subthread and summarize, please, what has been discussed
here and for patch 12.

I'm missing the big picture about the relationship between kvmclock and TSC
in STSC guests. 

After asking so many questions, it sounds to me like this patch and patch 12
should be merged into one and there it should be explained what the strategy
is when a STSC guest loads and how kvmclock is supposed to be handled, what is
allowed, what is warned about, when the guest terminates what is tainted,
yadda yadda. 

This all belongs in a single patch logically.
Nikunj A Dadhania Jan. 2, 2025, 1:10 p.m. UTC | #12
On 1/2/2025 4:15 PM, Borislav Petkov wrote:
> On Thu, Jan 02, 2025 at 03:31:49PM +0530, Nikunj A. Dadhania wrote:
>> Sure, how about the below:
>>
>> For SecureTSC enabled guests, TSC frequency should be obtained from the platform 
>> provided GUEST_TSC_FREQ MSR (C001_0134h). As paravirt clock(kvm-clock) has over-ridden 
>> x86_platform.calibrate_cpu() and x86_platform.calibrate_tsc() callbacks, 
>> SecureTSC needs to override them with its own callbacks.
> 
> Not really.
> 
> It's like you're in a contest of "how to write a commit message which is the
> shortest and has as less information as possible."

That is not the goal :-)

Patch 9

---------------------------------------------------------------------------
x86/tsc: Use the GUEST_TSC_FREQ MSR for discovering TSC frequency

Although the kernel switches over to stable TSC clocksource instead of
kvm-clock, TSC frequency calibration still keeps on using the kvm-clock
based frequency calibration. This is due to kvmclock_init() updating the
x86_platform's CPU and TSC callbacks unconditionally.

For SecureTSC enabled guests, use the GUEST_TSC_FREQ MSR to discover the
TSC frequency instead of relying on kvm-clock based frequency calibration.
Override both CPU and TSC frequency calibration callbacks with
securetsc_get_tsc_khz(). As difference between CPU base and TSC frequency
does not apply in this case same callback is being used.
---------------------------------------------------------------------------


> 
> Go back, read the subthread and summarize, please, what has been discussed
> here and for patch 12.

Here is the updated commit log for patch 12:

---------------------------------------------------------------------------
x86/kvmclock: Warn when kvmclock is selected for SecureTSC enabled guests

Warn users when kvmclock is selected as the clock source for SecureTSC enabled
guests. Users can change the clock source to kvm-clock using the sysfs interface
while running on a Secure TSC enabled guest. Switching to the hypervisor-controlled
kvmclock defeats the purpose of using SecureTSC.

Taint the kernel and issue a warning to the user when the clock source switches
to kvmclock, ensuring they are aware of the change and its implications.

---------------------------------------------------------------------------

 
> I'm missing the big picture about the relationship between kvmclock and TSC
> in STSC guests. 

kvmclock_init() always runs before tsc_early_init(). kvm-clock registers the
following during the initialization:

1) TSC calibration callbacks (addressed by patch 9)
2) sched clock (addressed by patch 11)
3) kvm-clock as the clocksource (addressed by patch 10)
4) wall clock callbacks (we don't have a solution for this yet)

I had a summary about this here [1], below snippet with slight modifications after review:

---
To summarise this thread with respect to TSC vs KVM clock, there three key questions:

1) When should kvmclock init be done?
2) How should the TSC frequency be discovered?
3) What should be the sched clock source and how should it be selected in a generic way?

○ Legacy CPU/VMs: VMs running on platforms without non-stop/constant TSC 
  + kvm-clock should be registered before tsc-early/tsc
  + Need to calibrate TSC frequency
  + Use kvmclock wallclock
  + Use kvmclock for sched clock

○ Modern CPU/VMs: VMs running on platforms supporting constant, non-stop and reliable TSC
  + kvm-clock should be registered before tsc-early/tsc
  + TSC Frequency:
      For SecureTSC: GUEST_TSC_FREQ MSR (C001_0134h) provides the TSC frequency, other 
      AMD guests need to calibrate the TSC frequency.
  + Use kvmclock wallclock
  + Use TSC for sched clock

----

> 
> After asking so many questions, it sounds to me like this patch and patch 12
> should be merged into one and there it should be explained what the strategy
> is when a STSC guest loads and how kvmclock is supposed to be handled, what is
> allowed, what is warned about, when the guest terminates what is tainted,
> yadda yadda. 
> > This all belongs in a single patch logically.



Regards
Nikunj
 
1: https://lore.kernel.org/lkml/64813123-e1e2-17e2-19e8-bd5c852b6a32@amd.com/
Borislav Petkov Jan. 3, 2025, 12:04 p.m. UTC | #13
On Thu, Jan 02, 2025 at 06:40:38PM +0530, Nikunj A. Dadhania wrote:
> ---------------------------------------------------------------------------
> x86/tsc: Use the GUEST_TSC_FREQ MSR for discovering TSC frequency
> 
> Although the kernel switches over to stable TSC clocksource instead of
> kvm-clock, TSC frequency calibration still keeps on using the kvm-clock
> based frequency calibration. This is due to kvmclock_init() updating the
> x86_platform's CPU and TSC callbacks unconditionally.
> 
> For SecureTSC enabled guests, use the GUEST_TSC_FREQ MSR to discover the
> TSC frequency instead of relying on kvm-clock based frequency calibration.
> Override both CPU and TSC frequency calibration callbacks with
> securetsc_get_tsc_khz(). As difference between CPU base and TSC frequency
> does not apply in this case same callback is being used.
> ---------------------------------------------------------------------------

Ok.

> ---------------------------------------------------------------------------
> x86/kvmclock: Warn when kvmclock is selected for SecureTSC enabled guests
> 
> Warn users when kvmclock is selected as the clock source for SecureTSC enabled
> guests. Users can change the clock source to kvm-clock using the sysfs interface
> while running on a Secure TSC enabled guest. Switching to the hypervisor-controlled
> kvmclock defeats the purpose of using SecureTSC.
> 
> Taint the kernel and issue a warning to the user when the clock source switches
> to kvmclock, ensuring they are aware of the change and its implications.
> 
> ---------------------------------------------------------------------------

Ok.

> ○ Modern CPU/VMs: VMs running on platforms supporting constant, non-stop and reliable TSC

Modern?

What guarantees do you have on "modern" setups that the HV has no control over
the TSC MSRs? None.

The only guarantee you have is when the TSC MSRs are not intercepted - IOW,
you're a STSC guest.

So none of that modern stuff means anything - your only case is a STSC guest
where you can somewhat reliably know in the guest that the host is not lying
to you.

So the only configuration is a STSC guest - everything else should use
kvm-clock.

AFAIU.

> > After asking so many questions, it sounds to me like this patch and patch 12
> > should be merged into one and there it should be explained what the strategy
> > is when a STSC guest loads and how kvmclock is supposed to be handled, what is
> > allowed, what is warned about, when the guest terminates what is tainted,
> > yadda yadda. 
> > > This all belongs in a single patch logically.

Now, why aren't you merging patch 9 and 12 into one and calling it:

"Switch Secure TSC guests away from kvm-clock"

or so, where you switch a STSC guest to use the TSC MSRs and
warn/taint/terminate the guest if the user chooses otherwise?
Nikunj A Dadhania Jan. 3, 2025, 1:59 p.m. UTC | #14
On 1/3/2025 5:34 PM, Borislav Petkov wrote:
> On Thu, Jan 02, 2025 at 06:40:38PM +0530, Nikunj A. Dadhania wrote:

>> ○ Modern CPU/VMs: VMs running on platforms supporting constant, non-stop and reliable TSC
> 
> Modern?

Meaning platforms/CPU that support SNP/TDX, they have constant, non-stop and invariant TSC.

> What guarantees do you have on "modern" setups that the HV has no control over
> the TSC MSRs? None.

None.

But, non-secure guests uses the regular TSC when the guest is booted with 
TscInvariant bit set, although it doesn't switch the sched clock and
tsc calibration. The guest initially picks up kvm-clock instead 
of tsc-early as it was registered earlier and both the clocks have the same 
clock rating(299). But at a later point in time clocksource switches to 
regular TSC(clock rating 300) from kvm-clock

[    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
[    0.000001] kvm-clock: using sched offset of 1799357702246960 cycles
[    0.001493] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
[    0.006289] tsc: Detected 1996.249 MHz processor
[    0.305123] clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x398cadd9d93, max_idle_ns: 881590552906 ns
[    1.045759] clocksource: Switched to clocksource kvm-clock
[    1.141326] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x398cadd9d93, max_idle_ns: 881590552906 ns
[    1.144634] clocksource: Switched to clocksource tsc

 
> The only guarantee you have is when the TSC MSRs are not intercepted - IOW,
> you're a STSC guest.
> 
> So none of that modern stuff means anything - your only case is a STSC guest
> where you can somewhat reliably know in the guest that the host is not lying
> to you.

That was my understanding and implementation in v11, where Sean suggested that
VMs running on CPUs supporting stable and always running TSC, should switch over
to TSC properly[1] and [2], in a generic way.

> 
> So the only configuration is a STSC guest - everything else should use
> kvm-clock.

That is not right, if non-secure guest is booted with TscInvariant bit set, guest
will start using TSC as the clocksource, unfortunately sched clock keeps on using
kvm-clock :(

> 
> AFAIU.
> 
>>> After asking so many questions, it sounds to me like this patch and patch 12
>>> should be merged into one and there it should be explained what the strategy
>>> is when a STSC guest loads and how kvmclock is supposed to be handled, what is
>>> allowed, what is warned about, when the guest terminates what is tainted,
>>> yadda yadda. 
>>>> This all belongs in a single patch logically.
> 
> Now, why aren't you merging patch 9 and 12 into one and calling it:
>
> "Switch Secure TSC guests away from kvm-clock"
> 
> or so, where you switch a STSC guest to use the TSC MSRs and
> warn/taint/terminate the guest if the user chooses otherwise?

I am going to merge both the patches, but wanted your input on the commit
wordings.

Thanks,
Nikunj

1. https://lore.kernel.org/kvm/ZuR2t1QrBpPc1Sz2@google.com/
2. https://lore.kernel.org/kvm/ZurCbP7MesWXQbqZ@google.com/
Borislav Petkov Jan. 4, 2025, 10:28 a.m. UTC | #15
On Fri, Jan 03, 2025 at 07:29:10PM +0530, Nikunj A. Dadhania wrote:
> That was my understanding and implementation in v11, where Sean suggested that
> VMs running on CPUs supporting stable and always running TSC, should switch over
> to TSC properly[1] and [2], in a generic way.

Sure, you can do that. But that doesn't get rid of the fact that until the HV
is safely blocked from touching the TSC MSRs, you cannot trust it fully. IOW, only
Secure TSC can give you that, I don't know what provisions TDX has about that.

And then, even if you can get the HV out of the picture, the underlying hw is
not guaranteed either. That's why I keep returning to TSC watchdog disable
logic in check_system_tsc_reliable() - only then you can somewhat trust the
TSC hardware.

> That is not right, if non-secure guest is booted with TscInvariant bit set,
> guest will start using TSC as the clocksource, unfortunately sched clock
> keeps on using kvm-clock :(

Again: you can switch to the TSC as much as you want to but until the
hypervisor is out of the picture, you can't really trust it.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 9fd02efef08e..c4dca06b3b01 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -493,6 +493,7 @@  int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req
 			   struct snp_guest_request_ioctl *rio);
 
 void __init snp_secure_tsc_prepare(void);
+void __init snp_secure_tsc_init(void);
 
 #else	/* !CONFIG_AMD_MEM_ENCRYPT */
 
@@ -536,6 +537,7 @@  static inline void snp_msg_free(struct snp_msg_desc *mdesc) { }
 static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
 					 struct snp_guest_request_ioctl *rio) { return -ENODEV; }
 static inline void __init snp_secure_tsc_prepare(void) { }
+static inline void __init snp_secure_tsc_init(void) { }
 
 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
 
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 59c5e716fdd1..1bc668883058 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -3279,3 +3279,19 @@  void __init snp_secure_tsc_prepare(void)
 
 	pr_debug("SecureTSC enabled");
 }
+
+static unsigned long securetsc_get_tsc_khz(void)
+{
+	unsigned long long tsc_freq_mhz;
+
+	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+	rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
+
+	return (unsigned long)(tsc_freq_mhz * 1000);
+}
+
+void __init snp_secure_tsc_init(void)
+{
+	x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
+	x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
+}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 67aeaba4ba9c..c0eef924b84e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -30,6 +30,7 @@ 
 #include <asm/i8259.h>
 #include <asm/topology.h>
 #include <asm/uv/uv.h>
+#include <asm/sev.h>
 
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -1515,6 +1516,10 @@  void __init tsc_early_init(void)
 	/* Don't change UV TSC multi-chassis synchronization */
 	if (is_early_uv_system())
 		return;
+
+	if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
+		snp_secure_tsc_init();
+
 	if (!determine_cpu_tsc_frequencies(true))
 		return;
 	tsc_enable_sched_clock();