diff mbox series

[Part2,v5,05/45] x86/sev: Add helper functions for RMPUPDATE and PSMASH instruction

Message ID 20210820155918.7518-6-brijesh.singh@amd.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Brijesh Singh Aug. 20, 2021, 3:58 p.m. UTC
The RMPUPDATE instruction writes a new RMP entry in the RMP Table. The
hypervisor will use the instruction to add pages to the RMP table. See
APM3 for details on the instruction operations.

The PSMASH instruction expands a 2MB RMP entry into a corresponding set of
contiguous 4KB-Page RMP entries. The hypervisor will use this instruction
to adjust the RMP entry without invalidating the previous RMP entry.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/include/asm/sev.h | 11 ++++++
 arch/x86/kernel/sev.c      | 72 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

Comments

Borislav Petkov Sept. 24, 2021, 2:04 p.m. UTC | #1
On Fri, Aug 20, 2021 at 10:58:38AM -0500, Brijesh Singh wrote:
> The RMPUPDATE instruction writes a new RMP entry in the RMP Table. The
> hypervisor will use the instruction to add pages to the RMP table. See
> APM3 for details on the instruction operations.
> 
> The PSMASH instruction expands a 2MB RMP entry into a corresponding set of
> contiguous 4KB-Page RMP entries. The hypervisor will use this instruction
> to adjust the RMP entry without invalidating the previous RMP entry.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/include/asm/sev.h | 11 ++++++
>  arch/x86/kernel/sev.c      | 72 ++++++++++++++++++++++++++++++++++++++
>  2 files changed, 83 insertions(+)
> 
> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
> index 5b1a6a075c47..92ced9626e95 100644
> --- a/arch/x86/include/asm/sev.h
> +++ b/arch/x86/include/asm/sev.h
> @@ -78,7 +78,9 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
>  
>  /* RMP page size */
>  #define RMP_PG_SIZE_4K			0
> +#define RMP_PG_SIZE_2M			1
>  #define RMP_TO_X86_PG_LEVEL(level)	(((level) == RMP_PG_SIZE_4K) ? PG_LEVEL_4K : PG_LEVEL_2M)
> +#define X86_TO_RMP_PG_LEVEL(level)	(((level) == PG_LEVEL_4K) ? RMP_PG_SIZE_4K : RMP_PG_SIZE_2M)
>  
>  /*
>   * The RMP entry format is not architectural. The format is defined in PPR
> @@ -107,6 +109,15 @@ struct __packed rmpentry {
>  
>  #define RMPADJUST_VMSA_PAGE_BIT		BIT(16)
>  
> +struct rmpupdate {

Function is called the same way - maybe this should be called
rmpupdate_desc or so.

> +	u64 gpa;
> +	u8 assigned;
> +	u8 pagesize;
> +	u8 immutable;
> +	u8 rsvd;
> +	u32 asid;
> +} __packed;
> +
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>  extern struct static_key_false sev_es_enable_key;
>  extern void __sev_es_ist_enter(struct pt_regs *regs);
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index f383d2a89263..8627c49666c9 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -2419,3 +2419,75 @@ int snp_lookup_rmpentry(u64 pfn, int *level)
>  	return !!rmpentry_assigned(e);
>  }
>  EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
> +

<--- kernel-doc comment.

> +int psmash(u64 pfn)
> +{
> +	unsigned long paddr = pfn << PAGE_SHIFT;
> +	int ret;
> +
> +	if (!pfn_valid(pfn))
> +		return -EINVAL;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> +		return -ENXIO;

Make that the first check pls.

> +
> +	/* Binutils version 2.36 supports the PSMASH mnemonic. */
> +	asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
> +		      : "=a"(ret)
> +		      : "a"(paddr)
> +		      : "memory", "cc");
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(psmash);

That's for kvm?

> +static int rmpupdate(u64 pfn, struct rmpupdate *val)
> +{
> +	unsigned long paddr = pfn << PAGE_SHIFT;
> +	int ret;
> +
> +	if (!pfn_valid(pfn))
> +		return -EINVAL;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> +		return -ENXIO;

Also first check.

> +
> +	/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
> +	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
> +		     : "=a"(ret)
> +		     : "a"(paddr), "c"((unsigned long)val)
> +		     : "memory", "cc");
> +	return ret;
> +}
> +
> +int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, int asid, bool immutable)
> +{
> +	struct rmpupdate val;
> +
> +	if (!pfn_valid(pfn))
> +		return -EINVAL;

rmpupdate() does that check too so choose one place please and kill the
other.

> +	memset(&val, 0, sizeof(val));
> +	val.assigned = 1;
> +	val.asid = asid;
> +	val.immutable = immutable;
> +	val.gpa = gpa;
> +	val.pagesize = X86_TO_RMP_PG_LEVEL(level);
> +
> +	return rmpupdate(pfn, &val);
> +}
> +EXPORT_SYMBOL_GPL(rmp_make_private);
> +
> +int rmp_make_shared(u64 pfn, enum pg_level level)
> +{
> +	struct rmpupdate val;
> +
> +	if (!pfn_valid(pfn))
> +		return -EINVAL;
> +
> +	memset(&val, 0, sizeof(val));
> +	val.pagesize = X86_TO_RMP_PG_LEVEL(level);
> +
> +	return rmpupdate(pfn, &val);
> +}
> +EXPORT_SYMBOL_GPL(rmp_make_shared);

Both exports for kvm I assume?

Thx.
Brijesh Singh Sept. 27, 2021, 4:06 p.m. UTC | #2
On 9/24/21 9:04 AM, Borislav Petkov wrote:
> On Fri, Aug 20, 2021 at 10:58:38AM -0500, Brijesh Singh wrote:
>> The RMPUPDATE instruction writes a new RMP entry in the RMP Table. The
>> hypervisor will use the instruction to add pages to the RMP table. See
>> APM3 for details on the instruction operations.
>>
>> The PSMASH instruction expands a 2MB RMP entry into a corresponding set of
>> contiguous 4KB-Page RMP entries. The hypervisor will use this instruction
>> to adjust the RMP entry without invalidating the previous RMP entry.
>>
>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>> ---
>>   arch/x86/include/asm/sev.h | 11 ++++++
>>   arch/x86/kernel/sev.c      | 72 ++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 83 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
>> index 5b1a6a075c47..92ced9626e95 100644
>> --- a/arch/x86/include/asm/sev.h
>> +++ b/arch/x86/include/asm/sev.h
>> @@ -78,7 +78,9 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
>>   
>>   /* RMP page size */
>>   #define RMP_PG_SIZE_4K			0
>> +#define RMP_PG_SIZE_2M			1
>>   #define RMP_TO_X86_PG_LEVEL(level)	(((level) == RMP_PG_SIZE_4K) ? PG_LEVEL_4K : PG_LEVEL_2M)
>> +#define X86_TO_RMP_PG_LEVEL(level)	(((level) == PG_LEVEL_4K) ? RMP_PG_SIZE_4K : RMP_PG_SIZE_2M)
>>   
>>   /*
>>    * The RMP entry format is not architectural. The format is defined in PPR
>> @@ -107,6 +109,15 @@ struct __packed rmpentry {
>>   
>>   #define RMPADJUST_VMSA_PAGE_BIT		BIT(16)
>>   
>> +struct rmpupdate {
> 
> Function is called the same way - maybe this should be called
> rmpupdate_desc or so.

Noted.


>> +
>> +	return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(psmash);
> 
> That's for kvm?

Yes.

...

>> +EXPORT_SYMBOL_GPL(rmp_make_shared);
> 
> Both exports for kvm I assume?

yes, both KVM and CCP drivers need them.

thanks
Sean Christopherson Oct. 15, 2021, 6:05 p.m. UTC | #3
On Fri, Aug 20, 2021, Brijesh Singh wrote:
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index f383d2a89263..8627c49666c9 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -2419,3 +2419,75 @@ int snp_lookup_rmpentry(u64 pfn, int *level)
>  	return !!rmpentry_assigned(e);
>  }
>  EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
> +
> +int psmash(u64 pfn)
> +{
> +	unsigned long paddr = pfn << PAGE_SHIFT;

Probably better to use __pfn_to_phys()?

> +	int ret;
> +
> +	if (!pfn_valid(pfn))
> +		return -EINVAL;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))

Shouldn't this be a WARN_ON_ONCE()?

> +		return -ENXIO;
> +
> +	/* Binutils version 2.36 supports the PSMASH mnemonic. */
> +	asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
> +		      : "=a"(ret)
> +		      : "a"(paddr)
> +		      : "memory", "cc");
> +
> +	return ret;

I don't like returning the raw result from hardware; it's mostly works because
hardware also uses '0' for success, but it will cause confusion should hardware
ever set bit 31.  There are also failures that likely should never happen unless
there's a kernel bug, e.g. I suspect we can do:

	if (WARN_ON_ONCE(ret == FAIL_INPUT))
		return -EINVAL;
	if (WARN_ON_ONCE(ret == FAIL_PERMISSION))
		return -EPERM;
	
	....

or if all errors are "impossible"

	if (WARN_ON_ONCE(ret))
		return snp_error_code_to_errno(ret);

FAIL_INUSE and FAIL_OVERLAP also need further discussion.  It's not clear to me
that two well-behaved callers can't encounter collisions due to the 2mb <=> 4kb
interactions.  If collisions between well-behaved callers are possible, then this
helper likely needs some form of serialization.  Either, the concurrency rules
for RMP access need explicit and lengthy documentation.
Brijesh Singh Oct. 15, 2021, 8:18 p.m. UTC | #4
On 10/15/21 1:05 PM, Sean Christopherson wrote:
> On Fri, Aug 20, 2021, Brijesh Singh wrote:
>> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
>> index f383d2a89263..8627c49666c9 100644
>> --- a/arch/x86/kernel/sev.c
>> +++ b/arch/x86/kernel/sev.c
>> @@ -2419,3 +2419,75 @@ int snp_lookup_rmpentry(u64 pfn, int *level)
>>  	return !!rmpentry_assigned(e);
>>  }
>>  EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
>> +
>> +int psmash(u64 pfn)
>> +{
>> +	unsigned long paddr = pfn << PAGE_SHIFT;
> Probably better to use __pfn_to_phys()?

Sure, I can use that instead of direct shift.


>
>> +	int ret;
>> +
>> +	if (!pfn_valid(pfn))
>> +		return -EINVAL;
>> +
>> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> Shouldn't this be a WARN_ON_ONCE()?

Since some of these function are called while handling the PSC so I
tried to avoid using the WARN -- mainly because if the warn_on_panic=1
is set on the host then it will result in the kernel panic.

>
>> +		return -ENXIO;
>> +
>> +	/* Binutils version 2.36 supports the PSMASH mnemonic. */
>> +	asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
>> +		      : "=a"(ret)
>> +		      : "a"(paddr)
>> +		      : "memory", "cc");
>> +
>> +	return ret;
> I don't like returning the raw result from hardware; it's mostly works because
> hardware also uses '0' for success, but it will cause confusion should hardware
> ever set bit 31.  There are also failures that likely should never happen unless
> there's a kernel bug, e.g. I suspect we can do:
>
> 	if (WARN_ON_ONCE(ret == FAIL_INPUT))
> 		return -EINVAL;
> 	if (WARN_ON_ONCE(ret == FAIL_PERMISSION))
> 		return -EPERM;
> 	
> 	....
>
> or if all errors are "impossible"
>
> 	if (WARN_ON_ONCE(ret))
> 		return snp_error_code_to_errno(ret);
>
> FAIL_INUSE and FAIL_OVERLAP also need further discussion.  It's not clear to me
> that two well-behaved callers can't encounter collisions due to the 2mb <=> 4kb
> interactions.  If collisions between well-behaved callers are possible, then this
> helper likely needs some form of serialization.  Either, the concurrency rules
> for RMP access need explicit and lengthy documentation.

I don't think we need to serialize the calls. The hardware acquires the
lock before changing the RMP table, and if another processor tries to
access the same RMP table entry, the hardware will return FAIL_INUSE or
#NPF. The FAIL_INUSE will happen on the RMPUPDATE, whereas the #NPF will
occur if the guest attempts to modify the RMP entry (such as using the
RMPADJUST).

As per the FAIL_OVERLAP is concerns, it's the case where the guest is
asking to create an invalid situation and hardware detects it.  In other
words, it is a guest bug. e.g., the guest issued a PSC to add a page as
2MB and then asked to add the same page (or one of subpage) as a 4K.
Hardware sees the overlap condition and fails to change the state on the
second request.


thanks
Sean Christopherson Oct. 15, 2021, 8:27 p.m. UTC | #5
On Fri, Oct 15, 2021, Brijesh Singh wrote:
> On 10/15/21 1:05 PM, Sean Christopherson wrote:
> > On Fri, Aug 20, 2021, Brijesh Singh wrote:
> >> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> > Shouldn't this be a WARN_ON_ONCE()?
> 
> Since some of these function are called while handling the PSC so I
> tried to avoid using the WARN -- mainly because if the warn_on_panic=1
> is set on the host then it will result in the kernel panic.

But why would KVM be handling PSC requests if SNP is disabled?
Brijesh Singh Oct. 15, 2021, 8:36 p.m. UTC | #6
On 10/15/21 3:27 PM, Sean Christopherson wrote:
> On Fri, Oct 15, 2021, Brijesh Singh wrote:
>> On 10/15/21 1:05 PM, Sean Christopherson wrote:
>>> On Fri, Aug 20, 2021, Brijesh Singh wrote:
>>>> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
>>> Shouldn't this be a WARN_ON_ONCE()?
>> Since some of these function are called while handling the PSC so I
>> tried to avoid using the WARN -- mainly because if the warn_on_panic=1
>> is set on the host then it will result in the kernel panic.
> But why would KVM be handling PSC requests if SNP is disabled?

The RMPUPDATE is also used by the CCP drv to change the page state
during the initialization. You are right that neither KVM nor CCP should
be using these function when SNP is not enabled. In your peudo code you
used the WARN_ON_ONCE() for the cpu_feature_enabled() and return code
check. I was more concern about the return code WARN_ON_ONCE() because
that will be called during the PSC. I am okay with using the
WARN_ON_ONCE() for the cpu_feature_enabled() check.

thanks
diff mbox series

Patch

diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 5b1a6a075c47..92ced9626e95 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -78,7 +78,9 @@  extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
 
 /* RMP page size */
 #define RMP_PG_SIZE_4K			0
+#define RMP_PG_SIZE_2M			1
 #define RMP_TO_X86_PG_LEVEL(level)	(((level) == RMP_PG_SIZE_4K) ? PG_LEVEL_4K : PG_LEVEL_2M)
+#define X86_TO_RMP_PG_LEVEL(level)	(((level) == PG_LEVEL_4K) ? RMP_PG_SIZE_4K : RMP_PG_SIZE_2M)
 
 /*
  * The RMP entry format is not architectural. The format is defined in PPR
@@ -107,6 +109,15 @@  struct __packed rmpentry {
 
 #define RMPADJUST_VMSA_PAGE_BIT		BIT(16)
 
+struct rmpupdate {
+	u64 gpa;
+	u8 assigned;
+	u8 pagesize;
+	u8 immutable;
+	u8 rsvd;
+	u32 asid;
+} __packed;
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 extern struct static_key_false sev_es_enable_key;
 extern void __sev_es_ist_enter(struct pt_regs *regs);
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index f383d2a89263..8627c49666c9 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2419,3 +2419,75 @@  int snp_lookup_rmpentry(u64 pfn, int *level)
 	return !!rmpentry_assigned(e);
 }
 EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
+
+int psmash(u64 pfn)
+{
+	unsigned long paddr = pfn << PAGE_SHIFT;
+	int ret;
+
+	if (!pfn_valid(pfn))
+		return -EINVAL;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+		return -ENXIO;
+
+	/* Binutils version 2.36 supports the PSMASH mnemonic. */
+	asm volatile(".byte 0xF3, 0x0F, 0x01, 0xFF"
+		      : "=a"(ret)
+		      : "a"(paddr)
+		      : "memory", "cc");
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(psmash);
+
+static int rmpupdate(u64 pfn, struct rmpupdate *val)
+{
+	unsigned long paddr = pfn << PAGE_SHIFT;
+	int ret;
+
+	if (!pfn_valid(pfn))
+		return -EINVAL;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+		return -ENXIO;
+
+	/* Binutils version 2.36 supports the RMPUPDATE mnemonic. */
+	asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFE"
+		     : "=a"(ret)
+		     : "a"(paddr), "c"((unsigned long)val)
+		     : "memory", "cc");
+	return ret;
+}
+
+int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, int asid, bool immutable)
+{
+	struct rmpupdate val;
+
+	if (!pfn_valid(pfn))
+		return -EINVAL;
+
+	memset(&val, 0, sizeof(val));
+	val.assigned = 1;
+	val.asid = asid;
+	val.immutable = immutable;
+	val.gpa = gpa;
+	val.pagesize = X86_TO_RMP_PG_LEVEL(level);
+
+	return rmpupdate(pfn, &val);
+}
+EXPORT_SYMBOL_GPL(rmp_make_private);
+
+int rmp_make_shared(u64 pfn, enum pg_level level)
+{
+	struct rmpupdate val;
+
+	if (!pfn_valid(pfn))
+		return -EINVAL;
+
+	memset(&val, 0, sizeof(val));
+	val.pagesize = X86_TO_RMP_PG_LEVEL(level);
+
+	return rmpupdate(pfn, &val);
+}
+EXPORT_SYMBOL_GPL(rmp_make_shared);