diff mbox series

[Part2,RFC,v4,04/40] x86/sev: Add the host SEV-SNP initialization support

Message ID 20210707183616.5620-5-brijesh.singh@amd.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Brijesh Singh July 7, 2021, 6:35 p.m. UTC
The memory integrity guarantees of SEV-SNP are enforced through a new
structure called the Reverse Map Table (RMP). The RMP is a single data
structure shared across the system that contains one entry for every 4K
page of DRAM that may be used by SEV-SNP VMs. The goal of RMP is to
track the owner of each page of memory. Pages of memory can be owned by
the hypervisor, owned by a specific VM or owned by the AMD-SP. See APM2
section 15.36.3 for more detail on RMP.

The RMP table is used to enforce access control to memory. The table itself
is not directly writable by the software. New CPU instructions (RMPUPDATE,
PVALIDATE, RMPADJUST) are used to manipulate the RMP entries.

Based on the platform configuration, the BIOS reserves the memory used
for the RMP table. The start and end address of the RMP table must be
queried by reading the RMP_BASE and RMP_END MSRs. If the RMP_BASE and
RMP_END are not set then disable the SEV-SNP feature.

The SEV-SNP feature is enabled only after the RMP table is successfully
initialized.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/include/asm/disabled-features.h |   8 +-
 arch/x86/include/asm/msr-index.h         |   6 +
 arch/x86/kernel/sev.c                    | 143 +++++++++++++++++++++++
 3 files changed, 156 insertions(+), 1 deletion(-)

Comments

Sean Christopherson July 14, 2021, 9:07 p.m. UTC | #1
On Wed, Jul 07, 2021, Brijesh Singh wrote:
> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
> index aa7e37631447..f9d813d498fa 100644
> --- a/arch/x86/kernel/sev.c
> +++ b/arch/x86/kernel/sev.c
> @@ -24,6 +24,8 @@
>  #include <linux/sev-guest.h>
>  #include <linux/platform_device.h>
>  #include <linux/io.h>
> +#include <linux/io.h>
> +#include <linux/iommu.h>
>  
>  #include <asm/cpu_entry_area.h>
>  #include <asm/stacktrace.h>
> @@ -40,11 +42,14 @@
>  #include <asm/efi.h>
>  #include <asm/cpuid-indexed.h>
>  #include <asm/setup.h>
> +#include <asm/iommu.h>
>  
>  #include "sev-internal.h"
>  
>  #define DR7_RESET_VALUE        0x400
>  
> +#define RMPTABLE_ENTRIES_OFFSET        0x4000

A comment and/or blurb in the changelog describing this magic number would be
quite helpful.  And maybe call out that this is for the bookkeeping, e.g.

  #define RMPTABLE_CPU_BOOKKEEPING_SIZE	0x4000

Also, the APM doesn't actually state the exact location of the bookkeeping
region, it only states that it's somewhere between RMP_BASE and RMP_END.  This
seems to imply that the bookkeeping region is always at RMP_BASE?

  The region of memory between RMP_BASE and RMP_END contains a 16KB region used
  for processor bookkeeping followed by the RMP entries, which are each 16B in
  size. The size of the RMP determines the range of physical memory that the
  hypervisor can assign to SNP-active virtual machines at runtime. The RMP covers
  the system physical address space from address 0h to the address calculated by:

  ((RMP_END + 1 – RMP_BASE – 16KB) / 16B) x 4KB

>  /* For early boot hypervisor communication in SEV-ES enabled guests */
>  static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
>  
> @@ -56,6 +61,9 @@ static struct ghcb __initdata *boot_ghcb;
>  
>  static u64 snp_secrets_phys;
>  
> +static unsigned long rmptable_start __ro_after_init;
> +static unsigned long rmptable_end __ro_after_init;
> +
>  /* #VC handler runtime per-CPU data */
>  struct sev_es_runtime_data {
>  	struct ghcb ghcb_page;
> @@ -2176,3 +2184,138 @@ static int __init add_snp_guest_request(void)
>  	return 0;
>  }
>  device_initcall(add_snp_guest_request);
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt)	"SEV-SNP: " fmt
> +
> +static int __snp_enable(unsigned int cpu)
> +{
> +	u64 val;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> +		return 0;
> +
> +	rdmsrl(MSR_AMD64_SYSCFG, val);
> +
> +	val |= MSR_AMD64_SYSCFG_SNP_EN;
> +	val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;

Is VMPL required?  Do we plan on using VMPL out of the gate?

> +
> +	wrmsrl(MSR_AMD64_SYSCFG, val);
> +
> +	return 0;
> +}
> +
> +static __init void snp_enable(void *arg)
> +{
> +	__snp_enable(smp_processor_id());
> +}
> +
> +static bool get_rmptable_info(u64 *start, u64 *len)
> +{
> +	u64 calc_rmp_sz, rmp_sz, rmp_base, rmp_end, nr_pages;
> +
> +	rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
> +	rdmsrl(MSR_AMD64_RMP_END, rmp_end);
> +
> +	if (!rmp_base || !rmp_end) {

Can BIOS put the RMP at PA=0?

Also, why is it a BIOS decision?  AFAICT, the MSRs aren't locked until SNP_EN
is set in SYSCFG, and that appears to be a kernel decision (ignoring kexec),
i.e. nothing would prevent the kernel from configuring it's own RMP.

> +		pr_info("Memory for the RMP table has not been reserved by BIOS\n");
> +		return false;
> +	}
> +
> +	rmp_sz = rmp_end - rmp_base + 1;
> +
> +	/*
> +	 * Calculate the amount the memory that must be reserved by the BIOS to
> +	 * address the full system RAM. The reserved memory should also cover the
> +	 * RMP table itself.
> +	 *
> +	 * See PPR section 2.1.5.2 for more information on memory requirement.
> +	 */
> +	nr_pages = totalram_pages();
> +	calc_rmp_sz = (((rmp_sz >> PAGE_SHIFT) + nr_pages) << 4) + RMPTABLE_ENTRIES_OFFSET;
> +
> +	if (calc_rmp_sz > rmp_sz) {
> +		pr_info("Memory reserved for the RMP table does not cover the full system "
> +			"RAM (expected 0x%llx got 0x%llx)\n", calc_rmp_sz, rmp_sz);

Is BIOS expected to provide exact coverage, e.g. should this be s/expected/need?

Should the kernel also sanity check other requirements, e.g. the 8kb alignment,
or does the CPU enforce those things at WRMSR?

> +		return false;
> +	}
> +
> +	*start = rmp_base;
> +	*len = rmp_sz;
> +
> +	pr_info("RMP table physical address 0x%016llx - 0x%016llx\n", rmp_base, rmp_end);
> +
> +	return true;
> +}
> +
> +static __init int __snp_rmptable_init(void)
> +{
> +	u64 rmp_base, sz;
> +	void *start;
> +	u64 val;
> +
> +	if (!get_rmptable_info(&rmp_base, &sz))
> +		return 1;
> +
> +	start = memremap(rmp_base, sz, MEMREMAP_WB);
> +	if (!start) {
> +		pr_err("Failed to map RMP table 0x%llx+0x%llx\n", rmp_base, sz);
> +		return 1;
> +	}
> +
> +	/*
> +	 * Check if SEV-SNP is already enabled, this can happen if we are coming from
> +	 * kexec boot.
> +	 */
> +	rdmsrl(MSR_AMD64_SYSCFG, val);
> +	if (val & MSR_AMD64_SYSCFG_SNP_EN)

Hmm, it kinda feels like there should be a sanity check for the case where SNP is
already enabled but get_rmptable_info() fails, e.g. due to insufficient RMP size.

> +		goto skip_enable;
> +
> +	/* Initialize the RMP table to zero */
> +	memset(start, 0, sz);
> +
> +	/* Flush the caches to ensure that data is written before SNP is enabled. */
> +	wbinvd_on_all_cpus();
> +
> +	/* Enable SNP on all CPUs. */
> +	on_each_cpu(snp_enable, NULL, 1);
> +
> +skip_enable:
> +	rmptable_start = (unsigned long)start;

Mostly out of curiosity, why store start/end as unsigned longs?  This is all 64-bit
only so it doesn't actually affect the code generation, but it feels odd to store
things that absolutely have to be 64-bit values as unsigned long.

Similar question for why asm/sev-common.h cases to unsigned long instead of u64.
E.g. the below in particular looks wrong because we're shifting an unsigned long
b y32 bits, i.e. the value _must_ be a 64-bit value, why obfuscate that?

	#define GHCB_CPUID_REQ(fn, reg)		\
		(GHCB_MSR_CPUID_REQ | \
		(((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
		(((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))

> +	rmptable_end = rmptable_start + sz;
> +
> +	return 0;
> +}
> +
> +static int __init snp_rmptable_init(void)
> +{
> +	if (!boot_cpu_has(X86_FEATURE_SEV_SNP))
> +		return 0;
> +
> +	/*
> +	 * The SEV-SNP support requires that IOMMU must be enabled, and is not
> +	 * configured in the passthrough mode.
> +	 */
> +	if (no_iommu || iommu_default_passthrough()) {

Similar comment regarding the sanity check, kexec'ing into a kernel with SNP
already enabled should probably fail explicitly if the new kernel is booted with
incompatible params.

> +		setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
> +		pr_err("IOMMU is either disabled or configured in passthrough mode.\n");
> +		return 0;
> +	}
> +
> +	if (__snp_rmptable_init()) {
> +		setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
> +		return 1;
> +	}
> +
> +	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
> +
> +	return 0;
> +}
> +
> +/*
> + * This must be called after the PCI subsystem. This is because before enabling
> + * the SNP feature we need to ensure that IOMMU is not configured in the
> + * passthrough mode. The iommu_default_passthrough() is used for checking the
> + * passthough state, and it is available after subsys_initcall().
> + */
> +fs_initcall(snp_rmptable_init);
> -- 
> 2.17.1
>
Brijesh Singh July 14, 2021, 10:02 p.m. UTC | #2
On 7/14/21 4:07 PM, Sean Christopherson wrote:
>>   
>> +#define RMPTABLE_ENTRIES_OFFSET        0x4000
> 
> A comment and/or blurb in the changelog describing this magic number would be
> quite helpful.  And maybe call out that this is for the bookkeeping, e.g.
> 
>    #define RMPTABLE_CPU_BOOKKEEPING_SIZE	0x4000

Noted.

> 
> Also, the APM doesn't actually state the exact location of the bookkeeping
> region, it only states that it's somewhere between RMP_BASE and RMP_END.  This
> seems to imply that the bookkeeping region is always at RMP_BASE?
> 
>    The region of memory between RMP_BASE and RMP_END contains a 16KB region used
>    for processor bookkeeping followed by the RMP entries, which are each 16B in
>    size. The size of the RMP determines the range of physical memory that the
>    hypervisor can assign to SNP-active virtual machines at runtime. The RMP covers
>    the system physical address space from address 0h to the address calculated by:
> 
>    ((RMP_END + 1 – RMP_BASE – 16KB) / 16B) x 4KB
> 

The bookkeeping region is at the start of the RMP_BASE. If we look at 
the PPR then it provides a formula which we should use to read the RMP 
entry location. And in that it adds the bookkeeping to the RMP_BASE.

       RMP Entry Address = RMP_BASE + 0x4000 + x>>8


>> +
>> +	val |= MSR_AMD64_SYSCFG_SNP_EN;
>> +	val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;
> 
> Is VMPL required?  Do we plan on using VMPL out of the gate?
> 

The SEV-SNP firmware requires that VMPL must be enabled otherwise it 
will fail to initialize. However, the current SEV-SNP support is limited 
to the VMPL0.

> 
> Can BIOS put the RMP at PA=0?

No, they should not. As per the PPR, the 0h is a reset value (means the 
MSR is not programmed).

> 
> Also, why is it a BIOS decision?  AFAICT, the MSRs aren't locked until SNP_EN
> is set in SYSCFG, and that appears to be a kernel decision (ignoring kexec),
> i.e. nothing would prevent the kernel from configuring it's own RMP.

In the current patch set, we assume that user is configuring the BIOS to 
reserve memory for the RMP table. From hardware point-of-view, it does 
not matter who reserves the memory (bios or kernel). In future, we could 
look into reserving the memory from the kernel before through the 
memblock etc.

> 
>> +		pr_info("Memory for the RMP table has not been reserved by BIOS\n");
>> +		return false;
>> +	}
>> +
>> +	rmp_sz = rmp_end - rmp_base + 1;
>> +
>> +	/*
>> +	 * Calculate the amount the memory that must be reserved by the BIOS to
>> +	 * address the full system RAM. The reserved memory should also cover the
>> +	 * RMP table itself.
>> +	 *
>> +	 * See PPR section 2.1.5.2 for more information on memory requirement.
>> +	 */
>> +	nr_pages = totalram_pages();
>> +	calc_rmp_sz = (((rmp_sz >> PAGE_SHIFT) + nr_pages) << 4) + RMPTABLE_ENTRIES_OFFSET;
>> +
>> +	if (calc_rmp_sz > rmp_sz) {
>> +		pr_info("Memory reserved for the RMP table does not cover the full system "
>> +			"RAM (expected 0x%llx got 0x%llx)\n", calc_rmp_sz, rmp_sz);
> 
> Is BIOS expected to provide exact coverage, e.g. should this be s/expected/need?
> 

BIOS provides option to reserve the required memory. If they don't cover 
the entire system ram then its a BIOS bug.

Yes, I will fix the wording s/expected/need.

To make things interesting, it also has option where user can specify 
amount of memory to be reserved. If user does not cover the full system 
ram then we need to warn and not enable the SNP. We cannot work with 
partially reserved RMP table memory.


> Should the kernel also sanity check other requirements, e.g. the 8kb alignment,
> or does the CPU enforce those things at WRMSR?
> 

The SNP firmware enforces those requirement. It is documented in the SNP 
firmware specification (SNP_INIT).



>> +
>> +	/*
>> +	 * Check if SEV-SNP is already enabled, this can happen if we are coming from
>> +	 * kexec boot.
>> +	 */
>> +	rdmsrl(MSR_AMD64_SYSCFG, val);
>> +	if (val & MSR_AMD64_SYSCFG_SNP_EN)
> 
> Hmm, it kinda feels like there should be a sanity check for the case where SNP is
> already enabled but get_rmptable_info() fails, e.g. due to insufficient RMP size.
> 

Hmm, I am not sure if we need to do this. We enabled the SNP only after 
all the sanity check is completed, so the get_rmptable_info() will not 
fail after the SNP is enabled. The RMP MSR's are locked after the SNP is 
enabled so we should not see a different size.


>> +		goto skip_enable;
>> +
>> +	/* Initialize the RMP table to zero */
>> +	memset(start, 0, sz);
>> +
>> +	/* Flush the caches to ensure that data is written before SNP is enabled. */
>> +	wbinvd_on_all_cpus();
>> +
>> +	/* Enable SNP on all CPUs. */
>> +	on_each_cpu(snp_enable, NULL, 1);
>> +
>> +skip_enable:
>> +	rmptable_start = (unsigned long)start;
> 
> Mostly out of curiosity, why store start/end as unsigned longs?  This is all 64-bit
> only so it doesn't actually affect the code generation, but it feels odd to store
> things that absolutely have to be 64-bit values as unsigned long.
> 

The AMD memory encryption support is compiled when 64-bit is enabled in 
the Kconfig; Having said that, I am okay to use the u64.


> Similar question for why asm/sev-common.h cases to unsigned long instead of u64.
> E.g. the below in particular looks wrong because we're shifting an unsigned long
> b y32 bits, i.e. the value _must_ be a 64-bit value, why obfuscate that?
> 
> 	#define GHCB_CPUID_REQ(fn, reg)		\
> 		(GHCB_MSR_CPUID_REQ | \
> 		(((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
> 		(((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
> 
>> +	rmptable_end = rmptable_start + sz;
>> +
>> +	return 0;
>> +}
>> +
>> +static int __init snp_rmptable_init(void)
>> +{
>> +	if (!boot_cpu_has(X86_FEATURE_SEV_SNP))
>> +		return 0;
>> +
>> +	/*
>> +	 * The SEV-SNP support requires that IOMMU must be enabled, and is not
>> +	 * configured in the passthrough mode.
>> +	 */
>> +	if (no_iommu || iommu_default_passthrough()) {
> 
> Similar comment regarding the sanity check, kexec'ing into a kernel with SNP
> already enabled should probably fail explicitly if the new kernel is booted with
> incompatible params.

Good point on the kexec, I'll look to cover it.

thanks
Sean Christopherson July 14, 2021, 10:06 p.m. UTC | #3
On Wed, Jul 14, 2021, Brijesh Singh wrote:
> The bookkeeping region is at the start of the RMP_BASE. If we look at the
> PPR then it provides a formula which we should use to read the RMP entry

What's the PPR?  I get the feeling I'm missing a spec :-)
Brijesh Singh July 14, 2021, 10:11 p.m. UTC | #4
On 7/14/21 5:06 PM, Sean Christopherson wrote:
> What's the PPR?  I get the feeling I'm missing a spec:-)

My bad, I should have provided the link in my previous response

Processor Programming Reference (PPR) for AMD Family 19h Model 01h, 
Revision B1 Processors

https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip

look for the PPR_B1_PUB_1.pdf for RMP entry details.

SEV-SNP firmware spec is at developer.amd.com/sev
https://www.amd.com/system/files/TechDocs/56860.pdf

thanks
Jarkko Sakkinen June 2, 2022, 11:47 a.m. UTC | #5
On Wed, Jul 07, 2021 at 01:35:40PM -0500, Brijesh Singh wrote:
> The memory integrity guarantees of SEV-SNP are enforced through a new
> structure called the Reverse Map Table (RMP). The RMP is a single data
> structure shared across the system that contains one entry for every 4K
> page of DRAM that may be used by SEV-SNP VMs. The goal of RMP is to
> track the owner of each page of memory. Pages of memory can be owned by
> the hypervisor, owned by a specific VM or owned by the AMD-SP. See APM2
> section 15.36.3 for more detail on RMP.
> 
> The RMP table is used to enforce access control to memory. The table itself
> is not directly writable by the software. New CPU instructions (RMPUPDATE,
> PVALIDATE, RMPADJUST) are used to manipulate the RMP entries.

What's the point of throwing out a set of opcodes, if there's
no explanation what they do?

BR, Jarkko
Dr. David Alan Gilbert June 6, 2022, 11:42 a.m. UTC | #6
* Jarkko Sakkinen (jarkko.sakkinen@iki.fi) wrote:
> On Wed, Jul 07, 2021 at 01:35:40PM -0500, Brijesh Singh wrote:
> > The memory integrity guarantees of SEV-SNP are enforced through a new
> > structure called the Reverse Map Table (RMP). The RMP is a single data
> > structure shared across the system that contains one entry for every 4K
> > page of DRAM that may be used by SEV-SNP VMs. The goal of RMP is to
> > track the owner of each page of memory. Pages of memory can be owned by
> > the hypervisor, owned by a specific VM or owned by the AMD-SP. See APM2
> > section 15.36.3 for more detail on RMP.
> > 
> > The RMP table is used to enforce access control to memory. The table itself
> > is not directly writable by the software. New CPU instructions (RMPUPDATE,
> > PVALIDATE, RMPADJUST) are used to manipulate the RMP entries.
> 
> What's the point of throwing out a set of opcodes, if there's
> no explanation what they do?

TBF They are described in the public document section linked in the previous
paragraph.

Dave
> BR, Jarkko
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index b7dd944dc867..0d5c8d08185c 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -68,6 +68,12 @@ 
 # define DISABLE_SGX	(1 << (X86_FEATURE_SGX & 31))
 #endif
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+# define DISABLE_SEV_SNP	0
+#else
+# define DISABLE_SEV_SNP	(1 << (X86_FEATURE_SEV_SNP & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -91,7 +97,7 @@ 
 			 DISABLE_ENQCMD)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK18	0
-#define DISABLED_MASK19	0
+#define DISABLED_MASK19	(DISABLE_SEV_SNP)
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 69ce50fa3565..e8d45929010a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -485,6 +485,8 @@ 
 #define MSR_AMD64_SEV_ENABLED		BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
 #define MSR_AMD64_SEV_ES_ENABLED	BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
 #define MSR_AMD64_SEV_SNP_ENABLED	BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
+#define MSR_AMD64_RMP_BASE		0xc0010132
+#define MSR_AMD64_RMP_END		0xc0010133
 
 #define MSR_AMD64_VIRT_SPEC_CTRL	0xc001011f
 
@@ -542,6 +544,10 @@ 
 #define MSR_AMD64_SYSCFG		0xc0010010
 #define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT	23
 #define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT		24
+#define MSR_AMD64_SYSCFG_SNP_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT	25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN	BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index aa7e37631447..f9d813d498fa 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -24,6 +24,8 @@ 
 #include <linux/sev-guest.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
 
 #include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
@@ -40,11 +42,14 @@ 
 #include <asm/efi.h>
 #include <asm/cpuid-indexed.h>
 #include <asm/setup.h>
+#include <asm/iommu.h>
 
 #include "sev-internal.h"
 
 #define DR7_RESET_VALUE        0x400
 
+#define RMPTABLE_ENTRIES_OFFSET        0x4000
+
 /* For early boot hypervisor communication in SEV-ES enabled guests */
 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
 
@@ -56,6 +61,9 @@  static struct ghcb __initdata *boot_ghcb;
 
 static u64 snp_secrets_phys;
 
+static unsigned long rmptable_start __ro_after_init;
+static unsigned long rmptable_end __ro_after_init;
+
 /* #VC handler runtime per-CPU data */
 struct sev_es_runtime_data {
 	struct ghcb ghcb_page;
@@ -2176,3 +2184,138 @@  static int __init add_snp_guest_request(void)
 	return 0;
 }
 device_initcall(add_snp_guest_request);
+
+#undef pr_fmt
+#define pr_fmt(fmt)	"SEV-SNP: " fmt
+
+static int __snp_enable(unsigned int cpu)
+{
+	u64 val;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+		return 0;
+
+	rdmsrl(MSR_AMD64_SYSCFG, val);
+
+	val |= MSR_AMD64_SYSCFG_SNP_EN;
+	val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;
+
+	wrmsrl(MSR_AMD64_SYSCFG, val);
+
+	return 0;
+}
+
+static __init void snp_enable(void *arg)
+{
+	__snp_enable(smp_processor_id());
+}
+
+static bool get_rmptable_info(u64 *start, u64 *len)
+{
+	u64 calc_rmp_sz, rmp_sz, rmp_base, rmp_end, nr_pages;
+
+	rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
+	rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+
+	if (!rmp_base || !rmp_end) {
+		pr_info("Memory for the RMP table has not been reserved by BIOS\n");
+		return false;
+	}
+
+	rmp_sz = rmp_end - rmp_base + 1;
+
+	/*
+	 * Calculate the amount the memory that must be reserved by the BIOS to
+	 * address the full system RAM. The reserved memory should also cover the
+	 * RMP table itself.
+	 *
+	 * See PPR section 2.1.5.2 for more information on memory requirement.
+	 */
+	nr_pages = totalram_pages();
+	calc_rmp_sz = (((rmp_sz >> PAGE_SHIFT) + nr_pages) << 4) + RMPTABLE_ENTRIES_OFFSET;
+
+	if (calc_rmp_sz > rmp_sz) {
+		pr_info("Memory reserved for the RMP table does not cover the full system "
+			"RAM (expected 0x%llx got 0x%llx)\n", calc_rmp_sz, rmp_sz);
+		return false;
+	}
+
+	*start = rmp_base;
+	*len = rmp_sz;
+
+	pr_info("RMP table physical address 0x%016llx - 0x%016llx\n", rmp_base, rmp_end);
+
+	return true;
+}
+
+static __init int __snp_rmptable_init(void)
+{
+	u64 rmp_base, sz;
+	void *start;
+	u64 val;
+
+	if (!get_rmptable_info(&rmp_base, &sz))
+		return 1;
+
+	start = memremap(rmp_base, sz, MEMREMAP_WB);
+	if (!start) {
+		pr_err("Failed to map RMP table 0x%llx+0x%llx\n", rmp_base, sz);
+		return 1;
+	}
+
+	/*
+	 * Check if SEV-SNP is already enabled, this can happen if we are coming from
+	 * kexec boot.
+	 */
+	rdmsrl(MSR_AMD64_SYSCFG, val);
+	if (val & MSR_AMD64_SYSCFG_SNP_EN)
+		goto skip_enable;
+
+	/* Initialize the RMP table to zero */
+	memset(start, 0, sz);
+
+	/* Flush the caches to ensure that data is written before SNP is enabled. */
+	wbinvd_on_all_cpus();
+
+	/* Enable SNP on all CPUs. */
+	on_each_cpu(snp_enable, NULL, 1);
+
+skip_enable:
+	rmptable_start = (unsigned long)start;
+	rmptable_end = rmptable_start + sz;
+
+	return 0;
+}
+
+static int __init snp_rmptable_init(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_SEV_SNP))
+		return 0;
+
+	/*
+	 * The SEV-SNP support requires that IOMMU must be enabled, and is not
+	 * configured in the passthrough mode.
+	 */
+	if (no_iommu || iommu_default_passthrough()) {
+		setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+		pr_err("IOMMU is either disabled or configured in passthrough mode.\n");
+		return 0;
+	}
+
+	if (__snp_rmptable_init()) {
+		setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+		return 1;
+	}
+
+	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
+
+	return 0;
+}
+
+/*
+ * This must be called after the PCI subsystem. This is because before enabling
+ * the SNP feature we need to ensure that IOMMU is not configured in the
+ * passthrough mode. The iommu_default_passthrough() is used for checking the
+ * passthough state, and it is available after subsys_initcall().
+ */
+fs_initcall(snp_rmptable_init);