diff mbox series

[v1,04/26] x86/sev: Add the host SEV-SNP initialization support

Message ID 20231230161954.569267-5-michael.roth@amd.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Initialization Support | expand

Commit Message

Michael Roth Dec. 30, 2023, 4:19 p.m. UTC
From: Brijesh Singh <brijesh.singh@amd.com>

The memory integrity guarantees of SEV-SNP are enforced through a new
structure called the Reverse Map Table (RMP). The RMP is a single data
structure shared across the system that contains one entry for every 4K
page of DRAM that may be used by SEV-SNP VMs. APM2 section 15.36 details
a number of steps needed to detect/enable SEV-SNP and RMP table support
on the host:

 - Detect SEV-SNP support based on CPUID bit
 - Initialize the RMP table memory reported by the RMP base/end MSR
   registers and configure IOMMU to be compatible with RMP access
   restrictions
 - Set the MtrrFixDramModEn bit in SYSCFG MSR
 - Set the SecureNestedPagingEn and VMPLEn bits in the SYSCFG MSR
 - Configure IOMMU

RMP table entry format is non-architectural and it can vary by
processor. It is defined by the PPR. Restrict SNP support to CPU
models/families which are compatible with the current RMP table entry
format to guard against any undefined behavior when running on other
system types. Future models/support will handle this through an
architectural mechanism to allow for broader compatibility.

SNP host code depends on CONFIG_KVM_AMD_SEV config flag, which may be
enabled even when CONFIG_AMD_MEM_ENCRYPT isn't set, so update the
SNP-specific IOMMU helpers used here to rely on CONFIG_KVM_AMD_SEV
instead of CONFIG_AMD_MEM_ENCRYPT.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Co-developed-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Co-developed-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Co-developed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
---
 arch/x86/Kbuild                  |   2 +
 arch/x86/include/asm/msr-index.h |  11 +-
 arch/x86/include/asm/sev.h       |   6 +
 arch/x86/kernel/cpu/amd.c        |  15 +++
 arch/x86/virt/svm/Makefile       |   3 +
 arch/x86/virt/svm/sev.c          | 219 +++++++++++++++++++++++++++++++
 6 files changed, 255 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/virt/svm/Makefile
 create mode 100644 arch/x86/virt/svm/sev.c

Comments

Jeremi Piotrowski Jan. 4, 2024, 11:05 a.m. UTC | #1
On 30/12/2023 17:19, Michael Roth wrote:
> From: Brijesh Singh <brijesh.singh@amd.com>
> 
> The memory integrity guarantees of SEV-SNP are enforced through a new
> structure called the Reverse Map Table (RMP). The RMP is a single data
> structure shared across the system that contains one entry for every 4K
> page of DRAM that may be used by SEV-SNP VMs. APM2 section 15.36 details
> a number of steps needed to detect/enable SEV-SNP and RMP table support
> on the host:
> 
>  - Detect SEV-SNP support based on CPUID bit
>  - Initialize the RMP table memory reported by the RMP base/end MSR
>    registers and configure IOMMU to be compatible with RMP access
>    restrictions
>  - Set the MtrrFixDramModEn bit in SYSCFG MSR
>  - Set the SecureNestedPagingEn and VMPLEn bits in the SYSCFG MSR
>  - Configure IOMMU
> 
> RMP table entry format is non-architectural and it can vary by
> processor. It is defined by the PPR. Restrict SNP support to CPU
> models/families which are compatible with the current RMP table entry
> format to guard against any undefined behavior when running on other
> system types. Future models/support will handle this through an
> architectural mechanism to allow for broader compatibility.
> 
> SNP host code depends on CONFIG_KVM_AMD_SEV config flag, which may be
> enabled even when CONFIG_AMD_MEM_ENCRYPT isn't set, so update the
> SNP-specific IOMMU helpers used here to rely on CONFIG_KVM_AMD_SEV
> instead of CONFIG_AMD_MEM_ENCRYPT.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Co-developed-by: Ashish Kalra <ashish.kalra@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> Co-developed-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Co-developed-by: Michael Roth <michael.roth@amd.com>
> Signed-off-by: Michael Roth <michael.roth@amd.com>
> ---
>  arch/x86/Kbuild                  |   2 +
>  arch/x86/include/asm/msr-index.h |  11 +-
>  arch/x86/include/asm/sev.h       |   6 +
>  arch/x86/kernel/cpu/amd.c        |  15 +++
>  arch/x86/virt/svm/Makefile       |   3 +
>  arch/x86/virt/svm/sev.c          | 219 +++++++++++++++++++++++++++++++
>  6 files changed, 255 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/virt/svm/Makefile
>  create mode 100644 arch/x86/virt/svm/sev.c
> 
> diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
> index 5a83da703e87..6a1f36df6a18 100644
> --- a/arch/x86/Kbuild
> +++ b/arch/x86/Kbuild
> @@ -28,5 +28,7 @@ obj-y += net/
>  
>  obj-$(CONFIG_KEXEC_FILE) += purgatory/
>  
> +obj-y += virt/svm/
> +
>  # for cleaning
>  subdir- += boot tools
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index f1bd7b91b3c6..15ce1269f270 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -599,6 +599,8 @@
>  #define MSR_AMD64_SEV_ENABLED		BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
>  #define MSR_AMD64_SEV_ES_ENABLED	BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
>  #define MSR_AMD64_SEV_SNP_ENABLED	BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
> +#define MSR_AMD64_RMP_BASE		0xc0010132
> +#define MSR_AMD64_RMP_END		0xc0010133
>  
>  /* SNP feature bits enabled by the hypervisor */
>  #define MSR_AMD64_SNP_VTOM			BIT_ULL(3)
> @@ -709,7 +711,14 @@
>  #define MSR_K8_TOP_MEM2			0xc001001d
>  #define MSR_AMD64_SYSCFG		0xc0010010
>  #define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT	23
> -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
> +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT		BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
> +#define MSR_AMD64_SYSCFG_SNP_EN_BIT		24
> +#define MSR_AMD64_SYSCFG_SNP_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
> +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT	25
> +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
> +#define MSR_AMD64_SYSCFG_MFDM_BIT		19
> +#define MSR_AMD64_SYSCFG_MFDM			BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
> +
>  #define MSR_K8_INT_PENDING_MSG		0xc0010055
>  /* C1E active bits in int pending message */
>  #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
> index 5b4a1ce3d368..1f59d8ba9776 100644
> --- a/arch/x86/include/asm/sev.h
> +++ b/arch/x86/include/asm/sev.h
> @@ -243,4 +243,10 @@ static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
>  static inline u64 sev_get_status(void) { return 0; }
>  #endif
>  
> +#ifdef CONFIG_KVM_AMD_SEV
> +bool snp_probe_rmptable_info(void);
> +#else
> +static inline bool snp_probe_rmptable_info(void) { return false; }
> +#endif
> +
>  #endif
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index 9a17165dfe84..0f0d425f0440 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -20,6 +20,7 @@
>  #include <asm/delay.h>
>  #include <asm/debugreg.h>
>  #include <asm/resctrl.h>
> +#include <asm/sev.h>
>  
>  #ifdef CONFIG_X86_64
>  # include <asm/mmconfig.h>
> @@ -574,6 +575,20 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
>  		break;
>  	}
>  
> +	if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
> +		/*
> +		 * RMP table entry format is not architectural and it can vary by processor
> +		 * and is defined by the per-processor PPR. Restrict SNP support on the
> +		 * known CPU model and family for which the RMP table entry format is
> +		 * currently defined for.
> +		 */
> +		if (!(c->x86 == 0x19 && c->x86_model <= 0xaf) &&
> +		    !(c->x86 == 0x1a && c->x86_model <= 0xf))
> +			setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
> +		else if (!snp_probe_rmptable_info())
> +			setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);

Is there a really good reason to perform the snp_probe_smptable_info() check at this
point (instead of in snp_rmptable_init). snp_rmptable_init will also clear the cap
on failure, and bsp_init_amd() runs too early to allow for the kernel to allocate the
rmptable itself. I pointed out in the previous review that kernel allocation of rmptable
is necessary in SNP-host capable VMs in Azure.

> +	}
> +
>  	return;
>  
>  warn:
> diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile
> new file mode 100644
> index 000000000000..ef2a31bdcc70
> --- /dev/null
> +++ b/arch/x86/virt/svm/Makefile
> @@ -0,0 +1,3 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +obj-$(CONFIG_KVM_AMD_SEV) += sev.o
> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> new file mode 100644
> index 000000000000..ce7ede9065ed
> --- /dev/null
> +++ b/arch/x86/virt/svm/sev.c
> @@ -0,0 +1,219 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * AMD SVM-SEV Host Support.
> + *
> + * Copyright (C) 2023 Advanced Micro Devices, Inc.
> + *
> + * Author: Ashish Kalra <ashish.kalra@amd.com>
> + *
> + */
> +
> +#include <linux/cc_platform.h>
> +#include <linux/printk.h>
> +#include <linux/mm_types.h>
> +#include <linux/set_memory.h>
> +#include <linux/memblock.h>
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <linux/cpumask.h>
> +#include <linux/iommu.h>
> +#include <linux/amd-iommu.h>
> +
> +#include <asm/sev.h>
> +#include <asm/processor.h>
> +#include <asm/setup.h>
> +#include <asm/svm.h>
> +#include <asm/smp.h>
> +#include <asm/cpu.h>
> +#include <asm/apic.h>
> +#include <asm/cpuid.h>
> +#include <asm/cmdline.h>
> +#include <asm/iommu.h>
> +
> +/*
> + * The RMP entry format is not architectural. The format is defined in PPR
> + * Family 19h Model 01h, Rev B1 processor.
> + */
> +struct rmpentry {
> +	u64	assigned	: 1,
> +		pagesize	: 1,
> +		immutable	: 1,
> +		rsvd1		: 9,
> +		gpa		: 39,
> +		asid		: 10,
> +		vmsa		: 1,
> +		validated	: 1,
> +		rsvd2		: 1;
> +	u64 rsvd3;
> +} __packed;
> +
> +/*
> + * The first 16KB from the RMP_BASE is used by the processor for the
> + * bookkeeping, the range needs to be added during the RMP entry lookup.
> + */
> +#define RMPTABLE_CPU_BOOKKEEPING_SZ	0x4000
> +
> +static u64 probed_rmp_base, probed_rmp_size;
> +static struct rmpentry *rmptable __ro_after_init;
> +static u64 rmptable_max_pfn __ro_after_init;
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt)	"SEV-SNP: " fmt
> +
> +static int __mfd_enable(unsigned int cpu)
> +{
> +	u64 val;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> +		return 0;
> +
> +	rdmsrl(MSR_AMD64_SYSCFG, val);
> +
> +	val |= MSR_AMD64_SYSCFG_MFDM;
> +
> +	wrmsrl(MSR_AMD64_SYSCFG, val);
> +
> +	return 0;
> +}
> +
> +static __init void mfd_enable(void *arg)
> +{
> +	__mfd_enable(smp_processor_id());
> +}
> +
> +static int __snp_enable(unsigned int cpu)
> +{
> +	u64 val;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> +		return 0;
> +
> +	rdmsrl(MSR_AMD64_SYSCFG, val);
> +
> +	val |= MSR_AMD64_SYSCFG_SNP_EN;
> +	val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;
> +
> +	wrmsrl(MSR_AMD64_SYSCFG, val);
> +
> +	return 0;
> +}
> +
> +static __init void snp_enable(void *arg)
> +{
> +	__snp_enable(smp_processor_id());
> +}
> +
> +#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
> +
> +bool snp_probe_rmptable_info(void)
> +{
> +	u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end;
> +
> +	rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
> +	rdmsrl(MSR_AMD64_RMP_END, rmp_end);
> +
> +	if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) {
> +		pr_err("Memory for the RMP table has not been reserved by BIOS\n");
> +		return false;
> +	}
> +
> +	if (rmp_base > rmp_end) {
> +		pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end);
> +		return false;
> +	}
> +
> +	rmp_sz = rmp_end - rmp_base + 1;
> +
> +	/*
> +	 * Calculate the amount the memory that must be reserved by the BIOS to
> +	 * address the whole RAM, including the bookkeeping area. The RMP itself
> +	 * must also be covered.
> +	 */
> +	max_rmp_pfn = max_pfn;
> +	if (PHYS_PFN(rmp_end) > max_pfn)
> +		max_rmp_pfn = PHYS_PFN(rmp_end);
> +
> +	calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ;
> +
> +	if (calc_rmp_sz > rmp_sz) {
> +		pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
> +		       calc_rmp_sz, rmp_sz);
> +		return false;
> +	}
> +
> +	probed_rmp_base = rmp_base;
> +	probed_rmp_size = rmp_sz;
> +
> +	pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n",
> +		probed_rmp_base, probed_rmp_base + probed_rmp_size - 1);
> +
> +	return true;
> +}
> +
> +static int __init __snp_rmptable_init(void)
> +{
> +	u64 rmptable_size;
> +	void *rmptable_start;
> +	u64 val;
> +
> +	if (!probed_rmp_size)
> +		return 1;
> +
> +	rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB);
> +	if (!rmptable_start) {
> +		pr_err("Failed to map RMP table\n");
> +		return 1;
> +	}
> +
> +	/*
> +	 * Check if SEV-SNP is already enabled, this can happen in case of
> +	 * kexec boot.
> +	 */
> +	rdmsrl(MSR_AMD64_SYSCFG, val);
> +	if (val & MSR_AMD64_SYSCFG_SNP_EN)
> +		goto skip_enable;
> +
> +	memset(rmptable_start, 0, probed_rmp_size);
> +
> +	/* Flush the caches to ensure that data is written before SNP is enabled. */
> +	wbinvd_on_all_cpus();
> +
> +	/* MtrrFixDramModEn must be enabled on all the CPUs prior to enabling SNP. */
> +	on_each_cpu(mfd_enable, NULL, 1);
> +
> +	on_each_cpu(snp_enable, NULL, 1);
> +
> +skip_enable:
> +	rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ;
> +	rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
> +
> +	rmptable = (struct rmpentry *)rmptable_start;
> +	rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1;
> +
> +	return 0;
> +}
> +
> +static int __init snp_rmptable_init(void)
> +{
> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> +		return 0;
> +
> +	if (!amd_iommu_snp_en)
> +		return 0;

Looks better - do you think it'll be OK to add a X86_FEATURE_HYPERVISOR check at this point
later to account for SNP-host capable VMs with no access to an iommu?

Jeremi

> +
> +	if (__snp_rmptable_init())
> +		goto nosnp;
> +
> +	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
> +
> +	return 0;
> +
> +nosnp:
> +	setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
> +	return -ENOSYS;
> +}
> +
> +/*
> + * This must be called after the IOMMU has been initialized.
> + */
> +device_initcall(snp_rmptable_init);
Borislav Petkov Jan. 4, 2024, 11:16 a.m. UTC | #2
On Sat, Dec 30, 2023 at 10:19:32AM -0600, Michael Roth wrote:
> From: Brijesh Singh <brijesh.singh@amd.com>
> 
> The memory integrity guarantees of SEV-SNP are enforced through a new
> structure called the Reverse Map Table (RMP). The RMP is a single data
> structure shared across the system that contains one entry for every 4K
> page of DRAM that may be used by SEV-SNP VMs. APM2 section 15.36 details
> a number of steps needed to detect/enable SEV-SNP and RMP table support
> on the host:
> 
>  - Detect SEV-SNP support based on CPUID bit
>  - Initialize the RMP table memory reported by the RMP base/end MSR
>    registers and configure IOMMU to be compatible with RMP access
>    restrictions
>  - Set the MtrrFixDramModEn bit in SYSCFG MSR
>  - Set the SecureNestedPagingEn and VMPLEn bits in the SYSCFG MSR
>  - Configure IOMMU
> 
> RMP table entry format is non-architectural and it can vary by
> processor. It is defined by the PPR. Restrict SNP support to CPU
> models/families which are compatible with the current RMP table entry
> format to guard against any undefined behavior when running on other
> system types. Future models/support will handle this through an
> architectural mechanism to allow for broader compatibility.
> 
> SNP host code depends on CONFIG_KVM_AMD_SEV config flag, which may be
> enabled even when CONFIG_AMD_MEM_ENCRYPT isn't set, so update the
> SNP-specific IOMMU helpers used here to rely on CONFIG_KVM_AMD_SEV
> instead of CONFIG_AMD_MEM_ENCRYPT.

Small fixups to the commit message:

    The memory integrity guarantees of SEV-SNP are enforced through a new
    structure called the Reverse Map Table (RMP). The RMP is a single data
    structure shared across the system that contains one entry for every 4K
    page of DRAM that may be used by SEV-SNP VMs. The APM v2 section on
    Secure Nested Paging (SEV-SNP) details a number of steps needed to
    detect/enable SEV-SNP and RMP table support on the host:
    
     - Detect SEV-SNP support based on CPUID bit
     - Initialize the RMP table memory reported by the RMP base/end MSR
       registers and configure IOMMU to be compatible with RMP access
       restrictions
     - Set the MtrrFixDramModEn bit in SYSCFG MSR
     - Set the SecureNestedPagingEn and VMPLEn bits in the SYSCFG MSR
     - Configure IOMMU
    
    The RMP table entry format is non-architectural and it can vary by
    processor. It is defined by the PPR document for each respective CPU
    family. Restrict SNP support to CPU models/families which are compatible
    with the current RMP table entry format to guard against any undefined
    behavior when running on other system types. Future models/support will
    handle this through an architectural mechanism to allow for broader
    compatibility.
    
    The SNP host code depends on CONFIG_KVM_AMD_SEV config flag which may
    be enabled even when CONFIG_AMD_MEM_ENCRYPT isn't set, so update the
    SNP-specific IOMMU helpers used here to rely on CONFIG_KVM_AMD_SEV
    instead of CONFIG_AMD_MEM_ENCRYPT.

> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index f1bd7b91b3c6..15ce1269f270 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -599,6 +599,8 @@
>  #define MSR_AMD64_SEV_ENABLED		BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
>  #define MSR_AMD64_SEV_ES_ENABLED	BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
>  #define MSR_AMD64_SEV_SNP_ENABLED	BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
> +#define MSR_AMD64_RMP_BASE		0xc0010132
> +#define MSR_AMD64_RMP_END		0xc0010133
>  
>  /* SNP feature bits enabled by the hypervisor */
>  #define MSR_AMD64_SNP_VTOM			BIT_ULL(3)
> @@ -709,7 +711,14 @@
>  #define MSR_K8_TOP_MEM2			0xc001001d
>  #define MSR_AMD64_SYSCFG		0xc0010010
>  #define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT	23
> -#define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
> +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT		BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
> +#define MSR_AMD64_SYSCFG_SNP_EN_BIT		24
> +#define MSR_AMD64_SYSCFG_SNP_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
> +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT	25
> +#define MSR_AMD64_SYSCFG_SNP_VMPL_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
> +#define MSR_AMD64_SYSCFG_MFDM_BIT		19
> +#define MSR_AMD64_SYSCFG_MFDM			BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
> +

Fix the vertical alignment:

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 15ce1269f270..f482bc6a5ae7 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -710,14 +710,14 @@
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
 #define MSR_AMD64_SYSCFG		0xc0010010
-#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT	23
-#define MSR_AMD64_SYSCFG_MEM_ENCRYPT		BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
-#define MSR_AMD64_SYSCFG_SNP_EN_BIT		24
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT	24
 #define MSR_AMD64_SYSCFG_SNP_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
-#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT	25
-#define MSR_AMD64_SYSCFG_SNP_VMPL_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
-#define MSR_AMD64_SYSCFG_MFDM_BIT		19
-#define MSR_AMD64_SYSCFG_MFDM			BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN	BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
+#define MSR_AMD64_SYSCFG_MFDM_BIT	19
+#define MSR_AMD64_SYSCFG_MFDM		BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
 
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
Borislav Petkov Jan. 4, 2024, 2:42 p.m. UTC | #3
On Sat, Dec 30, 2023 at 10:19:32AM -0600, Michael Roth wrote:
> +	if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
> +		/*
> +		 * RMP table entry format is not architectural and it can vary by processor
> +		 * and is defined by the per-processor PPR. Restrict SNP support on the
> +		 * known CPU model and family for which the RMP table entry format is
> +		 * currently defined for.
> +		 */
> +		if (!(c->x86 == 0x19 && c->x86_model <= 0xaf) &&
> +		    !(c->x86 == 0x1a && c->x86_model <= 0xf))
> +			setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
> +		else if (!snp_probe_rmptable_info())
> +			setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
> +	}

IOW, this below.

Lemme send the ZEN5 thing as a separate patch.

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 9492dcad560d..0fa702673e73 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -81,10 +81,8 @@
 #define X86_FEATURE_K6_MTRR		( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
 #define X86_FEATURE_CYRIX_ARR		( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
 #define X86_FEATURE_CENTAUR_MCR		( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-
-/* CPU types for specific tunings: */
 #define X86_FEATURE_K8			( 3*32+ 4) /* "" Opteron, Athlon64 */
-/* FREE, was #define X86_FEATURE_K7			( 3*32+ 5) "" Athlon */
+#define X86_FEATURE_ZEN5		( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */
 #define X86_FEATURE_P3			( 3*32+ 6) /* "" P3 */
 #define X86_FEATURE_P4			( 3*32+ 7) /* "" P4 */
 #define X86_FEATURE_CONSTANT_TSC	( 3*32+ 8) /* TSC ticks at a constant rate */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0f0d425f0440..46335c2df083 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -539,7 +539,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
 	/* Figure out Zen generations: */
 	switch (c->x86) {
-	case 0x17: {
+	case 0x17:
 		switch (c->x86_model) {
 		case 0x00 ... 0x2f:
 		case 0x50 ... 0x5f:
@@ -555,8 +555,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 			goto warn;
 		}
 		break;
-	}
-	case 0x19: {
+
+	case 0x19:
 		switch (c->x86_model) {
 		case 0x00 ... 0x0f:
 		case 0x20 ... 0x5f:
@@ -570,20 +570,31 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 			goto warn;
 		}
 		break;
-	}
+
+	case 0x1a:
+		switch (c->x86_model) {
+		case 0x00 ... 0x0f:
+			setup_force_cpu_cap(X86_FEATURE_ZEN5);
+			break;
+		default:
+			goto warn;
+		}
+		break;
+
 	default:
 		break;
 	}
 
 	if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
 		/*
-		 * RMP table entry format is not architectural and it can vary by processor
+		 * RMP table entry format is not architectural, can vary by processor
 		 * and is defined by the per-processor PPR. Restrict SNP support on the
 		 * known CPU model and family for which the RMP table entry format is
 		 * currently defined for.
 		 */
-		if (!(c->x86 == 0x19 && c->x86_model <= 0xaf) &&
-		    !(c->x86 == 0x1a && c->x86_model <= 0xf))
+		if (!boot_cpu_has(X86_FEATURE_ZEN3) &&
+		    !boot_cpu_has(X86_FEATURE_ZEN4) &&
+		    !boot_cpu_has(X86_FEATURE_ZEN5))
 			setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
 		else if (!snp_probe_rmptable_info())
 			setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
@@ -1055,6 +1066,11 @@ static void init_amd_zen4(struct cpuinfo_x86 *c)
 		msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
 }
 
+static void init_amd_zen5(struct cpuinfo_x86 *c)
+{
+	init_amd_zen_common();
+}
+
 static void init_amd(struct cpuinfo_x86 *c)
 {
 	u64 vm_cr;
@@ -1100,6 +1116,8 @@ static void init_amd(struct cpuinfo_x86 *c)
 		init_amd_zen3(c);
 	else if (boot_cpu_has(X86_FEATURE_ZEN4))
 		init_amd_zen4(c);
+	else if (boot_cpu_has(X86_FEATURE_ZEN5))
+		init_amd_zen5(c);
 
 	/*
 	 * Enable workaround for FXSAVE leak on CPUs
Borislav Petkov Jan. 5, 2024, 4:09 p.m. UTC | #4
On Thu, Jan 04, 2024 at 12:05:27PM +0100, Jeremi Piotrowski wrote:
> Is there a really good reason to perform the snp_probe_smptable_info() check at this
> point (instead of in snp_rmptable_init). snp_rmptable_init will also clear the cap
> on failure, and bsp_init_amd() runs too early to allow for the kernel to allocate the
> rmptable itself. I pointed out in the previous review that kernel allocation of rmptable
> is necessary in SNP-host capable VMs in Azure.

What does that even mean?

That function is doing some calculations after reading two MSRs. What
can possibly go wrong?!
Borislav Petkov Jan. 5, 2024, 4:21 p.m. UTC | #5
On Fri, Jan 05, 2024 at 05:09:16PM +0100, Borislav Petkov wrote:
> On Thu, Jan 04, 2024 at 12:05:27PM +0100, Jeremi Piotrowski wrote:
> > Is there a really good reason to perform the snp_probe_smptable_info() check at this
> > point (instead of in snp_rmptable_init). snp_rmptable_init will also clear the cap
> > on failure, and bsp_init_amd() runs too early to allow for the kernel to allocate the
> > rmptable itself. I pointed out in the previous review that kernel allocation of rmptable
> > is necessary in SNP-host capable VMs in Azure.
> 
> What does that even mean?
> 
> That function is doing some calculations after reading two MSRs. What
> can possibly go wrong?!

That could be one reason perhaps:

"It needs to be called early enough to allow for AutoIBRS to not be disabled
just because SNP is supported. By calling it where it is currently called, the
SNP feature can be cleared if, even though supported, SNP can't be used,
allowing AutoIBRS to be used as a more performant Spectre mitigation."

https://lore.kernel.org/r/8ec38db1-5ccf-4684-bc0d-d48579ebf0d0@amd.com
Borislav Petkov Jan. 5, 2024, 7:19 p.m. UTC | #6
On Sat, Dec 30, 2023 at 10:19:32AM -0600, Michael Roth wrote:
> +static int __init __snp_rmptable_init(void)
> +{
> +	u64 rmptable_size;
> +	void *rmptable_start;
> +	u64 val;

...

Ontop:

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index ce7ede9065ed..566bb6f39665 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -150,6 +150,11 @@ bool snp_probe_rmptable_info(void)
 	return true;
 }
 
+/*
+ * Do the necessary preparations which are verified by the firmware as
+ * described in the SNP_INIT_EX firmware command description in the SNP
+ * firmware ABI spec.
+ */
 static int __init __snp_rmptable_init(void)
 {
 	u64 rmptable_size;
Borislav Petkov Jan. 5, 2024, 9:27 p.m. UTC | #7
On Sat, Dec 30, 2023 at 10:19:32AM -0600, Michael Roth wrote:
> +static int __init __snp_rmptable_init(void)

I already asked a year ago:

https://lore.kernel.org/all/Y9ubi0i4Z750gdMm@zn.tnic/

why is the __ version - __snp_rmptable_init - carved out but crickets.
It simply gets ignored. :-\

So let me do it myself, diff below.

Please add to the next version:

Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>

after incorporating all the changes.

Thx.

---
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 566bb6f39665..feed65f80776 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -155,19 +155,25 @@ bool snp_probe_rmptable_info(void)
  * described in the SNP_INIT_EX firmware command description in the SNP
  * firmware ABI spec.
  */
-static int __init __snp_rmptable_init(void)
+static int __init snp_rmptable_init(void)
 {
-	u64 rmptable_size;
 	void *rmptable_start;
+	u64 rmptable_size;
 	u64 val;
 
+	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+		return 0;
+
+	if (!amd_iommu_snp_en)
+		return 0;
+
 	if (!probed_rmp_size)
-		return 1;
+		goto nosnp;
 
 	rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB);
 	if (!rmptable_start) {
 		pr_err("Failed to map RMP table\n");
-		return 1;
+		goto nosnp;
 	}
 
 	/*
@@ -195,20 +201,6 @@ static int __init __snp_rmptable_init(void)
 	rmptable = (struct rmpentry *)rmptable_start;
 	rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1;
 
-	return 0;
-}
-
-static int __init snp_rmptable_init(void)
-{
-	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
-		return 0;
-
-	if (!amd_iommu_snp_en)
-		return 0;
-
-	if (__snp_rmptable_init())
-		goto nosnp;
-
 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
 
 	return 0;
Jeremi Piotrowski Jan. 8, 2024, 4:49 p.m. UTC | #8
On 05/01/2024 17:21, Borislav Petkov wrote:
> On Fri, Jan 05, 2024 at 05:09:16PM +0100, Borislav Petkov wrote:
>> On Thu, Jan 04, 2024 at 12:05:27PM +0100, Jeremi Piotrowski wrote:
>>> Is there a really good reason to perform the snp_probe_smptable_info() check at this
>>> point (instead of in snp_rmptable_init). snp_rmptable_init will also clear the cap
>>> on failure, and bsp_init_amd() runs too early to allow for the kernel to allocate the
>>> rmptable itself. I pointed out in the previous review that kernel allocation of rmptable
>>> is necessary in SNP-host capable VMs in Azure.
>>
>> What does that even mean?>>
>> That function is doing some calculations after reading two MSRs. What
>> can possibly go wrong?!
> 

What I wrote: "allow for the kernel to allocate the rmptable". Until the kernel allocates a
rmptable the two MSRs are not initialized in a VM. This is specific to SNP-host VMs because
they don't have access to the system-wide rmptable (or a virtualized version of it), and the
rmptable is only useful for kernel internal tracking in this case. So we don't strictly need
one and could save the overhead but not having one would complicate the KVM SNP code so I'd
rather allocate one for now.
 
It makes most sense to perform the rmptable allocation later in kernel init, after platform
detection and e820 setup. It isn't really used until device_initcall.

https://lore.kernel.org/lkml/20230213103402.1189285-2-jpiotrowski@linux.microsoft.com/
(I'll be posting updated patches soon).


> That could be one reason perhaps:
> 
> "It needs to be called early enough to allow for AutoIBRS to not be disabled
> just because SNP is supported. By calling it where it is currently called, the
> SNP feature can be cleared if, even though supported, SNP can't be used,
> allowing AutoIBRS to be used as a more performant Spectre mitigation."
> 
> https://lore.kernel.org/r/8ec38db1-5ccf-4684-bc0d-d48579ebf0d0@amd.com
> 

This logic seems twisted. Why use firmware rmptable allocation as a proxy for SEV-SNP
enablement if BIOS provides an explicit flag to enable/disable SEV-SNP support. That
would be a better signal to use to control AutoIBRS enablement.
Borislav Petkov Jan. 8, 2024, 5:04 p.m. UTC | #9
On Mon, Jan 08, 2024 at 05:49:01PM +0100, Jeremi Piotrowski wrote:
> What I wrote: "allow for the kernel to allocate the rmptable".

What?!

"15.36.5 Hypervisor RMP Management

...

Because the RMP is initialized by the AMD-SP to prevent direct access to
the RMP, the hypervisor must use the RMPUPDATE instruction to alter the
entries of the RMP. RMPUPDATE allows the hypervisor to alter the
Guest_Physical_Address, Assigned, Page_Size, Immutable, and ASID fields
of an RMP entry."

What you want is something that you should keep far and away from the
upstream kernel.
Jeremi Piotrowski Jan. 9, 2024, 11:56 a.m. UTC | #10
On 08/01/2024 18:04, Borislav Petkov wrote:
> On Mon, Jan 08, 2024 at 05:49:01PM +0100, Jeremi Piotrowski wrote:
>> What I wrote: "allow for the kernel to allocate the rmptable".
> 
> What?!
> 
> "15.36.5 Hypervisor RMP Management
> 
> ...
> 
> Because the RMP is initialized by the AMD-SP to prevent direct access to
> the RMP, the hypervisor must use the RMPUPDATE instruction to alter the
> entries of the RMP. RMPUPDATE allows the hypervisor to alter the
> Guest_Physical_Address, Assigned, Page_Size, Immutable, and ASID fields
> of an RMP entry."
>> What you want is something that you should keep far and away from the
> upstream kernel.
>

Can we please not assume I am acting in bad faith. I am explicitly trying to
integrate nicely with AMD's KVM SNP host patches to cover an additional usecase
and get something upstreamable.

Let's separate RMP allocation from who (and how) maintains the entries.

"""
15.36.4 Initializing the RMP
...
Software must program RMP_BASE and RMP_END identically for each core in the
system and before enabling SEV-SNP globally.
"""

KVM expects UEFI to do this, Hyper-V does the allocation itself (on bare-metal).
Both are valid. Afaik it is the SNP_INIT command that hands over control of the
RMP from software to AMD-SP.

When it comes to "who and how maintains the rmp" - that is of course the AMD-SP
and hypervisor issues RMPUPDATE instructions. The paragraph you cite talks about
the physical RMP and AMD-SP - not virtualized SNP (aka "SNP-host VM"/nested SNP).
AMD specified an MSR-based RMPUPDATE for us for that usecase (15.36.19 SEV-SNP
Instruction Virtualization). The RMP inside the SNP-host VM is not related to
the physical RMP and is an entirely software based construct.

The RMP in nested SNP is only used for kernel bookkeeping and so its allocation
is optional. KVM could do without reading the RMP directly altogether (by tracking
the assigned bit somewhere) but that would be a design change and I'd rather see
the KVM SNP host patches merged in their current shape. Which is why the patch
I linked allocates a (shadow) RMP from the kernel.

I would very much appreciate if we would not prevent that usecase from working -
that's why I've been reviewing and testing multiple revisions of these patches
and providing feedback all along.
Borislav Petkov Jan. 9, 2024, 12:29 p.m. UTC | #11
On Tue, Jan 09, 2024 at 12:56:17PM +0100, Jeremi Piotrowski wrote:
> Can we please not assume I am acting in bad faith.

No you're not acting with bad faith.

What you're doing, in my experience so far is, you come with some weird
HV + guest models which has been invented somewhere, behind some closed
doors, then you come with some desire that the upstream kernel should
support it and you're not even documenting it properly and I'm left with
asking questions all the time, what is this, what's the use case,
blabla.

Don't take this personally - I guess this is all due to NDAs,
development schedules, and whatever else and yes, I've heard it all.

But just because you want this, we're not going to jump on it and
support it unconditionally. It needs to integrate properly with the rest
of the kernel and if it doesn't, it is not going upstream. That simple.

> I am explicitly trying to integrate nicely with AMD's KVM SNP host
> patches to cover an additional usecase and get something upstreamable.

And yet I still have no clue what your use case is. I always have to go
ask behind the scenes and get some half-answers about *maybe* this is
what they support.

Looking at the patch you pointed at I see there a proper explanation of
your nested SNP stuff. Finally!

From now on, please make sure your use case is properly explained
before you come with patches.

> The RMP in nested SNP is only used for kernel bookkeeping and so its
> allocation is optional. KVM could do without reading the RMP directly
> altogether (by tracking the assigned bit somewhere) but that would be
> a design change and I'd rather see the KVM SNP host patches merged in
> their current shape. Which is why the patch I linked allocates
> a (shadow) RMP from the kernel.

At least three issues I see with that:

- the allocation can fail so it is a lot more convenient when the
  firmware prepares it

- the RMP_BASE and RMP_END writes need to be verified they actially did
  set up the RMP range because if they haven't, you might as well
  throw SNP security out of the window. In general, letting the kernel
  do the RMP allocation needs to be verified very very thoroughly.

- a future feature might make this more complicated

> I would very much appreciate if we would not prevent that usecase from
> working - that's why I've been reviewing and testing multiple
> revisions of these patches and providing feedback all along.

I very much appreciate the help but we need to get the main SNP host
stuff in first and then we can talk about modifications.
Borislav Petkov Jan. 9, 2024, 12:44 p.m. UTC | #12
On Tue, Jan 09, 2024 at 01:29:06PM +0100, Borislav Petkov wrote:
> At least three issues I see with that:
> 
> - the allocation can fail so it is a lot more convenient when the
>   firmware prepares it
> 
> - the RMP_BASE and RMP_END writes need to be verified they actially did
>   set up the RMP range because if they haven't, you might as well
>   throw SNP security out of the window. In general, letting the kernel
>   do the RMP allocation needs to be verified very very thoroughly.
> 
> - a future feature might make this more complicated

- What do you do if you boot on a system which has the RMP already
  allocated in the BIOS?

- How do you detect that it is the L1 kernel that must allocate the RMP?

- Why can't you use the BIOS allocated RMP in your scenario too instead
  of the L1 kernel allocating it?

- ...

I might think of more.
Jeremi Piotrowski Feb. 14, 2024, 4:56 p.m. UTC | #13
On 09/01/2024 13:44, Borislav Petkov wrote:
> On Tue, Jan 09, 2024 at 01:29:06PM +0100, Borislav Petkov wrote:
>> At least three issues I see with that:
>>
>> - the allocation can fail so it is a lot more convenient when the
>>   firmware prepares it
>>
>> - the RMP_BASE and RMP_END writes need to be verified they actially did
>>   set up the RMP range because if they haven't, you might as well
>>   throw SNP security out of the window. In general, letting the kernel
>>   do the RMP allocation needs to be verified very very thoroughly.
>>
>> - a future feature might make this more complicated
> 
> - What do you do if you boot on a system which has the RMP already
>   allocated in the BIOS?
> 
> - How do you detect that it is the L1 kernel that must allocate the RMP?
> 
> - Why can't you use the BIOS allocated RMP in your scenario too instead
>   of the L1 kernel allocating it?
> 
> - ...
> 
> I might think of more.
> 

Sorry for not replying back sooner.

I agree, lets get the base SNP stuff in and then talk about extensions.

I want to sync up with Michael to make sure he's onboard with what I'm
proposing. I'll add more design/documentation/usecase descriptions with the
next submission and will make sure to address all the issues you brought up.

Jeremi
diff mbox series

Patch

diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 5a83da703e87..6a1f36df6a18 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -28,5 +28,7 @@  obj-y += net/
 
 obj-$(CONFIG_KEXEC_FILE) += purgatory/
 
+obj-y += virt/svm/
+
 # for cleaning
 subdir- += boot tools
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index f1bd7b91b3c6..15ce1269f270 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -599,6 +599,8 @@ 
 #define MSR_AMD64_SEV_ENABLED		BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
 #define MSR_AMD64_SEV_ES_ENABLED	BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
 #define MSR_AMD64_SEV_SNP_ENABLED	BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
+#define MSR_AMD64_RMP_BASE		0xc0010132
+#define MSR_AMD64_RMP_END		0xc0010133
 
 /* SNP feature bits enabled by the hypervisor */
 #define MSR_AMD64_SNP_VTOM			BIT_ULL(3)
@@ -709,7 +711,14 @@ 
 #define MSR_K8_TOP_MEM2			0xc001001d
 #define MSR_AMD64_SYSCFG		0xc0010010
 #define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT	23
-#define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT		BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT		24
+#define MSR_AMD64_SYSCFG_SNP_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT	25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN		BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
+#define MSR_AMD64_SYSCFG_MFDM_BIT		19
+#define MSR_AMD64_SYSCFG_MFDM			BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
+
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 5b4a1ce3d368..1f59d8ba9776 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -243,4 +243,10 @@  static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
 static inline u64 sev_get_status(void) { return 0; }
 #endif
 
+#ifdef CONFIG_KVM_AMD_SEV
+bool snp_probe_rmptable_info(void);
+#else
+static inline bool snp_probe_rmptable_info(void) { return false; }
+#endif
+
 #endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9a17165dfe84..0f0d425f0440 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -20,6 +20,7 @@ 
 #include <asm/delay.h>
 #include <asm/debugreg.h>
 #include <asm/resctrl.h>
+#include <asm/sev.h>
 
 #ifdef CONFIG_X86_64
 # include <asm/mmconfig.h>
@@ -574,6 +575,20 @@  static void bsp_init_amd(struct cpuinfo_x86 *c)
 		break;
 	}
 
+	if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
+		/*
+		 * RMP table entry format is not architectural and it can vary by processor
+		 * and is defined by the per-processor PPR. Restrict SNP support on the
+		 * known CPU model and family for which the RMP table entry format is
+		 * currently defined for.
+		 */
+		if (!(c->x86 == 0x19 && c->x86_model <= 0xaf) &&
+		    !(c->x86 == 0x1a && c->x86_model <= 0xf))
+			setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+		else if (!snp_probe_rmptable_info())
+			setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+	}
+
 	return;
 
 warn:
diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile
new file mode 100644
index 000000000000..ef2a31bdcc70
--- /dev/null
+++ b/arch/x86/virt/svm/Makefile
@@ -0,0 +1,3 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_KVM_AMD_SEV) += sev.o
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
new file mode 100644
index 000000000000..ce7ede9065ed
--- /dev/null
+++ b/arch/x86/virt/svm/sev.c
@@ -0,0 +1,219 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD SVM-SEV Host Support.
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Ashish Kalra <ashish.kalra@amd.com>
+ *
+ */
+
+#include <linux/cc_platform.h>
+#include <linux/printk.h>
+#include <linux/mm_types.h>
+#include <linux/set_memory.h>
+#include <linux/memblock.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/cpumask.h>
+#include <linux/iommu.h>
+#include <linux/amd-iommu.h>
+
+#include <asm/sev.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/svm.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+#include <asm/apic.h>
+#include <asm/cpuid.h>
+#include <asm/cmdline.h>
+#include <asm/iommu.h>
+
+/*
+ * The RMP entry format is not architectural. The format is defined in PPR
+ * Family 19h Model 01h, Rev B1 processor.
+ */
+struct rmpentry {
+	u64	assigned	: 1,
+		pagesize	: 1,
+		immutable	: 1,
+		rsvd1		: 9,
+		gpa		: 39,
+		asid		: 10,
+		vmsa		: 1,
+		validated	: 1,
+		rsvd2		: 1;
+	u64 rsvd3;
+} __packed;
+
+/*
+ * The first 16KB from the RMP_BASE is used by the processor for the
+ * bookkeeping, the range needs to be added during the RMP entry lookup.
+ */
+#define RMPTABLE_CPU_BOOKKEEPING_SZ	0x4000
+
+static u64 probed_rmp_base, probed_rmp_size;
+static struct rmpentry *rmptable __ro_after_init;
+static u64 rmptable_max_pfn __ro_after_init;
+
+#undef pr_fmt
+#define pr_fmt(fmt)	"SEV-SNP: " fmt
+
+static int __mfd_enable(unsigned int cpu)
+{
+	u64 val;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+		return 0;
+
+	rdmsrl(MSR_AMD64_SYSCFG, val);
+
+	val |= MSR_AMD64_SYSCFG_MFDM;
+
+	wrmsrl(MSR_AMD64_SYSCFG, val);
+
+	return 0;
+}
+
+static __init void mfd_enable(void *arg)
+{
+	__mfd_enable(smp_processor_id());
+}
+
+static int __snp_enable(unsigned int cpu)
+{
+	u64 val;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+		return 0;
+
+	rdmsrl(MSR_AMD64_SYSCFG, val);
+
+	val |= MSR_AMD64_SYSCFG_SNP_EN;
+	val |= MSR_AMD64_SYSCFG_SNP_VMPL_EN;
+
+	wrmsrl(MSR_AMD64_SYSCFG, val);
+
+	return 0;
+}
+
+static __init void snp_enable(void *arg)
+{
+	__snp_enable(smp_processor_id());
+}
+
+#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
+
+bool snp_probe_rmptable_info(void)
+{
+	u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end;
+
+	rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
+	rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+
+	if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) {
+		pr_err("Memory for the RMP table has not been reserved by BIOS\n");
+		return false;
+	}
+
+	if (rmp_base > rmp_end) {
+		pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end);
+		return false;
+	}
+
+	rmp_sz = rmp_end - rmp_base + 1;
+
+	/*
+	 * Calculate the amount the memory that must be reserved by the BIOS to
+	 * address the whole RAM, including the bookkeeping area. The RMP itself
+	 * must also be covered.
+	 */
+	max_rmp_pfn = max_pfn;
+	if (PHYS_PFN(rmp_end) > max_pfn)
+		max_rmp_pfn = PHYS_PFN(rmp_end);
+
+	calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+	if (calc_rmp_sz > rmp_sz) {
+		pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
+		       calc_rmp_sz, rmp_sz);
+		return false;
+	}
+
+	probed_rmp_base = rmp_base;
+	probed_rmp_size = rmp_sz;
+
+	pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n",
+		probed_rmp_base, probed_rmp_base + probed_rmp_size - 1);
+
+	return true;
+}
+
+static int __init __snp_rmptable_init(void)
+{
+	u64 rmptable_size;
+	void *rmptable_start;
+	u64 val;
+
+	if (!probed_rmp_size)
+		return 1;
+
+	rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB);
+	if (!rmptable_start) {
+		pr_err("Failed to map RMP table\n");
+		return 1;
+	}
+
+	/*
+	 * Check if SEV-SNP is already enabled, this can happen in case of
+	 * kexec boot.
+	 */
+	rdmsrl(MSR_AMD64_SYSCFG, val);
+	if (val & MSR_AMD64_SYSCFG_SNP_EN)
+		goto skip_enable;
+
+	memset(rmptable_start, 0, probed_rmp_size);
+
+	/* Flush the caches to ensure that data is written before SNP is enabled. */
+	wbinvd_on_all_cpus();
+
+	/* MtrrFixDramModEn must be enabled on all the CPUs prior to enabling SNP. */
+	on_each_cpu(mfd_enable, NULL, 1);
+
+	on_each_cpu(snp_enable, NULL, 1);
+
+skip_enable:
+	rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ;
+	rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+	rmptable = (struct rmpentry *)rmptable_start;
+	rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1;
+
+	return 0;
+}
+
+static int __init snp_rmptable_init(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+		return 0;
+
+	if (!amd_iommu_snp_en)
+		return 0;
+
+	if (__snp_rmptable_init())
+		goto nosnp;
+
+	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
+
+	return 0;
+
+nosnp:
+	setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+	return -ENOSYS;
+}
+
+/*
+ * This must be called after the IOMMU has been initialized.
+ */
+device_initcall(snp_rmptable_init);