diff mbox series

[v2,08/10] KVM: x86: Add a hook in kvm_arch_vcpu_map_memory()

Message ID 7194bb75ac25fa98875b7891d7929655ab245205.1712785629.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: Guest Memory Pre-Population API | expand

Commit Message

Isaku Yamahata April 10, 2024, 10:07 p.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Add a hook in kvm_arch_vcpu_map_memory() for KVM_MAP_MEMORY before calling
kvm_mmu_map_page() to adjust the error code for a page fault.  The hook can
hold vendor-specific logic to make those adjustments and enforce the
restrictions.  SEV and TDX KVM will use the hook.

In the case of SEV and TDX, they need to adjust the KVM page fault error
code or refuse the operation due to their restriction.  TDX requires that
the guest memory population must be before finalizing the VM.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
v2:
- Make pre_mmu_map_page() to take error_code.
- Drop post_mmu_map_page().
- Drop struct kvm_memory_map.source check.
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  3 +++
 arch/x86/kvm/x86.c                 | 28 ++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+)

Comments

Edgecombe, Rick P April 16, 2024, 2:57 p.m. UTC | #1
On Wed, 2024-04-10 at 15:07 -0700, isaku.yamahata@intel.com wrote:
> +static int kvm_pre_mmu_map_page(struct kvm_vcpu *vcpu,
> +                               struct kvm_memory_mapping *mapping,
> +                               u64 *error_code)
> +{
> +       int r = 0;
> +
> +       if (vcpu->kvm->arch.vm_type == KVM_X86_DEFAULT_VM) {
> +               /* nothing */

On the Intel side, vt_pre_mmu_map_page will handle doing nothing. Is there a
reason the AMD side can't do the same thing?

> +       } else if (vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM) {
> +               if (kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(mapping-
> >base_address)))
> +                       *error_code |= PFERR_PRIVATE_ACCESS;

Not suggesting to do anything about it for this series, but there seems to be a
growing amount of manual KVM_X86_SW_PROTECTED_VM checks. I guess the problem
with giving it its own x86_ops is figuring which arch calls to use. Hmm.

> +       } else if (kvm_x86_ops.pre_mmu_map_page)
> +               r = static_call(kvm_x86_pre_mmu_map_page)(vcpu, mapping,
> +                                                         error_code);
> +       else
> +               r = -EOPNOTSUPP;

Do we actually need this last check?

> +
> +       return r;
> +}
Paolo Bonzini April 17, 2024, 12:26 p.m. UTC | #2
On Thu, Apr 11, 2024 at 12:08 AM <isaku.yamahata@intel.com> wrote:
>
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> Add a hook in kvm_arch_vcpu_map_memory() for KVM_MAP_MEMORY before calling
> kvm_mmu_map_page() to adjust the error code for a page fault.  The hook can
> hold vendor-specific logic to make those adjustments and enforce the
> restrictions.  SEV and TDX KVM will use the hook.
>
> In the case of SEV and TDX, they need to adjust the KVM page fault error
> code or refuse the operation due to their restriction.  TDX requires that
> the guest memory population must be before finalizing the VM.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> v2:
> - Make pre_mmu_map_page() to take error_code.
> - Drop post_mmu_map_page().
> - Drop struct kvm_memory_map.source check.
> ---
>  arch/x86/include/asm/kvm-x86-ops.h |  1 +
>  arch/x86/include/asm/kvm_host.h    |  3 +++
>  arch/x86/kvm/x86.c                 | 28 ++++++++++++++++++++++++++++
>  3 files changed, 32 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index 5187fcf4b610..a5d4f4d5265d 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -139,6 +139,7 @@ KVM_X86_OP(vcpu_deliver_sipi_vector)
>  KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
>  KVM_X86_OP_OPTIONAL(get_untagged_addr)
>  KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
> +KVM_X86_OP_OPTIONAL(pre_mmu_map_page);
>
>  #undef KVM_X86_OP
>  #undef KVM_X86_OP_OPTIONAL
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3ce244ad44e5..2bf7f97f889b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1812,6 +1812,9 @@ struct kvm_x86_ops {
>
>         gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
>         void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
> +       int (*pre_mmu_map_page)(struct kvm_vcpu *vcpu,
> +                               struct kvm_memory_mapping *mapping,
> +                               u64 *error_code);
>  };
>
>  struct kvm_x86_nested_ops {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8ba9c1720ac9..b76d854701d5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5868,6 +5868,26 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
>         }
>  }
>
> +static int kvm_pre_mmu_map_page(struct kvm_vcpu *vcpu,
> +                               struct kvm_memory_mapping *mapping,
> +                               u64 *error_code)
> +{
> +       int r = 0;
> +
> +       if (vcpu->kvm->arch.vm_type == KVM_X86_DEFAULT_VM) {
> +               /* nothing */
> +       } else if (vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM) {
> +               if (kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(mapping->base_address)))
> +                       *error_code |= PFERR_PRIVATE_ACCESS;

This can probably be done for all VM types, not just KVM_X86_SW_PROTECTED_VM.

For now I am going to squash

        if (kvm_arch_has_private_mem(vcpu->kvm) &&
            kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(mapping->base_address)))
                *error_code |= PFERR_GUEST_ENC_MASK;

in the previous patch. If TDX or SEV need to adjust, they can
introduce the hook where we know if/how it is used.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 5187fcf4b610..a5d4f4d5265d 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -139,6 +139,7 @@  KVM_X86_OP(vcpu_deliver_sipi_vector)
 KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
 KVM_X86_OP_OPTIONAL(get_untagged_addr)
 KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
+KVM_X86_OP_OPTIONAL(pre_mmu_map_page);
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3ce244ad44e5..2bf7f97f889b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1812,6 +1812,9 @@  struct kvm_x86_ops {
 
 	gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
 	void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
+	int (*pre_mmu_map_page)(struct kvm_vcpu *vcpu,
+				struct kvm_memory_mapping *mapping,
+				u64 *error_code);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8ba9c1720ac9..b76d854701d5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5868,6 +5868,26 @@  static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	}
 }
 
+static int kvm_pre_mmu_map_page(struct kvm_vcpu *vcpu,
+				struct kvm_memory_mapping *mapping,
+				u64 *error_code)
+{
+	int r = 0;
+
+	if (vcpu->kvm->arch.vm_type == KVM_X86_DEFAULT_VM) {
+		/* nothing */
+	} else if (vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM) {
+		if (kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(mapping->base_address)))
+			*error_code |= PFERR_PRIVATE_ACCESS;
+	} else if (kvm_x86_ops.pre_mmu_map_page)
+		r = static_call(kvm_x86_pre_mmu_map_page)(vcpu, mapping,
+							  error_code);
+	else
+		r = -EOPNOTSUPP;
+
+	return r;
+}
+
 int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
 			     struct kvm_memory_mapping *mapping)
 {
@@ -5900,6 +5920,14 @@  int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
 	/* reload is optimized for repeated call. */
 	kvm_mmu_reload(vcpu);
 
+	/*
+	 * Adjust error_code for VM-type. max_level is adjusted by
+	 * gmem_max_level() callback.
+	 */
+	r = kvm_pre_mmu_map_page(vcpu, mapping, &error_code);
+	if (r)
+		goto out;
+
 	r = kvm_tdp_map_page(vcpu, mapping->base_address, error_code, &level);
 	if (r)
 		goto out;