diff mbox series

[6/7] KVM: x86: Implement kvm_arch_vcpu_map_memory()

Message ID 20240417153450.3608097-7-pbonzini@redhat.com (mailing list archive)
State New
Headers show
Series KVM: Guest Memory Pre-Population API | expand

Commit Message

Paolo Bonzini April 17, 2024, 3:34 p.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Wire KVM_MAP_MEMORY ioctl to kvm_mmu_map_tdp_page() to populate guest
memory.  When KVM_CREATE_VCPU creates vCPU, it initializes the x86
KVM MMU part by kvm_mmu_create() and kvm_init_mmu().  vCPU is ready to
invoke the KVM page fault handler.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Message-ID: <7138a3bc00ea8d3cbe0e59df15f8c22027005b59.1712785629.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Kconfig |  1 +
 arch/x86/kvm/x86.c   | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

Comments

Isaku Yamahata April 17, 2024, 7:28 p.m. UTC | #1
On Wed, Apr 17, 2024 at 11:34:49AM -0400,
Paolo Bonzini <pbonzini@redhat.com> wrote:

> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Wire KVM_MAP_MEMORY ioctl to kvm_mmu_map_tdp_page() to populate guest
> memory.  When KVM_CREATE_VCPU creates vCPU, it initializes the x86
> KVM MMU part by kvm_mmu_create() and kvm_init_mmu().  vCPU is ready to
> invoke the KVM page fault handler.


As a record for the past discussion and to address Rick comment at
https://lore.kernel.org/all/75b213fd73fcb5872703f89a9c6bb67ea91e3bd7.camel@intel.com/

  The current implementation supports TDP only because the population with GVA
  is moot based on the thread [1].  If necessary, this restriction can be
  relaxed in future.
  
  [1] https://lore.kernel.org/all/116179545fafbf39ed01e1f0f5ac76e0467fc09a.camel@intel.com/


> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Message-ID: <7138a3bc00ea8d3cbe0e59df15f8c22027005b59.1712785629.git.isaku.yamahata@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/Kconfig |  1 +
>  arch/x86/kvm/x86.c   | 43 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 44 insertions(+)
> 
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index 7632fe6e4db9..e58360d368ec 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -44,6 +44,7 @@ config KVM
>  	select KVM_VFIO
>  	select HAVE_KVM_PM_NOTIFIER if PM
>  	select KVM_GENERIC_HARDWARE_ENABLING
> +	select KVM_GENERIC_MAP_MEMORY
>  	help
>  	  Support hosting fully virtualized guest machines using hardware
>  	  virtualization extensions.  You will need a fairly recent
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 83b8260443a3..f84c75c2a47f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4715,6 +4715,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_MEMORY_FAULT_INFO:
>  		r = 1;
>  		break;
> +	case KVM_CAP_MAP_MEMORY:
> +		r = tdp_enabled;
> +		break;
>  	case KVM_CAP_EXIT_HYPERCALL:
>  		r = KVM_EXIT_HYPERCALL_VALID_MASK;
>  		break;
> @@ -5867,6 +5870,46 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
>  	}
>  }
>  
> +int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
> +			     struct kvm_map_memory *mapping)
> +{
> +	u64 mapped, end, error_code = 0;
> +	u8 level = PG_LEVEL_4K;
> +	int r;
> +
> +	/*
> +	 * Shadow paging uses GVA for kvm page fault.  The first implementation
> +	 * supports GPA only to avoid confusion.
> +	 */
> +	if (!tdp_enabled)
> +		return -EOPNOTSUPP;
> +
> +	/*
> +	 * reload is efficient when called repeatedly, so we can do it on
> +	 * every iteration.
> +	 */
> +	kvm_mmu_reload(vcpu);
> +
> +	if (kvm_arch_has_private_mem(vcpu->kvm) &&
> +	    kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(mapping->base_address)))
> +		error_code |= PFERR_PRIVATE_ACCESS;
> +
> +	r = kvm_tdp_map_page(vcpu, mapping->base_address, error_code, &level);
> +	if (r)
> +		return r;
> +
> +	/*
> +	 * level can be more than the alignment of mapping->base_address if
> +	 * the mapping can use a huge page.
> +	 */
> +	end = (mapping->base_address & KVM_HPAGE_MASK(level)) +
> +		KVM_HPAGE_SIZE(level);

end = ALIGN(mapping->base_address, KVM_HPAGE_SIZE(level));

ALIGN() simplifies this as Chao pointed out.
https://lore.kernel.org/all/Zh94V8ochIXEkO17@chao-email/


> +	mapped = min(mapping->size, end - mapping->base_address);
> +	mapping->size -= mapped;
> +	mapping->base_address += mapped;
> +	return r;
> +}
> +
>  long kvm_arch_vcpu_ioctl(struct file *filp,
>  			 unsigned int ioctl, unsigned long arg)
>  {
> -- 
> 2.43.0
> 
> 
>
Sean Christopherson April 17, 2024, 9:37 p.m. UTC | #2
On Wed, Apr 17, 2024, Paolo Bonzini wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Wire KVM_MAP_MEMORY ioctl to kvm_mmu_map_tdp_page() to populate guest
> memory.  When KVM_CREATE_VCPU creates vCPU, it initializes the x86
> KVM MMU part by kvm_mmu_create() and kvm_init_mmu().  vCPU is ready to
> invoke the KVM page fault handler.
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Message-ID: <7138a3bc00ea8d3cbe0e59df15f8c22027005b59.1712785629.git.isaku.yamahata@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/Kconfig |  1 +
>  arch/x86/kvm/x86.c   | 43 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 44 insertions(+)
> 
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index 7632fe6e4db9..e58360d368ec 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -44,6 +44,7 @@ config KVM
>  	select KVM_VFIO
>  	select HAVE_KVM_PM_NOTIFIER if PM
>  	select KVM_GENERIC_HARDWARE_ENABLING
> +	select KVM_GENERIC_MAP_MEMORY
>  	help
>  	  Support hosting fully virtualized guest machines using hardware
>  	  virtualization extensions.  You will need a fairly recent
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 83b8260443a3..f84c75c2a47f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4715,6 +4715,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_MEMORY_FAULT_INFO:
>  		r = 1;
>  		break;
> +	case KVM_CAP_MAP_MEMORY:
> +		r = tdp_enabled;
> +		break;
>  	case KVM_CAP_EXIT_HYPERCALL:
>  		r = KVM_EXIT_HYPERCALL_VALID_MASK;
>  		break;
> @@ -5867,6 +5870,46 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
>  	}
>  }
>  
> +int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
> +			     struct kvm_map_memory *mapping)
> +{
> +	u64 mapped, end, error_code = 0;

Maybe add PFERR_GUEST_FINAL_MASK to the error code?  KVM doesn't currently consume
that except in svm_check_emulate_instruction(), which isn't reachable, but it
seems logical?

> +	u8 level = PG_LEVEL_4K;
> +	int r;
> +
> +	/*
> +	 * Shadow paging uses GVA for kvm page fault.  The first implementation
> +	 * supports GPA only to avoid confusion.
> +	 */
> +	if (!tdp_enabled)

Eh, I'd omit this explicit check since kvm_tdp_map_page() has a more complete
check.

Actually, why is this a separate function and a separate patch?  Just implement
kvm_arch_vcpu_map_memory() in mmu.c, in a single patch, e.g.

int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
			     struct kvm_map_memory *mapping)
{
	u64 mapped, end, error_code = 0;
	u8 level = PG_LEVEL_4K;
	int r;

	if (vcpu->arch.mmu->page_fault != kvm_tdp_page_fault)
		return -EOPNOTSUPP;

	kvm_mmu_reload(vcpu);

	if (kvm_arch_has_private_mem(vcpu->kvm) &&
	    kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(mapping->base_address)))
		error_code |= PFERR_PRIVATE_ACCESS;

	r = __kvm_mmu_do_page_fault(vcpu, mapping->gpa, error_code, true, NULL, &level);
	if (r < 0)
		return r;

	switch (r) {
	case RET_PF_RETRY:
		return -EAGAIN;
	case RET_PF_FIXED:
	case RET_PF_SPURIOUS:
		break;
	case RET_PF_EMULATE:
		return -EBUSY;
	case RET_PF_CONTINUE:
	case RET_PF_INVALID:
	default:
		WARN_ON_ONCE(r);
		return -EIO;
	}

	/*
	 * Adjust the GPA down when accounting for the page size, as KVM could
	 * have created a hugepage that covers @gpa, but doesn't start at @gpa.
	 */
	end = (mapping->gpa & KVM_HPAGE_MASK(level)) + KVM_HPAGE_SIZE(level);
	return min(mapping->size, end - mapping->gpa);
}
diff mbox series

Patch

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 7632fe6e4db9..e58360d368ec 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -44,6 +44,7 @@  config KVM
 	select KVM_VFIO
 	select HAVE_KVM_PM_NOTIFIER if PM
 	select KVM_GENERIC_HARDWARE_ENABLING
+	select KVM_GENERIC_MAP_MEMORY
 	help
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 83b8260443a3..f84c75c2a47f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4715,6 +4715,9 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MEMORY_FAULT_INFO:
 		r = 1;
 		break;
+	case KVM_CAP_MAP_MEMORY:
+		r = tdp_enabled;
+		break;
 	case KVM_CAP_EXIT_HYPERCALL:
 		r = KVM_EXIT_HYPERCALL_VALID_MASK;
 		break;
@@ -5867,6 +5870,46 @@  static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	}
 }
 
+int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
+			     struct kvm_map_memory *mapping)
+{
+	u64 mapped, end, error_code = 0;
+	u8 level = PG_LEVEL_4K;
+	int r;
+
+	/*
+	 * Shadow paging uses GVA for kvm page fault.  The first implementation
+	 * supports GPA only to avoid confusion.
+	 */
+	if (!tdp_enabled)
+		return -EOPNOTSUPP;
+
+	/*
+	 * reload is efficient when called repeatedly, so we can do it on
+	 * every iteration.
+	 */
+	kvm_mmu_reload(vcpu);
+
+	if (kvm_arch_has_private_mem(vcpu->kvm) &&
+	    kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(mapping->base_address)))
+		error_code |= PFERR_PRIVATE_ACCESS;
+
+	r = kvm_tdp_map_page(vcpu, mapping->base_address, error_code, &level);
+	if (r)
+		return r;
+
+	/*
+	 * level can be more than the alignment of mapping->base_address if
+	 * the mapping can use a huge page.
+	 */
+	end = (mapping->base_address & KVM_HPAGE_MASK(level)) +
+		KVM_HPAGE_SIZE(level);
+	mapped = min(mapping->size, end - mapping->base_address);
+	mapping->size -= mapped;
+	mapping->base_address += mapped;
+	return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {