diff mbox

[v7,2/4] KVM: async_pf: Add L1 guest async_pf #PF vmexit handler

Message ID 1498705321-3927-3-git-send-email-wanpeng.li@hotmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wanpeng Li June 29, 2017, 3:01 a.m. UTC
From: Wanpeng Li <wanpeng.li@hotmail.com>

This patch adds the L1 guest async page fault #PF vmexit handler, such
#PF is converted into vmexit from L2 to L1 on #PF which is then handled
by L1 similar to ordinary async page fault.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.c              | 33 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu.h              |  2 ++
 arch/x86/kvm/svm.c              | 36 +++++-------------------------------
 arch/x86/kvm/vmx.c              | 12 +++++-------
 5 files changed, 46 insertions(+), 38 deletions(-)

Comments

Radim Krčmář July 12, 2017, 9:44 p.m. UTC | #1
2017-06-28 20:01-0700, Wanpeng Li:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> This patch adds the L1 guest async page fault #PF vmexit handler, such
> #PF is converted into vmexit from L2 to L1 on #PF which is then handled
> by L1 similar to ordinary async page fault.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---

This patch breaks SVM, so I've taken the series off kvm/queue for now;
I'll look into it tomorrow.  The error is:

 BUG: unable to handle kernel paging request at ffffffffc0735ad2
 IP: report_bug+0x94/0x120
 PGD 43e14067 
 P4D 43e14067 
 PUD 43e16067 
 PMD 2164bf067 
 PTE 80000002181fc161

 Oops: 0003 [#1] SMP
 Modules linked in: kvm_amd(OE) kvm(OE) irqbypass(E) xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables sunrpc snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm ppdev joydev parport_serial parport_pc snd_timer parport k10temp sky2 snd shpchp sp5100_tco acpi_cpufreq wmi soundcore i2c_piix4 amdkfd amd_iommu_v2 radeon i2c_algo_bit drm_kms_helper uas serio_raw usb_storage ttm pata_atiixp drm ata_generic pata_acpi pata_jmicron [last unloaded: irqbypass]
 CPU: 3 PID: 1868 Comm: CPU 0/KVM Tainted: G           OE   4.12.0+ #1
 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080014  03/07/2008
 task: ffff8bcbe3f1b140 task.stack: ffffabb481970000
 RIP: 0010:report_bug+0x94/0x120
 RSP: 0018:ffffabb481973a70 EFLAGS: 00010202
 RAX: 0000000000000907 RBX: ffffabb481973bd8 RCX: ffffffffc0735ac8
 RDX: 0000000000000001 RSI: 0000000000000ed0 RDI: 0000000000000001
 RBP: ffffabb481973a90 R08: 0000000000000001 R09: 7f9f279200000000
 R10: ffffabb4819739d0 R11: 0000000000000000 R12: ffffffffc07023d0
 R13: ffffffffc0733078 R14: 0000000000000004 R15: ffffabb481973bd8
 FS:  0000000000000000(0000) GS:ffff8bcbe7400000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: ffffffffc0735ad2 CR3: 00000002189d7000 CR4: 00000000000006e0
 Call Trace:
  ? kvm_handle_page_fault+0x1f0/0x200 [kvm]
  fixup_bug+0x2e/0x50
  do_trap+0x119/0x150
  do_error_trap+0xa3/0x160
  ? kvm_handle_page_fault+0x1f0/0x200 [kvm]
  ? trace_hardirqs_off_thunk+0x1a/0x1c
  do_invalid_op+0x20/0x30
  invalid_op+0x1e/0x30
 RIP: 0010:kvm_handle_page_fault+0x1f0/0x200 [kvm]
 RSP: 0018:ffffabb481973c80 EFLAGS: 00010202
 RAX: 0000000000000000 RBX: ffff8bcbd7550000 RCX: 0000000000000000
 RDX: 00000000fffffff0 RSI: 0000000000000014 RDI: ffff8bcbd7550000
 RBP: ffffabb481973ca0 R08: 0000000000000001 R09: 27624b3d00000000
 R10: ffffabb481973ca8 R11: ffff8bcbe3fb25f0 R12: 00000000fffffff0
 R13: 0000000000000014 R14: ffff8bcbd7550000 R15: ffff8bcbd7550000
  pf_interception+0x20/0x30 [kvm_amd]
  handle_exit+0x213/0xbb0 [kvm_amd]
  kvm_arch_vcpu_ioctl_run+0x7f1/0x1ae0 [kvm]
  kvm_vcpu_ioctl+0x2ac/0x6f0 [kvm]
  ? kvm_vcpu_ioctl+0x2ac/0x6f0 [kvm]
  ? sched_clock+0x9/0x10
  ? debug_lockdep_rcu_enabled+0x1d/0x30
  do_vfs_ioctl+0xa6/0x6c0
  SyS_ioctl+0x79/0x90
  entry_SYSCALL_64_fastpath+0x1f/0xbe
 RIP: 0033:0x7fabf6d815c7
 RSP: 002b:00007fabe87e77c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
 RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007fabf6d815c7
 RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000010
 RBP: 000055a7cb502fe0 R08: 000055a7cb51e410 R09: 000055a7cb509390
 R10: 000055a7cdb01000 R11: 0000000000000246 R12: 000055a7cdace0a6
 R13: 0000000000000000 R14: 00007fac00621000 R15: 000055a7cdace000
 Code: 74 59 0f b7 41 0a 4c 63 69 04 0f b7 71 08 89 c7 49 01 cd 83 e7 01 a8 02 74 15 66 85 ff 74 10 a8 04 ba 01 00 00 00 75 26 83 c8 04 <66> 89 41 0a 66 85 ff 74 49 0f b6 49 0b 4c 89 e2 45 31 c9 49 89 
 RIP: report_bug+0x94/0x120 RSP: ffffabb481973a70
 CR2: ffffffffc0735ad2
 ---[ end trace aec3a1f15664a4af ]---
 BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:33
 in_atomic(): 0, irqs_disabled(): 1, pid: 1868, name: CPU 0/KVM
 INFO: lockdep is turned off.
 irq event stamp: 1868
 hardirqs last  enabled at (1867): [<ffffffffa398eaab>] restore_regs_and_iret+0x0/0x1d
 hardirqs last disabled at (1868): [<ffffffffa398f7dc>] error_entry+0x7c/0xd0
 softirqs last  enabled at (1834): [<ffffffffa3992f62>] __do_softirq+0x382/0x4ed
 softirqs last disabled at (1817): [<ffffffffa30b9a2f>] irq_exit+0x10f/0x120
 CPU: 3 PID: 1868 Comm: CPU 0/KVM Tainted: G      D    OE   4.12.0+ #1
 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080014  03/07/2008
 Call Trace:
  dump_stack+0x8e/0xcd
  ___might_sleep+0x164/0x250
  __might_sleep+0x4a/0x80
  exit_signals+0x33/0x240
  do_exit+0xb4/0xd20
  ? SyS_ioctl+0x79/0x90
  rewind_stack_do_exit+0x17/0x20
 RIP: 0033:0x7fabf6d815c7
 RSP: 002b:00007fabe87e77c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
 RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007fabf6d815c7
 RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000010
 RBP: 000055a7cb502fe0 R08: 000055a7cb51e410 R09: 000055a7cb509390
 R10: 000055a7cdb01000 R11: 0000000000000246 R12: 000055a7cdace0a6
 R13: 0000000000000000 R14: 00007fac00621000 R15: 000055a7cdace000
Wanpeng Li July 13, 2017, 1:34 a.m. UTC | #2
2017-07-13 5:44 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
> 2017-06-28 20:01-0700, Wanpeng Li:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> This patch adds the L1 guest async page fault #PF vmexit handler, such
>> #PF is converted into vmexit from L2 to L1 on #PF which is then handled
>> by L1 similar to ordinary async page fault.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>> ---
>
> This patch breaks SVM, so I've taken the series off kvm/queue for now;

> I'll look into it tomorrow.

Thanks for the help. :)

Regards,
Wanpeng Li
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1f01bfb..e20d8a8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -645,6 +645,7 @@  struct kvm_vcpu_arch {
 		u64 msr_val;
 		u32 id;
 		bool send_user_only;
+		u32 host_apf_reason;
 	} apf;
 
 	/* OSVW MSRs (AMD only) */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cb82259..4a7dc00 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -46,6 +46,7 @@ 
 #include <asm/io.h>
 #include <asm/vmx.h>
 #include <asm/kvm_page_track.h>
+#include "trace.h"
 
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
@@ -3736,6 +3737,38 @@  static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	return false;
 }
 
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+				u64 fault_address)
+{
+	int r = 1;
+
+	switch (vcpu->arch.apf.host_apf_reason) {
+	default:
+		/* TDP won't cause page fault directly */
+		WARN_ON_ONCE(tdp_enabled);
+		trace_kvm_page_fault(fault_address, error_code);
+
+		if (kvm_event_needs_reinjection(vcpu))
+			kvm_mmu_unprotect_page_virt(vcpu, fault_address);
+		r = kvm_mmu_page_fault(vcpu, fault_address, error_code, NULL, 0);
+		break;
+	case KVM_PV_REASON_PAGE_NOT_PRESENT:
+		vcpu->arch.apf.host_apf_reason = 0;
+		local_irq_disable();
+		kvm_async_pf_task_wait(fault_address);
+		local_irq_enable();
+		break;
+	case KVM_PV_REASON_PAGE_READY:
+		vcpu->arch.apf.host_apf_reason = 0;
+		local_irq_disable();
+		kvm_async_pf_task_wake(fault_address);
+		local_irq_enable();
+		break;
+	}
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
+
 static bool
 check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 330bf3a..2ae88f0 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -77,6 +77,8 @@  void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty);
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+				u64 fault_address);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e1f8e89..8f263bf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -192,7 +192,6 @@  struct vcpu_svm {
 
 	unsigned int3_injected;
 	unsigned long int3_rip;
-	u32 apf_reason;
 
 	/* cached guest cpuid flags for faster access */
 	bool nrips_enabled	: 1;
@@ -2071,34 +2070,9 @@  static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
 static int pf_interception(struct vcpu_svm *svm)
 {
 	u64 fault_address = svm->vmcb->control.exit_info_2;
-	u64 error_code;
-	int r = 1;
+	u64 error_code = svm->vmcb->control.exit_info_1;
 
-	switch (svm->apf_reason) {
-	default:
-		error_code = svm->vmcb->control.exit_info_1;
-
-		trace_kvm_page_fault(fault_address, error_code);
-		if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
-			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
-		r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
-			svm->vmcb->control.insn_bytes,
-			svm->vmcb->control.insn_len);
-		break;
-	case KVM_PV_REASON_PAGE_NOT_PRESENT:
-		svm->apf_reason = 0;
-		local_irq_disable();
-		kvm_async_pf_task_wait(fault_address);
-		local_irq_enable();
-		break;
-	case KVM_PV_REASON_PAGE_READY:
-		svm->apf_reason = 0;
-		local_irq_disable();
-		kvm_async_pf_task_wake(fault_address);
-		local_irq_enable();
-		break;
-	}
-	return r;
+	return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address);
 }
 
 static int db_interception(struct vcpu_svm *svm)
@@ -2551,7 +2525,7 @@  static int nested_svm_exit_special(struct vcpu_svm *svm)
 		break;
 	case SVM_EXIT_EXCP_BASE + PF_VECTOR:
 		/* When we're shadowing, trap PFs, but not async PF */
-		if (!npt_enabled && svm->apf_reason == 0)
+		if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
 			return NESTED_EXIT_HOST;
 		break;
 	default:
@@ -2594,7 +2568,7 @@  static int nested_svm_intercept(struct vcpu_svm *svm)
 			vmexit = NESTED_EXIT_DONE;
 		/* async page fault always cause vmexit */
 		else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
-			 svm->apf_reason != 0)
+			 svm->vcpu.arch.apf.host_apf_reason != 0)
 			vmexit = NESTED_EXIT_DONE;
 		break;
 	}
@@ -4891,7 +4865,7 @@  static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	/* if exit due to PF check for async PF */
 	if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
-		svm->apf_reason = kvm_read_and_reset_pf_reason();
+		svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
 
 	if (npt_enabled) {
 		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index df825bb..d20f794 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5648,14 +5648,8 @@  static int handle_exception(struct kvm_vcpu *vcpu)
 	}
 
 	if (is_page_fault(intr_info)) {
-		/* EPT won't cause page fault directly */
-		BUG_ON(enable_ept);
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
-		trace_kvm_page_fault(cr2, error_code);
-
-		if (kvm_event_needs_reinjection(vcpu))
-			kvm_mmu_unprotect_page_virt(vcpu, cr2);
-		return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
+		return kvm_handle_page_fault(vcpu, error_code, cr2);
 	}
 
 	ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@@ -8602,6 +8596,10 @@  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 	vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 	exit_intr_info = vmx->exit_intr_info;
 
+	/* if exit due to PF check for async PF */
+	if (is_page_fault(exit_intr_info))
+		vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
+
 	/* Handle machine checks before interrupts are enabled */
 	if (is_machine_check(exit_intr_info))
 		kvm_machine_check();