@@ -104,14 +104,14 @@ unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait_schedule(u32 token);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_apf_flags(void);
-bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
+bool __kvm_handle_async_user_pf(struct pt_regs *regs, u32 token);
DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
-static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+static inline bool kvm_handle_async_user_pf(struct pt_regs *regs, u32 token)
{
if (static_branch_unlikely(&kvm_async_pf_enabled))
- return __kvm_handle_async_pf(regs, token);
+ return __kvm_handle_async_user_pf(regs, token);
else
return false;
}
@@ -148,7 +148,7 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
return 0;
}
-static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+static inline bool kvm_handle_async_user_pf(struct pt_regs *regs, u32 token)
{
return false;
}
@@ -240,17 +240,13 @@ noinstr u32 kvm_read_and_reset_apf_flags(void)
}
EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
-noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+bool __kvm_handle_async_user_pf(struct pt_regs *regs, u32 token)
{
u32 flags = kvm_read_and_reset_apf_flags();
- irqentry_state_t state;
if (!flags)
return false;
- state = irqentry_enter(regs);
- instrumentation_begin();
-
/*
* If the host managed to inject an async #PF into an interrupt
* disabled region, then die hard as this is not going to end well
@@ -260,16 +256,12 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
panic("Host injected async #PF in interrupt disabled region\n");
if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
- if (unlikely(!(user_mode(regs))))
- panic("Host injected async #PF in kernel mode\n");
/* Page is swapped out by the host. */
kvm_async_pf_task_wait_schedule(token);
} else {
WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
}
- instrumentation_end();
- irqentry_exit(regs, state);
return true;
}
@@ -1501,30 +1501,6 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
prefetchw(¤t->mm->mmap_lock);
- /*
- * KVM uses #PF vector to deliver 'page not present' events to guests
- * (asynchronous page fault mechanism). The event happens when a
- * userspace task is trying to access some valid (from guest's point of
- * view) memory which is not currently mapped by the host (e.g. the
- * memory is swapped out). Note, the corresponding "page ready" event
- * which is injected when the memory becomes available, is delivered via
- * an interrupt mechanism and not a #PF exception
- * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
- *
- * We are relying on the interrupted context being sane (valid RSP,
- * relevant locks not held, etc.), which is fine as long as the
- * interrupted context had IF=1. We are also relying on the KVM
- * async pf type field and CR2 being read consistently instead of
- * getting values from real and async page faults mixed up.
- *
- * Fingers crossed.
- *
- * The async #PF handling code takes care of idtentry handling
- * itself.
- */
- if (kvm_handle_async_pf(regs, (u32)address))
- return;
-
/*
* Entry handling for valid #PF from kernel mode is slightly
* different: RCU is already watching and rcu_irq_enter() must not
@@ -1538,7 +1514,20 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
state = irqentry_enter(regs);
instrumentation_begin();
- handle_page_fault(regs, error_code, address);
+
+ /*
+ * KVM uses #PF vector to deliver 'page not present' events to guests
+ * (asynchronous page fault mechanism). The event happens when a
+ * userspace task is trying to access some valid (from guest's point of
+ * view) memory which is not currently mapped by the host (e.g. the
+ * memory is swapped out). Note, the corresponding "page ready" event
+ * which is injected when the memory becomes available, is delivered via
+ * an interrupt mechanism and not a #PF exception
+ * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
+ */
+ if (!user_mode(regs) || !kvm_handle_async_user_pf(regs, (u32)address))
+ handle_page_fault(regs, error_code, address);
+
instrumentation_end();
irqentry_exit(regs, state);