diff mbox series

[v5,12/12] KVM: x86: First attempt at converting nested virtual APIC page to gpc

Message ID 20211121125451.9489-13-dwmw2@infradead.org (mailing list archive)
State New, archived
Headers show
Series KVM: x86/xen: Add in-kernel Xen event channel delivery | expand

Commit Message

David Woodhouse Nov. 21, 2021, 12:54 p.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

This is what evolved during the discussion at
https://lore.kernel.org/kvm/960E233F-EC0B-4FB5-BA2E-C8D2CCB38B12@infradead.org/T/#m11d75fcfe2da357ec1dabba0d0e3abb91fd13665

As discussed, an alternative approach might be to augment
kvm_arch_memslots_updated() to raise KVM_REQ_GET_NESTED_STATE_PAGES to
each vCPU (and make that req only do anything on a given vCPU if that
vCPU is actually in L2 guest mode).

That would mean the reload gets actively triggered even on memslot
changes rather than only on MMU notifiers as is the case now. It could
*potentially* mean we can drop the new 'check_guest_maps' function.

The 'check_guest_maps' function could be a lot simpler than it is,
though. It only really needs to get kvm->memslots->generation, then
check each gpc->generation against that, and each gpc->valid.

Also I suspect we *shouldn't* destroy the virtual_apic_cache in
nested_vmx_vmexit(). We can just leave it there for next time the
vCPU enters guest mode. If it happens to get invalidated in the
meantime, that's fine and we'll refresh it on the way back in.
We probably *would* want to actively do something on memslot changes
in that case though, to ensure that even if the vCPU isn't in guest
mode any more, we *release* the cached page.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/nested.c       | 50 ++++++++++++++++++++++++++++-----
 arch/x86/kvm/vmx/vmx.c          | 12 +++++---
 arch/x86/kvm/vmx/vmx.h          |  2 +-
 arch/x86/kvm/x86.c              | 10 +++++++
 5 files changed, 63 insertions(+), 12 deletions(-)

Comments

kernel test robot Nov. 26, 2021, 8:14 p.m. UTC | #1
Hi David,

I love your patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.16-rc2]
[cannot apply to kvm/queue kvms390/next powerpc/topic/ppc-kvm kvmarm/next mst-vhost/linux-next next-20211126]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/David-Woodhouse/KVM-x86-xen-Add-in-kernel-Xen-event-channel-delivery/20211121-205657
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 923dcc5eb0c111eccd51cc7ce1658537e3c38b25
config: x86_64-randconfig-r024-20211126 (https://download.01.org/0day-ci/archive/20211127/202111270405.fmQ2HTy9-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/21f06a7b7c4145cff195635eee12d43625610fb4
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review David-Woodhouse/KVM-x86-xen-Add-in-kernel-Xen-event-channel-delivery/20211121-205657
        git checkout 21f06a7b7c4145cff195635eee12d43625610fb4
        # save the config file to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash arch/x86/kvm/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   arch/x86/kvm/x86.c: In function 'vcpu_enter_guest':
>> arch/x86/kvm/x86.c:9743:4: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
    9743 |    ; /* Nothing to do. It just wanted to wake us */
         |    ^


vim +/if +9743 arch/x86/kvm/x86.c

  9593	
  9594	/*
  9595	 * Returns 1 to let vcpu_run() continue the guest execution loop without
  9596	 * exiting to the userspace.  Otherwise, the value will be returned to the
  9597	 * userspace.
  9598	 */
  9599	static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  9600	{
  9601		int r;
  9602		bool req_int_win =
  9603			dm_request_for_irq_injection(vcpu) &&
  9604			kvm_cpu_accept_dm_intr(vcpu);
  9605		fastpath_t exit_fastpath;
  9606	
  9607		bool req_immediate_exit = false;
  9608	
  9609		/* Forbid vmenter if vcpu dirty ring is soft-full */
  9610		if (unlikely(vcpu->kvm->dirty_ring_size &&
  9611			     kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
  9612			vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
  9613			trace_kvm_dirty_ring_exit(vcpu);
  9614			r = 0;
  9615			goto out;
  9616		}
  9617	
  9618		if (kvm_request_pending(vcpu)) {
  9619			if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
  9620				r = -EIO;
  9621				goto out;
  9622			}
  9623			if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
  9624				if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
  9625					r = 0;
  9626					goto out;
  9627				}
  9628			}
  9629			if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
  9630				kvm_mmu_unload(vcpu);
  9631			if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
  9632				__kvm_migrate_timers(vcpu);
  9633			if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
  9634				kvm_update_masterclock(vcpu->kvm);
  9635			if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
  9636				kvm_gen_kvmclock_update(vcpu);
  9637			if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
  9638				r = kvm_guest_time_update(vcpu);
  9639				if (unlikely(r))
  9640					goto out;
  9641			}
  9642			if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
  9643				kvm_mmu_sync_roots(vcpu);
  9644			if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
  9645				kvm_mmu_load_pgd(vcpu);
  9646			if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
  9647				kvm_vcpu_flush_tlb_all(vcpu);
  9648	
  9649				/* Flushing all ASIDs flushes the current ASID... */
  9650				kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
  9651			}
  9652			if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
  9653				kvm_vcpu_flush_tlb_current(vcpu);
  9654			if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
  9655				kvm_vcpu_flush_tlb_guest(vcpu);
  9656	
  9657			if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
  9658				vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
  9659				r = 0;
  9660				goto out;
  9661			}
  9662			if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
  9663				if (is_guest_mode(vcpu)) {
  9664					kvm_x86_ops.nested_ops->triple_fault(vcpu);
  9665				} else {
  9666					vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
  9667					vcpu->mmio_needed = 0;
  9668					r = 0;
  9669					goto out;
  9670				}
  9671			}
  9672			if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
  9673				/* Page is swapped out. Do synthetic halt */
  9674				vcpu->arch.apf.halted = true;
  9675				r = 1;
  9676				goto out;
  9677			}
  9678			if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
  9679				record_steal_time(vcpu);
  9680			if (kvm_check_request(KVM_REQ_SMI, vcpu))
  9681				process_smi(vcpu);
  9682			if (kvm_check_request(KVM_REQ_NMI, vcpu))
  9683				process_nmi(vcpu);
  9684			if (kvm_check_request(KVM_REQ_PMU, vcpu))
  9685				kvm_pmu_handle_event(vcpu);
  9686			if (kvm_check_request(KVM_REQ_PMI, vcpu))
  9687				kvm_pmu_deliver_pmi(vcpu);
  9688			if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
  9689				BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
  9690				if (test_bit(vcpu->arch.pending_ioapic_eoi,
  9691					     vcpu->arch.ioapic_handled_vectors)) {
  9692					vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
  9693					vcpu->run->eoi.vector =
  9694							vcpu->arch.pending_ioapic_eoi;
  9695					r = 0;
  9696					goto out;
  9697				}
  9698			}
  9699			if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
  9700				vcpu_scan_ioapic(vcpu);
  9701			if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
  9702				vcpu_load_eoi_exitmap(vcpu);
  9703			if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
  9704				kvm_vcpu_reload_apic_access_page(vcpu);
  9705			if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
  9706				vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
  9707				vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
  9708				r = 0;
  9709				goto out;
  9710			}
  9711			if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
  9712				vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
  9713				vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
  9714				r = 0;
  9715				goto out;
  9716			}
  9717			if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
  9718				struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  9719	
  9720				vcpu->run->exit_reason = KVM_EXIT_HYPERV;
  9721				vcpu->run->hyperv = hv_vcpu->exit;
  9722				r = 0;
  9723				goto out;
  9724			}
  9725	
  9726			/*
  9727			 * KVM_REQ_HV_STIMER has to be processed after
  9728			 * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers
  9729			 * depend on the guest clock being up-to-date
  9730			 */
  9731			if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
  9732				kvm_hv_process_stimers(vcpu);
  9733			if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
  9734				kvm_vcpu_update_apicv(vcpu);
  9735			if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
  9736				kvm_check_async_pf_completion(vcpu);
  9737			if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
  9738				static_call(kvm_x86_msr_filter_changed)(vcpu);
  9739	
  9740			if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
  9741				static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
  9742			if (kvm_check_request(KVM_REQ_GPC_INVALIDATE, vcpu))
> 9743				; /* Nothing to do. It just wanted to wake us */
  9744		}
  9745	
  9746		if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
  9747		    kvm_xen_has_interrupt(vcpu)) {
  9748			++vcpu->stat.req_event;
  9749			r = kvm_apic_accept_events(vcpu);
  9750			if (r < 0) {
  9751				r = 0;
  9752				goto out;
  9753			}
  9754			if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
  9755				r = 1;
  9756				goto out;
  9757			}
  9758	
  9759			r = inject_pending_event(vcpu, &req_immediate_exit);
  9760			if (r < 0) {
  9761				r = 0;
  9762				goto out;
  9763			}
  9764			if (req_int_win)
  9765				static_call(kvm_x86_enable_irq_window)(vcpu);
  9766	
  9767			if (kvm_lapic_enabled(vcpu)) {
  9768				update_cr8_intercept(vcpu);
  9769				kvm_lapic_sync_to_vapic(vcpu);
  9770			}
  9771		}
  9772	
  9773		r = kvm_mmu_reload(vcpu);
  9774		if (unlikely(r)) {
  9775			goto cancel_injection;
  9776		}
  9777	
  9778		preempt_disable();
  9779	
  9780		static_call(kvm_x86_prepare_guest_switch)(vcpu);
  9781	
  9782		/*
  9783		 * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
  9784		 * IPI are then delayed after guest entry, which ensures that they
  9785		 * result in virtual interrupt delivery.
  9786		 */
  9787		local_irq_disable();
  9788		vcpu->mode = IN_GUEST_MODE;
  9789	
  9790		/*
  9791		 * If the guest requires direct access to mapped L1 pages, check
  9792		 * the caches are valid. Will raise KVM_REQ_GET_NESTED_STATE_PAGES
  9793		 * to go and revalidate them, if necessary.
  9794		 */
  9795		if (is_guest_mode(vcpu) && kvm_x86_ops.nested_ops->check_guest_maps)
  9796			kvm_x86_ops.nested_ops->check_guest_maps(vcpu);
  9797	
  9798		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
  9799	
  9800		/*
  9801		 * 1) We should set ->mode before checking ->requests.  Please see
  9802		 * the comment in kvm_vcpu_exiting_guest_mode().
  9803		 *
  9804		 * 2) For APICv, we should set ->mode before checking PID.ON. This
  9805		 * pairs with the memory barrier implicit in pi_test_and_set_on
  9806		 * (see vmx_deliver_posted_interrupt).
  9807		 *
  9808		 * 3) This also orders the write to mode from any reads to the page
  9809		 * tables done while the VCPU is running.  Please see the comment
  9810		 * in kvm_flush_remote_tlbs.
  9811		 */
  9812		smp_mb__after_srcu_read_unlock();
  9813	
  9814		/*
  9815		 * This handles the case where a posted interrupt was
  9816		 * notified with kvm_vcpu_kick.
  9817		 */
  9818		if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
  9819			static_call(kvm_x86_sync_pir_to_irr)(vcpu);
  9820	
  9821		if (kvm_vcpu_exit_request(vcpu)) {
  9822			vcpu->mode = OUTSIDE_GUEST_MODE;
  9823			smp_wmb();
  9824			local_irq_enable();
  9825			preempt_enable();
  9826			vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
  9827			r = 1;
  9828			goto cancel_injection;
  9829		}
  9830	
  9831		if (req_immediate_exit) {
  9832			kvm_make_request(KVM_REQ_EVENT, vcpu);
  9833			static_call(kvm_x86_request_immediate_exit)(vcpu);
  9834		}
  9835	
  9836		fpregs_assert_state_consistent();
  9837		if (test_thread_flag(TIF_NEED_FPU_LOAD))
  9838			switch_fpu_return();
  9839	
  9840		if (unlikely(vcpu->arch.switch_db_regs)) {
  9841			set_debugreg(0, 7);
  9842			set_debugreg(vcpu->arch.eff_db[0], 0);
  9843			set_debugreg(vcpu->arch.eff_db[1], 1);
  9844			set_debugreg(vcpu->arch.eff_db[2], 2);
  9845			set_debugreg(vcpu->arch.eff_db[3], 3);
  9846		} else if (unlikely(hw_breakpoint_active())) {
  9847			set_debugreg(0, 7);
  9848		}
  9849	
  9850		for (;;) {
  9851			/*
  9852			 * Assert that vCPU vs. VM APICv state is consistent.  An APICv
  9853			 * update must kick and wait for all vCPUs before toggling the
  9854			 * per-VM state, and responsing vCPUs must wait for the update
  9855			 * to complete before servicing KVM_REQ_APICV_UPDATE.
  9856			 */
  9857			WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
  9858	
  9859			exit_fastpath = static_call(kvm_x86_run)(vcpu);
  9860			if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
  9861				break;
  9862	
  9863			if (vcpu->arch.apicv_active)
  9864				static_call(kvm_x86_sync_pir_to_irr)(vcpu);
  9865	
  9866			if (unlikely(kvm_vcpu_exit_request(vcpu))) {
  9867				exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
  9868				break;
  9869			}
  9870		}
  9871	
  9872		/*
  9873		 * Do this here before restoring debug registers on the host.  And
  9874		 * since we do this before handling the vmexit, a DR access vmexit
  9875		 * can (a) read the correct value of the debug registers, (b) set
  9876		 * KVM_DEBUGREG_WONT_EXIT again.
  9877		 */
  9878		if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
  9879			WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
  9880			static_call(kvm_x86_sync_dirty_debug_regs)(vcpu);
  9881			kvm_update_dr0123(vcpu);
  9882			kvm_update_dr7(vcpu);
  9883		}
  9884	
  9885		/*
  9886		 * If the guest has used debug registers, at least dr7
  9887		 * will be disabled while returning to the host.
  9888		 * If we don't have active breakpoints in the host, we don't
  9889		 * care about the messed up debug address registers. But if
  9890		 * we have some of them active, restore the old state.
  9891		 */
  9892		if (hw_breakpoint_active())
  9893			hw_breakpoint_restore();
  9894	
  9895		vcpu->arch.last_vmentry_cpu = vcpu->cpu;
  9896		vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
  9897	
  9898		vcpu->mode = OUTSIDE_GUEST_MODE;
  9899		smp_wmb();
  9900	
  9901		static_call(kvm_x86_handle_exit_irqoff)(vcpu);
  9902	
  9903		/*
  9904		 * Consume any pending interrupts, including the possible source of
  9905		 * VM-Exit on SVM and any ticks that occur between VM-Exit and now.
  9906		 * An instruction is required after local_irq_enable() to fully unblock
  9907		 * interrupts on processors that implement an interrupt shadow, the
  9908		 * stat.exits increment will do nicely.
  9909		 */
  9910		kvm_before_interrupt(vcpu);
  9911		local_irq_enable();
  9912		++vcpu->stat.exits;
  9913		local_irq_disable();
  9914		kvm_after_interrupt(vcpu);
  9915	
  9916		/*
  9917		 * Wait until after servicing IRQs to account guest time so that any
  9918		 * ticks that occurred while running the guest are properly accounted
  9919		 * to the guest.  Waiting until IRQs are enabled degrades the accuracy
  9920		 * of accounting via context tracking, but the loss of accuracy is
  9921		 * acceptable for all known use cases.
  9922		 */
  9923		vtime_account_guest_exit();
  9924	
  9925		if (lapic_in_kernel(vcpu)) {
  9926			s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
  9927			if (delta != S64_MIN) {
  9928				trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
  9929				vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
  9930			}
  9931		}
  9932	
  9933		local_irq_enable();
  9934		preempt_enable();
  9935	
  9936		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
  9937	
  9938		/*
  9939		 * Profile KVM exit RIPs:
  9940		 */
  9941		if (unlikely(prof_on == KVM_PROFILING)) {
  9942			unsigned long rip = kvm_rip_read(vcpu);
  9943			profile_hit(KVM_PROFILING, (void *)rip);
  9944		}
  9945	
  9946		if (unlikely(vcpu->arch.tsc_always_catchup))
  9947			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
  9948	
  9949		if (vcpu->arch.apic_attention)
  9950			kvm_lapic_sync_from_vapic(vcpu);
  9951	
  9952		r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath);
  9953		return r;
  9954	
  9955	cancel_injection:
  9956		if (req_immediate_exit)
  9957			kvm_make_request(KVM_REQ_EVENT, vcpu);
  9958		static_call(kvm_x86_cancel_injection)(vcpu);
  9959		if (unlikely(vcpu->arch.apic_attention))
  9960			kvm_lapic_sync_from_vapic(vcpu);
  9961	out:
  9962		return r;
  9963	}
  9964	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ea2446ab851..24f6f3e2de47 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1511,6 +1511,7 @@  struct kvm_x86_nested_ops {
 	int (*enable_evmcs)(struct kvm_vcpu *vcpu,
 			    uint16_t *vmcs_version);
 	uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
+	void (*check_guest_maps)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_init_ops {
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1e2f66951566..01bfabcfbbce 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -309,7 +309,7 @@  static void free_nested(struct kvm_vcpu *vcpu)
 		kvm_release_page_clean(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = NULL;
 	}
-	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
+	kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vmx->nested.virtual_apic_cache);
 	kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
 	vmx->nested.pi_desc = NULL;
 
@@ -3179,10 +3179,12 @@  static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 	}
 
 	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
-		map = &vmx->nested.virtual_apic_map;
+		struct gfn_to_pfn_cache *gpc = &vmx->nested.virtual_apic_cache;
 
-		if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
-			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
+ 		if (!kvm_gfn_to_pfn_cache_init(vcpu->kvm, gpc, vcpu, true, true,
+					       vmcs12->virtual_apic_page_addr,
+					       PAGE_SIZE, true)) {
+			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(gpc->pfn));
 		} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
 		           nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
 			   !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
@@ -3207,6 +3209,9 @@  static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 	if (nested_cpu_has_posted_intr(vmcs12)) {
 		map = &vmx->nested.pi_desc_map;
 
+		if (kvm_vcpu_mapped(map))
+			kvm_vcpu_unmap(vcpu, map, true);
+
 		if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
 			vmx->nested.pi_desc =
 				(struct pi_desc *)(((void *)map->hva) +
@@ -3251,6 +3256,29 @@  static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
 	return true;
 }
 
+static void nested_vmx_check_guest_maps(struct kvm_vcpu *vcpu)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct gfn_to_pfn_cache *gpc;
+
+	int valid;
+
+	if (nested_cpu_has_posted_intr(vmcs12)) {
+		gpc = &vmx->nested.virtual_apic_cache;
+
+		read_lock(&gpc->lock);
+		valid = kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc,
+						   vmcs12->virtual_apic_page_addr,
+						   PAGE_SIZE);
+		read_unlock(&gpc->lock);
+		if (!valid) {
+			kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+			return;
+		}
+	}
+}
+
 static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
 {
 	struct vmcs12 *vmcs12;
@@ -3749,9 +3777,15 @@  static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 
 	max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
 	if (max_irr != 256) {
-		vapic_page = vmx->nested.virtual_apic_map.hva;
-		if (!vapic_page)
+		struct gfn_to_pfn_cache *gpc = &vmx->nested.virtual_apic_cache;
+
+		read_lock(&gpc->lock);
+		if (!kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc, gpc->gpa, PAGE_SIZE)) {
+			read_unlock(&gpc->lock);
 			goto mmio_needed;
+		}
+
+		vapic_page = gpc->khva;
 
 		__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
 			vapic_page, &max_irr);
@@ -3761,6 +3795,7 @@  static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 			status |= (u8)max_irr;
 			vmcs_write16(GUEST_INTR_STATUS, status);
 		}
+		read_unlock(&gpc->lock);
 	}
 
 	nested_mark_vmcs12_pages_dirty(vcpu);
@@ -4581,7 +4616,7 @@  void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 		kvm_release_page_clean(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = NULL;
 	}
-	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
+	kvm_gfn_to_pfn_cache_unmap(vcpu->kvm, &vmx->nested.virtual_apic_cache);
 	kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
 	vmx->nested.pi_desc = NULL;
 
@@ -6756,4 +6791,5 @@  struct kvm_x86_nested_ops vmx_nested_ops = {
 	.write_log_dirty = nested_vmx_write_pml_buffer,
 	.enable_evmcs = nested_enable_evmcs,
 	.get_evmcs_version = nested_get_evmcs_version,
+	.check_guest_maps = nested_vmx_check_guest_maps,
 };
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ba66c171d951..6c61faef86d3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3839,19 +3839,23 @@  void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
 static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	void *vapic_page;
+	struct gfn_to_pfn_cache *gpc = &vmx->nested.virtual_apic_cache;
 	u32 vppr;
 	int rvi;
 
 	if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
 		!nested_cpu_has_vid(get_vmcs12(vcpu)) ||
-		WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
+		WARN_ON_ONCE(gpc->gpa == GPA_INVALID))
 		return false;
 
 	rvi = vmx_get_rvi();
 
-	vapic_page = vmx->nested.virtual_apic_map.hva;
-	vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
+	read_lock(&gpc->lock);
+	if (!kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc, gpc->gpa, PAGE_SIZE))
+		vppr = *((u32 *)(gpc->khva + APIC_PROCPRI));
+	else
+		vppr = 0xff;
+	read_unlock(&gpc->lock);
 
 	return ((rvi & 0xf0) > (vppr & 0xf0));
 }
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 4df2ac24ffc1..8364e7fc92a0 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -195,7 +195,7 @@  struct nested_vmx {
 	 * pointers, so we must keep them pinned while L2 runs.
 	 */
 	struct page *apic_access_page;
-	struct kvm_host_map virtual_apic_map;
+	struct gfn_to_pfn_cache virtual_apic_cache;
 	struct kvm_host_map pi_desc_map;
 
 	struct kvm_host_map msr_bitmap_map;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6cb022d0afab..d8f1d2169b45 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9739,6 +9739,8 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 		if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
 			static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
+		if (kvm_check_request(KVM_REQ_GPC_INVALIDATE, vcpu))
+			; /* Nothing to do. It just wanted to wake us */
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
@@ -9785,6 +9787,14 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	local_irq_disable();
 	vcpu->mode = IN_GUEST_MODE;
 
+	/*
+	 * If the guest requires direct access to mapped L1 pages, check
+	 * the caches are valid. Will raise KVM_REQ_GET_NESTED_STATE_PAGES
+	 * to go and revalidate them, if necessary.
+	 */
+	if (is_guest_mode(vcpu) && kvm_x86_ops.nested_ops->check_guest_maps)
+		kvm_x86_ops.nested_ops->check_guest_maps(vcpu);
+
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
 	/*