diff mbox series

[v2,5/6] KVM: x86/xen: Maintain valid mapping of Xen shared_info page

Message ID 20211101190314.17954-6-dwmw2@infradead.org (mailing list archive)
State New, archived
Headers show
Series KVM: x86/xen: Add in-kernel Xen event channel delivery | expand

Commit Message

David Woodhouse Nov. 1, 2021, 7:03 p.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

In order to allow for event channel delivery, we would like to have a
kernel mapping of the shared_info page which can be accessed in atomic
context in the common case.

The gfn_to_pfn_cache only automatically handles invalidation when the
KVM memslots change; it doesn't handle a change in the userspace HVA
to host PFN mappings. So hook into the MMU notifiers to invalidate the
shared_info pointer on demand.

The shared_info can be accessed while holding the shinfo_lock, with a
slow path which takes the kvm->lock mutex to refresh the mapping.
I'd like to use RCU for the invalidation but I don't think we can
always sleep in the invalidate_range notifier. Having a true kernel
mapping of the page means that our access to it can be atomic anyway,
so holding a spinlock is OK.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/kvm_host.h |  4 ++
 arch/x86/kvm/mmu/mmu.c          | 23 ++++++++++++
 arch/x86/kvm/xen.c              | 65 +++++++++++++++++++++++++++------
 include/linux/kvm_host.h        | 26 -------------
 include/linux/kvm_types.h       | 27 ++++++++++++++
 5 files changed, 107 insertions(+), 38 deletions(-)

Comments

kernel test robot Nov. 2, 2021, 1:23 a.m. UTC | #1
Hi David,

I love your patch! Yet something to improve:

[auto build test ERROR on mst-vhost/linux-next]
[also build test ERROR on linus/master v5.15 next-20211101]
[cannot apply to kvm/queue]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/David-Woodhouse/KVM-x86-xen-Add-in-kernel-Xen-event-channel-delivery/20211102-035038
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: i386-debian-10.3 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/bba9531e42e9dd7a2ab056057a94d56f43643e24
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review David-Woodhouse/KVM-x86-xen-Add-in-kernel-Xen-event-channel-delivery/20211102-035038
        git checkout bba9531e42e9dd7a2ab056057a94d56f43643e24
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash arch/x86/kvm/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/dynamic_debug.h:6,
                    from include/linux/printk.h:555,
                    from include/linux/kernel.h:19,
                    from include/linux/cpumask.h:10,
                    from include/linux/mm_types_task.h:14,
                    from include/linux/mm_types.h:5,
                    from arch/x86/kvm/irq.h:13,
                    from arch/x86/kvm/mmu/mmu.c:18:
   arch/x86/kvm/mmu/mmu.c: In function 'kvm_unmap_gfn_range':
>> arch/x86/kvm/mmu/mmu.c:1592:30: error: 'kvm_xen_enabled' undeclared (first use in this function); did you mean 'kvm_xen_msr_enabled'?
    1592 |  if (static_branch_unlikely(&kvm_xen_enabled.key)) {
         |                              ^~~~~~~~~~~~~~~
   include/linux/jump_label.h:496:43: note: in definition of macro 'static_branch_unlikely'
     496 |  if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \
         |                                           ^
   arch/x86/kvm/mmu/mmu.c:1592:30: note: each undeclared identifier is reported only once for each function it appears in
    1592 |  if (static_branch_unlikely(&kvm_xen_enabled.key)) {
         |                              ^~~~~~~~~~~~~~~
   include/linux/jump_label.h:496:43: note: in definition of macro 'static_branch_unlikely'
     496 |  if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \
         |                                           ^


vim +1592 arch/x86/kvm/mmu/mmu.c

  1587	
  1588	bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
  1589	{
  1590		bool flush = false;
  1591	
> 1592		if (static_branch_unlikely(&kvm_xen_enabled.key)) {
  1593			write_lock(&kvm->arch.xen.shinfo_lock);
  1594	
  1595			if (kvm->arch.xen.shared_info &&
  1596			    kvm->arch.xen.shinfo_gfn >= range->start &&
  1597			    kvm->arch.xen.shinfo_cache.gfn < range->end) {
  1598				/*
  1599				 * If kvm_xen_shared_info_init() had *finished* mapping the
  1600				 * page and assigned the pointer for real, then mark the page
  1601				 * dirty now instead of via the eventual cache teardown.
  1602				 */
  1603				if (kvm->arch.xen.shared_info != KVM_UNMAPPED_PAGE) {
  1604					kvm_set_pfn_dirty(kvm->arch.xen.shinfo_cache.pfn);
  1605					kvm->arch.xen.shinfo_cache.dirty = false;
  1606				}
  1607	
  1608				kvm->arch.xen.shared_info = NULL;
  1609			}
  1610	
  1611			write_unlock(&kvm->arch.xen.shinfo_lock);
  1612		}
  1613	
  1614		if (kvm_memslots_have_rmaps(kvm))
  1615			flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
  1616	
  1617		if (is_tdp_mmu_enabled(kvm))
  1618			flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
  1619	
  1620		return flush;
  1621	}
  1622	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
David Woodhouse Nov. 4, 2021, 7:05 p.m. UTC | #2
On Mon, 2021-11-01 at 19:03 +0000, David Woodhouse wrote:
> @@ -1588,6 +1589,28 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
>  {
>         bool flush = false;
>  
> +       if (static_branch_unlikely(&kvm_xen_enabled.key)) {
> +               write_lock(&kvm->arch.xen.shinfo_lock);
> +
> +               if (kvm->arch.xen.shared_info &&
> +                   kvm->arch.xen.shinfo_gfn >= range->start &&
> +                   kvm->arch.xen.shinfo_cache.gfn < range->end) {
> +                       /*
> +                        * If kvm_xen_shared_info_init() had *finished* mapping the
> +                        * page and assigned the pointer for real, then mark the page
> +                        * dirty now instead of via the eventual cache teardown.
> +                        */
> +                       if (kvm->arch.xen.shared_info != KVM_UNMAPPED_PAGE) {
> +                               kvm_set_pfn_dirty(kvm->arch.xen.shinfo_cache.pfn);
> +                               kvm->arch.xen.shinfo_cache.dirty = false;
> +                       }
> +
> +                       kvm->arch.xen.shared_info = NULL;
> +               }
> +
> +               write_unlock(&kvm->arch.xen.shinfo_lock);
> +       }
> +
>         if (kvm_memslots_have_rmaps(kvm))
>                 flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);

If I could find a way to ditch that rwlock and use RCU for this, then
I'd be fairly much OK with making it a generic facility and using it
for other things like nesting and maybe even the steal time. 

But I don't think we *can* always sleep in the MMU notifier, so we
can't call synchronize_srcu(), and I can't see how to ditch that
rwlock.

Which means I'm slightly less happy about offering it as a generic
facility, and I think it needs to be a special case for Xen which
really *does* need to deliver interrupts to the guest shinfo page from
IRQ context without current->mm == kvm->mm.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 750f74da9793..ec58e41a69c2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1017,6 +1017,10 @@  struct kvm_xen {
 	bool long_mode;
 	u8 upcall_vector;
 	gfn_t shinfo_gfn;
+	rwlock_t shinfo_lock;
+	void *shared_info;
+	struct kvm_host_map shinfo_map;
+	struct gfn_to_pfn_cache shinfo_cache;
 };
 
 enum kvm_irqchip_mode {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0cc58901bf7a..429a4860d67a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -25,6 +25,7 @@ 
 #include "kvm_emulate.h"
 #include "cpuid.h"
 #include "spte.h"
+#include "xen.h"
 
 #include <linux/kvm_host.h>
 #include <linux/types.h>
@@ -1588,6 +1589,28 @@  bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	bool flush = false;
 
+	if (static_branch_unlikely(&kvm_xen_enabled.key)) {
+		write_lock(&kvm->arch.xen.shinfo_lock);
+
+		if (kvm->arch.xen.shared_info &&
+		    kvm->arch.xen.shinfo_gfn >= range->start &&
+		    kvm->arch.xen.shinfo_cache.gfn < range->end) {
+			/*
+			 * If kvm_xen_shared_info_init() had *finished* mapping the
+			 * page and assigned the pointer for real, then mark the page
+			 * dirty now instead of via the eventual cache teardown.
+			 */
+			if (kvm->arch.xen.shared_info != KVM_UNMAPPED_PAGE) {
+				kvm_set_pfn_dirty(kvm->arch.xen.shinfo_cache.pfn);
+				kvm->arch.xen.shinfo_cache.dirty = false;
+			}
+
+			kvm->arch.xen.shared_info = NULL;
+		}
+
+		write_unlock(&kvm->arch.xen.shinfo_lock);
+	}
+
 	if (kvm_memslots_have_rmaps(kvm))
 		flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
 
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 565da9c3853b..9d143bc7d769 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -21,18 +21,59 @@ 
 
 DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
 
-static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
+static void kvm_xen_shared_info_unmap(struct kvm *kvm)
+{
+	bool was_valid = false;
+
+	write_lock(&kvm->arch.xen.shinfo_lock);
+	if (kvm->arch.xen.shared_info)
+		was_valid = true;
+	kvm->arch.xen.shared_info = NULL;
+	kvm->arch.xen.shinfo_gfn = GPA_INVALID;
+	write_unlock(&kvm->arch.xen.shinfo_lock);
+
+	if (kvm_vcpu_mapped(&kvm->arch.xen.shinfo_map)) {
+		kvm_unmap_gfn(kvm, &kvm->arch.xen.shinfo_map,
+			      &kvm->arch.xen.shinfo_cache, was_valid, false);
+
+		/* If the MMU notifier invalidated it, the gfn_to_pfn_cache
+		 * may be invalid. Force it to notice */
+		if (!was_valid)
+			kvm->arch.xen.shinfo_cache.generation = -1;
+	}
+}
+
+static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn, bool update_clock)
 {
 	gpa_t gpa = gfn_to_gpa(gfn);
 	int wc_ofs, sec_hi_ofs;
 	int ret = 0;
 	int idx = srcu_read_lock(&kvm->srcu);
 
-	if (kvm_is_error_hva(gfn_to_hva(kvm, gfn))) {
-		ret = -EFAULT;
+	kvm_xen_shared_info_unmap(kvm);
+
+	if (gfn == GPA_INVALID)
 		goto out;
-	}
+
+	/* Let the MMU notifier know that we are in the process of mapping it */
+	write_lock(&kvm->arch.xen.shinfo_lock);
+	kvm->arch.xen.shared_info = KVM_UNMAPPED_PAGE;
 	kvm->arch.xen.shinfo_gfn = gfn;
+	write_unlock(&kvm->arch.xen.shinfo_lock);
+
+	ret = kvm_map_gfn(kvm, gfn, &kvm->arch.xen.shinfo_map,
+			  &kvm->arch.xen.shinfo_cache, false);
+	if (ret)
+		goto out;
+
+	write_lock(&kvm->arch.xen.shinfo_lock);
+	/* Unless the MMU notifier already invalidated it */
+	if (kvm->arch.xen.shared_info == KVM_UNMAPPED_PAGE)
+		kvm->arch.xen.shared_info = kvm->arch.xen.shinfo_map.hva;
+	write_unlock(&kvm->arch.xen.shinfo_lock);
+
+	if (!update_clock)
+		goto out;
 
 	/* Paranoia checks on the 32-bit struct layout */
 	BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
@@ -260,15 +301,9 @@  int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		break;
 
 	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
-		if (data->u.shared_info.gfn == GPA_INVALID) {
-			kvm->arch.xen.shinfo_gfn = GPA_INVALID;
-			r = 0;
-			break;
-		}
-		r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
+		r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn, true);
 		break;
 
-
 	case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
 		if (data->u.vector && data->u.vector < 0x10)
 			r = -EINVAL;
@@ -661,11 +696,17 @@  int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
 
 void kvm_xen_init_vm(struct kvm *kvm)
 {
-	kvm->arch.xen.shinfo_gfn = GPA_INVALID;
+	rwlock_init(&kvm->arch.xen.shinfo_lock);
 }
 
 void kvm_xen_destroy_vm(struct kvm *kvm)
 {
+	struct gfn_to_pfn_cache *cache = &kvm->arch.xen.shinfo_cache;
+
+	kvm_xen_shared_info_unmap(kvm);
+
+	kvm_release_pfn(cache->pfn, cache->dirty, cache);
+
 	if (kvm->arch.xen_hvm_config.msr)
 		static_branch_slow_dec_deferred(&kvm_xen_enabled);
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 749cdc77fc4e..f0012d128aa5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -251,32 +251,6 @@  enum {
 	READING_SHADOW_PAGE_TABLES,
 };
 
-#define KVM_UNMAPPED_PAGE	((void *) 0x500 + POISON_POINTER_DELTA)
-
-struct kvm_host_map {
-	/*
-	 * Only valid if the 'pfn' is managed by the host kernel (i.e. There is
-	 * a 'struct page' for it. When using mem= kernel parameter some memory
-	 * can be used as guest memory but they are not managed by host
-	 * kernel).
-	 * If 'pfn' is not managed by the host kernel, this field is
-	 * initialized to KVM_UNMAPPED_PAGE.
-	 */
-	struct page *page;
-	void *hva;
-	kvm_pfn_t pfn;
-	kvm_pfn_t gfn;
-};
-
-/*
- * Used to check if the mapping is valid or not. Never use 'kvm_host_map'
- * directly to check for that.
- */
-static inline bool kvm_vcpu_mapped(struct kvm_host_map *map)
-{
-	return !!map->hva;
-}
-
 static inline bool kvm_vcpu_can_poll(ktime_t cur, ktime_t stop)
 {
 	return single_task_running() && !need_resched() && ktime_before(cur, stop);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 2237abb93ccd..2092f4ca156b 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -60,6 +60,33 @@  struct gfn_to_pfn_cache {
 	bool dirty;
 };
 
+#define KVM_UNMAPPED_PAGE	((void *) 0x500 + POISON_POINTER_DELTA)
+
+struct kvm_host_map {
+	/*
+	 * Only valid if the 'pfn' is managed by the host kernel (i.e. There is
+	 * a 'struct page' for it. When using mem= kernel parameter some memory
+	 * can be used as guest memory but they are not managed by host
+	 * kernel).
+	 * If 'pfn' is not managed by the host kernel, this field is
+	 * initialized to KVM_UNMAPPED_PAGE.
+	 */
+	struct page *page;
+	void *hva;
+	kvm_pfn_t pfn;
+	kvm_pfn_t gfn;
+};
+
+/*
+ * Used to check if the mapping is valid or not. Never use 'kvm_host_map'
+ * directly to check for that.
+ */
+static inline bool kvm_vcpu_mapped(struct kvm_host_map *map)
+{
+	return !!map->hva;
+}
+
+
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 /*
  * Memory caches are used to preallocate memory ahead of various MMU flows,