diff mbox series

[v3,8/8] KVM: x86/mmu: Lazily allocate memslot rmaps

Message ID 20210506184241.618958-9-bgardon@google.com (mailing list archive)
State New, archived
Headers show
Series Lazily allocate memslot rmaps | expand

Commit Message

Ben Gardon May 6, 2021, 6:42 p.m. UTC
If the TDP MMU is in use, wait to allocate the rmaps until the shadow
MMU is actually used. (i.e. a nested VM is launched.) This saves memory
equal to 0.2% of guest memory in cases where the TDP MMU is used and
there are no nested guests involved.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu/mmu.c          | 14 ++++++++++---
 arch/x86/kvm/mmu/tdp_mmu.c      |  6 ++++--
 arch/x86/kvm/mmu/tdp_mmu.h      |  4 ++--
 arch/x86/kvm/x86.c              | 37 ++++++++++++++++++++++++++++++++-
 5 files changed, 54 insertions(+), 8 deletions(-)

Comments

kernel test robot May 7, 2021, 1:10 a.m. UTC | #1
Hi Ben,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on kvm/queue]
[also build test WARNING on tip/master linus/master next-20210506]
[cannot apply to linux/master v5.12]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Ben-Gardon/Lazily-allocate-memslot-rmaps/20210507-024533
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
config: x86_64-randconfig-s021-20210506 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-341-g8af24329-dirty
        # https://github.com/0day-ci/linux/commit/43798461d3f4a38ef487d9c626dd0fb13e403328
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Ben-Gardon/Lazily-allocate-memslot-rmaps/20210507-024533
        git checkout 43798461d3f4a38ef487d9c626dd0fb13e403328
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
   arch/x86/kvm/x86.c:10926:17: sparse: sparse: incompatible types in comparison expression (different address spaces):
   arch/x86/kvm/x86.c:10926:17: sparse:    struct kvm_rmap_head [noderef] __rcu *
   arch/x86/kvm/x86.c:10926:17: sparse:    struct kvm_rmap_head *
   arch/x86/kvm/x86.c: note: in included file (through include/linux/notifier.h, include/linux/memory_hotplug.h, include/linux/mmzone.h, ...):
   include/linux/srcu.h:182:9: sparse: sparse: context imbalance in 'vcpu_enter_guest' - unexpected unlock
>> arch/x86/kvm/x86.c:10971:29: sparse: sparse: marked inline, but without a definition

vim +10971 arch/x86/kvm/x86.c

db3fe4eb45f355 Takuya Yoshikawa 2012-02-08  10913  
11bb59d1c3e83b Ben Gardon       2021-05-06  10914  static int alloc_memslot_rmap(struct kvm_memory_slot *slot,
11bb59d1c3e83b Ben Gardon       2021-05-06  10915  			      unsigned long npages)
11bb59d1c3e83b Ben Gardon       2021-05-06  10916  {
11bb59d1c3e83b Ben Gardon       2021-05-06  10917  	int i;
11bb59d1c3e83b Ben Gardon       2021-05-06  10918  
11bb59d1c3e83b Ben Gardon       2021-05-06  10919  	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
11bb59d1c3e83b Ben Gardon       2021-05-06  10920  		int lpages;
11bb59d1c3e83b Ben Gardon       2021-05-06  10921  		int level = i + 1;
11bb59d1c3e83b Ben Gardon       2021-05-06  10922  
11bb59d1c3e83b Ben Gardon       2021-05-06  10923  		lpages = gfn_to_index(slot->base_gfn + npages - 1,
11bb59d1c3e83b Ben Gardon       2021-05-06  10924  				      slot->base_gfn, level) + 1;
11bb59d1c3e83b Ben Gardon       2021-05-06  10925  
dd56af97c1d2b2 Ben Gardon       2021-05-06 @10926  		rcu_assign_pointer(slot->arch.rmap[i],
11bb59d1c3e83b Ben Gardon       2021-05-06  10927  				   kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
dd56af97c1d2b2 Ben Gardon       2021-05-06  10928  					    GFP_KERNEL_ACCOUNT));
11bb59d1c3e83b Ben Gardon       2021-05-06  10929  		if (!slot->arch.rmap[i])
11bb59d1c3e83b Ben Gardon       2021-05-06  10930  			goto out_free;
11bb59d1c3e83b Ben Gardon       2021-05-06  10931  	}
11bb59d1c3e83b Ben Gardon       2021-05-06  10932  
11bb59d1c3e83b Ben Gardon       2021-05-06  10933  	return 0;
11bb59d1c3e83b Ben Gardon       2021-05-06  10934  
11bb59d1c3e83b Ben Gardon       2021-05-06  10935  out_free:
11bb59d1c3e83b Ben Gardon       2021-05-06  10936  	free_memslot_rmap(slot);
11bb59d1c3e83b Ben Gardon       2021-05-06  10937  	return -ENOMEM;
11bb59d1c3e83b Ben Gardon       2021-05-06  10938  }
11bb59d1c3e83b Ben Gardon       2021-05-06  10939  
43798461d3f4a3 Ben Gardon       2021-05-06  10940  int alloc_all_memslots_rmaps(struct kvm *kvm)
43798461d3f4a3 Ben Gardon       2021-05-06  10941  {
43798461d3f4a3 Ben Gardon       2021-05-06  10942  	struct kvm_memslots *slots;
43798461d3f4a3 Ben Gardon       2021-05-06  10943  	struct kvm_memory_slot *slot;
43798461d3f4a3 Ben Gardon       2021-05-06  10944  	int r = 0;
43798461d3f4a3 Ben Gardon       2021-05-06  10945  	int i;
43798461d3f4a3 Ben Gardon       2021-05-06  10946  
43798461d3f4a3 Ben Gardon       2021-05-06  10947  	if (kvm_memslots_have_rmaps(kvm))
43798461d3f4a3 Ben Gardon       2021-05-06  10948  		return 0;
43798461d3f4a3 Ben Gardon       2021-05-06  10949  
43798461d3f4a3 Ben Gardon       2021-05-06  10950  	mutex_lock(&kvm->slots_arch_lock);
43798461d3f4a3 Ben Gardon       2021-05-06  10951  	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
43798461d3f4a3 Ben Gardon       2021-05-06  10952  		slots = __kvm_memslots(kvm, i);
43798461d3f4a3 Ben Gardon       2021-05-06  10953  		kvm_for_each_memslot(slot, slots) {
43798461d3f4a3 Ben Gardon       2021-05-06  10954  			r = alloc_memslot_rmap(slot, slot->npages);
43798461d3f4a3 Ben Gardon       2021-05-06  10955  			if (r) {
43798461d3f4a3 Ben Gardon       2021-05-06  10956  				mutex_unlock(&kvm->slots_arch_lock);
43798461d3f4a3 Ben Gardon       2021-05-06  10957  				return r;
43798461d3f4a3 Ben Gardon       2021-05-06  10958  			}
43798461d3f4a3 Ben Gardon       2021-05-06  10959  		}
43798461d3f4a3 Ben Gardon       2021-05-06  10960  	}
43798461d3f4a3 Ben Gardon       2021-05-06  10961  
43798461d3f4a3 Ben Gardon       2021-05-06  10962  	/*
43798461d3f4a3 Ben Gardon       2021-05-06  10963  	 * memslots_have_rmaps is set and read in different lock contexts,
43798461d3f4a3 Ben Gardon       2021-05-06  10964  	 * so protect it with smp_load/store.
43798461d3f4a3 Ben Gardon       2021-05-06  10965  	 */
43798461d3f4a3 Ben Gardon       2021-05-06  10966  	smp_store_release(&kvm->arch.memslots_have_rmaps, true);
43798461d3f4a3 Ben Gardon       2021-05-06  10967  	mutex_unlock(&kvm->slots_arch_lock);
43798461d3f4a3 Ben Gardon       2021-05-06  10968  	return 0;
43798461d3f4a3 Ben Gardon       2021-05-06  10969  }
43798461d3f4a3 Ben Gardon       2021-05-06  10970  
fac0d516d29b67 Ben Gardon       2021-05-06 @10971  bool kvm_memslots_have_rmaps(struct kvm *kvm)
fac0d516d29b67 Ben Gardon       2021-05-06  10972  {
43798461d3f4a3 Ben Gardon       2021-05-06  10973  	/*
43798461d3f4a3 Ben Gardon       2021-05-06  10974  	 * memslots_have_rmaps is set and read in different lock contexts,
43798461d3f4a3 Ben Gardon       2021-05-06  10975  	 * so protect it with smp_load/store.
43798461d3f4a3 Ben Gardon       2021-05-06  10976  	 */
43798461d3f4a3 Ben Gardon       2021-05-06  10977  	return smp_load_acquire(&kvm->arch.memslots_have_rmaps);
fac0d516d29b67 Ben Gardon       2021-05-06  10978  }
fac0d516d29b67 Ben Gardon       2021-05-06  10979  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Paolo Bonzini May 7, 2021, 8:28 a.m. UTC | #2
On 06/05/21 20:42, Ben Gardon wrote:
> +	/*
> +	 * memslots_have_rmaps is set and read in different lock contexts,
> +	 * so protect it with smp_load/store.
> +	 */
> +	smp_store_release(&kvm->arch.memslots_have_rmaps, true);

Shorter and better: /* write rmap pointers before memslots_have_rmaps */

> +	mutex_unlock(&kvm->slots_arch_lock);
> +	return 0;
> +}
> +
>   bool kvm_memslots_have_rmaps(struct kvm *kvm)
>   {
> -	return kvm->arch.memslots_have_rmaps;
> +	/*
> +	 * memslots_have_rmaps is set and read in different lock contexts,
> +	 * so protect it with smp_load/store.
> +	 */
> +	return smp_load_acquire(&kvm->arch.memslots_have_rmaps);
>   }
>   

Likewise, /* read memslots_have_rmaps before the rmaps themselves */

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 00065f9bbc5e..7b8e1532fb55 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1860,5 +1860,6 @@  static inline int kvm_cpu_get_apicid(int mps_cpu)
 int kvm_cpu_dirty_log_size(void);
 
 inline bool kvm_memslots_have_rmaps(struct kvm *kvm);
+int alloc_all_memslots_rmaps(struct kvm *kvm);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 48067c572c02..e3a3b65829c5 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3306,6 +3306,10 @@  static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	r = alloc_all_memslots_rmaps(vcpu->kvm);
+	if (r)
+		return r;
+
 	write_lock(&vcpu->kvm->mmu_lock);
 	r = make_mmu_pages_available(vcpu);
 	if (r < 0)
@@ -5494,9 +5498,13 @@  void kvm_mmu_init_vm(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
-	kvm_mmu_init_tdp_mmu(kvm);
-
-	kvm->arch.memslots_have_rmaps = true;
+	if (!kvm_mmu_init_tdp_mmu(kvm))
+		/*
+		 * No smp_load/store wrappers needed here as we are in
+		 * VM init and there cannot be any memslots / other threads
+		 * accessing this struct kvm yet.
+		 */
+		kvm->arch.memslots_have_rmaps = true;
 
 	node->track_write = kvm_mmu_pte_write;
 	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 83cbdbe5de5a..5342aca2c8e0 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -14,10 +14,10 @@  static bool __read_mostly tdp_mmu_enabled = false;
 module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
 
 /* Initializes the TDP MMU for the VM, if enabled. */
-void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
+bool kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 {
 	if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled))
-		return;
+		return false;
 
 	/* This should not be changed for the lifetime of the VM. */
 	kvm->arch.tdp_mmu_enabled = true;
@@ -25,6 +25,8 @@  void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
 	spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
+
+	return true;
 }
 
 static __always_inline void kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 5fdf63090451..b046ab5137a1 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -80,12 +80,12 @@  int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level);
 
 #ifdef CONFIG_X86_64
-void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
+bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
 #else
-static inline void kvm_mmu_init_tdp_mmu(struct kvm *kvm) {}
+static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
 static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1098ab73a704..95e74fb9fc20 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10868,9 +10868,44 @@  static int alloc_memslot_rmap(struct kvm_memory_slot *slot,
 	return -ENOMEM;
 }
 
+int alloc_all_memslots_rmaps(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *slot;
+	int r = 0;
+	int i;
+
+	if (kvm_memslots_have_rmaps(kvm))
+		return 0;
+
+	mutex_lock(&kvm->slots_arch_lock);
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		kvm_for_each_memslot(slot, slots) {
+			r = alloc_memslot_rmap(slot, slot->npages);
+			if (r) {
+				mutex_unlock(&kvm->slots_arch_lock);
+				return r;
+			}
+		}
+	}
+
+	/*
+	 * memslots_have_rmaps is set and read in different lock contexts,
+	 * so protect it with smp_load/store.
+	 */
+	smp_store_release(&kvm->arch.memslots_have_rmaps, true);
+	mutex_unlock(&kvm->slots_arch_lock);
+	return 0;
+}
+
 bool kvm_memslots_have_rmaps(struct kvm *kvm)
 {
-	return kvm->arch.memslots_have_rmaps;
+	/*
+	 * memslots_have_rmaps is set and read in different lock contexts,
+	 * so protect it with smp_load/store.
+	 */
+	return smp_load_acquire(&kvm->arch.memslots_have_rmaps);
 }
 
 static int kvm_alloc_memslot_metadata(struct kvm *kvm,