diff mbox series

KVM: x86/mmu: Register MMU shrinker only when necessary

Message ID 20240814082302.50032-1-liangchen.linux@gmail.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Register MMU shrinker only when necessary | expand

Commit Message

Liang Chen Aug. 14, 2024, 8:23 a.m. UTC
The shrinker is allocated with TDP MMU, which is meaningless except for
nested VMs, and 'count_objects' is also called each time the reclaim
path tries to shrink slab caches. Let's allocate the shrinker only when
necessary.

Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
---
 arch/x86/kvm/mmu/mmu.c | 49 ++++++++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 14 deletions(-)

Comments

Sean Christopherson Aug. 14, 2024, 2:41 p.m. UTC | #1
+Vipin and David

On Wed, Aug 14, 2024, Liang Chen wrote:
> The shrinker is allocated with TDP MMU, which is meaningless except for
> nested VMs, and 'count_objects' is also called each time the reclaim
> path tries to shrink slab caches. Let's allocate the shrinker only when
> necessary.

This is definitely not worth the complexity.  In its current form, KVM's shrinker
is quite useless[1], and there were plans to repurpose the shrinker to free pages
from the so called "mmu caches"[2], i.e. free pages that are guaranteed to not be
in use.

Vipin/David, what happened to that series?  Are we still working on it?

[1] https://lore.kernel.org/lkml/Y45dldZnI6OIf+a5@google.com
[2] https://lore.kernel.org/all/20221222023457.1764-2-vipinsh@google.com
Vipin Sharma Aug. 19, 2024, 9:40 p.m. UTC | #2
On 2024-08-14 07:41:19, Sean Christopherson wrote:
> +Vipin and David
> 
> On Wed, Aug 14, 2024, Liang Chen wrote:
> > The shrinker is allocated with TDP MMU, which is meaningless except for
> > nested VMs, and 'count_objects' is also called each time the reclaim
> > path tries to shrink slab caches. Let's allocate the shrinker only when
> > necessary.
> 
> This is definitely not worth the complexity.  In its current form, KVM's shrinker
> is quite useless[1], and there were plans to repurpose the shrinker to free pages
> from the so called "mmu caches"[2], i.e. free pages that are guaranteed to not be
> in use.
> 
> Vipin/David, what happened to that series?  Are we still working on it?
> 
> [1] https://lore.kernel.org/lkml/Y45dldZnI6OIf+a5@google.com
> [2] https://lore.kernel.org/all/20221222023457.1764-2-vipinsh@google.com

NUMA aware page table series got deprioritized, so, MMU shrinker changes
also moved to back burner.

I will extract the patch [2] above and send it as a separate series which
just changes the Shrinker behavior as it is independent of NUMA aware
page table effort.
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 928cf84778b0..d43d7548d801 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -69,11 +69,17 @@  static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
 #else
 static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
 #endif
+static struct shrinker *mmu_shrinker;
 
 static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp);
 static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
 static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp);
 
+static unsigned long mmu_shrink_count(struct shrinker *shrink,
+				      struct shrink_control *sc);
+static unsigned long mmu_shrink_scan(struct shrinker *shrink,
+				     struct shrink_control *sc);
+
 static const struct kernel_param_ops nx_huge_pages_ops = {
 	.set = set_nx_huge_pages,
 	.get = get_nx_huge_pages,
@@ -5666,6 +5672,28 @@  static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
 	reset_guest_paging_metadata(vcpu, g_context);
 }
 
+static void kvm_mmu_shrinker_init(void)
+{
+	struct shrinker *shrinker = shrinker_alloc(0, "x86-mmu");
+
+	if (!shrinker) {
+		pr_warn_once("could not allocate shrinker\n");
+		return;
+	}
+
+	/* Ensure mmu_shrinker is assigned only once. */
+	if (cmpxchg(&mmu_shrinker, NULL, shrinker)) {
+		shrinker_free(shrinker);
+		return;
+	}
+
+	mmu_shrinker->count_objects = mmu_shrink_count;
+	mmu_shrinker->scan_objects = mmu_shrink_scan;
+	mmu_shrinker->seeks = DEFAULT_SEEKS * 10;
+
+	shrinker_register(mmu_shrinker);
+}
+
 void kvm_init_mmu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu);
@@ -5677,6 +5705,13 @@  void kvm_init_mmu(struct kvm_vcpu *vcpu)
 		init_kvm_tdp_mmu(vcpu, cpu_role);
 	else
 		init_kvm_softmmu(vcpu, cpu_role);
+
+	/*
+	 * Register MMU shrinker only if TDP MMU is disabled or
+	 * in nested VM scenarios.
+	 */
+	if (unlikely(!mmu_shrinker) && (!tdp_mmu_enabled || mmu_is_nested(vcpu)))
+		kvm_mmu_shrinker_init();
 }
 EXPORT_SYMBOL_GPL(kvm_init_mmu);
 
@@ -7092,8 +7127,6 @@  static unsigned long mmu_shrink_count(struct shrinker *shrink,
 	return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
 }
 
-static struct shrinker *mmu_shrinker;
-
 static void mmu_destroy_caches(void)
 {
 	kmem_cache_destroy(pte_list_desc_cache);
@@ -7223,20 +7256,8 @@  int kvm_mmu_vendor_module_init(void)
 	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
 		goto out;
 
-	mmu_shrinker = shrinker_alloc(0, "x86-mmu");
-	if (!mmu_shrinker)
-		goto out_shrinker;
-
-	mmu_shrinker->count_objects = mmu_shrink_count;
-	mmu_shrinker->scan_objects = mmu_shrink_scan;
-	mmu_shrinker->seeks = DEFAULT_SEEKS * 10;
-
-	shrinker_register(mmu_shrinker);
-
 	return 0;
 
-out_shrinker:
-	percpu_counter_destroy(&kvm_total_used_mmu_pages);
 out:
 	mmu_destroy_caches();
 	return ret;