diff mbox series

[v2,12/13] KVM: s390: pv: add OOM notifier for lazy destroy

Message ID 20210728142631.41860-13-imbrenda@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series KVM: s390: pv: implement lazy destroy | expand

Commit Message

Claudio Imbrenda July 28, 2021, 2:26 p.m. UTC
Add a per-VM OOM notifier for lazy destroy.

When a protected VM is undergoing deferred teardown, register an OOM
notifier. This allows an OOM situation to be handled by just waiting a
little.

The background cleanup deferred destroy process will now keep a running
tally of the amount of pages freed. The asynchronous OOM notifier will
check the number of pages freed before and after waiting. The OOM
notifier will wait 10ms, and then report the number of pages freed by
the deferred destroy mechanism during that time.

If at least 1024 pages have already been freed in the current OOM
situation, no action is taken by the OOM notifier and no wait is
performed. This avoids excessive waiting times in case many VMs are
being destroyed at the same time, once enough memory has been freed.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 arch/s390/kvm/pv.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
diff mbox series

Patch

diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 088b94512af3..390b57307f24 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -15,8 +15,12 @@ 
 #include <linux/pagewalk.h>
 #include <linux/sched/mm.h>
 #include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/oom.h>
 #include "kvm-s390.h"
 
+#define KVM_S390_PV_LAZY_DESTROY_OOM_NOTIFY_PRIORITY	70
+
 struct deferred_priv {
 	struct mm_struct *mm;
 	bool has_mm;
@@ -24,6 +28,8 @@  struct deferred_priv {
 	u64 handle;
 	void *stor_var;
 	unsigned long stor_base;
+	unsigned long n_pages_freed;
+	struct notifier_block oom_nb;
 };
 
 static int lazy_destroy = 1;
@@ -249,6 +255,24 @@  static int kvm_s390_pv_deinit_vm_now(struct kvm *kvm, u16 *rc, u16 *rrc)
 	return cc ? -EIO : 0;
 }
 
+static int kvm_s390_pv_oom_notify(struct notifier_block *nb,
+				  unsigned long dummy, void *parm)
+{
+	unsigned long *freed = parm;
+	unsigned long free_before;
+	struct deferred_priv *p;
+
+	if (*freed > 1024)
+		return NOTIFY_OK;
+
+	p = container_of(nb, struct deferred_priv, oom_nb);
+	free_before = READ_ONCE(p->n_pages_freed);
+	msleep(20);
+	*freed += READ_ONCE(p->n_pages_freed) - free_before;
+
+	return NOTIFY_OK;
+}
+
 static int kvm_s390_pv_destroy_vm_thread(void *priv)
 {
 	struct destroy_page_lazy *lazy, *next;
@@ -256,12 +280,20 @@  static int kvm_s390_pv_destroy_vm_thread(void *priv)
 	u16 rc, rrc;
 	int r;
 
+	p->oom_nb.priority = KVM_S390_PV_LAZY_DESTROY_OOM_NOTIFY_PRIORITY;
+	p->oom_nb.notifier_call = kvm_s390_pv_oom_notify;
+	r = register_oom_notifier(&p->oom_nb);
+
 	list_for_each_entry_safe(lazy, next, &p->mm->context.deferred_list, list) {
 		list_del(&lazy->list);
 		s390_uv_destroy_pfns(lazy->count, lazy->pfns);
+		WRITE_ONCE(p->n_pages_freed, p->n_pages_freed + lazy->count + 1);
 		free_page(__pa(lazy));
 	}
 
+	if (!r)
+		unregister_oom_notifier(&p->oom_nb);
+
 	if (p->has_mm) {
 		/* Clear all the pages as long as we are not the only users of the mm */
 		s390_uv_destroy_range(p->mm, 1, 0, TASK_SIZE_MAX);