diff mbox series

[RFC,v2,3/4] mm/ksm: introduce force_madvise knob

Message ID 20190514131654.25463-4-oleksandr@redhat.com (mailing list archive)
State New, archived
Headers show
Series mm/ksm: add option to automerge VMAs | expand

Commit Message

Oleksandr Natalenko May 14, 2019, 1:16 p.m. UTC
Present a new sysfs knob to mark task's anonymous memory as mergeable.

To force merging task's VMAs, its PID is echoed in a write-only file:

   # echo PID > /sys/kernel/mm/ksm/force_madvise

Force unmerging is done similarly, but with "minus" sign:

   # echo -PID > /sys/kernel/mm/ksm/force_madvise

"0" or "-0" can be used to control the current task.

To achieve this, previously introduced ksm_enter()/ksm_leave() helpers
are used in the "store" handler.

Signed-off-by: Oleksandr Natalenko <oleksandr@redhat.com>
---
 mm/ksm.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

Comments

Aaron Tomlin May 14, 2019, 1:22 p.m. UTC | #1
On Tue 2019-05-14 15:16 +0200, Oleksandr Natalenko wrote:
> Present a new sysfs knob to mark task's anonymous memory as mergeable.
> 
> To force merging task's VMAs, its PID is echoed in a write-only file:
> 
>    # echo PID > /sys/kernel/mm/ksm/force_madvise
> 
> Force unmerging is done similarly, but with "minus" sign:
> 
>    # echo -PID > /sys/kernel/mm/ksm/force_madvise
> 
> "0" or "-0" can be used to control the current task.
> 
> To achieve this, previously introduced ksm_enter()/ksm_leave() helpers
> are used in the "store" handler.
> 
> Signed-off-by: Oleksandr Natalenko <oleksandr@redhat.com>
> ---
>  mm/ksm.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 68 insertions(+)
> 
> diff --git a/mm/ksm.c b/mm/ksm.c
> index e9f3901168bb..22c59fb03d3a 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -2879,10 +2879,77 @@ static void wait_while_offlining(void)
>  
>  #define KSM_ATTR_RO(_name) \
>  	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> +#define KSM_ATTR_WO(_name) \
> +	static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
>  #define KSM_ATTR(_name) \
>  	static struct kobj_attribute _name##_attr = \
>  		__ATTR(_name, 0644, _name##_show, _name##_store)
>  
> +static ssize_t force_madvise_store(struct kobject *kobj,
> +				     struct kobj_attribute *attr,
> +				     const char *buf, size_t count)
> +{
> +	int err;
> +	pid_t pid;
> +	bool merge = true;
> +	struct task_struct *tsk;
> +	struct mm_struct *mm;
> +	struct vm_area_struct *vma;
> +
> +	err = kstrtoint(buf, 10, &pid);
> +	if (err)
> +		return -EINVAL;
> +
> +	if (pid < 0) {
> +		pid = abs(pid);
> +		merge = false;
> +	}
> +
> +	if (!pid && *buf == '-')
> +		merge = false;
> +
> +	rcu_read_lock();
> +	if (pid) {
> +		tsk = find_task_by_vpid(pid);
> +		if (!tsk) {
> +			err = -ESRCH;
> +			rcu_read_unlock();
> +			goto out;
> +		}
> +	} else {
> +		tsk = current;
> +	}
> +
> +	tsk = tsk->group_leader;
> +
> +	get_task_struct(tsk);
> +	rcu_read_unlock();
> +
> +	mm = get_task_mm(tsk);
> +	if (!mm) {
> +		err = -EINVAL;
> +		goto out_put_task_struct;
> +	}
> +	down_write(&mm->mmap_sem);
> +	vma = mm->mmap;
> +	while (vma) {
> +		if (merge)
> +			ksm_enter(vma->vm_mm, vma, &vma->vm_flags);
> +		else
> +			ksm_leave(vma, vma->vm_start, vma->vm_end, &vma->vm_flags);
> +		vma = vma->vm_next;
> +	}
> +	up_write(&mm->mmap_sem);
> +	mmput(mm);
> +
> +out_put_task_struct:
> +	put_task_struct(tsk);
> +
> +out:
> +	return err ? err : count;
> +}
> +KSM_ATTR_WO(force_madvise);
> +
>  static ssize_t sleep_millisecs_show(struct kobject *kobj,
>  				    struct kobj_attribute *attr, char *buf)
>  {
> @@ -3185,6 +3252,7 @@ static ssize_t full_scans_show(struct kobject *kobj,
>  KSM_ATTR_RO(full_scans);
>  
>  static struct attribute *ksm_attrs[] = {
> +	&force_madvise_attr.attr,
>  	&sleep_millisecs_attr.attr,
>  	&pages_to_scan_attr.attr,
>  	&run_attr.attr,

Looks fine to me.

Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Timofey Titovets May 15, 2019, 12:48 a.m. UTC | #2
LGTM

Reviewed-by: Timofey Titovets <nefelim4ag@gmail.com>

вт, 14 мая 2019 г. в 16:22, Aaron Tomlin <atomlin@redhat.com>:
>
> On Tue 2019-05-14 15:16 +0200, Oleksandr Natalenko wrote:
> > Present a new sysfs knob to mark task's anonymous memory as mergeable.
> >
> > To force merging task's VMAs, its PID is echoed in a write-only file:
> >
> >    # echo PID > /sys/kernel/mm/ksm/force_madvise
> >
> > Force unmerging is done similarly, but with "minus" sign:
> >
> >    # echo -PID > /sys/kernel/mm/ksm/force_madvise
> >
> > "0" or "-0" can be used to control the current task.
> >
> > To achieve this, previously introduced ksm_enter()/ksm_leave() helpers
> > are used in the "store" handler.
> >
> > Signed-off-by: Oleksandr Natalenko <oleksandr@redhat.com>
> > ---
> >  mm/ksm.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 68 insertions(+)
> >
> > diff --git a/mm/ksm.c b/mm/ksm.c
> > index e9f3901168bb..22c59fb03d3a 100644
> > --- a/mm/ksm.c
> > +++ b/mm/ksm.c
> > @@ -2879,10 +2879,77 @@ static void wait_while_offlining(void)
> >
> >  #define KSM_ATTR_RO(_name) \
> >       static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> > +#define KSM_ATTR_WO(_name) \
> > +     static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
> >  #define KSM_ATTR(_name) \
> >       static struct kobj_attribute _name##_attr = \
> >               __ATTR(_name, 0644, _name##_show, _name##_store)
> >
> > +static ssize_t force_madvise_store(struct kobject *kobj,
> > +                                  struct kobj_attribute *attr,
> > +                                  const char *buf, size_t count)
> > +{
> > +     int err;
> > +     pid_t pid;
> > +     bool merge = true;
> > +     struct task_struct *tsk;
> > +     struct mm_struct *mm;
> > +     struct vm_area_struct *vma;
> > +
> > +     err = kstrtoint(buf, 10, &pid);
> > +     if (err)
> > +             return -EINVAL;
> > +
> > +     if (pid < 0) {
> > +             pid = abs(pid);
> > +             merge = false;
> > +     }
> > +
> > +     if (!pid && *buf == '-')
> > +             merge = false;
> > +
> > +     rcu_read_lock();
> > +     if (pid) {
> > +             tsk = find_task_by_vpid(pid);
> > +             if (!tsk) {
> > +                     err = -ESRCH;
> > +                     rcu_read_unlock();
> > +                     goto out;
> > +             }
> > +     } else {
> > +             tsk = current;
> > +     }
> > +
> > +     tsk = tsk->group_leader;
> > +
> > +     get_task_struct(tsk);
> > +     rcu_read_unlock();
> > +
> > +     mm = get_task_mm(tsk);
> > +     if (!mm) {
> > +             err = -EINVAL;
> > +             goto out_put_task_struct;
> > +     }
> > +     down_write(&mm->mmap_sem);
> > +     vma = mm->mmap;
> > +     while (vma) {
> > +             if (merge)
> > +                     ksm_enter(vma->vm_mm, vma, &vma->vm_flags);
> > +             else
> > +                     ksm_leave(vma, vma->vm_start, vma->vm_end, &vma->vm_flags);
> > +             vma = vma->vm_next;
> > +     }
> > +     up_write(&mm->mmap_sem);
> > +     mmput(mm);
> > +
> > +out_put_task_struct:
> > +     put_task_struct(tsk);
> > +
> > +out:
> > +     return err ? err : count;
> > +}
> > +KSM_ATTR_WO(force_madvise);
> > +
> >  static ssize_t sleep_millisecs_show(struct kobject *kobj,
> >                                   struct kobj_attribute *attr, char *buf)
> >  {
> > @@ -3185,6 +3252,7 @@ static ssize_t full_scans_show(struct kobject *kobj,
> >  KSM_ATTR_RO(full_scans);
> >
> >  static struct attribute *ksm_attrs[] = {
> > +     &force_madvise_attr.attr,
> >       &sleep_millisecs_attr.attr,
> >       &pages_to_scan_attr.attr,
> >       &run_attr.attr,
>
> Looks fine to me.
>
> Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
>
> --
> Aaron Tomlin
diff mbox series

Patch

diff --git a/mm/ksm.c b/mm/ksm.c
index e9f3901168bb..22c59fb03d3a 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2879,10 +2879,77 @@  static void wait_while_offlining(void)
 
 #define KSM_ATTR_RO(_name) \
 	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+#define KSM_ATTR_WO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
 #define KSM_ATTR(_name) \
 	static struct kobj_attribute _name##_attr = \
 		__ATTR(_name, 0644, _name##_show, _name##_store)
 
+static ssize_t force_madvise_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	int err;
+	pid_t pid;
+	bool merge = true;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+
+	err = kstrtoint(buf, 10, &pid);
+	if (err)
+		return -EINVAL;
+
+	if (pid < 0) {
+		pid = abs(pid);
+		merge = false;
+	}
+
+	if (!pid && *buf == '-')
+		merge = false;
+
+	rcu_read_lock();
+	if (pid) {
+		tsk = find_task_by_vpid(pid);
+		if (!tsk) {
+			err = -ESRCH;
+			rcu_read_unlock();
+			goto out;
+		}
+	} else {
+		tsk = current;
+	}
+
+	tsk = tsk->group_leader;
+
+	get_task_struct(tsk);
+	rcu_read_unlock();
+
+	mm = get_task_mm(tsk);
+	if (!mm) {
+		err = -EINVAL;
+		goto out_put_task_struct;
+	}
+	down_write(&mm->mmap_sem);
+	vma = mm->mmap;
+	while (vma) {
+		if (merge)
+			ksm_enter(vma->vm_mm, vma, &vma->vm_flags);
+		else
+			ksm_leave(vma, vma->vm_start, vma->vm_end, &vma->vm_flags);
+		vma = vma->vm_next;
+	}
+	up_write(&mm->mmap_sem);
+	mmput(mm);
+
+out_put_task_struct:
+	put_task_struct(tsk);
+
+out:
+	return err ? err : count;
+}
+KSM_ATTR_WO(force_madvise);
+
 static ssize_t sleep_millisecs_show(struct kobject *kobj,
 				    struct kobj_attribute *attr, char *buf)
 {
@@ -3185,6 +3252,7 @@  static ssize_t full_scans_show(struct kobject *kobj,
 KSM_ATTR_RO(full_scans);
 
 static struct attribute *ksm_attrs[] = {
+	&force_madvise_attr.attr,
 	&sleep_millisecs_attr.attr,
 	&pages_to_scan_attr.attr,
 	&run_attr.attr,