@@ -32,7 +32,7 @@ are swapped back in: ksmd must rediscover their identity and merge again).
Controlling KSM with madvise
============================
-KSM only operates on those areas of address space which an application
+KSM can operates on those areas of address space which an application
has advised to be likely candidates for merging, by using the madvise(2)
system call::
@@ -70,6 +70,23 @@ Applications should be considerate in their use of MADV_MERGEABLE,
restricting its use to areas likely to benefit. KSM's scans may use a lot
of processing power: some installations will disable KSM for that reason.
+Controlling KSM with procfs
+===========================
+
+KSM can also operate on anonymous areas of address space of those processes's
+knob ``/proc/<pid>/ksm_force`` is on, even if app codes doesn't call madvise()
+explicitly to advise specific areas as MADV_MERGEABLE.
+
+You can set ksm_force to 1 to force all anonymous and qualified VMAs of
+this process to be involved in KSM scanning.
+ e.g. ``echo 1 > /proc/<pid>/ksm_force``
+
+You can also set ksm_force to 0 to cancel that force feature of this process
+and unmerge those merged pages which belongs to those VMAs not marked as
+MADV_MERGEABLE of this process. But that still leave those pages belonging to
+VMAs marked as MADV_MERGEABLE merged (fallback to the default state).
+ e.g. ``echo 0 > /proc/<pid>/ksm_force``
+
.. _ksm_sysfs:
KSM daemon sysfs interface
@@ -47,6 +47,7 @@ fixes/update part 1.1 Stefani Seibold <stefani@seibold.net> June 9 2009
3.10 /proc/<pid>/timerslack_ns - Task timerslack value
3.11 /proc/<pid>/patch_state - Livepatch patch operation state
3.12 /proc/<pid>/arch_status - Task architecture specific information
+ 3.13 /proc/<pid>/ksm_force - Setting of mandatory involvement in KSM
4 Configuring procfs
4.1 Mount options
@@ -2176,6 +2177,22 @@ AVX512_elapsed_ms
the task is unlikely an AVX512 user, but depends on the workload and the
scheduling scenario, it also could be a false negative mentioned above.
+3.13 /proc/<pid>/ksm_force - Setting of mandatory involvement in KSM
+-----------------------------------------------------------------------
+When CONFIG_KSM is enabled, this file can be used to specify if this
+process's anonymous memory can be involved in KSM scanning without app codes
+explicitly calling madvise to mark memory address as MADV_MERGEABLE.
+
+If writing 1 to this file, the kernel will force all anonymous and qualified
+memory to be involved in KSM scanning without explicitly calling madvise to
+mark memory address as MADV_MERGEABLE. But that is effective only when the
+klob of '/sys/kernel/mm/ksm/run' is set as 1.
+
+If writing 0 to this file, the mandatory KSM feature of this process's will
+be cancelled and unmerge those merged pages which belongs to those areas not
+marked as MADV_MERGEABLE of this process, but leave those pages belonging to
+areas marked as MADV_MERGEABLE merged (fallback to the default state).
+
Chapter 4: Configuring procfs
=============================
@@ -96,6 +96,7 @@
#include <linux/time_namespace.h>
#include <linux/resctrl.h>
#include <linux/cn_proc.h>
+#include <linux/ksm.h>
#include <trace/events/oom.h>
#include "internal.h"
#include "fd.h"
@@ -3168,6 +3169,100 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *
return 0;
}
+
+static ssize_t ksm_force_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct task_struct *task;
+ struct mm_struct *mm;
+ char buffer[PROC_NUMBUF];
+ ssize_t len;
+ int ret;
+
+ task = get_proc_task(file_inode(file));
+ if (!task)
+ return -ESRCH;
+
+ mm = get_task_mm(task);
+ ret = 0;
+ if (mm) {
+ len = snprintf(buffer, sizeof(buffer), "%d\n", mm->ksm_force);
+ ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
+ mmput(mm);
+ }
+
+ return ret;
+}
+
+static ssize_t ksm_force_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ struct mm_struct *mm;
+ char buffer[PROC_NUMBUF];
+ int force;
+ int err = 0;
+ int str_len;
+
+ if (count > sizeof(buffer) - 1) {
+ count = sizeof(buffer) - 1;
+ }
+
+ str_len = strncpy_from_user(buffer, buf, count);
+ if (str_len < 0)
+ return -EFAULT;
+ buffer[str_len] = '\0';
+
+ err = kstrtoint(strstrip(buffer), 0, &force);
+ if (err)
+ return err;
+
+ if (force != 0 && force != 1)
+ return -EINVAL;
+
+ task = get_proc_task(file_inode(file));
+ if (!task)
+ return -ESRCH;
+
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out_put_task;
+
+ if (mm->ksm_force != force) {
+ if (mmap_write_lock_killable(mm)) {
+ err = -EINTR;
+ goto out_mmput;
+ }
+
+ if (force == 0)
+ mm->ksm_force = force;
+ else {
+ /*
+ * Force anonymous pages of this mm to be involved in KSM merging
+ * without explicitly calling madvise.
+ */
+ if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
+ err = __ksm_enter(mm);
+ if (!err)
+ mm->ksm_force = force;
+ }
+
+ mmap_write_unlock(mm);
+ }
+
+out_mmput:
+ mmput(mm);
+out_put_task:
+ put_task_struct(task);
+
+ return err < 0 ? err : count;
+}
+
+static const struct file_operations proc_pid_ksm_force_operations = {
+ .read = ksm_force_read,
+ .write = ksm_force_write,
+ .llseek = generic_file_llseek,
+};
#endif /* CONFIG_KSM */
#ifdef CONFIG_STACKLEAK_METRICS
@@ -3303,6 +3398,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
#ifdef CONFIG_KSM
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
+ REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations),
#endif
};
@@ -3639,6 +3735,7 @@ static const struct pid_entry tid_base_stuff[] = {
#endif
#ifdef CONFIG_KSM
ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
+ REG("ksm_force", S_IRUSR|S_IWUSR, proc_pid_ksm_force_operations),
#endif
};
@@ -661,6 +661,15 @@ struct mm_struct {
* merging.
*/
unsigned long ksm_merging_pages;
+ /*
+ * If true, force anonymous pages of this mm to be involved in KSM
+ * merging without explicitly calling madvise. It is effctive only
+ * when the klob of '/sys/kernel/mm/ksm/run' is set as 1. If false,
+ * cancel the feature of ksm_force of this process and unmerge
+ * those merged pages which is not madvised as MERGEABLE of this
+ * process, but leave MERGEABLE areas merged.
+ */
+ bool ksm_force;
#endif
} __randomize_layout;
@@ -334,6 +334,34 @@ static void __init ksm_slab_free(void)
mm_slot_cache = NULL;
}
+/* Check if vma is qualified for ksmd scanning */
+static bool ksm_vma_check(struct vm_area_struct *vma)
+{
+ unsigned long vm_flags = vma->vm_flags;
+
+ if (!(vma->vm_flags & VM_MERGEABLE) && !(vma->vm_mm->ksm_force))
+ return false;
+
+ if (vm_flags & (VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_HUGETLB | VM_MIXEDMAP))
+ return false; /* just ignore this vma*/
+
+ if (vma_is_dax(vma))
+ return false;
+
+#ifdef VM_SAO
+ if (vm_flags & VM_SAO)
+ return false;
+#endif
+#ifdef VM_SPARC_ADI
+ if (vm_flags & VM_SPARC_ADI)
+ return false;
+#endif
+
+ return true;
+}
+
static __always_inline bool is_stable_node_chain(struct stable_node *chain)
{
return chain->rmap_hlist_len == STABLE_NODE_CHAIN;
@@ -523,7 +551,7 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
if (ksm_test_exit(mm))
return NULL;
vma = vma_lookup(mm, addr);
- if (!vma || !(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
+ if (!vma || !ksm_vma_check(vma) || !vma->anon_vma)
return NULL;
return vma;
}
@@ -2297,7 +2325,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page)
vma = find_vma(mm, ksm_scan.address);
for (; vma; vma = vma->vm_next) {
- if (!(vma->vm_flags & VM_MERGEABLE))
+ if (!ksm_vma_check(vma))
continue;
if (ksm_scan.address < vma->vm_start)
ksm_scan.address = vma->vm_start;