@@ -390,7 +390,8 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
-
+467 common ksm_open sys_ksm_open
+468 common ksm_merge sys_ksm_merge
#
# Due to a historical design error, certain syscalls are numbered differently
# in x32 as compared to native x86_64. These syscalls have numbers 512-547.
@@ -14,6 +14,10 @@
#include <linux/rmap.h>
#include <linux/sched.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#define MAX_KSM_NAME_LEN 128
+
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
@@ -147,7 +147,8 @@ struct ksm_scan {
static struct kobject *ksm_base_kobj;
struct partition_kobj {
- struct kobject *kobj;
+ struct kobject *kobj; /* Not required for the syscall interface */
+ char name[MAX_KSM_NAME_LEN];
struct list_head list;
struct rb_root *root_stable_tree;
struct rb_root *root_unstable_tree;
@@ -166,6 +167,106 @@ static struct partition_kobj *find_partition_by_kobj(struct kobject *kobj)
return NULL;
}
+static struct partition_kobj *find_ksm_partition(char *partition_name)
+{
+ struct partition_kobj *partition;
+
+ list_for_each_entry(partition, &partition_list, list) {
+ if (strcmp(partition->name, partition_name) == 0)
+ return partition;
+ }
+ return NULL;
+}
+
+static DEFINE_MUTEX(ksm_partition_lock);
+
+static int ksm_release(struct inode *inode, struct file *file)
+{
+ struct partition_kobj *ksm = file->private_data;
+
+ mutex_lock(&ksm_partition_lock);
+ list_del(&ksm->list);
+ mutex_unlock(&ksm_partition_lock);
+
+ kfree(ksm);
+ return 0;
+}
+
+static const struct file_operations ksm_fops = {
+ .release = ksm_release,
+};
+
+static struct partition_kobj *ksm_create_partition(char *ksm_name)
+{
+ struct partition_kobj *partition;
+ struct rb_root *tree_root;
+
+ partition = kzalloc(sizeof(*partition), GFP_KERNEL);
+ if (!partition)
+ return NULL;
+
+ tree_root = kcalloc(nr_node_ids + nr_node_ids, sizeof(*tree_root),
+ GFP_KERNEL);
+ if (!tree_root)
+ return NULL;
+
+ partition->root_stable_tree = tree_root;
+ partition->root_unstable_tree = tree_root + nr_node_ids;
+ strncpy(partition->name, ksm_name, sizeof(partition->name));
+
+ list_add(&partition->list, &partition_list);
+
+ return partition;
+}
+
+static int ksm_partition_fd(struct partition_kobj *partition)
+{
+ int fd;
+ struct file *file;
+ int ret;
+
+ file = anon_inode_getfile("ksm_partition", &ksm_fops, partition, O_RDWR);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ return ret;
+ }
+
+ fd = get_unused_fd_flags(O_RDWR);
+ if (fd < 0) {
+ fput(file);
+ return fd;
+ }
+ fd_install(fd, file);
+ return fd;
+}
+
+SYSCALL_DEFINE2(ksm_open, const char __user *, ksm_name, int, flags) {
+ char name[MAX_KSM_NAME_LEN];
+ struct partition_kobj *partition;
+ int ret;
+
+ ret = strncpy_from_user(name, ksm_name, sizeof(name));
+ if (ret < 0)
+ return -EFAULT;
+
+ partition = find_ksm_partition(name);
+
+ if (flags & O_EXCL && partition) /* Partition already exists, return error */
+ return -EEXIST;
+
+ if (flags & O_CREAT && !partition) {
+ /* Partition does not exist, but we are allowed to create one */
+ mutex_lock(&ksm_partition_lock);
+ partition = ksm_create_partition(name);
+ mutex_unlock(&ksm_partition_lock);
+ }
+
+ if (!partition)
+ return flags & O_CREAT ? -ENOMEM : -ENOENT;
+
+ return ksm_partition_fd(partition);
+}
+
/**
* struct ksm_stable_node - node of the stable rbtree
* @node: rb node of this ksm page in the stable tree
@@ -4324,6 +4425,59 @@ static int __init ksm_thread_sysfs_init(void)
}
#endif /* CONFIG_SELECTIVE_KSM */
+SYSCALL_DEFINE4(ksm_merge, int, ksm_fd, pid_t, pid, unsigned long, start, size_t, size) {
+ unsigned long end = start + size;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ struct partition_kobj *partition;
+ struct file *file;
+
+ file = fget(ksm_fd);
+ if (!file)
+ return -EBADF;
+
+ partition = file->private_data;
+ if (!partition) {
+ fput(file);
+ return -EINVAL;
+ }
+
+ if (start >= end) {
+ fput(file);
+ return -EINVAL;
+ }
+
+ /* Find the mm_struct */
+ rcu_read_lock();
+ task = find_task_by_vpid(pid);
+ if (!task) {
+ fput(file);
+ rcu_read_unlock();
+ return -ESRCH;
+ }
+
+ get_task_struct(task);
+
+ rcu_read_unlock();
+ mm = get_task_mm(task);
+ put_task_struct(task);
+
+ if (!mm) {
+ fput(file);
+ return -EINVAL;
+ }
+
+ mutex_lock(&ksm_thread_mutex);
+ wait_while_offlining();
+ ksm_sync_merge(mm, start, end, partition);
+ mutex_unlock(&ksm_thread_mutex);
+
+ mmput(mm);
+
+ fput(file);
+ return 0;
+}
+
static int __init ksm_init(void)
{
int err;
Partition can be created or opened using: int ksm_fd = ksm_open(ksm_name, flag); name specifies the ksm partition to be created or opened. flags: O_CREAT Create the ksm partition object if it does not exist. O_EXCL If O_CREAT was also specified, and a ksm partition object with the given name already exists, return an error. Trigger the merge using: ksm_merge(ksm_fd, pid, start_addr, size); Limitation: Only supporting x86 syscall_64. Signed-off-by: Sourav Panda <souravpanda@google.com> --- arch/x86/entry/syscalls/syscall_64.tbl | 3 +- include/linux/ksm.h | 4 + mm/ksm.c | 156 ++++++++++++++++++++++++- 3 files changed, 161 insertions(+), 2 deletions(-)