@@ -468,3 +468,4 @@
460 i386 lsm_set_self_attr sys_lsm_set_self_attr
461 i386 lsm_list_modules sys_lsm_list_modules
462 i386 mseal sys_mseal
+463 i386 fbind sys_fbind
@@ -386,6 +386,7 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common fbind sys_fbind
#
# Due to a historical design error, certain syscalls are numbered differently
@@ -2058,6 +2058,9 @@ struct file_operations {
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
+#ifdef CONFIG_NUMA
+ int (*set_policy)(struct file *, struct mempolicy *);
+#endif
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
unsigned int poll_flags);
@@ -299,4 +299,7 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol)
}
#endif /* CONFIG_NUMA */
+struct mempolicy *create_mpol_from_args(unsigned char mode,
+ const unsigned long __user *nmask,
+ unsigned short maxnode);
#endif
@@ -502,6 +502,9 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *bu
asmlinkage long sys_newfstatat(int dfd, const char __user *filename,
struct stat __user *statbuf, int flag);
asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf);
+asmlinkage long sys_fbind(unsigned int fd, unsigned long mode,
+ const unsigned long __user *nmask,
+ unsigned long maxnode, unsigned int flags);
#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64)
asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf);
asmlinkage long sys_fstatat64(int dfd, const char __user *filename,
@@ -841,8 +841,11 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
#define __NR_mseal 462
__SYSCALL(__NR_mseal, sys_mseal)
+#define __NR_fbind 463
+__SYSCALL(__NR_fbind, sys_fbind)
+
#undef __NR_syscalls
-#define __NR_syscalls 463
+#define __NR_syscalls 464
/*
* 32 bit systems traditionally used different
@@ -195,6 +195,7 @@ COND_SYSCALL(move_pages);
COND_SYSCALL(set_mempolicy_home_node);
COND_SYSCALL(cachestat);
COND_SYSCALL(mseal);
+COND_SYSCALL(fbind);
COND_SYSCALL(perf_event_open);
COND_SYSCALL(accept4);
@@ -79,7 +79,7 @@ obj-$(CONFIG_ZSWAP) += zswap.o
obj-$(CONFIG_HAS_DMA) += dmapool.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) += hugetlb_vmemmap.o
-obj-$(CONFIG_NUMA) += mempolicy.o
+obj-$(CONFIG_NUMA) += mempolicy.o fbind.o
obj-$(CONFIG_SPARSEMEM) += sparse.o
obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
new file mode 100644
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Implement fbind() syscall.
+ *
+ * Copyright (c) 2024 AMD
+ *
+ * Author: Shivank Garg <shivankg@amd.com>
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mempolicy.h>
+#include <linux/syscalls.h>
+
+static long do_fbind(unsigned int fd, unsigned long mode,
+ const unsigned long __user *nmask,
+ unsigned long maxnode, unsigned int flags)
+{
+ struct mempolicy *mpol;
+ struct fd f;
+ int ret;
+
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+
+ mpol = create_mpol_from_args(mode, nmask, maxnode);
+ if (IS_ERR_OR_NULL(mpol)) {
+ ret = PTR_ERR(mpol);
+ goto out_putf;
+ }
+
+ if (f.file->f_op->set_policy)
+ ret = f.file->f_op->set_policy(f.file, mpol);
+ else
+ ret = -EOPNOTSUPP;
+
+ mpol_put(mpol);
+out_putf:
+ fdput(f);
+ return ret;
+}
+
+SYSCALL_DEFINE5(fbind, unsigned int, fd, unsigned long, mode,
+ const unsigned long __user *, nmask,
+ unsigned long, maxnode, unsigned int, flags)
+{
+ return do_fbind(fd, mode, nmask, maxnode, flags);
+}
@@ -3557,3 +3557,58 @@ static int __init mempolicy_sysfs_init(void)
late_initcall(mempolicy_sysfs_init);
#endif /* CONFIG_SYSFS */
+
+/**
+ * create_mpol_from_args - create a mempolicy structure from args
+ * @mode: NUMA memory policy mode
+ * @nmask: bitmask of NUMA nodes
+ * @maxnode: number of bits in the nodes bitmask
+ *
+ * Create a mempolicy from given nodemask and memory policy such as
+ * default, preferred, interleave or bind.
+ *
+ * Return: error encoded in a pointer or memory policy on success.
+ */
+struct mempolicy *create_mpol_from_args(unsigned char mode,
+ const unsigned long __user *nmask,
+ unsigned short maxnode)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned short mode_flags;
+ struct mempolicy *mpol;
+ nodemask_t nodes;
+ int lmode = mode;
+ int err = -ENOMEM;
+
+ err = sanitize_mpol_flags(&lmode, &mode_flags);
+ if (err)
+ return ERR_PTR(err);
+
+ err = get_nodes(&nodes, nmask, maxnode);
+ if (err)
+ return ERR_PTR(err);
+
+ mpol = mpol_new(mode, mode_flags, &nodes);
+ if (IS_ERR_OR_NULL(mpol))
+ return mpol;
+
+ NODEMASK_SCRATCH(scratch);
+ if (!scratch) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ mmap_write_lock(mm);
+ err = mpol_set_nodemask(mpol, &nodes, scratch);
+ mmap_write_unlock(mm);
+ NODEMASK_SCRATCH_FREE(scratch);
+
+ if (err)
+ goto err_out;
+
+ return mpol;
+
+err_out:
+ mpol_put(mpol);
+ return ERR_PTR(err);
+}