@@ -136,3 +136,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/
obj-$(CONFIG_EROFS_FS) += erofs/
obj-$(CONFIG_VBOXSF_FS) += vboxsf/
obj-$(CONFIG_ZONEFS_FS) += zonefs/
+obj-$(CONFIG_TRAMPFD) += trampfd/
new file mode 100644
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_TRAMPFD) += trampfd.o
+
+trampfd-y += trampfd_data.o trampfd_fops.o trampfd_map.o trampfd_pcs.o
+trampfd-y += trampfd_regs.o trampfd_stack.o trampfd_stubs.o trampfd_syscall.o
new file mode 100644
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - Trampoline type-specific code.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@microsoft.com)
+ *
+ * Copyright (C) 2020 Microsoft Corporation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/mman.h>
+#include <linux/trampfd.h>
+
+int trampfd_create_data(struct trampfd *trampfd, const void __user *tramp_data)
+{
+ struct trampfd_map *map = &trampfd->map;
+ struct trampfd_user *user;
+
+ if (trampfd->type == TRAMPFD_USER) {
+ user = kmalloc(sizeof(*user), GFP_KERNEL);
+ if (!user)
+ return -ENOMEM;
+
+ if (copy_from_user(user, tramp_data, sizeof(*user))) {
+ kfree(user);
+ return -EFAULT;
+ }
+ if (user->flags || user->reserved) {
+ kfree(user);
+ return -EINVAL;
+ }
+ trampfd->data = user;
+
+ map->size = PAGE_SIZE;
+ map->prot = PROT_NONE;
+ map->flags = MAP_PRIVATE;
+ map->offset = 0;
+ map->ioffset = 0;
+ }
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - File operations.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@microsoft.com)
+ *
+ * Copyright (C) 2020 Microsoft Corporation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/seq_file.h>
+#include <linux/trampfd.h>
+
+#ifdef CONFIG_PROC_FS
+static const char * const trampfd_type_names[TRAMPFD_NUM_TYPES] = {
+ "TRAMPFD_USER",
+};
+
+static void trampfd_show_fdinfo(struct seq_file *sfile, struct file *file)
+{
+ struct trampfd *trampfd = file->private_data;
+
+ seq_printf(sfile, "type: %s\n", trampfd_type_names[trampfd->type]);
+}
+#endif
+
+static loff_t trampfd_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct trampfd *trampfd = file->private_data;
+
+ if (whence != SEEK_SET)
+ return -EINVAL;
+
+ if ((offset < 0) || (offset >= TRAMPFD_NUM_OFFSETS))
+ return -EINVAL;
+
+ mutex_lock(&trampfd->lock);
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ mutex_unlock(&trampfd->lock);
+ return offset;
+}
+
+static ssize_t trampfd_read(struct file *file, char __user *arg,
+ size_t count, loff_t *ppos)
+{
+ int rc;
+
+ if (!arg || !count)
+ return -EINVAL;
+
+ switch (*ppos) {
+ case TRAMPFD_MAP_OFFSET:
+ rc = trampfd_get_map(file, arg, count);
+ break;
+
+ case TRAMPFD_REGS_OFFSET:
+ rc = trampfd_get_regs(file, arg, count);
+ break;
+
+ case TRAMPFD_STACK_OFFSET:
+ rc = trampfd_get_stack(file, arg, count);
+ break;
+
+ default:
+ rc = -EINVAL;
+ goto out;
+ }
+out:
+ return rc ? rc : (ssize_t) count;
+}
+
+static ssize_t trampfd_write(struct file *file, const char __user *arg,
+ size_t count, loff_t *ppos)
+{
+ int rc;
+
+ if (!arg || !count)
+ return -EINVAL;
+
+ switch (*ppos) {
+ case TRAMPFD_REGS_OFFSET:
+ rc = trampfd_set_regs(file, arg, count);
+ break;
+
+ case TRAMPFD_STACK_OFFSET:
+ rc = trampfd_set_stack(file, arg, count);
+ break;
+
+ case TRAMPFD_ALLOWED_PCS_OFFSET:
+ rc = trampfd_set_allowed_pcs(file, arg, count);
+ break;
+
+ default:
+ rc = -EINVAL;
+ goto out;
+ }
+out:
+ return rc ? rc : (ssize_t) count;
+}
+
+static int trampfd_release(struct inode *inode, struct file *file)
+{
+ struct trampfd *trampfd = file->private_data;
+
+ if (trampfd->type == TRAMPFD_USER) {
+ kfree(trampfd->regs);
+ kfree(trampfd->stack);
+ kfree(trampfd->allowed_pcs);
+ }
+ kfree(trampfd->data);
+ mutex_destroy(&trampfd->lock);
+ kmem_cache_free(trampfd_cache, trampfd);
+ return 0;
+}
+
+const struct file_operations trampfd_fops = {
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = trampfd_show_fdinfo,
+#endif
+ .llseek = trampfd_llseek,
+ .read = trampfd_read,
+ .write = trampfd_write,
+ .release = trampfd_release,
+ .mmap = trampfd_mmap,
+ .get_unmapped_area = trampfd_get_unmapped_area,
+};
new file mode 100644
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - Memory mapping.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@microsoft.com)
+ *
+ * Copyright (C) 2020 Microsoft Corporation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/mman.h>
+#include <linux/security.h>
+#include <linux/trampfd.h>
+
+int trampfd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct trampfd *trampfd = file->private_data;
+
+ if (trampfd->type == TRAMPFD_USER) {
+ /*
+ * These mappings are special mappings that should not be
+ * merged or inherited. No physical page is currently allocated
+ * to these mappings. So, there is nothing to read/write.
+ * When the trampoline is invoked, an execute fault must be
+ * encountered so the kernel can intercept the invocation and
+ * set up user context.
+ */
+ if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
+ return -EINVAL;
+ vma->vm_flags = VM_SPECIAL | VM_DONTCOPY | VM_DONTDUMP;
+ }
+ vma->vm_private_data = trampfd;
+ return 0;
+}
+
+unsigned long
+trampfd_get_unmapped_area(struct file *file, unsigned long orig_addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct trampfd *trampfd = file->private_data;
+ struct trampfd_map *map = &trampfd->map;
+ unsigned long map_pgoff = map->offset >> PAGE_SHIFT;
+
+ const typeof_member(struct file_operations, get_unmapped_area)
+ get_area = current->mm->get_unmapped_area;
+
+ if (len != map->size || pgoff != map_pgoff || (flags != map->flags))
+ return -EINVAL;
+
+ return get_area(file, orig_addr, len, pgoff, flags);
+}
+
+/*
+ * Retrieve the mapping parameters of a trampoline.
+ */
+int trampfd_get_map(struct file *file, char __user *arg, size_t count)
+{
+ struct trampfd *trampfd = file->private_data;
+
+ if (count != sizeof(trampfd->map))
+ return -EINVAL;
+ if (copy_to_user(arg, &trampfd->map, count))
+ return -EFAULT;
+ return 0;
+}
+
+bool is_trampfd_vma(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+
+ if (!file)
+ return false;
+ return !strcmp(file->f_path.dentry->d_name.name, trampfd_name);
+}
+EXPORT_SYMBOL_GPL(is_trampfd_vma);
new file mode 100644
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - Allowed PCs context.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@microsoft.com)
+ *
+ * Copyright (C) 2020 Microsoft Corporation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/trampfd.h>
+
+/*
+ * Copy list of allowed PCs from the user and validate it.
+ */
+static int trampfd_copy_allowed_pcs(struct trampfd_values *allowed_pcs,
+ const void __user *arg, size_t count)
+{
+ u32 npcs;
+ size_t size;
+ u64 *values;
+ int i;
+
+ if (copy_from_user(allowed_pcs, arg, count))
+ return -EFAULT;
+
+ if (allowed_pcs->reserved)
+ return -EINVAL;
+
+ npcs = allowed_pcs->nvalues;
+ if (npcs > TRAMPFD_MAX_PCS)
+ return -EINVAL;
+
+ size = sizeof(*allowed_pcs);
+ size += npcs * sizeof(u64);
+ if (size != count)
+ return -EINVAL;
+
+ values = allowed_pcs->values;
+ for (i = 0; i < npcs; i++) {
+ if (!values[i])
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Set the allowed PCs for a trampoline. If the trampoline has a register
+ * context at this point, the PC register value in that register context is
+ * not checked against this list of allowed PCs.
+ */
+int trampfd_set_allowed_pcs(struct file *file, const char __user *arg,
+ size_t count)
+{
+ struct trampfd *trampfd = file->private_data;
+ struct trampfd_values *allowed_pcs, *cur_allowed_pcs;
+ int rc;
+
+ if (count < sizeof(*allowed_pcs) || count > TRAMPFD_MAX_PCS_SIZE)
+ return -EINVAL;
+
+ allowed_pcs = kmalloc(count, GFP_KERNEL);
+ if (!allowed_pcs)
+ return -ENOMEM;
+
+ rc = trampfd_copy_allowed_pcs(allowed_pcs, arg, count);
+ if (rc)
+ goto out;
+
+ /*
+ * If number of PCs is 0, there is no new PCS to set.
+ */
+ if (!allowed_pcs->nvalues) {
+ kfree(allowed_pcs);
+ allowed_pcs = NULL;
+ }
+
+ /*
+ * Swap the new PCs with the current one and free the current one,
+ * if any.
+ */
+ mutex_lock(&trampfd->lock);
+
+ cur_allowed_pcs = trampfd->allowed_pcs;
+ trampfd->allowed_pcs = allowed_pcs;
+ allowed_pcs = cur_allowed_pcs;
+
+ mutex_unlock(&trampfd->lock);
+out:
+ kfree(allowed_pcs);
+ return rc;
+}
new file mode 100644
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - Register context.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@microsoft.com)
+ *
+ * Copyright (C) 2020 Microsoft Corporation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/trampfd.h>
+
+/*
+ * Copy context from the user and validate it.
+ */
+static int trampfd_copy_regs(struct trampfd_regs *regs, const void __user *arg,
+ size_t count)
+{
+ u32 nregs;
+ size_t size;
+
+ if (copy_from_user(regs, arg, count))
+ return -EFAULT;
+
+ if (regs->reserved)
+ return -EINVAL;
+
+ nregs = regs->nregs;
+ if (nregs > TRAMPFD_MAX_REGS)
+ return -EINVAL;
+
+ size = sizeof(*regs);
+ size += nregs * sizeof(struct trampfd_reg);
+ if (size != count)
+ return -EINVAL;
+
+ if (nregs && !trampfd_valid_regs(regs))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Set the register context for a trampoline.
+ */
+int trampfd_set_regs(struct file *file, const char __user *arg, size_t count)
+{
+ struct trampfd *trampfd = file->private_data;
+ struct trampfd_regs *regs, *cur_regs;
+ int rc;
+
+ if (count < sizeof(*regs) || count > TRAMPFD_MAX_REGS_SIZE)
+ return -EINVAL;
+
+ regs = kmalloc(count, GFP_KERNEL);
+ if (!regs)
+ return -ENOMEM;
+
+ rc = trampfd_copy_regs(regs, arg, count);
+ if (rc)
+ goto out;
+
+ /*
+ * If nregs is 0, there is no new register context to set.
+ */
+ if (!regs->nregs) {
+ kfree(regs);
+ regs = NULL;
+ }
+
+ /*
+ * Swap the new register context with the current one and free the
+ * current one, if any.
+ */
+ mutex_lock(&trampfd->lock);
+
+ /*
+ * Check if the specified PC is allowed.
+ */
+ if (!regs || trampfd_allowed_pc(trampfd, regs)) {
+ cur_regs = trampfd->regs;
+ trampfd->regs = regs;
+ regs = cur_regs;
+ } else {
+ rc = -EINVAL;
+ }
+
+ mutex_unlock(&trampfd->lock);
+out:
+ kfree(regs);
+ return rc;
+}
+
+/*
+ * Retrieve the register context of a trampoline.
+ */
+int trampfd_get_regs(struct file *file, char __user *arg, size_t count)
+{
+ struct trampfd *trampfd = file->private_data;
+ struct trampfd_regs *regs, *cur_regs;
+ size_t size;
+ int rc = 0;
+
+ if (count < sizeof(*regs) || count > TRAMPFD_MAX_REGS_SIZE)
+ return -EINVAL;
+
+ regs = kmalloc(count, GFP_KERNEL);
+ if (!regs)
+ return -ENOMEM;
+
+ mutex_lock(&trampfd->lock);
+
+ /*
+ * Copy the current register context into a local buffer so we can
+ * copy it to the user outside the lock.
+ */
+ cur_regs = trampfd->regs;
+ if (cur_regs) {
+ size = sizeof(*cur_regs);
+ size += sizeof(struct trampfd_reg) * cur_regs->nregs;
+ if (size > count)
+ size = count;
+ memcpy(regs, cur_regs, size);
+ } else {
+ size = sizeof(*regs);
+ memset(regs, 0, size);
+ }
+
+ mutex_unlock(&trampfd->lock);
+
+ if (copy_to_user(arg, regs, size))
+ rc = -EFAULT;
+
+ kfree(regs);
+ return rc;
+}
new file mode 100644
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - Stack context.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@microsoft.com)
+ *
+ * Copyright (C) 2020 Microsoft Corporation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/trampfd.h>
+
+/*
+ * Copy context from the user and validate it.
+ */
+static int trampfd_copy_stack(struct trampfd_stack *stack,
+ const void __user *arg, size_t count)
+{
+ size_t size;
+
+ if (copy_from_user(stack, arg, count))
+ return -EFAULT;
+
+ if (stack->reserved)
+ return -EINVAL;
+
+ size = stack->size;
+ if (size > TRAMPFD_MAX_DATA_SIZE)
+ return -EINVAL;
+
+ size += sizeof(*stack);
+ if (size != count)
+ return -EINVAL;
+
+ if (!stack->size)
+ return 0;
+
+ if ((stack->flags & ~TRAMPFD_SFLAGS) ||
+ stack->offset > TRAMPFD_MAX_STACK_OFFSET)
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Set the register context for a trampoline.
+ */
+int trampfd_set_stack(struct file *file, const char __user *arg, size_t count)
+{
+ struct trampfd *trampfd = file->private_data;
+ struct trampfd_stack *stack, *cur_stack;
+ int rc;
+
+ if (count < sizeof(*stack) || count > TRAMPFD_MAX_STACK_SIZE)
+ return -EINVAL;
+
+ stack = kmalloc(count, GFP_KERNEL);
+ if (!stack)
+ return -ENOMEM;
+
+ rc = trampfd_copy_stack(stack, arg, count);
+ if (rc)
+ goto out;
+
+ /*
+ * If size is 0, there is no new stack context to set.
+ */
+ if (!stack->size) {
+ kfree(stack);
+ stack = NULL;
+ }
+
+ /*
+ * Swap the new stack context with the current one and free the
+ * current one, if any.
+ */
+ mutex_lock(&trampfd->lock);
+
+ cur_stack = trampfd->stack;
+ trampfd->stack = stack;
+ stack = cur_stack;
+
+ mutex_unlock(&trampfd->lock);
+out:
+ kfree(stack);
+ return rc;
+}
+
+/*
+ * Retrieve the register context of a trampoline.
+ */
+int trampfd_get_stack(struct file *file, char __user *arg, size_t count)
+{
+ struct trampfd *trampfd = file->private_data;
+ struct trampfd_stack *stack, *cur_stack;
+ size_t size;
+ int rc = 0;
+
+ if (count < sizeof(*stack) || count > TRAMPFD_MAX_STACK_SIZE)
+ return -EINVAL;
+
+ stack = kmalloc(count, GFP_KERNEL);
+ if (!stack)
+ return -ENOMEM;
+
+ mutex_lock(&trampfd->lock);
+
+ /*
+ * Copy the current register context into a local buffer so we can
+ * copy it to the user outside the lock.
+ */
+ cur_stack = trampfd->stack;
+ if (cur_stack) {
+ size = sizeof(*cur_stack) + cur_stack->size;
+ if (size > count)
+ size = count;
+ memcpy(stack, cur_stack, size);
+ } else {
+ size = sizeof(*stack);
+ memset(stack, 0, size);
+ }
+
+ mutex_unlock(&trampfd->lock);
+
+ if (copy_to_user(arg, stack, size))
+ rc = -EFAULT;
+
+ kfree(stack);
+ return rc;
+}
new file mode 100644
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - Stub functions.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@microsoft.com)
+ *
+ * Copyright (C) 2020 Microsoft Corporation.
+ */
+
+#include <linux/trampfd.h>
+
+/*
+ * Stub for the arch function that checks if a trampoline type is supported
+ * by the architecture. Return an error for all types that require architecture
+ * support. Return success for the rest as they are generic.
+ */
+int __attribute__((weak)) trampfd_check_arch(struct trampfd *trampfd)
+{
+ if (trampfd->type == TRAMPFD_USER)
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Stub for the arch function that checks if a specified register context
+ * is valid.
+ */
+bool __attribute__((weak)) trampfd_valid_regs(struct trampfd_regs *regs)
+{
+ return false;
+}
+
+/*
+ * Stub for the arch function that checks if the PC register in a specified
+ * register context is allowed.
+ */
+bool __attribute__((weak)) trampfd_allowed_pc(struct trampfd *trampfd,
+ struct trampfd_regs *regs)
+{
+ return false;
+}
new file mode 100644
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - System call.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@microsoft.com)
+ *
+ * Copyright (C) 2020 Microsoft Corporation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/anon_inodes.h>
+#include <linux/trampfd.h>
+
+char *trampfd_name = "[trampfd]";
+
+struct kmem_cache *trampfd_cache;
+
+SYSCALL_DEFINE3(trampfd_create,
+ int, tramp_type,
+ const void __user *, tramp_data,
+ unsigned int, flags)
+{
+ struct trampfd *trampfd;
+ struct file *file;
+ int fd, rc = 0;
+
+ if (!trampfd_cache)
+ return -ENOMEM;
+
+ /*
+ * Flags are for future use.
+ */
+ if (flags || !tramp_data)
+ return -EINVAL;
+
+ if (tramp_type < 0 || tramp_type >= TRAMPFD_NUM_TYPES)
+ return -EINVAL;
+
+ trampfd = kmem_cache_zalloc(trampfd_cache, GFP_KERNEL);
+ if (!trampfd)
+ return -ENOMEM;
+
+ mutex_init(&trampfd->lock);
+ trampfd->type = tramp_type;
+
+ rc = trampfd_create_data(trampfd, tramp_data);
+ if (rc)
+ goto freetramp;
+
+ rc = trampfd_check_arch(trampfd);
+ if (rc)
+ goto freedata;
+
+ rc = get_unused_fd_flags(O_CLOEXEC);
+ if (rc < 0)
+ goto freedata;
+ fd = rc;
+
+ file = anon_inode_getfile(trampfd_name, &trampfd_fops, trampfd, O_RDWR);
+ if (IS_ERR(file)) {
+ rc = PTR_ERR(file);
+ goto freefd;
+ }
+ file->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
+
+ fd_install(fd, file);
+ return fd;
+freefd:
+ put_unused_fd(fd);
+freedata:
+ kfree(trampfd->data);
+freetramp:
+ kmem_cache_free(trampfd_cache, trampfd);
+ return rc;
+}
+
+int __init trampfd_init(void)
+{
+ trampfd_cache = kmem_cache_create("trampfd_cache",
+ sizeof(struct trampfd), 0, SLAB_HWCACHE_ALIGN, NULL);
+
+ if (trampfd_cache == NULL) {
+ pr_warn("%s: kmem_cache_create failed", __func__);
+ return -ENOMEM;
+ }
+ return 0;
+}
+core_initcall(trampfd_init);
@@ -1005,6 +1005,9 @@ asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
siginfo_t __user *info,
unsigned int flags);
asmlinkage long sys_pidfd_getfd(int pidfd, int fd, unsigned int flags);
+asmlinkage long sys_trampfd_create(int tramp_type,
+ const void __user *tramp_data,
+ unsigned int flags);
/*
* Architecture-specific system calls
new file mode 100644
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Trampoline File Descriptor - Internal structures and definitions.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@linux.microsoft.com)
+ *
+ * Copyright (c) 2020, Microsoft Corporation.
+ */
+#ifndef _LINUX_TRAMPFD_H
+#define _LINUX_TRAMPFD_H
+
+#include <uapi/linux/trampfd.h>
+
+#define TRAMPFD_MAX_REGS_SIZE \
+ (sizeof(struct trampfd_regs) + \
+ (sizeof(struct trampfd_reg) * TRAMPFD_MAX_REGS))
+
+#define TRAMPFD_MAX_STACK_SIZE \
+ (sizeof(struct trampfd_stack) + TRAMPFD_MAX_DATA_SIZE)
+
+#define TRAMPFD_MAX_PCS_SIZE \
+ (sizeof(struct trampfd_values) + sizeof(u64) * TRAMPFD_MAX_PCS)
+
+/*
+ * Trampoline structure.
+ */
+struct trampfd {
+ struct mutex lock; /* to serialize access */
+ enum trampfd_type type; /* type of trampoline */
+ void *data; /* type specific data */
+ struct trampfd_map map; /* mmap() parameters */
+ struct trampfd_regs *regs; /* register context */
+ struct trampfd_stack *stack; /* stack context */
+ struct trampfd_values *allowed_pcs; /* allowed PCs */
+};
+
+#ifdef CONFIG_TRAMPFD
+
+/* Trampoline mapping */
+int trampfd_mmap(struct file *file, struct vm_area_struct *vma);
+unsigned long trampfd_get_unmapped_area(struct file *file,
+ unsigned long orig_addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags);
+bool is_trampfd_vma(struct vm_area_struct *vma);
+
+/* Trampoline context */
+int trampfd_get_map(struct file *file, char __user *arg, size_t count);
+int trampfd_set_regs(struct file *file, const char __user *arg, size_t count);
+int trampfd_get_regs(struct file *file, char __user *arg, size_t count);
+int trampfd_set_stack(struct file *file, const char __user *arg, size_t count);
+int trampfd_get_stack(struct file *file, char __user *arg, size_t count);
+int trampfd_set_allowed_pcs(struct file *file, const char __user *arg,
+ size_t count);
+
+/* Arch functions */
+bool trampfd_fault(struct vm_area_struct *vma, struct pt_regs *pt_regs);
+bool trampfd_valid_regs(struct trampfd_regs *regs);
+bool trampfd_allowed_pc(struct trampfd *trampfd, struct trampfd_regs *regs);
+int trampfd_check_arch(struct trampfd *trampfd);
+
+/* Trampoline type-specific */
+int trampfd_create_data(struct trampfd *trampfd, const void __user *tramp_data);
+
+extern char *trampfd_name;
+extern struct kmem_cache *trampfd_cache;
+extern const struct file_operations trampfd_fops;
+
+#define USERPTR(ptr) ((void __user *)(uintptr_t)(ptr))
+
+#else
+
+static inline bool trampfd_fault(struct vm_area_struct *vma,
+ struct pt_regs *pt_regs)
+{
+ return false;
+}
+
+#endif /* CONFIG_TRAMPFD */
+
+#endif /* _LINUX_TRAMPFD_H */
@@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2)
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
#define __NR_faccessat2 439
__SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_trampfd_create 440
+__SYSCALL(__NR_trampfd_create, sys_trampfd_create)
#undef __NR_syscalls
-#define __NR_syscalls 440
+#define __NR_syscalls 441
/*
* 32 bit systems traditionally used different
new file mode 100644
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Trampoline File Descriptor - API structures and definitions.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@linux.microsoft.com)
+ *
+ * Copyright (c) 2020, Microsoft Corporation.
+ */
+#ifndef _UAPI_LINUX_TRAMPFD_H
+#define _UAPI_LINUX_TRAMPFD_H
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+/*
+ * All structure fields are defined so that they are the same width and at the
+ * same structure offset on 32-bit and 64-bit to avoid compat code.
+ *
+ * All fields named "reserved" must be set to 0. They are there primarily for
+ * alignment. But they may be used in the future.
+ */
+
+/* ------------------------- Types of Trampolines ------------------------- */
+
+/*
+ * TRAMPFD_USER
+ * User programs use the kernel as a trampoline to setup a user context
+ * and jump to a user function. This trampoline type can be used to
+ * replace user trampoline code.
+ */
+enum trampfd_type {
+ TRAMPFD_USER,
+ TRAMPFD_NUM_TYPES,
+};
+
+/* ---------------------------- Context offsets ---------------------------- */
+
+/*
+ * A trampoline has different types of context associated with it. Each context
+ * type has a symbolic offset into trampfd. The context can be read from or
+ * written to at its symbolic offset in trampfd.
+ *
+ * TRAMPFD_MAP_OFFSET
+ * To read trampoline mapping parameters - struct ktramp_map.
+ *
+ * TRAMPFD_REGS_OFFSET
+ * To read/write trampoline register context - struct ktramp_regs.
+ *
+ * TRAMPFD_STACK_OFFSET
+ * To read/write trampoline stack context - struct ktramp_stack.
+ *
+ * TRAMPFD_ALLOWED_PCS_OFFSET
+ * To write a list of allowed PCs - struct trampfd_values.
+ */
+enum trampfd_offsets {
+ TRAMPFD_MAP_OFFSET,
+ TRAMPFD_REGS_OFFSET,
+ TRAMPFD_STACK_OFFSET,
+ TRAMPFD_ALLOWED_PCS_OFFSET,
+ TRAMPFD_NUM_OFFSETS,
+};
+
+/* ------------------- Trampoline type specific data -------------------- */
+
+/*
+ * For TRAMPFD_USER.
+ */
+struct trampfd_user {
+ __u32 flags; /* for future enhancements */
+ __u32 reserved;
+};
+
+/* ------------------- Trampoline mapping parameters ---------------------- */
+
+/*
+ * Since the kernel implements the trampoline object, the kernel specifies
+ * how a trampoline should be mapped. User code must obtain these parameters
+ * and do an mmap() to map the trampoline. The first four parameters are used
+ * in the mmap() call. User code must add ioffset to the address returned by
+ * mmap() to get the actual invocation address for the trampoline.
+ */
+struct trampfd_map {
+ __u32 size; /* Size of the mapping */
+ __u32 prot; /* memory protection */
+ __u32 flags; /* map flags */
+ __u32 offset; /* file offset */
+ __u32 ioffset; /* invocation offset */
+ __u32 reserved;
+};
+
+/* -------------------------- Register context -------------------------- */
+
+/*
+ * A register context may be specified for a trampoline, if applicable
+ * to the trampoline type. E.g., TRAMPFD_USER. The register context is
+ * an array of name-value pairs. When a trampoline is invoked, its user
+ * registers are loaded with the specified values. Register names are
+ * architecture specific and can be found in <linux/ptrace.h> for architectures
+ * that support trampolines. Enumerations reg_32_name and reg_64_name in
+ * <linux/ptrace.h> refer to 32-bit and 64-bit respectively.
+ */
+struct trampfd_reg {
+ __u32 name; /* Register name */
+ __u32 reserved;
+ __u64 value; /* Register value */
+};
+
+/*
+ * Register context. It is a variable sized structure sized by the number
+ * of registers.
+ */
+struct trampfd_regs {
+ __u32 nregs; /* Number of registers */
+ __u32 reserved;
+ struct trampfd_reg regs[0]; /* Array of registers */
+};
+
+#define TRAMPFD_MAX_REGS 40
+
+/* ---------------------------- Stack context ---------------------------- */
+
+/*
+ * A stack context may be specified for a trampoline, if applicable
+ * to the trampoline type. E.g., TRAMPFD_USER. The stack context contains
+ * a data buffer. When a trampoline is invoked, the specified data is pushed
+ * on the stack at a specified offset from the current stack pointer.
+ * Optionally, the stack pointer can be moved to the top of the data.
+ *
+ * This is a variable sized structure sized by the amount of data that is
+ * to be pushed on the user stack.
+ */
+struct trampfd_stack {
+ __u32 flags; /* TRAMPFD_SFLAGS */
+ __u32 offset; /* Offset from top of stack */
+ __u32 size; /* Size of data to push */
+ __u32 reserved;
+ __u8 data[0]; /* Data to push on the stack */
+};
+
+#define TRAMPFD_MAX_DATA_SIZE 64
+#define TRAMPFD_MAX_STACK_OFFSET 256
+
+/*
+ * Stack context flags:
+ *
+ * TRAMPFD_SET_SP
+ * After pushing the data to user stack, move the stack pointer to the
+ * base of the data pushed. Note that the kernel will align the stack
+ * pointer based on the alignment requirements of the architecture.
+ */
+#define TRAMPFD_SET_SP 0x1
+#define TRAMPFD_SFLAGS (TRAMPFD_SET_SP)
+
+/* ---------------------------- Values context ---------------------------- */
+
+/*
+ * Some contexts may be just a list of values. For instance, the user can
+ * specify a list of allowed PCs for a trampoline. The following structure
+ * is used for those contexts.
+ */
+struct trampfd_values {
+ __u32 nvalues; /* number of values */
+ __u32 reserved;
+ __u64 values[0]; /* Array of values */
+};
+
+#define TRAMPFD_MAX_PCS 16
+
+/* -------------------------------------------------------------------------- */
+
+#endif /* _UAPI_LINUX_TRAMPFD_H */
@@ -2313,3 +2313,11 @@ config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
# <asm/syscall_wrapper.h>.
config ARCH_HAS_SYSCALL_WRAPPER
def_bool n
+
+config TRAMPFD
+ bool "Enable trampfd_create() system call"
+ depends on MMU
+ help
+ Enable the trampfd_create() system call that allows a process to
+ map trampolines within its address space that can be invoked
+ with the help of the kernel.
@@ -349,6 +349,9 @@ COND_SYSCALL(pkey_mprotect);
COND_SYSCALL(pkey_alloc);
COND_SYSCALL(pkey_free);
+/* Trampoline fd */
+COND_SYSCALL(trampfd_create);
+
/*
* Architecture specific weak syscall entries.