Message ID | 20240903232241.43995-6-anthony.yznaga@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add support for shared PTEs across processes | expand |
On Wed, Sep 4, 2024 at 1:22 AM Anthony Yznaga <anthony.yznaga@oracle.com> wrote: > Reserve a range of ioctls for msharefs and add the first two ioctls > to get and set the start address and size of an mshare region. [...] > +static long > +msharefs_set_size(struct mm_struct *mm, struct mshare_data *m_data, > + struct mshare_info *minfo) > +{ > + unsigned long end = minfo->start + minfo->size; > + > + /* > + * Validate alignment for start address, and size > + */ > + if ((minfo->start | end) & (PGDIR_SIZE - 1)) { > + spin_unlock(&m_data->m_lock); > + return -EINVAL; > + } > + > + mm->mmap_base = minfo->start; > + mm->task_size = minfo->size; > + if (!mm->task_size) > + mm->task_size--; > + > + m_data->minfo.start = mm->mmap_base; > + m_data->minfo.size = mm->task_size; > + spin_unlock(&m_data->m_lock); > + > + return 0; > +} > + > +static long > +msharefs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > +{ > + struct mshare_data *m_data = filp->private_data; > + struct mm_struct *mm = m_data->mm; > + struct mshare_info minfo; > + > + switch (cmd) { > + case MSHAREFS_GET_SIZE: > + spin_lock(&m_data->m_lock); > + minfo = m_data->minfo; > + spin_unlock(&m_data->m_lock); > + > + if (copy_to_user((void __user *)arg, &minfo, sizeof(minfo))) > + return -EFAULT; > + > + return 0; > + > + case MSHAREFS_SET_SIZE: > + if (copy_from_user(&minfo, (struct mshare_info __user *)arg, > + sizeof(minfo))) > + return -EFAULT; > + > + /* > + * If this mshare region has been set up once already, bail out > + */ > + spin_lock(&m_data->m_lock); > + if (m_data->minfo.start != 0) { Is there actually anything that prevents msharefs_set_size() from setting up m_data with ->minfo.start==0, so that a second MSHAREFS_SET_SIZE invocation will succeed? It would probably be more reliable to have a separate flag for "has this thing been set up yet". > + spin_unlock(&m_data->m_lock); > + return -EINVAL; > + } > + > + return msharefs_set_size(mm, m_data, &minfo); > + > + default: > + return -ENOTTY; > + } > +}
On 10/14/24 1:08 PM, Jann Horn wrote: > On Wed, Sep 4, 2024 at 1:22 AM Anthony Yznaga <anthony.yznaga@oracle.com> wrote: >> Reserve a range of ioctls for msharefs and add the first two ioctls >> to get and set the start address and size of an mshare region. > [...] >> +static long >> +msharefs_set_size(struct mm_struct *mm, struct mshare_data *m_data, >> + struct mshare_info *minfo) >> +{ >> + unsigned long end = minfo->start + minfo->size; >> + >> + /* >> + * Validate alignment for start address, and size >> + */ >> + if ((minfo->start | end) & (PGDIR_SIZE - 1)) { >> + spin_unlock(&m_data->m_lock); >> + return -EINVAL; >> + } >> + >> + mm->mmap_base = minfo->start; >> + mm->task_size = minfo->size; >> + if (!mm->task_size) >> + mm->task_size--; >> + >> + m_data->minfo.start = mm->mmap_base; >> + m_data->minfo.size = mm->task_size; >> + spin_unlock(&m_data->m_lock); >> + >> + return 0; >> +} >> + >> +static long >> +msharefs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) >> +{ >> + struct mshare_data *m_data = filp->private_data; >> + struct mm_struct *mm = m_data->mm; >> + struct mshare_info minfo; >> + >> + switch (cmd) { >> + case MSHAREFS_GET_SIZE: >> + spin_lock(&m_data->m_lock); >> + minfo = m_data->minfo; >> + spin_unlock(&m_data->m_lock); >> + >> + if (copy_to_user((void __user *)arg, &minfo, sizeof(minfo))) >> + return -EFAULT; >> + >> + return 0; >> + >> + case MSHAREFS_SET_SIZE: >> + if (copy_from_user(&minfo, (struct mshare_info __user *)arg, >> + sizeof(minfo))) >> + return -EFAULT; >> + >> + /* >> + * If this mshare region has been set up once already, bail out >> + */ >> + spin_lock(&m_data->m_lock); >> + if (m_data->minfo.start != 0) { > Is there actually anything that prevents msharefs_set_size() from > setting up m_data with ->minfo.start==0, so that a second > MSHAREFS_SET_SIZE invocation will succeed? It would probably be more > reliable to have a separate flag for "has this thing been set up yet". Thanks for pointing this out. Yes, this is problematic. A start address of 0 generally won't work because mmap() will fail unless there are sufficient privileges (cap_map_addr will return -EPERM). I already have changes to use the size to indicate initialization, but it may make sense to have flags. Anthony > > >> + spin_unlock(&m_data->m_lock); >> + return -EINVAL; >> + } >> + >> + return msharefs_set_size(mm, m_data, &minfo); >> + >> + default: >> + return -ENOTTY; >> + } >> +}
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index e91c0376ee59..d2fa6b117f66 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -301,6 +301,7 @@ Code Seq# Include File Comments 'v' 20-27 arch/powerpc/include/uapi/asm/vas-api.h VAS API 'v' C0-FF linux/meye.h conflict! 'w' all CERN SCI driver +'x' 00-1F linux/msharefs.h msharefs filesystem 'y' 00-1F packet based user level communications <mailto:zapman@interlan.net> 'z' 00-3F CAN bus card conflict! diff --git a/include/uapi/linux/msharefs.h b/include/uapi/linux/msharefs.h new file mode 100644 index 000000000000..c7b509c7e093 --- /dev/null +++ b/include/uapi/linux/msharefs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * msharefs defines a memory region that is shared across processes. + * ioctl is used on files created under msharefs to set various + * attributes on these shared memory regions + * + * + * Copyright (C) 2024 Oracle Corp. All rights reserved. + * Author: Khalid Aziz <khalid@kernel.org> + */ + +#ifndef _UAPI_LINUX_MSHAREFS_H +#define _UAPI_LINUX_MSHAREFS_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +/* + * msharefs specific ioctl commands + */ +#define MSHAREFS_GET_SIZE _IOR('x', 0, struct mshare_info) +#define MSHAREFS_SET_SIZE _IOW('x', 1, struct mshare_info) + +struct mshare_info { + __u64 start; + __u64 size; +}; + +#endif diff --git a/mm/mshare.c b/mm/mshare.c index a37849e724e1..af46eb76d2bc 100644 --- a/mm/mshare.c +++ b/mm/mshare.c @@ -10,15 +10,20 @@ * * Copyright (C) 2024 Oracle Corp. All rights reserved. * Author: Khalid Aziz <khalid@kernel.org> + * Author: Matthew Wilcox <willy@infradead.org> * */ #include <linux/fs.h> #include <linux/fs_context.h> +#include <linux/spinlock_types.h> #include <uapi/linux/magic.h> +#include <uapi/linux/msharefs.h> struct mshare_data { struct mm_struct *mm; + spinlock_t m_lock; + struct mshare_info minfo; }; struct msharefs_info { @@ -28,8 +33,74 @@ struct msharefs_info { static const struct inode_operations msharefs_dir_inode_ops; static const struct inode_operations msharefs_file_inode_ops; +static long +msharefs_set_size(struct mm_struct *mm, struct mshare_data *m_data, + struct mshare_info *minfo) +{ + unsigned long end = minfo->start + minfo->size; + + /* + * Validate alignment for start address, and size + */ + if ((minfo->start | end) & (PGDIR_SIZE - 1)) { + spin_unlock(&m_data->m_lock); + return -EINVAL; + } + + mm->mmap_base = minfo->start; + mm->task_size = minfo->size; + if (!mm->task_size) + mm->task_size--; + + m_data->minfo.start = mm->mmap_base; + m_data->minfo.size = mm->task_size; + spin_unlock(&m_data->m_lock); + + return 0; +} + +static long +msharefs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct mshare_data *m_data = filp->private_data; + struct mm_struct *mm = m_data->mm; + struct mshare_info minfo; + + switch (cmd) { + case MSHAREFS_GET_SIZE: + spin_lock(&m_data->m_lock); + minfo = m_data->minfo; + spin_unlock(&m_data->m_lock); + + if (copy_to_user((void __user *)arg, &minfo, sizeof(minfo))) + return -EFAULT; + + return 0; + + case MSHAREFS_SET_SIZE: + if (copy_from_user(&minfo, (struct mshare_info __user *)arg, + sizeof(minfo))) + return -EFAULT; + + /* + * If this mshare region has been set up once already, bail out + */ + spin_lock(&m_data->m_lock); + if (m_data->minfo.start != 0) { + spin_unlock(&m_data->m_lock); + return -EINVAL; + } + + return msharefs_set_size(mm, m_data, &minfo); + + default: + return -ENOTTY; + } +} + static const struct file_operations msharefs_file_operations = { .open = simple_open, + .unlocked_ioctl = msharefs_ioctl, .llseek = no_llseek, }; @@ -54,6 +125,7 @@ msharefs_fill_mm(struct inode *inode) goto err_free; } m_data->mm = mm; + spin_lock_init(&m_data->m_lock); inode->i_private = m_data; return 0;