diff mbox series

[RFC,v3,05/10] mm/mshare: Add ioctl support

Message ID 20240903232241.43995-6-anthony.yznaga@oracle.com (mailing list archive)
State New
Headers show
Series Add support for shared PTEs across processes | expand

Commit Message

Anthony Yznaga Sept. 3, 2024, 11:22 p.m. UTC
From: Khalid Aziz <khalid@kernel.org>

Reserve a range of ioctls for msharefs and add the first two ioctls
to get and set the start address and size of an mshare region.

Signed-off-by: Khalid Aziz <khalid@kernel.org>
Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 .../userspace-api/ioctl/ioctl-number.rst      |  1 +
 include/uapi/linux/msharefs.h                 | 29 ++++++++
 mm/mshare.c                                   | 72 +++++++++++++++++++
 3 files changed, 102 insertions(+)
 create mode 100644 include/uapi/linux/msharefs.h

Comments

Jann Horn Oct. 14, 2024, 8:08 p.m. UTC | #1
On Wed, Sep 4, 2024 at 1:22 AM Anthony Yznaga <anthony.yznaga@oracle.com> wrote:
> Reserve a range of ioctls for msharefs and add the first two ioctls
> to get and set the start address and size of an mshare region.
[...]
> +static long
> +msharefs_set_size(struct mm_struct *mm, struct mshare_data *m_data,
> +                       struct mshare_info *minfo)
> +{
> +       unsigned long end = minfo->start + minfo->size;
> +
> +       /*
> +        * Validate alignment for start address, and size
> +        */
> +       if ((minfo->start | end) & (PGDIR_SIZE - 1)) {
> +               spin_unlock(&m_data->m_lock);
> +               return -EINVAL;
> +       }
> +
> +       mm->mmap_base = minfo->start;
> +       mm->task_size = minfo->size;
> +       if (!mm->task_size)
> +               mm->task_size--;
> +
> +       m_data->minfo.start = mm->mmap_base;
> +       m_data->minfo.size = mm->task_size;
> +       spin_unlock(&m_data->m_lock);
> +
> +       return 0;
> +}
> +
> +static long
> +msharefs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> +{
> +       struct mshare_data *m_data = filp->private_data;
> +       struct mm_struct *mm = m_data->mm;
> +       struct mshare_info minfo;
> +
> +       switch (cmd) {
> +       case MSHAREFS_GET_SIZE:
> +               spin_lock(&m_data->m_lock);
> +               minfo = m_data->minfo;
> +               spin_unlock(&m_data->m_lock);
> +
> +               if (copy_to_user((void __user *)arg, &minfo, sizeof(minfo)))
> +                       return -EFAULT;
> +
> +               return 0;
> +
> +       case MSHAREFS_SET_SIZE:
> +               if (copy_from_user(&minfo, (struct mshare_info __user *)arg,
> +                       sizeof(minfo)))
> +                       return -EFAULT;
> +
> +               /*
> +                * If this mshare region has been set up once already, bail out
> +                */
> +               spin_lock(&m_data->m_lock);
> +               if (m_data->minfo.start != 0) {

Is there actually anything that prevents msharefs_set_size() from
setting up m_data with ->minfo.start==0, so that a second
MSHAREFS_SET_SIZE invocation will succeed? It would probably be more
reliable to have a separate flag for "has this thing been set up yet".


> +                       spin_unlock(&m_data->m_lock);
> +                       return -EINVAL;
> +               }
> +
> +               return msharefs_set_size(mm, m_data, &minfo);
> +
> +       default:
> +               return -ENOTTY;
> +       }
> +}
Anthony Yznaga Oct. 16, 2024, 12:49 a.m. UTC | #2
On 10/14/24 1:08 PM, Jann Horn wrote:
> On Wed, Sep 4, 2024 at 1:22 AM Anthony Yznaga <anthony.yznaga@oracle.com> wrote:
>> Reserve a range of ioctls for msharefs and add the first two ioctls
>> to get and set the start address and size of an mshare region.
> [...]
>> +static long
>> +msharefs_set_size(struct mm_struct *mm, struct mshare_data *m_data,
>> +                       struct mshare_info *minfo)
>> +{
>> +       unsigned long end = minfo->start + minfo->size;
>> +
>> +       /*
>> +        * Validate alignment for start address, and size
>> +        */
>> +       if ((minfo->start | end) & (PGDIR_SIZE - 1)) {
>> +               spin_unlock(&m_data->m_lock);
>> +               return -EINVAL;
>> +       }
>> +
>> +       mm->mmap_base = minfo->start;
>> +       mm->task_size = minfo->size;
>> +       if (!mm->task_size)
>> +               mm->task_size--;
>> +
>> +       m_data->minfo.start = mm->mmap_base;
>> +       m_data->minfo.size = mm->task_size;
>> +       spin_unlock(&m_data->m_lock);
>> +
>> +       return 0;
>> +}
>> +
>> +static long
>> +msharefs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>> +{
>> +       struct mshare_data *m_data = filp->private_data;
>> +       struct mm_struct *mm = m_data->mm;
>> +       struct mshare_info minfo;
>> +
>> +       switch (cmd) {
>> +       case MSHAREFS_GET_SIZE:
>> +               spin_lock(&m_data->m_lock);
>> +               minfo = m_data->minfo;
>> +               spin_unlock(&m_data->m_lock);
>> +
>> +               if (copy_to_user((void __user *)arg, &minfo, sizeof(minfo)))
>> +                       return -EFAULT;
>> +
>> +               return 0;
>> +
>> +       case MSHAREFS_SET_SIZE:
>> +               if (copy_from_user(&minfo, (struct mshare_info __user *)arg,
>> +                       sizeof(minfo)))
>> +                       return -EFAULT;
>> +
>> +               /*
>> +                * If this mshare region has been set up once already, bail out
>> +                */
>> +               spin_lock(&m_data->m_lock);
>> +               if (m_data->minfo.start != 0) {
> Is there actually anything that prevents msharefs_set_size() from
> setting up m_data with ->minfo.start==0, so that a second
> MSHAREFS_SET_SIZE invocation will succeed? It would probably be more
> reliable to have a separate flag for "has this thing been set up yet".

Thanks for pointing this out. Yes, this is problematic. A start address 
of 0 generally won't work because mmap() will fail unless there are 
sufficient privileges (cap_map_addr will return -EPERM). I already have 
changes to use the size to indicate initialization, but it may make 
sense to have flags.


Anthony

>
>
>> +                       spin_unlock(&m_data->m_lock);
>> +                       return -EINVAL;
>> +               }
>> +
>> +               return msharefs_set_size(mm, m_data, &minfo);
>> +
>> +       default:
>> +               return -ENOTTY;
>> +       }
>> +}
diff mbox series

Patch

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index e91c0376ee59..d2fa6b117f66 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -301,6 +301,7 @@  Code  Seq#    Include File                                           Comments
 'v'   20-27  arch/powerpc/include/uapi/asm/vas-api.h		     VAS API
 'v'   C0-FF  linux/meye.h                                            conflict!
 'w'   all                                                            CERN SCI driver
+'x'   00-1F  linux/msharefs.h                                        msharefs filesystem
 'y'   00-1F                                                          packet based user level communications
                                                                      <mailto:zapman@interlan.net>
 'z'   00-3F                                                          CAN bus card conflict!
diff --git a/include/uapi/linux/msharefs.h b/include/uapi/linux/msharefs.h
new file mode 100644
index 000000000000..c7b509c7e093
--- /dev/null
+++ b/include/uapi/linux/msharefs.h
@@ -0,0 +1,29 @@ 
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * msharefs defines a memory region that is shared across processes.
+ * ioctl is used on files created under msharefs to set various
+ * attributes on these shared memory regions
+ *
+ *
+ * Copyright (C) 2024 Oracle Corp. All rights reserved.
+ * Author:	Khalid Aziz <khalid@kernel.org>
+ */
+
+#ifndef _UAPI_LINUX_MSHAREFS_H
+#define _UAPI_LINUX_MSHAREFS_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * msharefs specific ioctl commands
+ */
+#define MSHAREFS_GET_SIZE	_IOR('x', 0,  struct mshare_info)
+#define MSHAREFS_SET_SIZE	_IOW('x', 1,  struct mshare_info)
+
+struct mshare_info {
+	__u64 start;
+	__u64 size;
+};
+
+#endif
diff --git a/mm/mshare.c b/mm/mshare.c
index a37849e724e1..af46eb76d2bc 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -10,15 +10,20 @@ 
  *
  * Copyright (C) 2024 Oracle Corp. All rights reserved.
  * Author:	Khalid Aziz <khalid@kernel.org>
+ * Author:	Matthew Wilcox <willy@infradead.org>
  *
  */
 
 #include <linux/fs.h>
 #include <linux/fs_context.h>
+#include <linux/spinlock_types.h>
 #include <uapi/linux/magic.h>
+#include <uapi/linux/msharefs.h>
 
 struct mshare_data {
 	struct mm_struct *mm;
+	spinlock_t m_lock;
+	struct mshare_info minfo;
 };
 
 struct msharefs_info {
@@ -28,8 +33,74 @@  struct msharefs_info {
 static const struct inode_operations msharefs_dir_inode_ops;
 static const struct inode_operations msharefs_file_inode_ops;
 
+static long
+msharefs_set_size(struct mm_struct *mm, struct mshare_data *m_data,
+			struct mshare_info *minfo)
+{
+	unsigned long end = minfo->start + minfo->size;
+
+	/*
+	 * Validate alignment for start address, and size
+	 */
+	if ((minfo->start | end) & (PGDIR_SIZE - 1)) {
+		spin_unlock(&m_data->m_lock);
+		return -EINVAL;
+	}
+
+	mm->mmap_base = minfo->start;
+	mm->task_size = minfo->size;
+	if (!mm->task_size)
+		mm->task_size--;
+
+	m_data->minfo.start = mm->mmap_base;
+	m_data->minfo.size = mm->task_size;
+	spin_unlock(&m_data->m_lock);
+
+	return 0;
+}
+
+static long
+msharefs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct mshare_data *m_data = filp->private_data;
+	struct mm_struct *mm = m_data->mm;
+	struct mshare_info minfo;
+
+	switch (cmd) {
+	case MSHAREFS_GET_SIZE:
+		spin_lock(&m_data->m_lock);
+		minfo = m_data->minfo;
+		spin_unlock(&m_data->m_lock);
+
+		if (copy_to_user((void __user *)arg, &minfo, sizeof(minfo)))
+			return -EFAULT;
+
+		return 0;
+
+	case MSHAREFS_SET_SIZE:
+		if (copy_from_user(&minfo, (struct mshare_info __user *)arg,
+			sizeof(minfo)))
+			return -EFAULT;
+
+		/*
+		 * If this mshare region has been set up once already, bail out
+		 */
+		spin_lock(&m_data->m_lock);
+		if (m_data->minfo.start != 0) {
+			spin_unlock(&m_data->m_lock);
+			return -EINVAL;
+		}
+
+		return msharefs_set_size(mm, m_data, &minfo);
+
+	default:
+		return -ENOTTY;
+	}
+}
+
 static const struct file_operations msharefs_file_operations = {
 	.open		= simple_open,
+	.unlocked_ioctl	= msharefs_ioctl,
 	.llseek		= no_llseek,
 };
 
@@ -54,6 +125,7 @@  msharefs_fill_mm(struct inode *inode)
 		goto err_free;
 	}
 	m_data->mm = mm;
+	spin_lock_init(&m_data->m_lock);
 	inode->i_private = m_data;
 
 	return 0;