diff mbox series

[v2,4/9] mm/mshare: Add a read operation for msharefs files

Message ID 05649b455e2191642e85cc5522ef39ad49fdeca3.1656531090.git.khalid.aziz@oracle.com (mailing list archive)
State New
Headers show
Series Add support for shared PTEs across processes | expand

Commit Message

Khalid Aziz June 29, 2022, 10:53 p.m. UTC
When a new file is created under msharefs, allocate a new mm_struct
that will hold the VMAs for mshare region. Also allocate structure
to defines the mshare region and add a read operation to the file
that returns this information about the mshare region. Currently
this information is returned as a struct:

struct mshare_info {
	unsigned long start;
	unsigned long size;
};

This gives the start address for mshare region and its size.

Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
---
 include/uapi/linux/mman.h |  5 +++
 mm/mshare.c               | 64 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 1 deletion(-)

Comments

Darrick J. Wong June 30, 2022, 9:27 p.m. UTC | #1
On Wed, Jun 29, 2022 at 04:53:55PM -0600, Khalid Aziz wrote:
> When a new file is created under msharefs, allocate a new mm_struct
> that will hold the VMAs for mshare region. Also allocate structure
> to defines the mshare region and add a read operation to the file
> that returns this information about the mshare region. Currently
> this information is returned as a struct:
> 
> struct mshare_info {
> 	unsigned long start;
> 	unsigned long size;
> };
> 
> This gives the start address for mshare region and its size.
> 
> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
> ---
>  include/uapi/linux/mman.h |  5 +++
>  mm/mshare.c               | 64 ++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 68 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
> index f55bc680b5b0..56fe446e24b1 100644
> --- a/include/uapi/linux/mman.h
> +++ b/include/uapi/linux/mman.h
> @@ -41,4 +41,9 @@
>  #define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
>  #define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
>  
> +struct mshare_info {
> +	unsigned long start;
> +	unsigned long size;

You might want to make these explicitly u64, since this is userspace
ABI and you never know when someone will want to do something crazy like
run 32-bit programs with mshare files.

Also you might want to add some padding fields for flags, future
expansion, etc.

> +};
> +
>  #endif /* _UAPI_LINUX_MMAN_H */
> diff --git a/mm/mshare.c b/mm/mshare.c
> index 2d5924d39221..d238b68b0576 100644
> --- a/mm/mshare.c
> +++ b/mm/mshare.c
> @@ -22,8 +22,14 @@
>  #include <uapi/linux/magic.h>
>  #include <uapi/linux/limits.h>
>  #include <uapi/linux/mman.h>
> +#include <linux/sched/mm.h>
>  
>  static struct super_block *msharefs_sb;
> +struct mshare_data {
> +	struct mm_struct *mm;
> +	refcount_t refcnt;
> +	struct mshare_info *minfo;
> +};
>  
>  static const struct inode_operations msharefs_dir_inode_ops;
>  static const struct inode_operations msharefs_file_inode_ops;
> @@ -34,8 +40,29 @@ msharefs_open(struct inode *inode, struct file *file)
>  	return simple_open(inode, file);
>  }
>  
> +static ssize_t
> +msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
> +{
> +	struct mshare_data *info = iocb->ki_filp->private_data;
> +	size_t ret;
> +	struct mshare_info m_info;
> +
> +	if (info->minfo != NULL) {
> +		m_info.start = info->minfo->start;
> +		m_info.size = info->minfo->size;
> +	} else {
> +		m_info.start = 0;
> +		m_info.size = 0;

Hmmm, read()ing out the shared mapping information.  Heh.

When does this case happen?  Is it before anybody mmaps this file into
an address space?

> +	}
> +	ret = copy_to_iter(&m_info, sizeof(m_info), iov);
> +	if (!ret)
> +		return -EFAULT;
> +	return ret;
> +}
> +
>  static const struct file_operations msharefs_file_operations = {
>  	.open		= msharefs_open,
> +	.read_iter	= msharefs_read,
>  	.llseek		= no_llseek,
>  };
>  
> @@ -73,12 +100,43 @@ static struct dentry
>  	return ERR_PTR(-ENOMEM);
>  }
>  
> +static int
> +msharefs_fill_mm(struct inode *inode)
> +{
> +	struct mm_struct *mm;
> +	struct mshare_data *info = NULL;
> +	int retval = 0;
> +
> +	mm = mm_alloc();
> +	if (!mm) {
> +		retval = -ENOMEM;
> +		goto err_free;
> +	}
> +
> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
> +	if (!info) {
> +		retval = -ENOMEM;
> +		goto err_free;
> +	}
> +	info->mm = mm;
> +	info->minfo = NULL;
> +	refcount_set(&info->refcnt, 1);
> +	inode->i_private = info;
> +
> +	return 0;
> +
> +err_free:
> +	if (mm)
> +		mmput(mm);
> +	kfree(info);
> +	return retval;
> +}
> +
>  static struct inode
>  *msharefs_get_inode(struct super_block *sb, const struct inode *dir,
>  			umode_t mode)
>  {
>  	struct inode *inode = new_inode(sb);
> -
>  	if (inode) {
>  		inode->i_ino = get_next_ino();
>  		inode_init_owner(&init_user_ns, inode, dir, mode);
> @@ -89,6 +147,10 @@ static struct inode
>  		case S_IFREG:
>  			inode->i_op = &msharefs_file_inode_ops;
>  			inode->i_fop = &msharefs_file_operations;
> +			if (msharefs_fill_mm(inode) != 0) {
> +				discard_new_inode(inode);
> +				inode = ERR_PTR(-ENOMEM);

Is it intentional to clobber the msharefs_fill_mm return value and
replace it with ENOMEM?

--D

> +			}
>  			break;
>  		case S_IFDIR:
>  			inode->i_op = &msharefs_dir_inode_ops;
> -- 
> 2.32.0
>
Khalid Aziz June 30, 2022, 10:27 p.m. UTC | #2
On 6/30/22 15:27, Darrick J. Wong wrote:
> On Wed, Jun 29, 2022 at 04:53:55PM -0600, Khalid Aziz wrote:
>> When a new file is created under msharefs, allocate a new mm_struct
>> that will hold the VMAs for mshare region. Also allocate structure
>> to defines the mshare region and add a read operation to the file
>> that returns this information about the mshare region. Currently
>> this information is returned as a struct:
>>
>> struct mshare_info {
>> 	unsigned long start;
>> 	unsigned long size;
>> };
>>
>> This gives the start address for mshare region and its size.
>>
>> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
>> ---
>>   include/uapi/linux/mman.h |  5 +++
>>   mm/mshare.c               | 64 ++++++++++++++++++++++++++++++++++++++-
>>   2 files changed, 68 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
>> index f55bc680b5b0..56fe446e24b1 100644
>> --- a/include/uapi/linux/mman.h
>> +++ b/include/uapi/linux/mman.h
>> @@ -41,4 +41,9 @@
>>   #define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
>>   #define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
>>   
>> +struct mshare_info {
>> +	unsigned long start;
>> +	unsigned long size;
> 
> You might want to make these explicitly u64, since this is userspace
> ABI and you never know when someone will want to do something crazy like
> run 32-bit programs with mshare files.
> 
> Also you might want to add some padding fields for flags, future
> expansion, etc.

That sounds like a good idea. I will queue it up for next version of patch series.

> 
>> +};
>> +
>>   #endif /* _UAPI_LINUX_MMAN_H */
>> diff --git a/mm/mshare.c b/mm/mshare.c
>> index 2d5924d39221..d238b68b0576 100644
>> --- a/mm/mshare.c
>> +++ b/mm/mshare.c
>> @@ -22,8 +22,14 @@
>>   #include <uapi/linux/magic.h>
>>   #include <uapi/linux/limits.h>
>>   #include <uapi/linux/mman.h>
>> +#include <linux/sched/mm.h>
>>   
>>   static struct super_block *msharefs_sb;
>> +struct mshare_data {
>> +	struct mm_struct *mm;
>> +	refcount_t refcnt;
>> +	struct mshare_info *minfo;
>> +};
>>   
>>   static const struct inode_operations msharefs_dir_inode_ops;
>>   static const struct inode_operations msharefs_file_inode_ops;
>> @@ -34,8 +40,29 @@ msharefs_open(struct inode *inode, struct file *file)
>>   	return simple_open(inode, file);
>>   }
>>   
>> +static ssize_t
>> +msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
>> +{
>> +	struct mshare_data *info = iocb->ki_filp->private_data;
>> +	size_t ret;
>> +	struct mshare_info m_info;
>> +
>> +	if (info->minfo != NULL) {
>> +		m_info.start = info->minfo->start;
>> +		m_info.size = info->minfo->size;
>> +	} else {
>> +		m_info.start = 0;
>> +		m_info.size = 0;
> 
> Hmmm, read()ing out the shared mapping information.  Heh.
> 
> When does this case happen?  Is it before anybody mmaps this file into
> an address space?
> 

It can happen before or after the first mmap which will establish the start address and size. Hence I have to account 
for both cases.

>> +	}
>> +	ret = copy_to_iter(&m_info, sizeof(m_info), iov);
>> +	if (!ret)
>> +		return -EFAULT;
>> +	return ret;
>> +}
>> +
>>   static const struct file_operations msharefs_file_operations = {
>>   	.open		= msharefs_open,
>> +	.read_iter	= msharefs_read,
>>   	.llseek		= no_llseek,
>>   };
>>   
>> @@ -73,12 +100,43 @@ static struct dentry
>>   	return ERR_PTR(-ENOMEM);
>>   }
>>   
>> +static int
>> +msharefs_fill_mm(struct inode *inode)
>> +{
>> +	struct mm_struct *mm;
>> +	struct mshare_data *info = NULL;
>> +	int retval = 0;
>> +
>> +	mm = mm_alloc();
>> +	if (!mm) {
>> +		retval = -ENOMEM;
>> +		goto err_free;
>> +	}
>> +
>> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
>> +	if (!info) {
>> +		retval = -ENOMEM;
>> +		goto err_free;
>> +	}
>> +	info->mm = mm;
>> +	info->minfo = NULL;
>> +	refcount_set(&info->refcnt, 1);
>> +	inode->i_private = info;
>> +
>> +	return 0;
>> +
>> +err_free:
>> +	if (mm)
>> +		mmput(mm);
>> +	kfree(info);
>> +	return retval;
>> +}
>> +
>>   static struct inode
>>   *msharefs_get_inode(struct super_block *sb, const struct inode *dir,
>>   			umode_t mode)
>>   {
>>   	struct inode *inode = new_inode(sb);
>> -
>>   	if (inode) {
>>   		inode->i_ino = get_next_ino();
>>   		inode_init_owner(&init_user_ns, inode, dir, mode);
>> @@ -89,6 +147,10 @@ static struct inode
>>   		case S_IFREG:
>>   			inode->i_op = &msharefs_file_inode_ops;
>>   			inode->i_fop = &msharefs_file_operations;
>> +			if (msharefs_fill_mm(inode) != 0) {
>> +				discard_new_inode(inode);
>> +				inode = ERR_PTR(-ENOMEM);
> 
> Is it intentional to clobber the msharefs_fill_mm return value and
> replace it with ENOMEM?

ENOMEM sounded like the right value to return from msharefs_get_inode() in case of failure. On the other hand, there 
isn't much of a reason to not just return the return value from msharefs_fill_mm(). I can change that.

Thanks for the review.

--
Khalid
diff mbox series

Patch

diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index f55bc680b5b0..56fe446e24b1 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -41,4 +41,9 @@ 
 #define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
 
+struct mshare_info {
+	unsigned long start;
+	unsigned long size;
+};
+
 #endif /* _UAPI_LINUX_MMAN_H */
diff --git a/mm/mshare.c b/mm/mshare.c
index 2d5924d39221..d238b68b0576 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -22,8 +22,14 @@ 
 #include <uapi/linux/magic.h>
 #include <uapi/linux/limits.h>
 #include <uapi/linux/mman.h>
+#include <linux/sched/mm.h>
 
 static struct super_block *msharefs_sb;
+struct mshare_data {
+	struct mm_struct *mm;
+	refcount_t refcnt;
+	struct mshare_info *minfo;
+};
 
 static const struct inode_operations msharefs_dir_inode_ops;
 static const struct inode_operations msharefs_file_inode_ops;
@@ -34,8 +40,29 @@  msharefs_open(struct inode *inode, struct file *file)
 	return simple_open(inode, file);
 }
 
+static ssize_t
+msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
+{
+	struct mshare_data *info = iocb->ki_filp->private_data;
+	size_t ret;
+	struct mshare_info m_info;
+
+	if (info->minfo != NULL) {
+		m_info.start = info->minfo->start;
+		m_info.size = info->minfo->size;
+	} else {
+		m_info.start = 0;
+		m_info.size = 0;
+	}
+	ret = copy_to_iter(&m_info, sizeof(m_info), iov);
+	if (!ret)
+		return -EFAULT;
+	return ret;
+}
+
 static const struct file_operations msharefs_file_operations = {
 	.open		= msharefs_open,
+	.read_iter	= msharefs_read,
 	.llseek		= no_llseek,
 };
 
@@ -73,12 +100,43 @@  static struct dentry
 	return ERR_PTR(-ENOMEM);
 }
 
+static int
+msharefs_fill_mm(struct inode *inode)
+{
+	struct mm_struct *mm;
+	struct mshare_data *info = NULL;
+	int retval = 0;
+
+	mm = mm_alloc();
+	if (!mm) {
+		retval = -ENOMEM;
+		goto err_free;
+	}
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		retval = -ENOMEM;
+		goto err_free;
+	}
+	info->mm = mm;
+	info->minfo = NULL;
+	refcount_set(&info->refcnt, 1);
+	inode->i_private = info;
+
+	return 0;
+
+err_free:
+	if (mm)
+		mmput(mm);
+	kfree(info);
+	return retval;
+}
+
 static struct inode
 *msharefs_get_inode(struct super_block *sb, const struct inode *dir,
 			umode_t mode)
 {
 	struct inode *inode = new_inode(sb);
-
 	if (inode) {
 		inode->i_ino = get_next_ino();
 		inode_init_owner(&init_user_ns, inode, dir, mode);
@@ -89,6 +147,10 @@  static struct inode
 		case S_IFREG:
 			inode->i_op = &msharefs_file_inode_ops;
 			inode->i_fop = &msharefs_file_operations;
+			if (msharefs_fill_mm(inode) != 0) {
+				discard_new_inode(inode);
+				inode = ERR_PTR(-ENOMEM);
+			}
 			break;
 		case S_IFDIR:
 			inode->i_op = &msharefs_dir_inode_ops;