diff mbox series

[v2,7/9] mm/mshare: Add unlink and munmap support

Message ID 1b0a0e8c9558766f13a64ae93092eb8c9ea7965d.1656531090.git.khalid.aziz@oracle.com (mailing list archive)
State New
Headers show
Series Add support for shared PTEs across processes | expand

Commit Message

Khalid Aziz June 29, 2022, 10:53 p.m. UTC
Number of mappings of an mshare region should be tracked so it can
be removed when there are no more references to it and associated
file has been deleted. This add code to support the unlink operation
for associated file, remove the mshare region on file deletion if
refcount goes to zero, add munmap operation to maintain refcount
to mshare region and remove it on last munmap if file has been
deleted.

Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
---
 mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

Comments

Darrick J. Wong June 30, 2022, 9:50 p.m. UTC | #1
On Wed, Jun 29, 2022 at 04:53:58PM -0600, Khalid Aziz wrote:
> Number of mappings of an mshare region should be tracked so it can
> be removed when there are no more references to it and associated
> file has been deleted. This add code to support the unlink operation
> for associated file, remove the mshare region on file deletion if
> refcount goes to zero, add munmap operation to maintain refcount
> to mshare region and remove it on last munmap if file has been
> deleted.
> 
> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
> ---
>  mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 63 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/mshare.c b/mm/mshare.c
> index 088a6cab1e93..90ce0564a138 100644
> --- a/mm/mshare.c
> +++ b/mm/mshare.c
> @@ -29,6 +29,7 @@ static struct super_block *msharefs_sb;
>  struct mshare_data {
>  	struct mm_struct *mm;
>  	refcount_t refcnt;
> +	int deleted;
>  	struct mshare_info *minfo;
>  };
>  
> @@ -48,6 +49,7 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
>  	size_t ret;
>  	struct mshare_info m_info;
>  
> +	mmap_read_lock(info->mm);
>  	if (info->minfo != NULL) {
>  		m_info.start = info->minfo->start;
>  		m_info.size = info->minfo->size;
> @@ -55,18 +57,42 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
>  		m_info.start = 0;
>  		m_info.size = 0;
>  	}
> +	mmap_read_unlock(info->mm);
>  	ret = copy_to_iter(&m_info, sizeof(m_info), iov);
>  	if (!ret)
>  		return -EFAULT;
>  	return ret;
>  }
>  
> +static void
> +msharefs_close(struct vm_area_struct *vma)
> +{
> +	struct mshare_data *info = vma->vm_private_data;
> +
> +	if (refcount_dec_and_test(&info->refcnt)) {
> +		mmap_read_lock(info->mm);
> +		if (info->deleted) {
> +			mmap_read_unlock(info->mm);
> +			mmput(info->mm);
> +			kfree(info->minfo);
> +			kfree(info);

Aren't filesystems supposed to take care of disposing of the file data
in destroy_inode?  IIRC struct inode doesn't go away until all fds are
closed, mappings are torn down, and there are no more references from
dentries.  I could be misremembering since it's been a few months since
I went looking at the (VFS) inode lifecycle.

> +		} else {
> +			mmap_read_unlock(info->mm);
> +		}
> +	}
> +}
> +
> +static const struct vm_operations_struct msharefs_vm_ops = {
> +	.close	= msharefs_close,
> +};
> +
>  static int
>  msharefs_mmap(struct file *file, struct vm_area_struct *vma)
>  {
>  	struct mshare_data *info = file->private_data;
>  	struct mm_struct *mm = info->mm;
>  
> +	mmap_write_lock(mm);
>  	/*
>  	 * If this mshare region has been set up once already, bail out
>  	 */
> @@ -80,10 +106,14 @@ msharefs_mmap(struct file *file, struct vm_area_struct *vma)
>  	mm->task_size = vma->vm_end - vma->vm_start;
>  	if (!mm->task_size)
>  		mm->task_size--;
> +	mmap_write_unlock(mm);
>  	info->minfo->start = mm->mmap_base;
>  	info->minfo->size = mm->task_size;
> +	info->deleted = 0;
> +	refcount_inc(&info->refcnt);
>  	vma->vm_flags |= VM_SHARED_PT;
>  	vma->vm_private_data = info;
> +	vma->vm_ops = &msharefs_vm_ops;
>  	return 0;
>  }
>  
> @@ -240,6 +270,38 @@ msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
>  	return ret;
>  }
>  
> +static int
> +msharefs_unlink(struct inode *dir, struct dentry *dentry)
> +{
> +	struct inode *inode = d_inode(dentry);
> +	struct mshare_data *info = inode->i_private;
> +
> +	/*
> +	 * Unmap the mshare region if it is still mapped in
> +	 */
> +	vm_munmap(info->minfo->start, info->minfo->size);
> +
> +	/*
> +	 * Mark msharefs file for deletion so it can not be opened
> +	 * and used for mshare mappings any more
> +	 */
> +	simple_unlink(dir, dentry);
> +	mmap_write_lock(info->mm);
> +	info->deleted = 1;
> +	mmap_write_unlock(info->mm);

What if the file is hardlinked?

--D

> +
> +	/*
> +	 * Is this the last reference? If so, delete mshare region and
> +	 * remove the file
> +	 */
> +	if (!refcount_dec_and_test(&info->refcnt)) {
> +		mmput(info->mm);
> +		kfree(info->minfo);
> +		kfree(info);
> +	}
> +	return 0;
> +}
> +
>  static const struct inode_operations msharefs_file_inode_ops = {
>  	.setattr	= simple_setattr,
>  	.getattr	= simple_getattr,
> @@ -248,7 +310,7 @@ static const struct inode_operations msharefs_dir_inode_ops = {
>  	.create		= msharefs_create,
>  	.lookup		= simple_lookup,
>  	.link		= simple_link,
> -	.unlink		= simple_unlink,
> +	.unlink		= msharefs_unlink,
>  	.mkdir		= msharefs_mkdir,
>  	.rmdir		= simple_rmdir,
>  	.mknod		= msharefs_mknod,
> -- 
> 2.32.0
>
Khalid Aziz July 1, 2022, 3:58 p.m. UTC | #2
On 6/30/22 15:50, Darrick J. Wong wrote:
> On Wed, Jun 29, 2022 at 04:53:58PM -0600, Khalid Aziz wrote:
>> Number of mappings of an mshare region should be tracked so it can
>> be removed when there are no more references to it and associated
>> file has been deleted. This add code to support the unlink operation
>> for associated file, remove the mshare region on file deletion if
>> refcount goes to zero, add munmap operation to maintain refcount
>> to mshare region and remove it on last munmap if file has been
>> deleted.
>>
>> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
>> ---
>>   mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 63 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/mshare.c b/mm/mshare.c
>> index 088a6cab1e93..90ce0564a138 100644
>> --- a/mm/mshare.c
>> +++ b/mm/mshare.c
>> @@ -29,6 +29,7 @@ static struct super_block *msharefs_sb;
>>   struct mshare_data {
>>   	struct mm_struct *mm;
>>   	refcount_t refcnt;
>> +	int deleted;
>>   	struct mshare_info *minfo;
>>   };
>>   
>> @@ -48,6 +49,7 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
>>   	size_t ret;
>>   	struct mshare_info m_info;
>>   
>> +	mmap_read_lock(info->mm);
>>   	if (info->minfo != NULL) {
>>   		m_info.start = info->minfo->start;
>>   		m_info.size = info->minfo->size;
>> @@ -55,18 +57,42 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
>>   		m_info.start = 0;
>>   		m_info.size = 0;
>>   	}
>> +	mmap_read_unlock(info->mm);
>>   	ret = copy_to_iter(&m_info, sizeof(m_info), iov);
>>   	if (!ret)
>>   		return -EFAULT;
>>   	return ret;
>>   }
>>   
>> +static void
>> +msharefs_close(struct vm_area_struct *vma)
>> +{
>> +	struct mshare_data *info = vma->vm_private_data;
>> +
>> +	if (refcount_dec_and_test(&info->refcnt)) {
>> +		mmap_read_lock(info->mm);
>> +		if (info->deleted) {
>> +			mmap_read_unlock(info->mm);
>> +			mmput(info->mm);
>> +			kfree(info->minfo);
>> +			kfree(info);
> 
> Aren't filesystems supposed to take care of disposing of the file data
> in destroy_inode?  IIRC struct inode doesn't go away until all fds are
> closed, mappings are torn down, and there are no more references from
> dentries.  I could be misremembering since it's been a few months since
> I went looking at the (VFS) inode lifecycle.

Documentation (vfs.rst) says - "this method is called by destroy_inode() to release resources allocated for struct 
inode. It is only required if ->alloc_inode was defined and simply undoes anything done by ->alloc_inode.". I am not 
defining alloc_inode, so I assumed I do not need to define destroy_inode and the standard destroy_inode will do the 
right thing since standard alloc_inode is being used.

Are you suggesting per-region mshare_data should be freed in destroy_inode instead of in close?

> 
>> +		} else {
>> +			mmap_read_unlock(info->mm);
>> +		}
>> +	}
>> +}
>> +
>> +static const struct vm_operations_struct msharefs_vm_ops = {
>> +	.close	= msharefs_close,
>> +};
>> +
>>   static int
>>   msharefs_mmap(struct file *file, struct vm_area_struct *vma)
>>   {
>>   	struct mshare_data *info = file->private_data;
>>   	struct mm_struct *mm = info->mm;
>>   
>> +	mmap_write_lock(mm);
>>   	/*
>>   	 * If this mshare region has been set up once already, bail out
>>   	 */
>> @@ -80,10 +106,14 @@ msharefs_mmap(struct file *file, struct vm_area_struct *vma)
>>   	mm->task_size = vma->vm_end - vma->vm_start;
>>   	if (!mm->task_size)
>>   		mm->task_size--;
>> +	mmap_write_unlock(mm);
>>   	info->minfo->start = mm->mmap_base;
>>   	info->minfo->size = mm->task_size;
>> +	info->deleted = 0;
>> +	refcount_inc(&info->refcnt);
>>   	vma->vm_flags |= VM_SHARED_PT;
>>   	vma->vm_private_data = info;
>> +	vma->vm_ops = &msharefs_vm_ops;
>>   	return 0;
>>   }
>>   
>> @@ -240,6 +270,38 @@ msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
>>   	return ret;
>>   }
>>   
>> +static int
>> +msharefs_unlink(struct inode *dir, struct dentry *dentry)
>> +{
>> +	struct inode *inode = d_inode(dentry);
>> +	struct mshare_data *info = inode->i_private;
>> +
>> +	/*
>> +	 * Unmap the mshare region if it is still mapped in
>> +	 */
>> +	vm_munmap(info->minfo->start, info->minfo->size);
>> +
>> +	/*
>> +	 * Mark msharefs file for deletion so it can not be opened
>> +	 * and used for mshare mappings any more
>> +	 */
>> +	simple_unlink(dir, dentry);
>> +	mmap_write_lock(info->mm);
>> +	info->deleted = 1;
>> +	mmap_write_unlock(info->mm);
> 
> What if the file is hardlinked?

It looks like that is a bug currently. I need to account for that. Thanks!

--
Khalid
diff mbox series

Patch

diff --git a/mm/mshare.c b/mm/mshare.c
index 088a6cab1e93..90ce0564a138 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -29,6 +29,7 @@  static struct super_block *msharefs_sb;
 struct mshare_data {
 	struct mm_struct *mm;
 	refcount_t refcnt;
+	int deleted;
 	struct mshare_info *minfo;
 };
 
@@ -48,6 +49,7 @@  msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
 	size_t ret;
 	struct mshare_info m_info;
 
+	mmap_read_lock(info->mm);
 	if (info->minfo != NULL) {
 		m_info.start = info->minfo->start;
 		m_info.size = info->minfo->size;
@@ -55,18 +57,42 @@  msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
 		m_info.start = 0;
 		m_info.size = 0;
 	}
+	mmap_read_unlock(info->mm);
 	ret = copy_to_iter(&m_info, sizeof(m_info), iov);
 	if (!ret)
 		return -EFAULT;
 	return ret;
 }
 
+static void
+msharefs_close(struct vm_area_struct *vma)
+{
+	struct mshare_data *info = vma->vm_private_data;
+
+	if (refcount_dec_and_test(&info->refcnt)) {
+		mmap_read_lock(info->mm);
+		if (info->deleted) {
+			mmap_read_unlock(info->mm);
+			mmput(info->mm);
+			kfree(info->minfo);
+			kfree(info);
+		} else {
+			mmap_read_unlock(info->mm);
+		}
+	}
+}
+
+static const struct vm_operations_struct msharefs_vm_ops = {
+	.close	= msharefs_close,
+};
+
 static int
 msharefs_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct mshare_data *info = file->private_data;
 	struct mm_struct *mm = info->mm;
 
+	mmap_write_lock(mm);
 	/*
 	 * If this mshare region has been set up once already, bail out
 	 */
@@ -80,10 +106,14 @@  msharefs_mmap(struct file *file, struct vm_area_struct *vma)
 	mm->task_size = vma->vm_end - vma->vm_start;
 	if (!mm->task_size)
 		mm->task_size--;
+	mmap_write_unlock(mm);
 	info->minfo->start = mm->mmap_base;
 	info->minfo->size = mm->task_size;
+	info->deleted = 0;
+	refcount_inc(&info->refcnt);
 	vma->vm_flags |= VM_SHARED_PT;
 	vma->vm_private_data = info;
+	vma->vm_ops = &msharefs_vm_ops;
 	return 0;
 }
 
@@ -240,6 +270,38 @@  msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
 	return ret;
 }
 
+static int
+msharefs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	struct mshare_data *info = inode->i_private;
+
+	/*
+	 * Unmap the mshare region if it is still mapped in
+	 */
+	vm_munmap(info->minfo->start, info->minfo->size);
+
+	/*
+	 * Mark msharefs file for deletion so it can not be opened
+	 * and used for mshare mappings any more
+	 */
+	simple_unlink(dir, dentry);
+	mmap_write_lock(info->mm);
+	info->deleted = 1;
+	mmap_write_unlock(info->mm);
+
+	/*
+	 * Is this the last reference? If so, delete mshare region and
+	 * remove the file
+	 */
+	if (!refcount_dec_and_test(&info->refcnt)) {
+		mmput(info->mm);
+		kfree(info->minfo);
+		kfree(info);
+	}
+	return 0;
+}
+
 static const struct inode_operations msharefs_file_inode_ops = {
 	.setattr	= simple_setattr,
 	.getattr	= simple_getattr,
@@ -248,7 +310,7 @@  static const struct inode_operations msharefs_dir_inode_ops = {
 	.create		= msharefs_create,
 	.lookup		= simple_lookup,
 	.link		= simple_link,
-	.unlink		= simple_unlink,
+	.unlink		= msharefs_unlink,
 	.mkdir		= msharefs_mkdir,
 	.rmdir		= simple_rmdir,
 	.mknod		= msharefs_mknod,