diff mbox series

[06/12] zonefs: Convert to using invalidate_lock

Message ID 20210423173018.23133-6-jack@suse.cz (mailing list archive)
State New, archived
Headers show
Series fs: Hole punch vs page cache filling races | expand

Commit Message

Jan Kara April 23, 2021, 5:29 p.m. UTC
Use invalidate_lock instead of zonefs' private i_mmap_sem. The intended
purpose is exactly the same. By this conversion we also fix a race
between hole punching and read(2) / readahead(2) paths that can lead to
stale page cache contents.

CC: Damien Le Moal <damien.lemoal@wdc.com>
CC: Johannes Thumshirn <jth@kernel.org>
CC: <linux-fsdevel@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/zonefs/super.c  | 23 +++++------------------
 fs/zonefs/zonefs.h |  7 +++----
 2 files changed, 8 insertions(+), 22 deletions(-)

Comments

Damien Le Moal April 26, 2021, 6:40 a.m. UTC | #1
On 2021/04/24 2:30, Jan Kara wrote:
> Use invalidate_lock instead of zonefs' private i_mmap_sem. The intended
> purpose is exactly the same. By this conversion we also fix a race
> between hole punching and read(2) / readahead(2) paths that can lead to
> stale page cache contents.

zonefs does not support hole punching since the blocks of a file are determined
by the device zone configuration and cannot change, ever. So I think you can
remove the second sentence above.

> 
> CC: Damien Le Moal <damien.lemoal@wdc.com>
> CC: Johannes Thumshirn <jth@kernel.org>
> CC: <linux-fsdevel@vger.kernel.org>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/zonefs/super.c  | 23 +++++------------------
>  fs/zonefs/zonefs.h |  7 +++----
>  2 files changed, 8 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
> index 049e36c69ed7..60ac5587c880 100644
> --- a/fs/zonefs/super.c
> +++ b/fs/zonefs/super.c
> @@ -462,7 +462,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
>  	inode_dio_wait(inode);
>  
>  	/* Serialize against page faults */
> -	down_write(&zi->i_mmap_sem);
> +	down_write(&inode->i_mapping->invalidate_lock);
>  
>  	/* Serialize against zonefs_iomap_begin() */
>  	mutex_lock(&zi->i_truncate_mutex);
> @@ -500,7 +500,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
>  
>  unlock:
>  	mutex_unlock(&zi->i_truncate_mutex);
> -	up_write(&zi->i_mmap_sem);
> +	up_write(&inode->i_mapping->invalidate_lock);
>  
>  	return ret;
>  }
> @@ -575,18 +575,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
>  	return ret;
>  }
>  
> -static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf)
> -{
> -	struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file));
> -	vm_fault_t ret;
> -
> -	down_read(&zi->i_mmap_sem);
> -	ret = filemap_fault(vmf);
> -	up_read(&zi->i_mmap_sem);
> -
> -	return ret;
> -}
> -
>  static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
>  {
>  	struct inode *inode = file_inode(vmf->vma->vm_file);
> @@ -607,16 +595,16 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
>  	file_update_time(vmf->vma->vm_file);
>  
>  	/* Serialize against truncates */
> -	down_read(&zi->i_mmap_sem);
> +	down_read(&inode->i_mapping->invalidate_lock);
>  	ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
> -	up_read(&zi->i_mmap_sem);
> +	up_read(&inode->i_mapping->invalidate_lock);
>  
>  	sb_end_pagefault(inode->i_sb);
>  	return ret;
>  }
>  
>  static const struct vm_operations_struct zonefs_file_vm_ops = {
> -	.fault		= zonefs_filemap_fault,
> +	.fault		= filemap_fault,
>  	.map_pages	= filemap_map_pages,
>  	.page_mkwrite	= zonefs_filemap_page_mkwrite,
>  };
> @@ -1158,7 +1146,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
>  
>  	inode_init_once(&zi->i_vnode);
>  	mutex_init(&zi->i_truncate_mutex);
> -	init_rwsem(&zi->i_mmap_sem);
>  	zi->i_wr_refcnt = 0;
>  
>  	return &zi->i_vnode;
> diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
> index 51141907097c..7b147907c328 100644
> --- a/fs/zonefs/zonefs.h
> +++ b/fs/zonefs/zonefs.h
> @@ -70,12 +70,11 @@ struct zonefs_inode_info {
>  	 * and changes to the inode private data, and in particular changes to
>  	 * a sequential file size on completion of direct IO writes.
>  	 * Serialization of mmap read IOs with truncate and syscall IO
> -	 * operations is done with i_mmap_sem in addition to i_truncate_mutex.
> -	 * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first,
> -	 * i_truncate_mutex second).
> +	 * operations is done with invalidate_lock in addition to
> +	 * i_truncate_mutex.  Only zonefs_seq_file_truncate() takes both lock
> +	 * (invalidate_lock first, i_truncate_mutex second).
>  	 */
>  	struct mutex		i_truncate_mutex;
> -	struct rw_semaphore	i_mmap_sem;
>  
>  	/* guarded by i_truncate_mutex */
>  	unsigned int		i_wr_refcnt;
>
Jan Kara April 26, 2021, 4:24 p.m. UTC | #2
On Mon 26-04-21 06:40:27, Damien Le Moal wrote:
> On 2021/04/24 2:30, Jan Kara wrote:
> > Use invalidate_lock instead of zonefs' private i_mmap_sem. The intended
> > purpose is exactly the same. By this conversion we also fix a race
> > between hole punching and read(2) / readahead(2) paths that can lead to
> > stale page cache contents.
> 
> zonefs does not support hole punching since the blocks of a file are determined
> by the device zone configuration and cannot change, ever. So I think you can
> remove the second sentence above.

Sure, thanks for correction. Updated.

								Honza

> 
> > 
> > CC: Damien Le Moal <damien.lemoal@wdc.com>
> > CC: Johannes Thumshirn <jth@kernel.org>
> > CC: <linux-fsdevel@vger.kernel.org>
> > Signed-off-by: Jan Kara <jack@suse.cz>
> > ---
> >  fs/zonefs/super.c  | 23 +++++------------------
> >  fs/zonefs/zonefs.h |  7 +++----
> >  2 files changed, 8 insertions(+), 22 deletions(-)
> > 
> > diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
> > index 049e36c69ed7..60ac5587c880 100644
> > --- a/fs/zonefs/super.c
> > +++ b/fs/zonefs/super.c
> > @@ -462,7 +462,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
> >  	inode_dio_wait(inode);
> >  
> >  	/* Serialize against page faults */
> > -	down_write(&zi->i_mmap_sem);
> > +	down_write(&inode->i_mapping->invalidate_lock);
> >  
> >  	/* Serialize against zonefs_iomap_begin() */
> >  	mutex_lock(&zi->i_truncate_mutex);
> > @@ -500,7 +500,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
> >  
> >  unlock:
> >  	mutex_unlock(&zi->i_truncate_mutex);
> > -	up_write(&zi->i_mmap_sem);
> > +	up_write(&inode->i_mapping->invalidate_lock);
> >  
> >  	return ret;
> >  }
> > @@ -575,18 +575,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
> >  	return ret;
> >  }
> >  
> > -static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf)
> > -{
> > -	struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file));
> > -	vm_fault_t ret;
> > -
> > -	down_read(&zi->i_mmap_sem);
> > -	ret = filemap_fault(vmf);
> > -	up_read(&zi->i_mmap_sem);
> > -
> > -	return ret;
> > -}
> > -
> >  static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
> >  {
> >  	struct inode *inode = file_inode(vmf->vma->vm_file);
> > @@ -607,16 +595,16 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
> >  	file_update_time(vmf->vma->vm_file);
> >  
> >  	/* Serialize against truncates */
> > -	down_read(&zi->i_mmap_sem);
> > +	down_read(&inode->i_mapping->invalidate_lock);
> >  	ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
> > -	up_read(&zi->i_mmap_sem);
> > +	up_read(&inode->i_mapping->invalidate_lock);
> >  
> >  	sb_end_pagefault(inode->i_sb);
> >  	return ret;
> >  }
> >  
> >  static const struct vm_operations_struct zonefs_file_vm_ops = {
> > -	.fault		= zonefs_filemap_fault,
> > +	.fault		= filemap_fault,
> >  	.map_pages	= filemap_map_pages,
> >  	.page_mkwrite	= zonefs_filemap_page_mkwrite,
> >  };
> > @@ -1158,7 +1146,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
> >  
> >  	inode_init_once(&zi->i_vnode);
> >  	mutex_init(&zi->i_truncate_mutex);
> > -	init_rwsem(&zi->i_mmap_sem);
> >  	zi->i_wr_refcnt = 0;
> >  
> >  	return &zi->i_vnode;
> > diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
> > index 51141907097c..7b147907c328 100644
> > --- a/fs/zonefs/zonefs.h
> > +++ b/fs/zonefs/zonefs.h
> > @@ -70,12 +70,11 @@ struct zonefs_inode_info {
> >  	 * and changes to the inode private data, and in particular changes to
> >  	 * a sequential file size on completion of direct IO writes.
> >  	 * Serialization of mmap read IOs with truncate and syscall IO
> > -	 * operations is done with i_mmap_sem in addition to i_truncate_mutex.
> > -	 * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first,
> > -	 * i_truncate_mutex second).
> > +	 * operations is done with invalidate_lock in addition to
> > +	 * i_truncate_mutex.  Only zonefs_seq_file_truncate() takes both lock
> > +	 * (invalidate_lock first, i_truncate_mutex second).
> >  	 */
> >  	struct mutex		i_truncate_mutex;
> > -	struct rw_semaphore	i_mmap_sem;
> >  
> >  	/* guarded by i_truncate_mutex */
> >  	unsigned int		i_wr_refcnt;
> > 
> 
> 
> -- 
> Damien Le Moal
> Western Digital Research
diff mbox series

Patch

diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 049e36c69ed7..60ac5587c880 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -462,7 +462,7 @@  static int zonefs_file_truncate(struct inode *inode, loff_t isize)
 	inode_dio_wait(inode);
 
 	/* Serialize against page faults */
-	down_write(&zi->i_mmap_sem);
+	down_write(&inode->i_mapping->invalidate_lock);
 
 	/* Serialize against zonefs_iomap_begin() */
 	mutex_lock(&zi->i_truncate_mutex);
@@ -500,7 +500,7 @@  static int zonefs_file_truncate(struct inode *inode, loff_t isize)
 
 unlock:
 	mutex_unlock(&zi->i_truncate_mutex);
-	up_write(&zi->i_mmap_sem);
+	up_write(&inode->i_mapping->invalidate_lock);
 
 	return ret;
 }
@@ -575,18 +575,6 @@  static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
 	return ret;
 }
 
-static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf)
-{
-	struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file));
-	vm_fault_t ret;
-
-	down_read(&zi->i_mmap_sem);
-	ret = filemap_fault(vmf);
-	up_read(&zi->i_mmap_sem);
-
-	return ret;
-}
-
 static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
 {
 	struct inode *inode = file_inode(vmf->vma->vm_file);
@@ -607,16 +595,16 @@  static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
 	file_update_time(vmf->vma->vm_file);
 
 	/* Serialize against truncates */
-	down_read(&zi->i_mmap_sem);
+	down_read(&inode->i_mapping->invalidate_lock);
 	ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
-	up_read(&zi->i_mmap_sem);
+	up_read(&inode->i_mapping->invalidate_lock);
 
 	sb_end_pagefault(inode->i_sb);
 	return ret;
 }
 
 static const struct vm_operations_struct zonefs_file_vm_ops = {
-	.fault		= zonefs_filemap_fault,
+	.fault		= filemap_fault,
 	.map_pages	= filemap_map_pages,
 	.page_mkwrite	= zonefs_filemap_page_mkwrite,
 };
@@ -1158,7 +1146,6 @@  static struct inode *zonefs_alloc_inode(struct super_block *sb)
 
 	inode_init_once(&zi->i_vnode);
 	mutex_init(&zi->i_truncate_mutex);
-	init_rwsem(&zi->i_mmap_sem);
 	zi->i_wr_refcnt = 0;
 
 	return &zi->i_vnode;
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index 51141907097c..7b147907c328 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -70,12 +70,11 @@  struct zonefs_inode_info {
 	 * and changes to the inode private data, and in particular changes to
 	 * a sequential file size on completion of direct IO writes.
 	 * Serialization of mmap read IOs with truncate and syscall IO
-	 * operations is done with i_mmap_sem in addition to i_truncate_mutex.
-	 * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first,
-	 * i_truncate_mutex second).
+	 * operations is done with invalidate_lock in addition to
+	 * i_truncate_mutex.  Only zonefs_seq_file_truncate() takes both lock
+	 * (invalidate_lock first, i_truncate_mutex second).
 	 */
 	struct mutex		i_truncate_mutex;
-	struct rw_semaphore	i_mmap_sem;
 
 	/* guarded by i_truncate_mutex */
 	unsigned int		i_wr_refcnt;