Message ID | 20210423173018.23133-6-jack@suse.cz (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fs: Hole punch vs page cache filling races | expand |
On 2021/04/24 2:30, Jan Kara wrote: > Use invalidate_lock instead of zonefs' private i_mmap_sem. The intended > purpose is exactly the same. By this conversion we also fix a race > between hole punching and read(2) / readahead(2) paths that can lead to > stale page cache contents. zonefs does not support hole punching since the blocks of a file are determined by the device zone configuration and cannot change, ever. So I think you can remove the second sentence above. > > CC: Damien Le Moal <damien.lemoal@wdc.com> > CC: Johannes Thumshirn <jth@kernel.org> > CC: <linux-fsdevel@vger.kernel.org> > Signed-off-by: Jan Kara <jack@suse.cz> > --- > fs/zonefs/super.c | 23 +++++------------------ > fs/zonefs/zonefs.h | 7 +++---- > 2 files changed, 8 insertions(+), 22 deletions(-) > > diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c > index 049e36c69ed7..60ac5587c880 100644 > --- a/fs/zonefs/super.c > +++ b/fs/zonefs/super.c > @@ -462,7 +462,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) > inode_dio_wait(inode); > > /* Serialize against page faults */ > - down_write(&zi->i_mmap_sem); > + down_write(&inode->i_mapping->invalidate_lock); > > /* Serialize against zonefs_iomap_begin() */ > mutex_lock(&zi->i_truncate_mutex); > @@ -500,7 +500,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) > > unlock: > mutex_unlock(&zi->i_truncate_mutex); > - up_write(&zi->i_mmap_sem); > + up_write(&inode->i_mapping->invalidate_lock); > > return ret; > } > @@ -575,18 +575,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, > return ret; > } > > -static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf) > -{ > - struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file)); > - vm_fault_t ret; > - > - down_read(&zi->i_mmap_sem); > - ret = filemap_fault(vmf); > - up_read(&zi->i_mmap_sem); > - > - return ret; > -} > - > static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) > { > struct inode *inode = file_inode(vmf->vma->vm_file); > @@ -607,16 +595,16 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) > file_update_time(vmf->vma->vm_file); > > /* Serialize against truncates */ > - down_read(&zi->i_mmap_sem); > + down_read(&inode->i_mapping->invalidate_lock); > ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); > - up_read(&zi->i_mmap_sem); > + up_read(&inode->i_mapping->invalidate_lock); > > sb_end_pagefault(inode->i_sb); > return ret; > } > > static const struct vm_operations_struct zonefs_file_vm_ops = { > - .fault = zonefs_filemap_fault, > + .fault = filemap_fault, > .map_pages = filemap_map_pages, > .page_mkwrite = zonefs_filemap_page_mkwrite, > }; > @@ -1158,7 +1146,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) > > inode_init_once(&zi->i_vnode); > mutex_init(&zi->i_truncate_mutex); > - init_rwsem(&zi->i_mmap_sem); > zi->i_wr_refcnt = 0; > > return &zi->i_vnode; > diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h > index 51141907097c..7b147907c328 100644 > --- a/fs/zonefs/zonefs.h > +++ b/fs/zonefs/zonefs.h > @@ -70,12 +70,11 @@ struct zonefs_inode_info { > * and changes to the inode private data, and in particular changes to > * a sequential file size on completion of direct IO writes. > * Serialization of mmap read IOs with truncate and syscall IO > - * operations is done with i_mmap_sem in addition to i_truncate_mutex. > - * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first, > - * i_truncate_mutex second). > + * operations is done with invalidate_lock in addition to > + * i_truncate_mutex. Only zonefs_seq_file_truncate() takes both lock > + * (invalidate_lock first, i_truncate_mutex second). > */ > struct mutex i_truncate_mutex; > - struct rw_semaphore i_mmap_sem; > > /* guarded by i_truncate_mutex */ > unsigned int i_wr_refcnt; >
On Mon 26-04-21 06:40:27, Damien Le Moal wrote: > On 2021/04/24 2:30, Jan Kara wrote: > > Use invalidate_lock instead of zonefs' private i_mmap_sem. The intended > > purpose is exactly the same. By this conversion we also fix a race > > between hole punching and read(2) / readahead(2) paths that can lead to > > stale page cache contents. > > zonefs does not support hole punching since the blocks of a file are determined > by the device zone configuration and cannot change, ever. So I think you can > remove the second sentence above. Sure, thanks for correction. Updated. Honza > > > > > CC: Damien Le Moal <damien.lemoal@wdc.com> > > CC: Johannes Thumshirn <jth@kernel.org> > > CC: <linux-fsdevel@vger.kernel.org> > > Signed-off-by: Jan Kara <jack@suse.cz> > > --- > > fs/zonefs/super.c | 23 +++++------------------ > > fs/zonefs/zonefs.h | 7 +++---- > > 2 files changed, 8 insertions(+), 22 deletions(-) > > > > diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c > > index 049e36c69ed7..60ac5587c880 100644 > > --- a/fs/zonefs/super.c > > +++ b/fs/zonefs/super.c > > @@ -462,7 +462,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) > > inode_dio_wait(inode); > > > > /* Serialize against page faults */ > > - down_write(&zi->i_mmap_sem); > > + down_write(&inode->i_mapping->invalidate_lock); > > > > /* Serialize against zonefs_iomap_begin() */ > > mutex_lock(&zi->i_truncate_mutex); > > @@ -500,7 +500,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) > > > > unlock: > > mutex_unlock(&zi->i_truncate_mutex); > > - up_write(&zi->i_mmap_sem); > > + up_write(&inode->i_mapping->invalidate_lock); > > > > return ret; > > } > > @@ -575,18 +575,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, > > return ret; > > } > > > > -static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf) > > -{ > > - struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file)); > > - vm_fault_t ret; > > - > > - down_read(&zi->i_mmap_sem); > > - ret = filemap_fault(vmf); > > - up_read(&zi->i_mmap_sem); > > - > > - return ret; > > -} > > - > > static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) > > { > > struct inode *inode = file_inode(vmf->vma->vm_file); > > @@ -607,16 +595,16 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) > > file_update_time(vmf->vma->vm_file); > > > > /* Serialize against truncates */ > > - down_read(&zi->i_mmap_sem); > > + down_read(&inode->i_mapping->invalidate_lock); > > ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); > > - up_read(&zi->i_mmap_sem); > > + up_read(&inode->i_mapping->invalidate_lock); > > > > sb_end_pagefault(inode->i_sb); > > return ret; > > } > > > > static const struct vm_operations_struct zonefs_file_vm_ops = { > > - .fault = zonefs_filemap_fault, > > + .fault = filemap_fault, > > .map_pages = filemap_map_pages, > > .page_mkwrite = zonefs_filemap_page_mkwrite, > > }; > > @@ -1158,7 +1146,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) > > > > inode_init_once(&zi->i_vnode); > > mutex_init(&zi->i_truncate_mutex); > > - init_rwsem(&zi->i_mmap_sem); > > zi->i_wr_refcnt = 0; > > > > return &zi->i_vnode; > > diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h > > index 51141907097c..7b147907c328 100644 > > --- a/fs/zonefs/zonefs.h > > +++ b/fs/zonefs/zonefs.h > > @@ -70,12 +70,11 @@ struct zonefs_inode_info { > > * and changes to the inode private data, and in particular changes to > > * a sequential file size on completion of direct IO writes. > > * Serialization of mmap read IOs with truncate and syscall IO > > - * operations is done with i_mmap_sem in addition to i_truncate_mutex. > > - * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first, > > - * i_truncate_mutex second). > > + * operations is done with invalidate_lock in addition to > > + * i_truncate_mutex. Only zonefs_seq_file_truncate() takes both lock > > + * (invalidate_lock first, i_truncate_mutex second). > > */ > > struct mutex i_truncate_mutex; > > - struct rw_semaphore i_mmap_sem; > > > > /* guarded by i_truncate_mutex */ > > unsigned int i_wr_refcnt; > > > > > -- > Damien Le Moal > Western Digital Research
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 049e36c69ed7..60ac5587c880 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -462,7 +462,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) inode_dio_wait(inode); /* Serialize against page faults */ - down_write(&zi->i_mmap_sem); + down_write(&inode->i_mapping->invalidate_lock); /* Serialize against zonefs_iomap_begin() */ mutex_lock(&zi->i_truncate_mutex); @@ -500,7 +500,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) unlock: mutex_unlock(&zi->i_truncate_mutex); - up_write(&zi->i_mmap_sem); + up_write(&inode->i_mapping->invalidate_lock); return ret; } @@ -575,18 +575,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, return ret; } -static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf) -{ - struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file)); - vm_fault_t ret; - - down_read(&zi->i_mmap_sem); - ret = filemap_fault(vmf); - up_read(&zi->i_mmap_sem); - - return ret; -} - static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); @@ -607,16 +595,16 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) file_update_time(vmf->vma->vm_file); /* Serialize against truncates */ - down_read(&zi->i_mmap_sem); + down_read(&inode->i_mapping->invalidate_lock); ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); - up_read(&zi->i_mmap_sem); + up_read(&inode->i_mapping->invalidate_lock); sb_end_pagefault(inode->i_sb); return ret; } static const struct vm_operations_struct zonefs_file_vm_ops = { - .fault = zonefs_filemap_fault, + .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = zonefs_filemap_page_mkwrite, }; @@ -1158,7 +1146,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) inode_init_once(&zi->i_vnode); mutex_init(&zi->i_truncate_mutex); - init_rwsem(&zi->i_mmap_sem); zi->i_wr_refcnt = 0; return &zi->i_vnode; diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index 51141907097c..7b147907c328 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -70,12 +70,11 @@ struct zonefs_inode_info { * and changes to the inode private data, and in particular changes to * a sequential file size on completion of direct IO writes. * Serialization of mmap read IOs with truncate and syscall IO - * operations is done with i_mmap_sem in addition to i_truncate_mutex. - * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first, - * i_truncate_mutex second). + * operations is done with invalidate_lock in addition to + * i_truncate_mutex. Only zonefs_seq_file_truncate() takes both lock + * (invalidate_lock first, i_truncate_mutex second). */ struct mutex i_truncate_mutex; - struct rw_semaphore i_mmap_sem; /* guarded by i_truncate_mutex */ unsigned int i_wr_refcnt;
Use invalidate_lock instead of zonefs' private i_mmap_sem. The intended purpose is exactly the same. By this conversion we also fix a race between hole punching and read(2) / readahead(2) paths that can lead to stale page cache contents. CC: Damien Le Moal <damien.lemoal@wdc.com> CC: Johannes Thumshirn <jth@kernel.org> CC: <linux-fsdevel@vger.kernel.org> Signed-off-by: Jan Kara <jack@suse.cz> --- fs/zonefs/super.c | 23 +++++------------------ fs/zonefs/zonefs.h | 7 +++---- 2 files changed, 8 insertions(+), 22 deletions(-)