Message ID | 20200110192942.25021-10-ira.weiny@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable per-file/directory DAX operations V2 | expand |
On Fri, Jan 10, 2020 at 11:29:39AM -0800, ira.weiny@intel.com wrote: > From: Ira Weiny <ira.weiny@intel.com> > > Page faults need to ensure the inode mode is correct and consistent with > the vmf information at the time of the fault. There is no easy way to > ensure the vmf information is correct if a mode change is in progress. > Furthermore, there is no good use case to require a mode change while > the file is mmap'ed. > > Track mmap's of the file and fail the mode change if the file is > mmap'ed. > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > --- > fs/inode.c | 2 ++ > fs/xfs/xfs_ioctl.c | 8 ++++++++ > include/linux/fs.h | 1 + > mm/mmap.c | 19 +++++++++++++++++-- > 4 files changed, 28 insertions(+), 2 deletions(-) > > diff --git a/fs/inode.c b/fs/inode.c > index 2b0f51161918..944711aed6f8 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -245,6 +245,8 @@ static struct inode *alloc_inode(struct super_block *sb) > return NULL; > } > > + atomic64_set(&inode->i_mapped, 0); > + > return inode; > } > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > index bc3654fe3b5d..1ab0906c6c7f 100644 > --- a/fs/xfs/xfs_ioctl.c > +++ b/fs/xfs/xfs_ioctl.c > @@ -1200,6 +1200,14 @@ xfs_ioctl_setattr_dax_invalidate( > goto out_unlock; > } > > + /* > + * If there is a mapping in place we must remain in our current mode. > + */ > + if (atomic64_read(&inode->i_mapped)) { Urk, should we really be messing around with the address space internals? > + error = -EBUSY; > + goto out_unlock; > + } > + > error = filemap_write_and_wait(inode->i_mapping); > if (error) > goto out_unlock; > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 631f11d6246e..6e7dc626b657 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -740,6 +740,7 @@ struct inode { > #endif > > void *i_private; /* fs or device private pointer */ > + atomic64_t i_mapped; I would have expected to find this in struct address_space since the mapping count is a function of the address space, right? --D > } __randomize_layout; > > struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode); > diff --git a/mm/mmap.c b/mm/mmap.c > index dfaf1130e706..e6b68924b7ca 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -171,12 +171,17 @@ void unlink_file_vma(struct vm_area_struct *vma) > static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) > { > struct vm_area_struct *next = vma->vm_next; > + struct file *f = vma->vm_file; > > might_sleep(); > if (vma->vm_ops && vma->vm_ops->close) > vma->vm_ops->close(vma); > - if (vma->vm_file) > - fput(vma->vm_file); > + if (f) { > + struct inode *inode = file_inode(f); > + if (inode) > + atomic64_dec(&inode->i_mapped); > + fput(f); > + } > mpol_put(vma_policy(vma)); > vm_area_free(vma); > return next; > @@ -1837,6 +1842,16 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > > vma_set_page_prot(vma); > > + /* > + * Track if there is mapping in place such that a mode change > + * does not occur on a file which is mapped > + */ > + if (file) { > + struct inode *inode = file_inode(file); > + > + atomic64_inc(&inode->i_mapped); > + } > + > return addr; > > unmap_and_free_vma: > -- > 2.21.0 >
On Mon, Jan 13, 2020 at 02:22:12PM -0800, Darrick J. Wong wrote: > On Fri, Jan 10, 2020 at 11:29:39AM -0800, ira.weiny@intel.com wrote: > > From: Ira Weiny <ira.weiny@intel.com> > > [snip] > > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > > index bc3654fe3b5d..1ab0906c6c7f 100644 > > --- a/fs/xfs/xfs_ioctl.c > > +++ b/fs/xfs/xfs_ioctl.c > > @@ -1200,6 +1200,14 @@ xfs_ioctl_setattr_dax_invalidate( > > goto out_unlock; > > } > > > > + /* > > + * If there is a mapping in place we must remain in our current mode. > > + */ > > + if (atomic64_read(&inode->i_mapped)) { > > Urk, should we really be messing around with the address space > internals? I contemplated a function call instead of checking i_mapped directly? Is that what you mean? > > > + error = -EBUSY; > > + goto out_unlock; > > + } > > + > > error = filemap_write_and_wait(inode->i_mapping); > > if (error) > > goto out_unlock; > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > index 631f11d6246e..6e7dc626b657 100644 > > --- a/include/linux/fs.h > > +++ b/include/linux/fs.h > > @@ -740,6 +740,7 @@ struct inode { > > #endif > > > > void *i_private; /* fs or device private pointer */ > > + atomic64_t i_mapped; > > I would have expected to find this in struct address_space since the > mapping count is a function of the address space, right? I suppose but the only external call (above) would be passing an inode. So to me it seemed better here. Ira > > --D >
On Mon, Jan 13, 2020 at 04:46:10PM -0800, Ira Weiny wrote: > On Mon, Jan 13, 2020 at 02:22:12PM -0800, Darrick J. Wong wrote: > > On Fri, Jan 10, 2020 at 11:29:39AM -0800, ira.weiny@intel.com wrote: > > > From: Ira Weiny <ira.weiny@intel.com> > > > > > [snip] > > > > > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > > > index bc3654fe3b5d..1ab0906c6c7f 100644 > > > --- a/fs/xfs/xfs_ioctl.c > > > +++ b/fs/xfs/xfs_ioctl.c > > > @@ -1200,6 +1200,14 @@ xfs_ioctl_setattr_dax_invalidate( > > > goto out_unlock; > > > } > > > > > > + /* > > > + * If there is a mapping in place we must remain in our current mode. > > > + */ > > > + if (atomic64_read(&inode->i_mapped)) { > > > > Urk, should we really be messing around with the address space > > internals? > > I contemplated a function call instead of checking i_mapped directly? Is that > what you mean? Yeah. Abstracting the details just enough that filesystems don't have to know that i_mapped is atomic64 etc. > > > > > > + error = -EBUSY; > > > + goto out_unlock; > > > + } > > > + > > > error = filemap_write_and_wait(inode->i_mapping); > > > if (error) > > > goto out_unlock; > > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > > index 631f11d6246e..6e7dc626b657 100644 > > > --- a/include/linux/fs.h > > > +++ b/include/linux/fs.h > > > @@ -740,6 +740,7 @@ struct inode { > > > #endif > > > > > > void *i_private; /* fs or device private pointer */ > > > + atomic64_t i_mapped; > > > > I would have expected to find this in struct address_space since the > > mapping count is a function of the address space, right? > > I suppose but the only external call (above) would be passing an inode. So to > me it seemed better here. But the number of memory mappings reflects the state of the address space, not the inode. Or maybe put another way, if I were an mm developer I would not expect to look in struct inode for mm state. static inline bool inode_has_mappings(struct inode *inode) { return atomic64_read(&inode->i_mapping->mapcount) > 0; } OTOH if there exist other mm developers who /do/ find that storing the mmap count in struct inode is more logical, please let me know. :) --D > Ira > > > > > --D > >
On Mon, Jan 13, 2020 at 05:30:04PM -0800, Darrick J. Wong wrote: > On Mon, Jan 13, 2020 at 04:46:10PM -0800, Ira Weiny wrote: > > On Mon, Jan 13, 2020 at 02:22:12PM -0800, Darrick J. Wong wrote: > > > On Fri, Jan 10, 2020 at 11:29:39AM -0800, ira.weiny@intel.com wrote: > > > > From: Ira Weiny <ira.weiny@intel.com> > > > > > > > > [snip] > > > > > > > > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > > > > index bc3654fe3b5d..1ab0906c6c7f 100644 > > > > --- a/fs/xfs/xfs_ioctl.c > > > > +++ b/fs/xfs/xfs_ioctl.c > > > > @@ -1200,6 +1200,14 @@ xfs_ioctl_setattr_dax_invalidate( > > > > goto out_unlock; > > > > } > > > > > > > > + /* > > > > + * If there is a mapping in place we must remain in our current mode. > > > > + */ > > > > + if (atomic64_read(&inode->i_mapped)) { > > > > > > Urk, should we really be messing around with the address space > > > internals? > > > > I contemplated a function call instead of checking i_mapped directly? Is that > > what you mean? > > Yeah. Abstracting the details just enough that filesystems don't have > to know that i_mapped is atomic64 etc. Done. > > > > > > > > > > + error = -EBUSY; > > > > + goto out_unlock; > > > > + } > > > > + > > > > error = filemap_write_and_wait(inode->i_mapping); > > > > if (error) > > > > goto out_unlock; > > > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > > > index 631f11d6246e..6e7dc626b657 100644 > > > > --- a/include/linux/fs.h > > > > +++ b/include/linux/fs.h > > > > @@ -740,6 +740,7 @@ struct inode { > > > > #endif > > > > > > > > void *i_private; /* fs or device private pointer */ > > > > + atomic64_t i_mapped; > > > > > > I would have expected to find this in struct address_space since the > > > mapping count is a function of the address space, right? > > > > I suppose but the only external call (above) would be passing an inode. So to > > me it seemed better here. > > But the number of memory mappings reflects the state of the address > space, not the inode. Or maybe put another way, if I were an mm > developer I would not expect to look in struct inode for mm state. This is a good point... > > static inline bool inode_has_mappings(struct inode *inode) > { > return atomic64_read(&inode->i_mapping->mapcount) > 0; > } > > OTOH if there exist other mm developers who /do/ find that storing the > mmap count in struct inode is more logical, please let me know. :) ... My thinking was that the number of mappings does not matters to the mm system... However, I'm starting to think you are correct... ;-) I've made a note of it and we will see what others think. Ira > > --D > > > Ira > > > > > > > > --D > > >
From ira.weiny@intel.com > Sent: 10 January 2020 19:30 > > Page faults need to ensure the inode mode is correct and consistent with > the vmf information at the time of the fault. There is no easy way to > ensure the vmf information is correct if a mode change is in progress. > Furthermore, there is no good use case to require a mode change while > the file is mmap'ed. > > Track mmap's of the file and fail the mode change if the file is > mmap'ed. This seems wrong to me. I presume the 'mode changes' are from things like 'chmod -w ...'. mmap() should be no different to open(). Only the permissions set when the file is opened count. Next you'll be stopping unlink() when a file is open :-) David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Tue 14-01-20 09:53:54, Ira Weiny wrote: > On Mon, Jan 13, 2020 at 05:30:04PM -0800, Darrick J. Wong wrote: > > > > > + error = -EBUSY; > > > > > + goto out_unlock; > > > > > + } > > > > > + > > > > > error = filemap_write_and_wait(inode->i_mapping); > > > > > if (error) > > > > > goto out_unlock; > > > > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > > > > index 631f11d6246e..6e7dc626b657 100644 > > > > > --- a/include/linux/fs.h > > > > > +++ b/include/linux/fs.h > > > > > @@ -740,6 +740,7 @@ struct inode { > > > > > #endif > > > > > > > > > > void *i_private; /* fs or device private pointer */ > > > > > + atomic64_t i_mapped; > > > > > > > > I would have expected to find this in struct address_space since the > > > > mapping count is a function of the address space, right? > > > > > > I suppose but the only external call (above) would be passing an inode. So to > > > me it seemed better here. > > > > But the number of memory mappings reflects the state of the address > > space, not the inode. Or maybe put another way, if I were an mm > > developer I would not expect to look in struct inode for mm state. > > This is a good point... > > > > > static inline bool inode_has_mappings(struct inode *inode) > > { > > return atomic64_read(&inode->i_mapping->mapcount) > 0; > > } > > > > OTOH if there exist other mm developers who /do/ find that storing the > > mmap count in struct inode is more logical, please let me know. :) > > ... My thinking was that the number of mappings does not matters to the mm > system... However, I'm starting to think you are correct... ;-) > > I've made a note of it and we will see what others think. Well, more importantly mapping != inode. There can be multiple inodes pointing to the same mapping (struct address_space) as is the case for example for block devices. So this counter definitely belongs into struct address_space. Honza
On Wed, Jan 15, 2020 at 10:21:45AM +0000, David Laight wrote: > From ira.weiny@intel.com > > Sent: 10 January 2020 19:30 > > > > Page faults need to ensure the inode mode is correct and consistent with > > the vmf information at the time of the fault. There is no easy way to > > ensure the vmf information is correct if a mode change is in progress. > > Furthermore, there is no good use case to require a mode change while > > the file is mmap'ed. > > > > Track mmap's of the file and fail the mode change if the file is > > mmap'ed. > > This seems wrong to me. > I presume the 'mode changes' are from things like 'chmod -w ...'. No... Sorry... "mode" was a _very_ bad name. In this context "mode" was the "DAX mode" not the file mode. > mmap() should be no different to open(). > Only the permissions set when the file is opened count. > > Next you'll be stopping unlink() when a file is open :-) hehehe :-D no ... sorry that was not the meaning. To be clear what this is preventing is a change from non-DAX to DAX or vice versa while a file is mmap'ed. I'm looking at a better name for this. For this commit message is this more clear? <commit> fs: Prevent DAX change if file is mmap'ed Page faults need to ensure the inode DAX configuration is correct and consistent with the vmf information at the time of the fault. There is no easy way to ensure the vmf information is correct if a DAX change is in progress. Furthermore, there is no good use case to require changing DAX configs while the file is mmap'ed. Track mmap's of the file and fail the DAX change if the file is mmap'ed. </commit> Sorry for the confusion, Ira > > David > > - > Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK > Registration No: 1397386 (Wales) >
On Wed, Jan 15, 2020 at 12:34:55PM +0100, Jan Kara wrote: > On Tue 14-01-20 09:53:54, Ira Weiny wrote: > > On Mon, Jan 13, 2020 at 05:30:04PM -0800, Darrick J. Wong wrote: > > > > > > + error = -EBUSY; > > > > > > + goto out_unlock; > > > > > > + } > > > > > > + > > > > > > error = filemap_write_and_wait(inode->i_mapping); > > > > > > if (error) > > > > > > goto out_unlock; > > > > > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > > > > > index 631f11d6246e..6e7dc626b657 100644 > > > > > > --- a/include/linux/fs.h > > > > > > +++ b/include/linux/fs.h > > > > > > @@ -740,6 +740,7 @@ struct inode { > > > > > > #endif > > > > > > > > > > > > void *i_private; /* fs or device private pointer */ > > > > > > + atomic64_t i_mapped; > > > > > > > > > > I would have expected to find this in struct address_space since the > > > > > mapping count is a function of the address space, right? > > > > > > > > I suppose but the only external call (above) would be passing an inode. So to > > > > me it seemed better here. > > > > > > But the number of memory mappings reflects the state of the address > > > space, not the inode. Or maybe put another way, if I were an mm > > > developer I would not expect to look in struct inode for mm state. > > > > This is a good point... > > > > > > > > static inline bool inode_has_mappings(struct inode *inode) > > > { > > > return atomic64_read(&inode->i_mapping->mapcount) > 0; > > > } > > > > > > OTOH if there exist other mm developers who /do/ find that storing the > > > mmap count in struct inode is more logical, please let me know. :) > > > > ... My thinking was that the number of mappings does not matters to the mm > > system... However, I'm starting to think you are correct... ;-) > > > > I've made a note of it and we will see what others think. > > Well, more importantly mapping != inode. There can be multiple inodes > pointing to the same mapping (struct address_space) as is the case for > example for block devices. So this counter definitely belongs into struct > address_space. Ah Yes, great point. Done. Ira
diff --git a/fs/inode.c b/fs/inode.c index 2b0f51161918..944711aed6f8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -245,6 +245,8 @@ static struct inode *alloc_inode(struct super_block *sb) return NULL; } + atomic64_set(&inode->i_mapped, 0); + return inode; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index bc3654fe3b5d..1ab0906c6c7f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1200,6 +1200,14 @@ xfs_ioctl_setattr_dax_invalidate( goto out_unlock; } + /* + * If there is a mapping in place we must remain in our current mode. + */ + if (atomic64_read(&inode->i_mapped)) { + error = -EBUSY; + goto out_unlock; + } + error = filemap_write_and_wait(inode->i_mapping); if (error) goto out_unlock; diff --git a/include/linux/fs.h b/include/linux/fs.h index 631f11d6246e..6e7dc626b657 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -740,6 +740,7 @@ struct inode { #endif void *i_private; /* fs or device private pointer */ + atomic64_t i_mapped; } __randomize_layout; struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode); diff --git a/mm/mmap.c b/mm/mmap.c index dfaf1130e706..e6b68924b7ca 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -171,12 +171,17 @@ void unlink_file_vma(struct vm_area_struct *vma) static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) { struct vm_area_struct *next = vma->vm_next; + struct file *f = vma->vm_file; might_sleep(); if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); - if (vma->vm_file) - fput(vma->vm_file); + if (f) { + struct inode *inode = file_inode(f); + if (inode) + atomic64_dec(&inode->i_mapped); + fput(f); + } mpol_put(vma_policy(vma)); vm_area_free(vma); return next; @@ -1837,6 +1842,16 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma_set_page_prot(vma); + /* + * Track if there is mapping in place such that a mode change + * does not occur on a file which is mapped + */ + if (file) { + struct inode *inode = file_inode(file); + + atomic64_inc(&inode->i_mapped); + } + return addr; unmap_and_free_vma: