Message ID | 20240517111355.233085-2-yi.zhang@huaweicloud.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | iomap/xfs: fix stale data exposure when truncating realtime inodes | expand |
On Fri, May 17, 2024 at 07:13:53PM +0800, Zhang Yi wrote: > From: Zhang Yi <yi.zhang@huawei.com> > > iomap_truncate_page() always assumes the block size of the truncating > inode is i_blocksize(), this is not always true for some filesystems, > e.g. XFS does extent size alignment for realtime inodes. Drop this > assumption and pass the block size for zeroing into > iomap_truncate_page(), allow filesystems to indicate the correct block > size. > > Suggested-by: Dave Chinner <david@fromorbit.com> > Signed-off-by: Zhang Yi <yi.zhang@huawei.com> > --- > fs/iomap/buffered-io.c | 13 +++++++++---- > fs/xfs/xfs_iomap.c | 3 ++- > include/linux/iomap.h | 4 ++-- > 3 files changed, 13 insertions(+), 7 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index 0926d216a5af..a0a0ac2c659c 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -17,6 +17,7 @@ > #include <linux/bio.h> > #include <linux/sched/signal.h> > #include <linux/migrate.h> > +#include <linux/math64.h> > #include "trace.h" > > #include "../internal.h" > @@ -1445,11 +1446,15 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, > EXPORT_SYMBOL_GPL(iomap_zero_range); > > int > -iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, > - const struct iomap_ops *ops) > +iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize, > + bool *did_zero, const struct iomap_ops *ops) > { > - unsigned int blocksize = i_blocksize(inode); > - unsigned int off = pos & (blocksize - 1); > + unsigned int off; > + > + if (is_power_of_2(blocksize)) > + off = pos & (blocksize - 1); > + else > + div_u64_rem(pos, blocksize, &off); I wish this was a helper in math64.h somewhere. static inline u32 rem_u64(u64 dividend, u32 divisor) { if (likely(is_power_of_2(divisor))) return dividend & (divisor - 1); return dividend % divisor; } That way we skip the second division in div_u64_rem entirely, and the iomap/dax code becomes: unsigned int off = rem_u64(pos, blocksize); /* pos in block */ Otherwise this looks like a straightforward mechanical change to me. --D > > /* Block boundary? Nothing to do */ > if (!off) > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index 2857ef1b0272..31ac07bb8425 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -1467,10 +1467,11 @@ xfs_truncate_page( > bool *did_zero) > { > struct inode *inode = VFS_I(ip); > + unsigned int blocksize = i_blocksize(inode); > > if (IS_DAX(inode)) > return dax_truncate_page(inode, pos, did_zero, > &xfs_dax_write_iomap_ops); > - return iomap_truncate_page(inode, pos, did_zero, > + return iomap_truncate_page(inode, pos, blocksize, did_zero, > &xfs_buffered_write_iomap_ops); > } > diff --git a/include/linux/iomap.h b/include/linux/iomap.h > index 6fc1c858013d..d67bf86ec582 100644 > --- a/include/linux/iomap.h > +++ b/include/linux/iomap.h > @@ -273,8 +273,8 @@ int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, > const struct iomap_ops *ops); > int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, > bool *did_zero, const struct iomap_ops *ops); > -int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, > - const struct iomap_ops *ops); > +int iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize, > + bool *did_zero, const struct iomap_ops *ops); > vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, > const struct iomap_ops *ops); > int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, > -- > 2.39.2 > >
On 2024/5/18 1:29, Darrick J. Wong wrote: > On Fri, May 17, 2024 at 07:13:53PM +0800, Zhang Yi wrote: >> From: Zhang Yi <yi.zhang@huawei.com> >> >> iomap_truncate_page() always assumes the block size of the truncating >> inode is i_blocksize(), this is not always true for some filesystems, >> e.g. XFS does extent size alignment for realtime inodes. Drop this >> assumption and pass the block size for zeroing into >> iomap_truncate_page(), allow filesystems to indicate the correct block >> size. >> >> Suggested-by: Dave Chinner <david@fromorbit.com> >> Signed-off-by: Zhang Yi <yi.zhang@huawei.com> >> --- >> fs/iomap/buffered-io.c | 13 +++++++++---- >> fs/xfs/xfs_iomap.c | 3 ++- >> include/linux/iomap.h | 4 ++-- >> 3 files changed, 13 insertions(+), 7 deletions(-) >> >> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c >> index 0926d216a5af..a0a0ac2c659c 100644 >> --- a/fs/iomap/buffered-io.c >> +++ b/fs/iomap/buffered-io.c >> @@ -17,6 +17,7 @@ >> #include <linux/bio.h> >> #include <linux/sched/signal.h> >> #include <linux/migrate.h> >> +#include <linux/math64.h> >> #include "trace.h" >> >> #include "../internal.h" >> @@ -1445,11 +1446,15 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, >> EXPORT_SYMBOL_GPL(iomap_zero_range); >> >> int >> -iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, >> - const struct iomap_ops *ops) >> +iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize, >> + bool *did_zero, const struct iomap_ops *ops) >> { >> - unsigned int blocksize = i_blocksize(inode); >> - unsigned int off = pos & (blocksize - 1); >> + unsigned int off; >> + >> + if (is_power_of_2(blocksize)) >> + off = pos & (blocksize - 1); >> + else >> + div_u64_rem(pos, blocksize, &off); > > I wish this was a helper in math64.h somewhere. > > static inline u32 rem_u64(u64 dividend, u32 divisor) > { > if (likely(is_power_of_2(divisor))) > return dividend & (divisor - 1); > > return dividend % divisor; > } > > That way we skip the second division in div_u64_rem entirely, and the > iomap/dax code becomes: > > unsigned int off = rem_u64(pos, blocksize); /* pos in block */ > > Otherwise this looks like a straightforward mechanical change to me. > Yeah, we do need this helper. Thanks, Yi. > >> >> /* Block boundary? Nothing to do */ >> if (!off) >> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c >> index 2857ef1b0272..31ac07bb8425 100644 >> --- a/fs/xfs/xfs_iomap.c >> +++ b/fs/xfs/xfs_iomap.c >> @@ -1467,10 +1467,11 @@ xfs_truncate_page( >> bool *did_zero) >> { >> struct inode *inode = VFS_I(ip); >> + unsigned int blocksize = i_blocksize(inode); >> >> if (IS_DAX(inode)) >> return dax_truncate_page(inode, pos, did_zero, >> &xfs_dax_write_iomap_ops); >> - return iomap_truncate_page(inode, pos, did_zero, >> + return iomap_truncate_page(inode, pos, blocksize, did_zero, >> &xfs_buffered_write_iomap_ops); >> } >> diff --git a/include/linux/iomap.h b/include/linux/iomap.h >> index 6fc1c858013d..d67bf86ec582 100644 >> --- a/include/linux/iomap.h >> +++ b/include/linux/iomap.h >> @@ -273,8 +273,8 @@ int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, >> const struct iomap_ops *ops); >> int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, >> bool *did_zero, const struct iomap_ops *ops); >> -int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, >> - const struct iomap_ops *ops); >> +int iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize, >> + bool *did_zero, const struct iomap_ops *ops); >> vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, >> const struct iomap_ops *ops); >> int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, >> -- >> 2.39.2 >> >>
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 0926d216a5af..a0a0ac2c659c 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -17,6 +17,7 @@ #include <linux/bio.h> #include <linux/sched/signal.h> #include <linux/migrate.h> +#include <linux/math64.h> #include "trace.h" #include "../internal.h" @@ -1445,11 +1446,15 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, EXPORT_SYMBOL_GPL(iomap_zero_range); int -iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - const struct iomap_ops *ops) +iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize, + bool *did_zero, const struct iomap_ops *ops) { - unsigned int blocksize = i_blocksize(inode); - unsigned int off = pos & (blocksize - 1); + unsigned int off; + + if (is_power_of_2(blocksize)) + off = pos & (blocksize - 1); + else + div_u64_rem(pos, blocksize, &off); /* Block boundary? Nothing to do */ if (!off) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2857ef1b0272..31ac07bb8425 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1467,10 +1467,11 @@ xfs_truncate_page( bool *did_zero) { struct inode *inode = VFS_I(ip); + unsigned int blocksize = i_blocksize(inode); if (IS_DAX(inode)) return dax_truncate_page(inode, pos, did_zero, &xfs_dax_write_iomap_ops); - return iomap_truncate_page(inode, pos, did_zero, + return iomap_truncate_page(inode, pos, blocksize, did_zero, &xfs_buffered_write_iomap_ops); } diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6fc1c858013d..d67bf86ec582 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -273,8 +273,8 @@ int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops); -int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - const struct iomap_ops *ops); +int iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize, + bool *did_zero, const struct iomap_ops *ops); vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,