Message ID | 20240429174746.2132161-12-john.g.garry@oracle.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | block atomic writes for XFS | expand |
On Mon, Apr 29, 2024 at 05:47:36PM +0000, John Garry wrote: > For when forcealign is enabled, blocks in an inode need to be unmapped > according to extent alignment, like what is already done for rtvol. > > Signed-off-by: John Garry <john.g.garry@oracle.com> > --- > fs/xfs/libxfs/xfs_bmap.c | 39 +++++++++++++++++++++++++++++++++------ > fs/xfs/xfs_inode.h | 5 +++++ > 2 files changed, 38 insertions(+), 6 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > index 4f39a43d78a7..4a78ab193753 100644 > --- a/fs/xfs/libxfs/xfs_bmap.c > +++ b/fs/xfs/libxfs/xfs_bmap.c > @@ -5339,6 +5339,15 @@ xfs_bmap_del_extent_real( > return 0; > } > > +/* Return the offset of an block number within an extent for forcealign. */ > +static xfs_extlen_t > +xfs_forcealign_extent_offset( > + struct xfs_inode *ip, > + xfs_fsblock_t bno) > +{ > + return bno & (ip->i_extsize - 1); > +} > + > /* > * Unmap (remove) blocks from a file. > * If nexts is nonzero then the number of extents to remove is limited to > @@ -5361,6 +5370,7 @@ __xfs_bunmapi( > struct xfs_bmbt_irec got; /* current extent record */ > struct xfs_ifork *ifp; /* inode fork pointer */ > int isrt; /* freeing in rt area */ > + int isforcealign; /* freeing for file inode with forcealign */ > int logflags; /* transaction logging flags */ > xfs_extlen_t mod; /* rt extent offset */ > struct xfs_mount *mp = ip->i_mount; > @@ -5397,7 +5407,10 @@ __xfs_bunmapi( > return 0; > } > XFS_STATS_INC(mp, xs_blk_unmap); > - isrt = xfs_ifork_is_realtime(ip, whichfork); > + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); Why did you change this check? What's wrong with xfs_ifork_is_realtime(), and if there is something wrong, why shouldn't xfs_ifork_is_relatime() get fixed? > + isforcealign = (whichfork == XFS_DATA_FORK) && > + xfs_inode_has_forcealign(ip) && > + xfs_inode_has_extsize(ip) && ip->i_extsize > 1; This is one of the reasons why I said xfs_inode_has_forcealign() should be checking that extent size hints should be checked in that helper.... > end = start + len; > > if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { > @@ -5459,11 +5472,15 @@ __xfs_bunmapi( > if (del.br_startoff + del.br_blockcount > end + 1) > del.br_blockcount = end + 1 - del.br_startoff; > > - if (!isrt || (flags & XFS_BMAPI_REMAP)) > + if ((!isrt && !isforcealign) || (flags & XFS_BMAPI_REMAP)) > goto delete; > > - mod = xfs_rtb_to_rtxoff(mp, > - del.br_startblock + del.br_blockcount); > + if (isrt) > + mod = xfs_rtb_to_rtxoff(mp, > + del.br_startblock + del.br_blockcount); > + else if (isforcealign) > + mod = xfs_forcealign_extent_offset(ip, > + del.br_startblock + del.br_blockcount); There's got to be a cleaner way to do this. We already know that either isrt or isforcealign must be set here, so there's no need for the "else if" construct. Also, forcealign should take precedence over realtime, so that forcealign will work on realtime devices as well. I'd change this code to call a wrapper like: mod = xfs_bunmapi_align(ip, del.br_startblock + del.br_blockcount); static xfs_extlen_t xfs_bunmapi_align( struct xfs_inode *ip, xfs_fsblock_t bno) { if (!XFS_INODE_IS_REALTIME(ip)) { ASSERT(xfs_inode_has_forcealign(ip)) if (is_power_of_2(ip->i_extsize)) return bno & (ip->i_extsize - 1); return do_div(bno, ip->i_extsize); } return xfs_rtb_to_rtxoff(ip->i_mount, bno); } > if (mod) { > /* > * Realtime extent not lined up at the end. > @@ -5511,9 +5528,19 @@ __xfs_bunmapi( > goto nodelete; > } > > - mod = xfs_rtb_to_rtxoff(mp, del.br_startblock); > + if (isrt) > + mod = xfs_rtb_to_rtxoff(mp, del.br_startblock); > + else if (isforcealign) > + mod = xfs_forcealign_extent_offset(ip, > + del.br_startblock); > + mod = xfs_bunmapi_align(ip, del.br_startblock); > if (mod) { > - xfs_extlen_t off = mp->m_sb.sb_rextsize - mod; > + xfs_extlen_t off; > + > + if (isrt) > + off = mp->m_sb.sb_rextsize - mod; > + else if (isforcealign) > + off = ip->i_extsize - mod; if (forcealign) off = ip->i_extsize - mod; else off = mp->m_sb.sb_rextsize - mod; -Dave.
On 01/05/2024 01:10, Dave Chinner wrote: > On Mon, Apr 29, 2024 at 05:47:36PM +0000, John Garry wrote: >> For when forcealign is enabled, blocks in an inode need to be unmapped >> according to extent alignment, like what is already done for rtvol. >> >> Signed-off-by: John Garry <john.g.garry@oracle.com> >> --- >> fs/xfs/libxfs/xfs_bmap.c | 39 +++++++++++++++++++++++++++++++++------ >> fs/xfs/xfs_inode.h | 5 +++++ >> 2 files changed, 38 insertions(+), 6 deletions(-) >> >> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c >> index 4f39a43d78a7..4a78ab193753 100644 >> --- a/fs/xfs/libxfs/xfs_bmap.c >> +++ b/fs/xfs/libxfs/xfs_bmap.c >> @@ -5339,6 +5339,15 @@ xfs_bmap_del_extent_real( >> return 0; >> } >> >> +/* Return the offset of an block number within an extent for forcealign. */ >> +static xfs_extlen_t >> +xfs_forcealign_extent_offset( >> + struct xfs_inode *ip, >> + xfs_fsblock_t bno) >> +{ >> + return bno & (ip->i_extsize - 1); >> +} >> + >> /* >> * Unmap (remove) blocks from a file. >> * If nexts is nonzero then the number of extents to remove is limited to >> @@ -5361,6 +5370,7 @@ __xfs_bunmapi( >> struct xfs_bmbt_irec got; /* current extent record */ >> struct xfs_ifork *ifp; /* inode fork pointer */ >> int isrt; /* freeing in rt area */ >> + int isforcealign; /* freeing for file inode with forcealign */ >> int logflags; /* transaction logging flags */ >> xfs_extlen_t mod; /* rt extent offset */ >> struct xfs_mount *mp = ip->i_mount; >> @@ -5397,7 +5407,10 @@ __xfs_bunmapi( >> return 0; >> } >> XFS_STATS_INC(mp, xs_blk_unmap); >> - isrt = xfs_ifork_is_realtime(ip, whichfork); >> + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); > > Why did you change this check? What's wrong with > xfs_ifork_is_realtime(), and if there is something wrong, why > shouldn't xfs_ifork_is_relatime() get fixed? oops, I should have not made that change. I must have changed it when debugging and not reverted it. > >> + isforcealign = (whichfork == XFS_DATA_FORK) && >> + xfs_inode_has_forcealign(ip) && >> + xfs_inode_has_extsize(ip) && ip->i_extsize > 1; > > This is one of the reasons why I said xfs_inode_has_forcealign() > should be checking that extent size hints should be checked in that > helper.... Right. In this particular case, I found that directories may be considered as well if we don't check for xfs_inode_has_extsize() (which we don't want). > >> end = start + len; >> >> if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { >> @@ -5459,11 +5472,15 @@ __xfs_bunmapi( >> if (del.br_startoff + del.br_blockcount > end + 1) >> del.br_blockcount = end + 1 - del.br_startoff; >> >> - if (!isrt || (flags & XFS_BMAPI_REMAP)) >> + if ((!isrt && !isforcealign) || (flags & XFS_BMAPI_REMAP)) >> goto delete; >> >> - mod = xfs_rtb_to_rtxoff(mp, >> - del.br_startblock + del.br_blockcount); >> + if (isrt) >> + mod = xfs_rtb_to_rtxoff(mp, >> + del.br_startblock + del.br_blockcount); >> + else if (isforcealign) >> + mod = xfs_forcealign_extent_offset(ip, >> + del.br_startblock + del.br_blockcount); > > There's got to be a cleaner way to do this. > > We already know that either isrt or isforcealign must be set here, > so there's no need for the "else if" construct. right > > Also, forcealign should take precedence over realtime, so that > forcealign will work on realtime devices as well. I'd change this > code to call a wrapper like: > > mod = xfs_bunmapi_align(ip, del.br_startblock + del.br_blockcount); > > static xfs_extlen_t > xfs_bunmapi_align( > struct xfs_inode *ip, > xfs_fsblock_t bno) > { > if (!XFS_INODE_IS_REALTIME(ip)) { > ASSERT(xfs_inode_has_forcealign(ip)) > if (is_power_of_2(ip->i_extsize)) > return bno & (ip->i_extsize - 1); > return do_div(bno, ip->i_extsize); > } > return xfs_rtb_to_rtxoff(ip->i_mount, bno); > } ok, that's neater Thanks, John
Hi Dave, >> >> if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { >> @@ -5459,11 +5472,15 @@ __xfs_bunmapi( >> if (del.br_startoff + del.br_blockcount > end + 1) >> del.br_blockcount = end + 1 - del.br_startoff; >> >> - if (!isrt || (flags & XFS_BMAPI_REMAP)) >> + if ((!isrt && !isforcealign) || (flags & XFS_BMAPI_REMAP)) >> goto delete; >> >> - mod = xfs_rtb_to_rtxoff(mp, >> - del.br_startblock + del.br_blockcount); >> + if (isrt) >> + mod = xfs_rtb_to_rtxoff(mp, >> + del.br_startblock + del.br_blockcount); >> + else if (isforcealign) >> + mod = xfs_forcealign_extent_offset(ip, >> + del.br_startblock + del.br_blockcount); > There's got to be a cleaner way to do this. > > We already know that either isrt or isforcealign must be set here, > so there's no need for the "else if" construct. > > Also, forcealign should take precedence over realtime, so that > forcealign will work on realtime devices as well. I'd change this > code to call a wrapper like: > > mod = xfs_bunmapi_align(ip, del.br_startblock + del.br_blockcount); > > static xfs_extlen_t > xfs_bunmapi_align( > struct xfs_inode *ip, > xfs_fsblock_t bno) > { > if (!XFS_INODE_IS_REALTIME(ip)) { > ASSERT(xfs_inode_has_forcealign(ip)) > if (is_power_of_2(ip->i_extsize)) > return bno & (ip->i_extsize - 1); > return do_div(bno, ip->i_extsize); > } > return xfs_rtb_to_rtxoff(ip->i_mount, bno); > } I have made that change according to your suggestion. However, now that the forcealign power-of-2 extsize restriction has been lifted, I am finding another bug. I will just mention this now, as I want to go back to that other issue https://lore.kernel.org/linux-xfs/20240429174746.2132161-1-john.g.garry@oracle.com/T/#mebd7e97dfd0f12219bf92f289c41f62bf2abcff5 However this new one is pretty simple to reproduce. We create a file: ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 1775: 40200.. 41975: 1776: last,eof /root/mnt2/file_22: 1 extent found The forcealign extsize is 98304, i.e. 24 blks. And then try to delete it, and get this: [ 17.604237] XFS: Assertion failed: tp->t_blk_res > 0, file: fs/xfs/libxfs/xfs_bmap.c, line: 5599 [ 17.605908] ------------[ cut here ]------------ [ 17.606884] kernel BUG at fs/xfs/xfs_message.c:102! [ 17.607917] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 17.609134] CPU: 3 PID: 240 Comm: kworker/3:2 Not tainted 6.10.0-rc1-00096-g759a4497daa7-dirty #2553 [ 17.610606] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [ 17.612619] Workqueue: xfs-inodegc/sda xfs_inodegc_worker [ 17.613682] RIP: 0010:assfail+0x36/0x40 [ 17.614134] Code: c2 18 1e 1d 9d 48 89 f1 48 89 fe 48 c7 c7 43 f2 12 9d e8 7d fd ff ff 80 3d ae 86 8d 01 00 75 09 90 0f 0b 90 c3 cc cc cc cc 90 <0f> 0b 0f 1f 84 00 00 00 00 00 90 90 90 90 900 [ 17.616478] RSP: 0018:ff4887cac0973c28 EFLAGS: 00010202 [ 17.617080] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000007fffffff [ 17.617899] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff9d12f243 [ 17.618717] RBP: ff4887cac0973d40 R08: 0000000000000000 R09: 000000000000000a [ 17.619548] R10: 000000000000000a R11: f000000000000000 R12: ff360ff881d88000[ 17.620360] R13: 0000000000000000 R14: ff360ff8efa32040 R15: 000ffffffffe0000 [ 17.620367] FS: 0000000000000000(0000) GS:ff360ffa75cc0000(0000) knlGS:0000000000000000 [ 17.620369] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 17.620371] CR2: 00007f213b4ef008 CR3: 000000011cfca003 CR4: 0000000000771ef0 [ 17.620372] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 17.620374] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 17.620375] PKRU: 55555554 [ 17.620376] Call Trace: [ 17.620384] <TASK> [ 17.624278] ? die+0x32/0x90 [ 17.624284] ? do_trap+0xd8/0x100 [ 17.624286] ? assfail+0x36/0x40 [ 17.624288] ? do_error_trap+0x60/0x80 [ 17.624289] ? assfail+0x36/0x40 [ 17.624292] ? exc_invalid_op+0x53/0x70 [ 17.628287] ? assfail+0x36/0x40 [ 17.628291] ? asm_exc_invalid_op+0x1a/0x20 [ 17.628295] ? assfail+0x36/0x40 [ 17.628297] ? assfail+0x23/0x40 [ 17.628299] __xfs_bunmapi+0xb87/0xeb0 [ 17.628304] ? xfs_log_reserve+0x18f/0x210 [ 17.629447] xfs_bunmapi_range+0x62/0xd0 [ 17.631699] xfs_itruncate_extents_flags+0x1c4/0x410 [ 17.631703] xfs_inactive_truncate+0xba/0x140 [ 17.631705] xfs_inactive+0x331/0x3d0 [ 17.631707] xfs_inodegc_worker+0xb8/0x190 [ 17.631709] process_one_work+0x157/0x380 [ 17.633949] worker_thread+0x2ba/0x3e0 [ 17.633953] ? __pfx_worker_thread+0x10/0x10 [ 17.633954] kthread+0xce/0x100 [ 17.633958] ? __pfx_kthread+0x10/0x10 [ 17.633960] ret_from_fork+0x2c/0x50 [ 17.633963] ? __pfx_kthread+0x10/0x10 [ 17.633965] ret_from_fork_asm+0x1a/0x30 [ 17.636314] </TASK> [ 17.640377] Modules linked in: [ 17.642375] ---[ end trace 0000000000000000 ]--- Maybe something is going wrong with the AG bno vs fsbno indexing. That extent allocated has fsbno=50552 (% 24 != 0). The agsize is 22416 fsb. That 50552 comes from xfs_alloc_vextent_finish() with args->fsbno=50552 = XFS_AGB_TO_FSB(mp, agno=1, agbno=17784) = 32K (=roundup_power_of_2(22416)) + 17784 So the agbno is aligned, but the fsbno is not. In __xfs_bunmapi(), at this point: mod = xfs_bunmapi_align(ip, del.br_startblock); if (mod) { xfs_extlen_t off; if (isforcealign) { off = ip->i_extsize - mod; } else { ASSERT(isrt); off = mp->m_sb.sb_rextsize - mod; } /* * Realtime extent is lined up at the end but not * at the front. We'll get rid of full extents if * we can. */ mod=8 del.br_startblock(48776) + del.br_blockcount(1776)=50552 Since this code was originally only used for rt, we may be missing setting some struct members which were being set for rt. For example, xfs_trans_alloc() accepts rtextents value, and maybe we should be doing similar for forcealign. Or xfs_fsb_to_db() has special RT handling, but I doubt this is the problem. I have no such issue in using a power-of-2 extsize. I do realise that I need to share the full code, but I'm reluctant to post with known bugs. Please let me know if you have an idea. I'll look at this further when I get a chance. Thanks, John
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 4f39a43d78a7..4a78ab193753 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5339,6 +5339,15 @@ xfs_bmap_del_extent_real( return 0; } +/* Return the offset of an block number within an extent for forcealign. */ +static xfs_extlen_t +xfs_forcealign_extent_offset( + struct xfs_inode *ip, + xfs_fsblock_t bno) +{ + return bno & (ip->i_extsize - 1); +} + /* * Unmap (remove) blocks from a file. * If nexts is nonzero then the number of extents to remove is limited to @@ -5361,6 +5370,7 @@ __xfs_bunmapi( struct xfs_bmbt_irec got; /* current extent record */ struct xfs_ifork *ifp; /* inode fork pointer */ int isrt; /* freeing in rt area */ + int isforcealign; /* freeing for file inode with forcealign */ int logflags; /* transaction logging flags */ xfs_extlen_t mod; /* rt extent offset */ struct xfs_mount *mp = ip->i_mount; @@ -5397,7 +5407,10 @@ __xfs_bunmapi( return 0; } XFS_STATS_INC(mp, xs_blk_unmap); - isrt = xfs_ifork_is_realtime(ip, whichfork); + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); + isforcealign = (whichfork == XFS_DATA_FORK) && + xfs_inode_has_forcealign(ip) && + xfs_inode_has_extsize(ip) && ip->i_extsize > 1; end = start + len; if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { @@ -5459,11 +5472,15 @@ __xfs_bunmapi( if (del.br_startoff + del.br_blockcount > end + 1) del.br_blockcount = end + 1 - del.br_startoff; - if (!isrt || (flags & XFS_BMAPI_REMAP)) + if ((!isrt && !isforcealign) || (flags & XFS_BMAPI_REMAP)) goto delete; - mod = xfs_rtb_to_rtxoff(mp, - del.br_startblock + del.br_blockcount); + if (isrt) + mod = xfs_rtb_to_rtxoff(mp, + del.br_startblock + del.br_blockcount); + else if (isforcealign) + mod = xfs_forcealign_extent_offset(ip, + del.br_startblock + del.br_blockcount); if (mod) { /* * Realtime extent not lined up at the end. @@ -5511,9 +5528,19 @@ __xfs_bunmapi( goto nodelete; } - mod = xfs_rtb_to_rtxoff(mp, del.br_startblock); + if (isrt) + mod = xfs_rtb_to_rtxoff(mp, del.br_startblock); + else if (isforcealign) + mod = xfs_forcealign_extent_offset(ip, + del.br_startblock); + if (mod) { - xfs_extlen_t off = mp->m_sb.sb_rextsize - mod; + xfs_extlen_t off; + + if (isrt) + off = mp->m_sb.sb_rextsize - mod; + else if (isforcealign) + off = ip->i_extsize - mod; /* * Realtime extent is lined up at the end but not diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 065028789473..3f13943ab3a3 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -316,6 +316,11 @@ static inline bool xfs_inode_has_forcealign(struct xfs_inode *ip) return ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN; } +static inline bool xfs_inode_has_extsize(struct xfs_inode *ip) +{ + return ip->i_diflags & XFS_DIFLAG_EXTSIZE; +} + /* * Return the buftarg used for data allocations on a given inode. */
For when forcealign is enabled, blocks in an inode need to be unmapped according to extent alignment, like what is already done for rtvol. Signed-off-by: John Garry <john.g.garry@oracle.com> --- fs/xfs/libxfs/xfs_bmap.c | 39 +++++++++++++++++++++++++++++++++------ fs/xfs/xfs_inode.h | 5 +++++ 2 files changed, 38 insertions(+), 6 deletions(-)