Message ID | 20200606082745.15174-3-chandanrlinux@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | xfs: Extend per-inode extent counters. | expand |
On Sat, Jun 06, 2020 at 01:57:40PM +0530, Chandan Babu R wrote: > The following error message was noticed when a workload added one > million xattrs, deleted 50% of them and then inserted 400,000 new > xattrs. > > XFS (loop0): xfs_iflush_int: detected corrupt incore inode 131, total extents = -19916, nblocks = 102937, ptr ffff9ce33b098c00 > > The error message was printed during unmounting the filesystem. The > value printed under "total extents" indicates that we overflowed the > per-inode signed 16-bit xattr extent counter. > > Instead of letting this silent corruption occur, this patch checks for > extent counter (both data and xattr) overflow before we assign the > new value to the corresponding in-memory extent counter. > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > --- > fs/xfs/libxfs/xfs_bmap.c | 92 +++++++++++++++++++++++++++------- > fs/xfs/libxfs/xfs_inode_fork.c | 29 +++++++++++ > fs/xfs/libxfs/xfs_inode_fork.h | 1 + > 3 files changed, 104 insertions(+), 18 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > index edc63dba007f..798fca5c52af 100644 > --- a/fs/xfs/libxfs/xfs_bmap.c > +++ b/fs/xfs/libxfs/xfs_bmap.c > @@ -906,7 +906,10 @@ xfs_bmap_local_to_extents( > xfs_iext_first(ifp, &icur); > xfs_iext_insert(ip, &icur, &rec, 0); > > - ifp->if_nextents = 1; > + error = xfs_next_set(ip, whichfork, 1); > + if (error) > + goto done; Are you sure that if_nextents == 0 is a precondition here? Technically speaking, this turns an assignment into an increment operation. > + > ip->i_d.di_nblocks = 1; > xfs_trans_mod_dquot_byino(tp, ip, > XFS_TRANS_DQ_BCOUNT, 1L); > @@ -1594,7 +1597,10 @@ xfs_bmap_add_extent_delay_real( > xfs_iext_remove(bma->ip, &bma->icur, state); > xfs_iext_prev(ifp, &bma->icur); > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > - ifp->if_nextents--; > + > + error = xfs_next_set(bma->ip, whichfork, -1); > + if (error) > + goto done; > > if (bma->cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > @@ -1698,7 +1704,10 @@ xfs_bmap_add_extent_delay_real( > PREV.br_startblock = new->br_startblock; > PREV.br_state = new->br_state; > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > - ifp->if_nextents++; > + > + error = xfs_next_set(bma->ip, whichfork, 1); > + if (error) > + goto done; > > if (bma->cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > @@ -1764,7 +1773,10 @@ xfs_bmap_add_extent_delay_real( > * The left neighbor is not contiguous. > */ > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > - ifp->if_nextents++; > + > + error = xfs_next_set(bma->ip, whichfork, 1); > + if (error) > + goto done; > > if (bma->cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > @@ -1851,7 +1863,10 @@ xfs_bmap_add_extent_delay_real( > * The right neighbor is not contiguous. > */ > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > - ifp->if_nextents++; > + > + error = xfs_next_set(bma->ip, whichfork, 1); > + if (error) > + goto done; > > if (bma->cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > @@ -1937,7 +1952,10 @@ xfs_bmap_add_extent_delay_real( > xfs_iext_next(ifp, &bma->icur); > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > - ifp->if_nextents++; > + > + error = xfs_next_set(bma->ip, whichfork, 1); > + if (error) > + goto done; > > if (bma->cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > @@ -2141,7 +2159,11 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_remove(ip, icur, state); > xfs_iext_prev(ifp, icur); > xfs_iext_update_extent(ip, state, icur, &LEFT); > - ifp->if_nextents -= 2; > + > + error = xfs_next_set(ip, whichfork, -2); > + if (error) > + goto done; > + > if (cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > else { > @@ -2193,7 +2215,11 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_remove(ip, icur, state); > xfs_iext_prev(ifp, icur); > xfs_iext_update_extent(ip, state, icur, &LEFT); > - ifp->if_nextents--; > + > + error = xfs_next_set(ip, whichfork, -1); > + if (error) > + goto done; > + > if (cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > else { > @@ -2235,7 +2261,10 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_remove(ip, icur, state); > xfs_iext_prev(ifp, icur); > xfs_iext_update_extent(ip, state, icur, &PREV); > - ifp->if_nextents--; > + > + error = xfs_next_set(ip, whichfork, -1); > + if (error) > + goto done; > > if (cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > @@ -2343,7 +2372,10 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_update_extent(ip, state, icur, &PREV); > xfs_iext_insert(ip, icur, new, state); > - ifp->if_nextents++; > + > + error = xfs_next_set(ip, whichfork, 1); > + if (error) > + goto done; > > if (cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > @@ -2419,7 +2451,10 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_update_extent(ip, state, icur, &PREV); > xfs_iext_next(ifp, icur); > xfs_iext_insert(ip, icur, new, state); > - ifp->if_nextents++; > + > + error = xfs_next_set(ip, whichfork, 1); > + if (error) > + goto done; > > if (cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > @@ -2471,7 +2506,10 @@ xfs_bmap_add_extent_unwritten_real( > xfs_iext_next(ifp, icur); > xfs_iext_insert(ip, icur, &r[1], state); > xfs_iext_insert(ip, icur, &r[0], state); > - ifp->if_nextents += 2; > + > + error = xfs_next_set(ip, whichfork, 2); > + if (error) > + goto done; > > if (cur == NULL) > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > @@ -2787,7 +2825,10 @@ xfs_bmap_add_extent_hole_real( > xfs_iext_remove(ip, icur, state); > xfs_iext_prev(ifp, icur); > xfs_iext_update_extent(ip, state, icur, &left); > - ifp->if_nextents--; > + > + error = xfs_next_set(ip, whichfork, -1); > + if (error) > + goto done; > > if (cur == NULL) { > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > @@ -2886,7 +2927,10 @@ xfs_bmap_add_extent_hole_real( > * Insert a new entry. > */ > xfs_iext_insert(ip, icur, new, state); > - ifp->if_nextents++; > + > + error = xfs_next_set(ip, whichfork, 1); > + if (error) > + goto done; > > if (cur == NULL) { > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > @@ -5083,7 +5127,10 @@ xfs_bmap_del_extent_real( > */ > xfs_iext_remove(ip, icur, state); > xfs_iext_prev(ifp, icur); > - ifp->if_nextents--; > + > + error = xfs_next_set(ip, whichfork, -1); > + if (error) > + goto done; > > flags |= XFS_ILOG_CORE; > if (!cur) { > @@ -5193,7 +5240,10 @@ xfs_bmap_del_extent_real( > } else > flags |= xfs_ilog_fext(whichfork); > > - ifp->if_nextents++; > + error = xfs_next_set(ip, whichfork, 1); > + if (error) > + goto done; > + > xfs_iext_next(ifp, icur); > xfs_iext_insert(ip, icur, &new, state); > break; > @@ -5660,7 +5710,10 @@ xfs_bmse_merge( > * Update the on-disk extent count, the btree if necessary and log the > * inode. > */ > - ifp->if_nextents--; > + error = xfs_next_set(ip, whichfork, -1); > + if (error) > + goto done; > + > *logflags |= XFS_ILOG_CORE; > if (!cur) { > *logflags |= XFS_ILOG_DEXT; > @@ -6047,7 +6100,10 @@ xfs_bmap_split_extent( > /* Add new extent */ > xfs_iext_next(ifp, &icur); > xfs_iext_insert(ip, &icur, &new, 0); > - ifp->if_nextents++; > + > + error = xfs_next_set(ip, whichfork, 1); > + if (error) > + goto del_cursor; > > if (cur) { > error = xfs_bmbt_lookup_eq(cur, &new, &i); > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > index 28b366275ae0..3bf5a2c391bd 100644 > --- a/fs/xfs/libxfs/xfs_inode_fork.c > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > @@ -728,3 +728,32 @@ xfs_ifork_verify_local_attr( > > return 0; > } > + > +int > +xfs_next_set( "next"... please choose an abbreviation that doesn't collide with a common English word. > + struct xfs_inode *ip, > + int whichfork, > + int delta) Delta? I thought this was a setter function? > +{ > + struct xfs_ifork *ifp; > + int64_t nr_exts; > + int64_t max_exts; > + > + ifp = XFS_IFORK_PTR(ip, whichfork); > + > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > + max_exts = MAXEXTNUM; > + else if (whichfork == XFS_ATTR_FORK) > + max_exts = MAXAEXTNUM; > + else > + ASSERT(0); > + > + nr_exts = ifp->if_nextents + delta; Nope, it's a modify function all right. Then it should be named: xfs_nextents_mod(ip, whichfork, delta) > + if ((delta > 0 && nr_exts > max_exts) > + || (delta < 0 && nr_exts < 0)) Line these up, please. e.g., if ((delta > 0 && nr_exts > max_exts) || (delta < 0 && nr_exts < 0)) --D > + return -EOVERFLOW; > + > + ifp->if_nextents = nr_exts; > + > + return 0; > +} > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > index a4953e95c4f3..a84ae42ace79 100644 > --- a/fs/xfs/libxfs/xfs_inode_fork.h > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > @@ -173,4 +173,5 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > +int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > #endif /* __XFS_INODE_FORK_H__ */ > -- > 2.20.1 >
On Mon, Jun 08, 2020 at 09:24:25AM -0700, Darrick J. Wong wrote: > On Sat, Jun 06, 2020 at 01:57:40PM +0530, Chandan Babu R wrote: > > The following error message was noticed when a workload added one > > million xattrs, deleted 50% of them and then inserted 400,000 new > > xattrs. > > > > XFS (loop0): xfs_iflush_int: detected corrupt incore inode 131, total extents = -19916, nblocks = 102937, ptr ffff9ce33b098c00 > > > > The error message was printed during unmounting the filesystem. The > > value printed under "total extents" indicates that we overflowed the > > per-inode signed 16-bit xattr extent counter. > > > > Instead of letting this silent corruption occur, this patch checks for > > extent counter (both data and xattr) overflow before we assign the > > new value to the corresponding in-memory extent counter. > > > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > --- > > fs/xfs/libxfs/xfs_bmap.c | 92 +++++++++++++++++++++++++++------- > > fs/xfs/libxfs/xfs_inode_fork.c | 29 +++++++++++ > > fs/xfs/libxfs/xfs_inode_fork.h | 1 + > > 3 files changed, 104 insertions(+), 18 deletions(-) > > > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > > index edc63dba007f..798fca5c52af 100644 > > --- a/fs/xfs/libxfs/xfs_bmap.c > > +++ b/fs/xfs/libxfs/xfs_bmap.c > > @@ -906,7 +906,10 @@ xfs_bmap_local_to_extents( > > xfs_iext_first(ifp, &icur); > > xfs_iext_insert(ip, &icur, &rec, 0); > > > > - ifp->if_nextents = 1; > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > Are you sure that if_nextents == 0 is a precondition here? Technically > speaking, this turns an assignment into an increment operation. > > > + > > ip->i_d.di_nblocks = 1; > > xfs_trans_mod_dquot_byino(tp, ip, > > XFS_TRANS_DQ_BCOUNT, 1L); > > @@ -1594,7 +1597,10 @@ xfs_bmap_add_extent_delay_real( > > xfs_iext_remove(bma->ip, &bma->icur, state); > > xfs_iext_prev(ifp, &bma->icur); > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(bma->ip, whichfork, -1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -1698,7 +1704,10 @@ xfs_bmap_add_extent_delay_real( > > PREV.br_startblock = new->br_startblock; > > PREV.br_state = new->br_state; > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(bma->ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -1764,7 +1773,10 @@ xfs_bmap_add_extent_delay_real( > > * The left neighbor is not contiguous. > > */ > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(bma->ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -1851,7 +1863,10 @@ xfs_bmap_add_extent_delay_real( > > * The right neighbor is not contiguous. > > */ > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(bma->ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -1937,7 +1952,10 @@ xfs_bmap_add_extent_delay_real( > > xfs_iext_next(ifp, &bma->icur); > > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(bma->ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2141,7 +2159,11 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > - ifp->if_nextents -= 2; > > + > > + error = xfs_next_set(ip, whichfork, -2); > > + if (error) > > + goto done; > > + > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > else { > > @@ -2193,7 +2215,11 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > + > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > else { > > @@ -2235,7 +2261,10 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &PREV); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2343,7 +2372,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > xfs_iext_insert(ip, icur, new, state); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2419,7 +2451,10 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_update_extent(ip, state, icur, &PREV); > > xfs_iext_next(ifp, icur); > > xfs_iext_insert(ip, icur, new, state); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2471,7 +2506,10 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_next(ifp, icur); > > xfs_iext_insert(ip, icur, &r[1], state); > > xfs_iext_insert(ip, icur, &r[0], state); > > - ifp->if_nextents += 2; > > + > > + error = xfs_next_set(ip, whichfork, 2); > > + if (error) > > + goto done; > > > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2787,7 +2825,10 @@ xfs_bmap_add_extent_hole_real( > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &left); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > > > if (cur == NULL) { > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > @@ -2886,7 +2927,10 @@ xfs_bmap_add_extent_hole_real( > > * Insert a new entry. > > */ > > xfs_iext_insert(ip, icur, new, state); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (cur == NULL) { > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > @@ -5083,7 +5127,10 @@ xfs_bmap_del_extent_real( > > */ > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > > > flags |= XFS_ILOG_CORE; > > if (!cur) { > > @@ -5193,7 +5240,10 @@ xfs_bmap_del_extent_real( > > } else > > flags |= xfs_ilog_fext(whichfork); > > > > - ifp->if_nextents++; > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > + > > xfs_iext_next(ifp, icur); > > xfs_iext_insert(ip, icur, &new, state); > > break; > > @@ -5660,7 +5710,10 @@ xfs_bmse_merge( > > * Update the on-disk extent count, the btree if necessary and log the > > * inode. > > */ > > - ifp->if_nextents--; > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > + > > *logflags |= XFS_ILOG_CORE; > > if (!cur) { > > *logflags |= XFS_ILOG_DEXT; > > @@ -6047,7 +6100,10 @@ xfs_bmap_split_extent( > > /* Add new extent */ > > xfs_iext_next(ifp, &icur); > > xfs_iext_insert(ip, &icur, &new, 0); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto del_cursor; > > > > if (cur) { > > error = xfs_bmbt_lookup_eq(cur, &new, &i); > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > > index 28b366275ae0..3bf5a2c391bd 100644 > > --- a/fs/xfs/libxfs/xfs_inode_fork.c > > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > > @@ -728,3 +728,32 @@ xfs_ifork_verify_local_attr( > > > > return 0; > > } > > + > > +int > > +xfs_next_set( > > "next"... please choose an abbreviation that doesn't collide with a > common English word. > > > + struct xfs_inode *ip, > > + int whichfork, > > + int delta) > > Delta? I thought this was a setter function? > > > +{ > > + struct xfs_ifork *ifp; > > + int64_t nr_exts; > > + int64_t max_exts; > > + > > + ifp = XFS_IFORK_PTR(ip, whichfork); > > + > > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > > + max_exts = MAXEXTNUM; > > + else if (whichfork == XFS_ATTR_FORK) > > + max_exts = MAXAEXTNUM; > > + else > > + ASSERT(0); > > + > > + nr_exts = ifp->if_nextents + delta; > > Nope, it's a modify function all right. Then it should be named: > > xfs_nextents_mod(ip, whichfork, delta) > > > + if ((delta > 0 && nr_exts > max_exts) > > + || (delta < 0 && nr_exts < 0)) > > Line these up, please. e.g., > > if ((delta > 0 && nr_exts > max_exts) || > (delta < 0 && nr_exts < 0)) > > --D > > > + return -EOVERFLOW; Oh, also, shouldn't this be EFBIG ("File too big")? --D > > + > > + ifp->if_nextents = nr_exts; > > + > > + return 0; > > +} > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > > index a4953e95c4f3..a84ae42ace79 100644 > > --- a/fs/xfs/libxfs/xfs_inode_fork.h > > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > > @@ -173,4 +173,5 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > > > +int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > > #endif /* __XFS_INODE_FORK_H__ */ > > -- > > 2.20.1 > >
On Monday 8 June 2020 9:54:25 PM IST Darrick J. Wong wrote: > On Sat, Jun 06, 2020 at 01:57:40PM +0530, Chandan Babu R wrote: > > The following error message was noticed when a workload added one > > million xattrs, deleted 50% of them and then inserted 400,000 new > > xattrs. > > > > XFS (loop0): xfs_iflush_int: detected corrupt incore inode 131, total extents = -19916, nblocks = 102937, ptr ffff9ce33b098c00 > > > > The error message was printed during unmounting the filesystem. The > > value printed under "total extents" indicates that we overflowed the > > per-inode signed 16-bit xattr extent counter. > > > > Instead of letting this silent corruption occur, this patch checks for > > extent counter (both data and xattr) overflow before we assign the > > new value to the corresponding in-memory extent counter. > > > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > --- > > fs/xfs/libxfs/xfs_bmap.c | 92 +++++++++++++++++++++++++++------- > > fs/xfs/libxfs/xfs_inode_fork.c | 29 +++++++++++ > > fs/xfs/libxfs/xfs_inode_fork.h | 1 + > > 3 files changed, 104 insertions(+), 18 deletions(-) > > > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > > index edc63dba007f..798fca5c52af 100644 > > --- a/fs/xfs/libxfs/xfs_bmap.c > > +++ b/fs/xfs/libxfs/xfs_bmap.c > > @@ -906,7 +906,10 @@ xfs_bmap_local_to_extents( > > xfs_iext_first(ifp, &icur); > > xfs_iext_insert(ip, &icur, &rec, 0); > > > > - ifp->if_nextents = 1; > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > Are you sure that if_nextents == 0 is a precondition here? Technically > speaking, this turns an assignment into an increment operation. Hmm. I didn't pay attention to that. I will check and update the code appropriately. Thanks for pointing this out. > > > + > > ip->i_d.di_nblocks = 1; > > xfs_trans_mod_dquot_byino(tp, ip, > > XFS_TRANS_DQ_BCOUNT, 1L); > > @@ -1594,7 +1597,10 @@ xfs_bmap_add_extent_delay_real( > > xfs_iext_remove(bma->ip, &bma->icur, state); > > xfs_iext_prev(ifp, &bma->icur); > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(bma->ip, whichfork, -1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -1698,7 +1704,10 @@ xfs_bmap_add_extent_delay_real( > > PREV.br_startblock = new->br_startblock; > > PREV.br_state = new->br_state; > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(bma->ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -1764,7 +1773,10 @@ xfs_bmap_add_extent_delay_real( > > * The left neighbor is not contiguous. > > */ > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(bma->ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -1851,7 +1863,10 @@ xfs_bmap_add_extent_delay_real( > > * The right neighbor is not contiguous. > > */ > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(bma->ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -1937,7 +1952,10 @@ xfs_bmap_add_extent_delay_real( > > xfs_iext_next(ifp, &bma->icur); > > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(bma->ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (bma->cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2141,7 +2159,11 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > - ifp->if_nextents -= 2; > > + > > + error = xfs_next_set(ip, whichfork, -2); > > + if (error) > > + goto done; > > + > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > else { > > @@ -2193,7 +2215,11 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > + > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > else { > > @@ -2235,7 +2261,10 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &PREV); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2343,7 +2372,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > xfs_iext_insert(ip, icur, new, state); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2419,7 +2451,10 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_update_extent(ip, state, icur, &PREV); > > xfs_iext_next(ifp, icur); > > xfs_iext_insert(ip, icur, new, state); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2471,7 +2506,10 @@ xfs_bmap_add_extent_unwritten_real( > > xfs_iext_next(ifp, icur); > > xfs_iext_insert(ip, icur, &r[1], state); > > xfs_iext_insert(ip, icur, &r[0], state); > > - ifp->if_nextents += 2; > > + > > + error = xfs_next_set(ip, whichfork, 2); > > + if (error) > > + goto done; > > > > if (cur == NULL) > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > @@ -2787,7 +2825,10 @@ xfs_bmap_add_extent_hole_real( > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > xfs_iext_update_extent(ip, state, icur, &left); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > > > if (cur == NULL) { > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > @@ -2886,7 +2927,10 @@ xfs_bmap_add_extent_hole_real( > > * Insert a new entry. > > */ > > xfs_iext_insert(ip, icur, new, state); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > > > if (cur == NULL) { > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > @@ -5083,7 +5127,10 @@ xfs_bmap_del_extent_real( > > */ > > xfs_iext_remove(ip, icur, state); > > xfs_iext_prev(ifp, icur); > > - ifp->if_nextents--; > > + > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > > > flags |= XFS_ILOG_CORE; > > if (!cur) { > > @@ -5193,7 +5240,10 @@ xfs_bmap_del_extent_real( > > } else > > flags |= xfs_ilog_fext(whichfork); > > > > - ifp->if_nextents++; > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto done; > > + > > xfs_iext_next(ifp, icur); > > xfs_iext_insert(ip, icur, &new, state); > > break; > > @@ -5660,7 +5710,10 @@ xfs_bmse_merge( > > * Update the on-disk extent count, the btree if necessary and log the > > * inode. > > */ > > - ifp->if_nextents--; > > + error = xfs_next_set(ip, whichfork, -1); > > + if (error) > > + goto done; > > + > > *logflags |= XFS_ILOG_CORE; > > if (!cur) { > > *logflags |= XFS_ILOG_DEXT; > > @@ -6047,7 +6100,10 @@ xfs_bmap_split_extent( > > /* Add new extent */ > > xfs_iext_next(ifp, &icur); > > xfs_iext_insert(ip, &icur, &new, 0); > > - ifp->if_nextents++; > > + > > + error = xfs_next_set(ip, whichfork, 1); > > + if (error) > > + goto del_cursor; > > > > if (cur) { > > error = xfs_bmbt_lookup_eq(cur, &new, &i); > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > > index 28b366275ae0..3bf5a2c391bd 100644 > > --- a/fs/xfs/libxfs/xfs_inode_fork.c > > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > > @@ -728,3 +728,32 @@ xfs_ifork_verify_local_attr( > > > > return 0; > > } > > + > > +int > > +xfs_next_set( > > "next"... please choose an abbreviation that doesn't collide with a > common English word. > > > + struct xfs_inode *ip, > > + int whichfork, > > + int delta) > > Delta? I thought this was a setter function? > > > +{ > > + struct xfs_ifork *ifp; > > + int64_t nr_exts; > > + int64_t max_exts; > > + > > + ifp = XFS_IFORK_PTR(ip, whichfork); > > + > > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > > + max_exts = MAXEXTNUM; > > + else if (whichfork == XFS_ATTR_FORK) > > + max_exts = MAXAEXTNUM; > > + else > > + ASSERT(0); > > + > > + nr_exts = ifp->if_nextents + delta; > > Nope, it's a modify function all right. Then it should be named: > > xfs_nextents_mod(ip, whichfork, delta) Ok. I will change this. > > > + if ((delta > 0 && nr_exts > max_exts) > > + || (delta < 0 && nr_exts < 0)) > > Line these up, please. e.g., > > if ((delta > 0 && nr_exts > max_exts) || > (delta < 0 && nr_exts < 0)) Ok. > > --D > > > + return -EOVERFLOW; > > + > > + ifp->if_nextents = nr_exts; > > + > > + return 0; > > +} > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > > index a4953e95c4f3..a84ae42ace79 100644 > > --- a/fs/xfs/libxfs/xfs_inode_fork.h > > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > > @@ -173,4 +173,5 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > > > +int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > > #endif /* __XFS_INODE_FORK_H__ */ >
On Monday 8 June 2020 10:02:16 PM IST Darrick J. Wong wrote: > On Mon, Jun 08, 2020 at 09:24:25AM -0700, Darrick J. Wong wrote: > > On Sat, Jun 06, 2020 at 01:57:40PM +0530, Chandan Babu R wrote: > > > The following error message was noticed when a workload added one > > > million xattrs, deleted 50% of them and then inserted 400,000 new > > > xattrs. > > > > > > XFS (loop0): xfs_iflush_int: detected corrupt incore inode 131, total extents = -19916, nblocks = 102937, ptr ffff9ce33b098c00 > > > > > > The error message was printed during unmounting the filesystem. The > > > value printed under "total extents" indicates that we overflowed the > > > per-inode signed 16-bit xattr extent counter. > > > > > > Instead of letting this silent corruption occur, this patch checks for > > > extent counter (both data and xattr) overflow before we assign the > > > new value to the corresponding in-memory extent counter. > > > > > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > > --- > > > fs/xfs/libxfs/xfs_bmap.c | 92 +++++++++++++++++++++++++++------- > > > fs/xfs/libxfs/xfs_inode_fork.c | 29 +++++++++++ > > > fs/xfs/libxfs/xfs_inode_fork.h | 1 + > > > 3 files changed, 104 insertions(+), 18 deletions(-) > > > > > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > > > index edc63dba007f..798fca5c52af 100644 > > > --- a/fs/xfs/libxfs/xfs_bmap.c > > > +++ b/fs/xfs/libxfs/xfs_bmap.c > > > @@ -906,7 +906,10 @@ xfs_bmap_local_to_extents( > > > xfs_iext_first(ifp, &icur); > > > xfs_iext_insert(ip, &icur, &rec, 0); > > > > > > - ifp->if_nextents = 1; > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > Are you sure that if_nextents == 0 is a precondition here? Technically > > speaking, this turns an assignment into an increment operation. > > > > > + > > > ip->i_d.di_nblocks = 1; > > > xfs_trans_mod_dquot_byino(tp, ip, > > > XFS_TRANS_DQ_BCOUNT, 1L); > > > @@ -1594,7 +1597,10 @@ xfs_bmap_add_extent_delay_real( > > > xfs_iext_remove(bma->ip, &bma->icur, state); > > > xfs_iext_prev(ifp, &bma->icur); > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -1698,7 +1704,10 @@ xfs_bmap_add_extent_delay_real( > > > PREV.br_startblock = new->br_startblock; > > > PREV.br_state = new->br_state; > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -1764,7 +1773,10 @@ xfs_bmap_add_extent_delay_real( > > > * The left neighbor is not contiguous. > > > */ > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -1851,7 +1863,10 @@ xfs_bmap_add_extent_delay_real( > > > * The right neighbor is not contiguous. > > > */ > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -1937,7 +1952,10 @@ xfs_bmap_add_extent_delay_real( > > > xfs_iext_next(ifp, &bma->icur); > > > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > > > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2141,7 +2159,11 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > - ifp->if_nextents -= 2; > > > + > > > + error = xfs_next_set(ip, whichfork, -2); > > > + if (error) > > > + goto done; > > > + > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > else { > > > @@ -2193,7 +2215,11 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > + > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > else { > > > @@ -2235,7 +2261,10 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2343,7 +2372,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > xfs_iext_insert(ip, icur, new, state); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2419,7 +2451,10 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > xfs_iext_next(ifp, icur); > > > xfs_iext_insert(ip, icur, new, state); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2471,7 +2506,10 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_next(ifp, icur); > > > xfs_iext_insert(ip, icur, &r[1], state); > > > xfs_iext_insert(ip, icur, &r[0], state); > > > - ifp->if_nextents += 2; > > > + > > > + error = xfs_next_set(ip, whichfork, 2); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2787,7 +2825,10 @@ xfs_bmap_add_extent_hole_real( > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &left); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) { > > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > > @@ -2886,7 +2927,10 @@ xfs_bmap_add_extent_hole_real( > > > * Insert a new entry. > > > */ > > > xfs_iext_insert(ip, icur, new, state); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) { > > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > > @@ -5083,7 +5127,10 @@ xfs_bmap_del_extent_real( > > > */ > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > > > > flags |= XFS_ILOG_CORE; > > > if (!cur) { > > > @@ -5193,7 +5240,10 @@ xfs_bmap_del_extent_real( > > > } else > > > flags |= xfs_ilog_fext(whichfork); > > > > > > - ifp->if_nextents++; > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > + > > > xfs_iext_next(ifp, icur); > > > xfs_iext_insert(ip, icur, &new, state); > > > break; > > > @@ -5660,7 +5710,10 @@ xfs_bmse_merge( > > > * Update the on-disk extent count, the btree if necessary and log the > > > * inode. > > > */ > > > - ifp->if_nextents--; > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > + > > > *logflags |= XFS_ILOG_CORE; > > > if (!cur) { > > > *logflags |= XFS_ILOG_DEXT; > > > @@ -6047,7 +6100,10 @@ xfs_bmap_split_extent( > > > /* Add new extent */ > > > xfs_iext_next(ifp, &icur); > > > xfs_iext_insert(ip, &icur, &new, 0); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto del_cursor; > > > > > > if (cur) { > > > error = xfs_bmbt_lookup_eq(cur, &new, &i); > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > > > index 28b366275ae0..3bf5a2c391bd 100644 > > > --- a/fs/xfs/libxfs/xfs_inode_fork.c > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > > > @@ -728,3 +728,32 @@ xfs_ifork_verify_local_attr( > > > > > > return 0; > > > } > > > + > > > +int > > > +xfs_next_set( > > > > "next"... please choose an abbreviation that doesn't collide with a > > common English word. > > > > > + struct xfs_inode *ip, > > > + int whichfork, > > > + int delta) > > > > Delta? I thought this was a setter function? > > > > > +{ > > > + struct xfs_ifork *ifp; > > > + int64_t nr_exts; > > > + int64_t max_exts; > > > + > > > + ifp = XFS_IFORK_PTR(ip, whichfork); > > > + > > > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > > > + max_exts = MAXEXTNUM; > > > + else if (whichfork == XFS_ATTR_FORK) > > > + max_exts = MAXAEXTNUM; > > > + else > > > + ASSERT(0); > > > + > > > + nr_exts = ifp->if_nextents + delta; > > > > Nope, it's a modify function all right. Then it should be named: > > > > xfs_nextents_mod(ip, whichfork, delta) > > > > > + if ((delta > 0 && nr_exts > max_exts) > > > + || (delta < 0 && nr_exts < 0)) > > > > Line these up, please. e.g., > > > > if ((delta > 0 && nr_exts > max_exts) || > > (delta < 0 && nr_exts < 0)) > > > > --D > > > > > + return -EOVERFLOW; > > Oh, also, shouldn't this be EFBIG ("File too big")? True, EFBIG is more appropriate than EOVERFLOW in this case. Darrick, I have one question. The purpose of this patch is to fix the zero day bug where we overflow extent counter silently and get to know about it only when flushing the incore inode to disk. Patches that come later in the series modify the extent count limits to 2^32 (for xattr fork) and 2^47 (for data fork). If this patch is not required to be sent to stable release, I will drop it from the series. Also, I can't have a "fixes" tag because this is a zero day bug. > > --D > > > > + > > > + ifp->if_nextents = nr_exts; > > > + > > > + return 0; > > > +} > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > > > index a4953e95c4f3..a84ae42ace79 100644 > > > --- a/fs/xfs/libxfs/xfs_inode_fork.h > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > > > @@ -173,4 +173,5 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > > > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > > > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > > > > > +int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > > > #endif /* __XFS_INODE_FORK_H__ */ >
On Tue, Jun 09, 2020 at 07:52:48PM +0530, Chandan Babu R wrote: > On Monday 8 June 2020 10:02:16 PM IST Darrick J. Wong wrote: > > On Mon, Jun 08, 2020 at 09:24:25AM -0700, Darrick J. Wong wrote: > > > On Sat, Jun 06, 2020 at 01:57:40PM +0530, Chandan Babu R wrote: > > > > The following error message was noticed when a workload added one > > > > million xattrs, deleted 50% of them and then inserted 400,000 new > > > > xattrs. > > > > > > > > XFS (loop0): xfs_iflush_int: detected corrupt incore inode 131, total extents = -19916, nblocks = 102937, ptr ffff9ce33b098c00 > > > > > > > > The error message was printed during unmounting the filesystem. The > > > > value printed under "total extents" indicates that we overflowed the > > > > per-inode signed 16-bit xattr extent counter. > > > > > > > > Instead of letting this silent corruption occur, this patch checks for > > > > extent counter (both data and xattr) overflow before we assign the > > > > new value to the corresponding in-memory extent counter. > > > > > > > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > > > --- > > > > fs/xfs/libxfs/xfs_bmap.c | 92 +++++++++++++++++++++++++++------- > > > > fs/xfs/libxfs/xfs_inode_fork.c | 29 +++++++++++ > > > > fs/xfs/libxfs/xfs_inode_fork.h | 1 + > > > > 3 files changed, 104 insertions(+), 18 deletions(-) > > > > > > > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > > > > index edc63dba007f..798fca5c52af 100644 > > > > --- a/fs/xfs/libxfs/xfs_bmap.c > > > > +++ b/fs/xfs/libxfs/xfs_bmap.c > > > > @@ -906,7 +906,10 @@ xfs_bmap_local_to_extents( > > > > xfs_iext_first(ifp, &icur); > > > > xfs_iext_insert(ip, &icur, &rec, 0); > > > > > > > > - ifp->if_nextents = 1; > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > + if (error) > > > > + goto done; > > > > > > Are you sure that if_nextents == 0 is a precondition here? Technically > > > speaking, this turns an assignment into an increment operation. > > > > > > > + > > > > ip->i_d.di_nblocks = 1; > > > > xfs_trans_mod_dquot_byino(tp, ip, > > > > XFS_TRANS_DQ_BCOUNT, 1L); > > > > @@ -1594,7 +1597,10 @@ xfs_bmap_add_extent_delay_real( > > > > xfs_iext_remove(bma->ip, &bma->icur, state); > > > > xfs_iext_prev(ifp, &bma->icur); > > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > > > > - ifp->if_nextents--; > > > > + > > > > + error = xfs_next_set(bma->ip, whichfork, -1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (bma->cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > @@ -1698,7 +1704,10 @@ xfs_bmap_add_extent_delay_real( > > > > PREV.br_startblock = new->br_startblock; > > > > PREV.br_state = new->br_state; > > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > > > > - ifp->if_nextents++; > > > > + > > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (bma->cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > @@ -1764,7 +1773,10 @@ xfs_bmap_add_extent_delay_real( > > > > * The left neighbor is not contiguous. > > > > */ > > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > > - ifp->if_nextents++; > > > > + > > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (bma->cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > @@ -1851,7 +1863,10 @@ xfs_bmap_add_extent_delay_real( > > > > * The right neighbor is not contiguous. > > > > */ > > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > > - ifp->if_nextents++; > > > > + > > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (bma->cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > @@ -1937,7 +1952,10 @@ xfs_bmap_add_extent_delay_real( > > > > xfs_iext_next(ifp, &bma->icur); > > > > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > > > > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > > > > - ifp->if_nextents++; > > > > + > > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (bma->cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > @@ -2141,7 +2159,11 @@ xfs_bmap_add_extent_unwritten_real( > > > > xfs_iext_remove(ip, icur, state); > > > > xfs_iext_prev(ifp, icur); > > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > > - ifp->if_nextents -= 2; > > > > + > > > > + error = xfs_next_set(ip, whichfork, -2); > > > > + if (error) > > > > + goto done; > > > > + > > > > if (cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > else { > > > > @@ -2193,7 +2215,11 @@ xfs_bmap_add_extent_unwritten_real( > > > > xfs_iext_remove(ip, icur, state); > > > > xfs_iext_prev(ifp, icur); > > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > > - ifp->if_nextents--; > > > > + > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > + if (error) > > > > + goto done; > > > > + > > > > if (cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > else { > > > > @@ -2235,7 +2261,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > xfs_iext_remove(ip, icur, state); > > > > xfs_iext_prev(ifp, icur); > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > > - ifp->if_nextents--; > > > > + > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > @@ -2343,7 +2372,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > > xfs_iext_insert(ip, icur, new, state); > > > > - ifp->if_nextents++; > > > > + > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > @@ -2419,7 +2451,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > > xfs_iext_next(ifp, icur); > > > > xfs_iext_insert(ip, icur, new, state); > > > > - ifp->if_nextents++; > > > > + > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > @@ -2471,7 +2506,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > xfs_iext_next(ifp, icur); > > > > xfs_iext_insert(ip, icur, &r[1], state); > > > > xfs_iext_insert(ip, icur, &r[0], state); > > > > - ifp->if_nextents += 2; > > > > + > > > > + error = xfs_next_set(ip, whichfork, 2); > > > > + if (error) > > > > + goto done; > > > > > > > > if (cur == NULL) > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > @@ -2787,7 +2825,10 @@ xfs_bmap_add_extent_hole_real( > > > > xfs_iext_remove(ip, icur, state); > > > > xfs_iext_prev(ifp, icur); > > > > xfs_iext_update_extent(ip, state, icur, &left); > > > > - ifp->if_nextents--; > > > > + > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (cur == NULL) { > > > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > > > @@ -2886,7 +2927,10 @@ xfs_bmap_add_extent_hole_real( > > > > * Insert a new entry. > > > > */ > > > > xfs_iext_insert(ip, icur, new, state); > > > > - ifp->if_nextents++; > > > > + > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > + if (error) > > > > + goto done; > > > > > > > > if (cur == NULL) { > > > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > > > @@ -5083,7 +5127,10 @@ xfs_bmap_del_extent_real( > > > > */ > > > > xfs_iext_remove(ip, icur, state); > > > > xfs_iext_prev(ifp, icur); > > > > - ifp->if_nextents--; > > > > + > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > + if (error) > > > > + goto done; > > > > > > > > flags |= XFS_ILOG_CORE; > > > > if (!cur) { > > > > @@ -5193,7 +5240,10 @@ xfs_bmap_del_extent_real( > > > > } else > > > > flags |= xfs_ilog_fext(whichfork); > > > > > > > > - ifp->if_nextents++; > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > + if (error) > > > > + goto done; > > > > + > > > > xfs_iext_next(ifp, icur); > > > > xfs_iext_insert(ip, icur, &new, state); > > > > break; > > > > @@ -5660,7 +5710,10 @@ xfs_bmse_merge( > > > > * Update the on-disk extent count, the btree if necessary and log the > > > > * inode. > > > > */ > > > > - ifp->if_nextents--; > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > + if (error) > > > > + goto done; > > > > + > > > > *logflags |= XFS_ILOG_CORE; > > > > if (!cur) { > > > > *logflags |= XFS_ILOG_DEXT; > > > > @@ -6047,7 +6100,10 @@ xfs_bmap_split_extent( > > > > /* Add new extent */ > > > > xfs_iext_next(ifp, &icur); > > > > xfs_iext_insert(ip, &icur, &new, 0); > > > > - ifp->if_nextents++; > > > > + > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > + if (error) > > > > + goto del_cursor; > > > > > > > > if (cur) { > > > > error = xfs_bmbt_lookup_eq(cur, &new, &i); > > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > > > > index 28b366275ae0..3bf5a2c391bd 100644 > > > > --- a/fs/xfs/libxfs/xfs_inode_fork.c > > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > > > > @@ -728,3 +728,32 @@ xfs_ifork_verify_local_attr( > > > > > > > > return 0; > > > > } > > > > + > > > > +int > > > > +xfs_next_set( > > > > > > "next"... please choose an abbreviation that doesn't collide with a > > > common English word. > > > > > > > + struct xfs_inode *ip, > > > > + int whichfork, > > > > + int delta) > > > > > > Delta? I thought this was a setter function? > > > > > > > +{ > > > > + struct xfs_ifork *ifp; > > > > + int64_t nr_exts; > > > > + int64_t max_exts; > > > > + > > > > + ifp = XFS_IFORK_PTR(ip, whichfork); > > > > + > > > > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > > > > + max_exts = MAXEXTNUM; > > > > + else if (whichfork == XFS_ATTR_FORK) > > > > + max_exts = MAXAEXTNUM; > > > > + else > > > > + ASSERT(0); > > > > + > > > > + nr_exts = ifp->if_nextents + delta; > > > > > > Nope, it's a modify function all right. Then it should be named: > > > > > > xfs_nextents_mod(ip, whichfork, delta) > > > > > > > + if ((delta > 0 && nr_exts > max_exts) > > > > + || (delta < 0 && nr_exts < 0)) > > > > > > Line these up, please. e.g., > > > > > > if ((delta > 0 && nr_exts > max_exts) || > > > (delta < 0 && nr_exts < 0)) HA even the maintainer gets it wrong. :( > > > > > > --D > > > > > > > + return -EOVERFLOW; > > > > Oh, also, shouldn't this be EFBIG ("File too big")? > > True, EFBIG is more appropriate than EOVERFLOW in this case. > > Darrick, I have one question. The purpose of this patch is to fix the zero day > bug where we overflow extent counter silently and get to know about it only > when flushing the incore inode to disk. Patches that come later in the series > modify the extent count limits to 2^32 (for xattr fork) and 2^47 (for data > fork). If this patch is not required to be sent to stable release, I will drop > it from the series. I would leave it in the series, unless you mean to send this as a separate cleanup ahead of everything else? Now that I think about it, this probably should become its own cleanup series. I just realized that if we error out EFBIG in the middle of a bmap function, we're probably going to end up cancelling a dirty transaction, which will cause an fs shutdown. Since xfs cannot undo the effects of a dirty transaction, we have to be able to error out earlier in the transaction sequence so that we can back out to userspace without affecting the filesystem. IOWs, this means that any code path that could increase an inode's extent count will have to check the the inode (after we take the ILOCK) to make sure that it can accomodate however many more extents we're adding. static int xfs_trans_inode_reserve_extent_count(ip, whichfork, nrtoadd) { if (MAX{,A}EXTNUM - XFS_IFORK_PTR(ip, whichfork)->if_nextents < nrtoadd) return -EFBIG; return 0; } error = xfs_trans_alloc(..., &tp); if (error) goto out; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(ip, 0); error = xfs_trans_inode_reserve_extent_count(ip, whichfork, nrtoadd) if (error) goto out; error = xfs_trans_reserve_quota_nblks(tp, ip, ...); if (error) goto out; ...or something like that. And now suddenly this grows into its own cleanup series. :/ > Also, I can't have a "fixes" tag because this is a zero > day bug. Everything is a zero day now... but establishing a base for this one is probably not going to be easy since I bet the overflow has existed since the beginning. --D > > > > > --D > > > > > > + > > > > + ifp->if_nextents = nr_exts; > > > > + > > > > + return 0; > > > > +} > > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > > > > index a4953e95c4f3..a84ae42ace79 100644 > > > > --- a/fs/xfs/libxfs/xfs_inode_fork.h > > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > > > > @@ -173,4 +173,5 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > > > > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > > > > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > > > > > > > +int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > > > > #endif /* __XFS_INODE_FORK_H__ */ > > > > > -- > chandan > > >
On Tue, Jun 09, 2020 at 07:52:37PM +0530, Chandan Babu R wrote: > On Monday 8 June 2020 9:54:25 PM IST Darrick J. Wong wrote: > > On Sat, Jun 06, 2020 at 01:57:40PM +0530, Chandan Babu R wrote: > > > The following error message was noticed when a workload added one > > > million xattrs, deleted 50% of them and then inserted 400,000 new > > > xattrs. > > > > > > XFS (loop0): xfs_iflush_int: detected corrupt incore inode 131, total extents = -19916, nblocks = 102937, ptr ffff9ce33b098c00 > > > > > > The error message was printed during unmounting the filesystem. The > > > value printed under "total extents" indicates that we overflowed the > > > per-inode signed 16-bit xattr extent counter. > > > > > > Instead of letting this silent corruption occur, this patch checks for > > > extent counter (both data and xattr) overflow before we assign the > > > new value to the corresponding in-memory extent counter. > > > > > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > > --- > > > fs/xfs/libxfs/xfs_bmap.c | 92 +++++++++++++++++++++++++++------- > > > fs/xfs/libxfs/xfs_inode_fork.c | 29 +++++++++++ > > > fs/xfs/libxfs/xfs_inode_fork.h | 1 + > > > 3 files changed, 104 insertions(+), 18 deletions(-) > > > > > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > > > index edc63dba007f..798fca5c52af 100644 > > > --- a/fs/xfs/libxfs/xfs_bmap.c > > > +++ b/fs/xfs/libxfs/xfs_bmap.c > > > @@ -906,7 +906,10 @@ xfs_bmap_local_to_extents( > > > xfs_iext_first(ifp, &icur); > > > xfs_iext_insert(ip, &icur, &rec, 0); > > > > > > - ifp->if_nextents = 1; > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > Are you sure that if_nextents == 0 is a precondition here? Technically > > speaking, this turns an assignment into an increment operation. > > Hmm. I didn't pay attention to that. I will check and update the code > appropriately. Thanks for pointing this out. > > > > > > + > > > ip->i_d.di_nblocks = 1; > > > xfs_trans_mod_dquot_byino(tp, ip, > > > XFS_TRANS_DQ_BCOUNT, 1L); > > > @@ -1594,7 +1597,10 @@ xfs_bmap_add_extent_delay_real( > > > xfs_iext_remove(bma->ip, &bma->icur, state); > > > xfs_iext_prev(ifp, &bma->icur); > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -1698,7 +1704,10 @@ xfs_bmap_add_extent_delay_real( > > > PREV.br_startblock = new->br_startblock; > > > PREV.br_state = new->br_state; > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -1764,7 +1773,10 @@ xfs_bmap_add_extent_delay_real( > > > * The left neighbor is not contiguous. > > > */ > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -1851,7 +1863,10 @@ xfs_bmap_add_extent_delay_real( > > > * The right neighbor is not contiguous. > > > */ > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -1937,7 +1952,10 @@ xfs_bmap_add_extent_delay_real( > > > xfs_iext_next(ifp, &bma->icur); > > > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > > > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (bma->cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2141,7 +2159,11 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > - ifp->if_nextents -= 2; > > > + > > > + error = xfs_next_set(ip, whichfork, -2); > > > + if (error) > > > + goto done; > > > + > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > else { > > > @@ -2193,7 +2215,11 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > + > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > else { > > > @@ -2235,7 +2261,10 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2343,7 +2372,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > xfs_iext_insert(ip, icur, new, state); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2419,7 +2451,10 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > xfs_iext_next(ifp, icur); > > > xfs_iext_insert(ip, icur, new, state); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2471,7 +2506,10 @@ xfs_bmap_add_extent_unwritten_real( > > > xfs_iext_next(ifp, icur); > > > xfs_iext_insert(ip, icur, &r[1], state); > > > xfs_iext_insert(ip, icur, &r[0], state); > > > - ifp->if_nextents += 2; > > > + > > > + error = xfs_next_set(ip, whichfork, 2); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > @@ -2787,7 +2825,10 @@ xfs_bmap_add_extent_hole_real( > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > xfs_iext_update_extent(ip, state, icur, &left); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) { > > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > > @@ -2886,7 +2927,10 @@ xfs_bmap_add_extent_hole_real( > > > * Insert a new entry. > > > */ > > > xfs_iext_insert(ip, icur, new, state); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > > > > if (cur == NULL) { > > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > > @@ -5083,7 +5127,10 @@ xfs_bmap_del_extent_real( > > > */ > > > xfs_iext_remove(ip, icur, state); > > > xfs_iext_prev(ifp, icur); > > > - ifp->if_nextents--; > > > + > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > > > > flags |= XFS_ILOG_CORE; > > > if (!cur) { > > > @@ -5193,7 +5240,10 @@ xfs_bmap_del_extent_real( > > > } else > > > flags |= xfs_ilog_fext(whichfork); > > > > > > - ifp->if_nextents++; > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto done; > > > + > > > xfs_iext_next(ifp, icur); > > > xfs_iext_insert(ip, icur, &new, state); > > > break; > > > @@ -5660,7 +5710,10 @@ xfs_bmse_merge( > > > * Update the on-disk extent count, the btree if necessary and log the > > > * inode. > > > */ > > > - ifp->if_nextents--; > > > + error = xfs_next_set(ip, whichfork, -1); > > > + if (error) > > > + goto done; > > > + > > > *logflags |= XFS_ILOG_CORE; > > > if (!cur) { > > > *logflags |= XFS_ILOG_DEXT; > > > @@ -6047,7 +6100,10 @@ xfs_bmap_split_extent( > > > /* Add new extent */ > > > xfs_iext_next(ifp, &icur); > > > xfs_iext_insert(ip, &icur, &new, 0); > > > - ifp->if_nextents++; > > > + > > > + error = xfs_next_set(ip, whichfork, 1); > > > + if (error) > > > + goto del_cursor; > > > > > > if (cur) { > > > error = xfs_bmbt_lookup_eq(cur, &new, &i); > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > > > index 28b366275ae0..3bf5a2c391bd 100644 > > > --- a/fs/xfs/libxfs/xfs_inode_fork.c > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > > > @@ -728,3 +728,32 @@ xfs_ifork_verify_local_attr( > > > > > > return 0; > > > } > > > + > > > +int > > > +xfs_next_set( > > > > "next"... please choose an abbreviation that doesn't collide with a > > common English word. > > > > > + struct xfs_inode *ip, > > > + int whichfork, > > > + int delta) > > > > Delta? I thought this was a setter function? > > > > > +{ > > > + struct xfs_ifork *ifp; > > > + int64_t nr_exts; > > > + int64_t max_exts; > > > + > > > + ifp = XFS_IFORK_PTR(ip, whichfork); > > > + > > > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > > > + max_exts = MAXEXTNUM; > > > + else if (whichfork == XFS_ATTR_FORK) > > > + max_exts = MAXAEXTNUM; > > > + else > > > + ASSERT(0); > > > + > > > + nr_exts = ifp->if_nextents + delta; > > > > Nope, it's a modify function all right. Then it should be named: > > > > xfs_nextents_mod(ip, whichfork, delta) > > Ok. I will change this. <nod> Though as I (just) pointed out in the other part of this thread, the range check on the extent count ought to come earlier in the transaction sequence so that we can return EFBIG to userspace without having to cancel a (potentially dirty) transaction. --D > > > > > + if ((delta > 0 && nr_exts > max_exts) > > > + || (delta < 0 && nr_exts < 0)) > > > > Line these up, please. e.g., > > > > if ((delta > 0 && nr_exts > max_exts) || > > (delta < 0 && nr_exts < 0)) > > Ok. > > > > > --D > > > > > + return -EOVERFLOW; > > > + > > > + ifp->if_nextents = nr_exts; > > > + > > > + return 0; > > > +} > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > > > index a4953e95c4f3..a84ae42ace79 100644 > > > --- a/fs/xfs/libxfs/xfs_inode_fork.h > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > > > @@ -173,4 +173,5 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > > > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > > > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > > > > > +int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > > > #endif /* __XFS_INODE_FORK_H__ */ > > > > > -- > chandan > > >
On Tuesday 9 June 2020 10:37:34 PM IST Darrick J. Wong wrote: > On Tue, Jun 09, 2020 at 07:52:48PM +0530, Chandan Babu R wrote: > > On Monday 8 June 2020 10:02:16 PM IST Darrick J. Wong wrote: > > > On Mon, Jun 08, 2020 at 09:24:25AM -0700, Darrick J. Wong wrote: > > > > On Sat, Jun 06, 2020 at 01:57:40PM +0530, Chandan Babu R wrote: > > > > > The following error message was noticed when a workload added one > > > > > million xattrs, deleted 50% of them and then inserted 400,000 new > > > > > xattrs. > > > > > > > > > > XFS (loop0): xfs_iflush_int: detected corrupt incore inode 131, total extents = -19916, nblocks = 102937, ptr ffff9ce33b098c00 > > > > > > > > > > The error message was printed during unmounting the filesystem. The > > > > > value printed under "total extents" indicates that we overflowed the > > > > > per-inode signed 16-bit xattr extent counter. > > > > > > > > > > Instead of letting this silent corruption occur, this patch checks for > > > > > extent counter (both data and xattr) overflow before we assign the > > > > > new value to the corresponding in-memory extent counter. > > > > > > > > > > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > > > > --- > > > > > fs/xfs/libxfs/xfs_bmap.c | 92 +++++++++++++++++++++++++++------- > > > > > fs/xfs/libxfs/xfs_inode_fork.c | 29 +++++++++++ > > > > > fs/xfs/libxfs/xfs_inode_fork.h | 1 + > > > > > 3 files changed, 104 insertions(+), 18 deletions(-) > > > > > > > > > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > > > > > index edc63dba007f..798fca5c52af 100644 > > > > > --- a/fs/xfs/libxfs/xfs_bmap.c > > > > > +++ b/fs/xfs/libxfs/xfs_bmap.c > > > > > @@ -906,7 +906,10 @@ xfs_bmap_local_to_extents( > > > > > xfs_iext_first(ifp, &icur); > > > > > xfs_iext_insert(ip, &icur, &rec, 0); > > > > > > > > > > - ifp->if_nextents = 1; > > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > Are you sure that if_nextents == 0 is a precondition here? Technically > > > > speaking, this turns an assignment into an increment operation. > > > > > > > > > + > > > > > ip->i_d.di_nblocks = 1; > > > > > xfs_trans_mod_dquot_byino(tp, ip, > > > > > XFS_TRANS_DQ_BCOUNT, 1L); > > > > > @@ -1594,7 +1597,10 @@ xfs_bmap_add_extent_delay_real( > > > > > xfs_iext_remove(bma->ip, &bma->icur, state); > > > > > xfs_iext_prev(ifp, &bma->icur); > > > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); > > > > > - ifp->if_nextents--; > > > > > + > > > > > + error = xfs_next_set(bma->ip, whichfork, -1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (bma->cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > @@ -1698,7 +1704,10 @@ xfs_bmap_add_extent_delay_real( > > > > > PREV.br_startblock = new->br_startblock; > > > > > PREV.br_state = new->br_state; > > > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); > > > > > - ifp->if_nextents++; > > > > > + > > > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (bma->cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > @@ -1764,7 +1773,10 @@ xfs_bmap_add_extent_delay_real( > > > > > * The left neighbor is not contiguous. > > > > > */ > > > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > > > - ifp->if_nextents++; > > > > > + > > > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (bma->cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > @@ -1851,7 +1863,10 @@ xfs_bmap_add_extent_delay_real( > > > > > * The right neighbor is not contiguous. > > > > > */ > > > > > xfs_iext_update_extent(bma->ip, state, &bma->icur, new); > > > > > - ifp->if_nextents++; > > > > > + > > > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (bma->cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > @@ -1937,7 +1952,10 @@ xfs_bmap_add_extent_delay_real( > > > > > xfs_iext_next(ifp, &bma->icur); > > > > > xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); > > > > > xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); > > > > > - ifp->if_nextents++; > > > > > + > > > > > + error = xfs_next_set(bma->ip, whichfork, 1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (bma->cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > @@ -2141,7 +2159,11 @@ xfs_bmap_add_extent_unwritten_real( > > > > > xfs_iext_remove(ip, icur, state); > > > > > xfs_iext_prev(ifp, icur); > > > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > > > - ifp->if_nextents -= 2; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, -2); > > > > > + if (error) > > > > > + goto done; > > > > > + > > > > > if (cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > else { > > > > > @@ -2193,7 +2215,11 @@ xfs_bmap_add_extent_unwritten_real( > > > > > xfs_iext_remove(ip, icur, state); > > > > > xfs_iext_prev(ifp, icur); > > > > > xfs_iext_update_extent(ip, state, icur, &LEFT); > > > > > - ifp->if_nextents--; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > > + if (error) > > > > > + goto done; > > > > > + > > > > > if (cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > else { > > > > > @@ -2235,7 +2261,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > > xfs_iext_remove(ip, icur, state); > > > > > xfs_iext_prev(ifp, icur); > > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > > > - ifp->if_nextents--; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > @@ -2343,7 +2372,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > > > > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > > > xfs_iext_insert(ip, icur, new, state); > > > > > - ifp->if_nextents++; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > @@ -2419,7 +2451,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > > xfs_iext_update_extent(ip, state, icur, &PREV); > > > > > xfs_iext_next(ifp, icur); > > > > > xfs_iext_insert(ip, icur, new, state); > > > > > - ifp->if_nextents++; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > @@ -2471,7 +2506,10 @@ xfs_bmap_add_extent_unwritten_real( > > > > > xfs_iext_next(ifp, icur); > > > > > xfs_iext_insert(ip, icur, &r[1], state); > > > > > xfs_iext_insert(ip, icur, &r[0], state); > > > > > - ifp->if_nextents += 2; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, 2); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (cur == NULL) > > > > > rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; > > > > > @@ -2787,7 +2825,10 @@ xfs_bmap_add_extent_hole_real( > > > > > xfs_iext_remove(ip, icur, state); > > > > > xfs_iext_prev(ifp, icur); > > > > > xfs_iext_update_extent(ip, state, icur, &left); > > > > > - ifp->if_nextents--; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (cur == NULL) { > > > > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > > > > @@ -2886,7 +2927,10 @@ xfs_bmap_add_extent_hole_real( > > > > > * Insert a new entry. > > > > > */ > > > > > xfs_iext_insert(ip, icur, new, state); > > > > > - ifp->if_nextents++; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > if (cur == NULL) { > > > > > rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); > > > > > @@ -5083,7 +5127,10 @@ xfs_bmap_del_extent_real( > > > > > */ > > > > > xfs_iext_remove(ip, icur, state); > > > > > xfs_iext_prev(ifp, icur); > > > > > - ifp->if_nextents--; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > > + if (error) > > > > > + goto done; > > > > > > > > > > flags |= XFS_ILOG_CORE; > > > > > if (!cur) { > > > > > @@ -5193,7 +5240,10 @@ xfs_bmap_del_extent_real( > > > > > } else > > > > > flags |= xfs_ilog_fext(whichfork); > > > > > > > > > > - ifp->if_nextents++; > > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > > + if (error) > > > > > + goto done; > > > > > + > > > > > xfs_iext_next(ifp, icur); > > > > > xfs_iext_insert(ip, icur, &new, state); > > > > > break; > > > > > @@ -5660,7 +5710,10 @@ xfs_bmse_merge( > > > > > * Update the on-disk extent count, the btree if necessary and log the > > > > > * inode. > > > > > */ > > > > > - ifp->if_nextents--; > > > > > + error = xfs_next_set(ip, whichfork, -1); > > > > > + if (error) > > > > > + goto done; > > > > > + > > > > > *logflags |= XFS_ILOG_CORE; > > > > > if (!cur) { > > > > > *logflags |= XFS_ILOG_DEXT; > > > > > @@ -6047,7 +6100,10 @@ xfs_bmap_split_extent( > > > > > /* Add new extent */ > > > > > xfs_iext_next(ifp, &icur); > > > > > xfs_iext_insert(ip, &icur, &new, 0); > > > > > - ifp->if_nextents++; > > > > > + > > > > > + error = xfs_next_set(ip, whichfork, 1); > > > > > + if (error) > > > > > + goto del_cursor; > > > > > > > > > > if (cur) { > > > > > error = xfs_bmbt_lookup_eq(cur, &new, &i); > > > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > > > > > index 28b366275ae0..3bf5a2c391bd 100644 > > > > > --- a/fs/xfs/libxfs/xfs_inode_fork.c > > > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > > > > > @@ -728,3 +728,32 @@ xfs_ifork_verify_local_attr( > > > > > > > > > > return 0; > > > > > } > > > > > + > > > > > +int > > > > > +xfs_next_set( > > > > > > > > "next"... please choose an abbreviation that doesn't collide with a > > > > common English word. > > > > > > > > > + struct xfs_inode *ip, > > > > > + int whichfork, > > > > > + int delta) > > > > > > > > Delta? I thought this was a setter function? > > > > > > > > > +{ > > > > > + struct xfs_ifork *ifp; > > > > > + int64_t nr_exts; > > > > > + int64_t max_exts; > > > > > + > > > > > + ifp = XFS_IFORK_PTR(ip, whichfork); > > > > > + > > > > > + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) > > > > > + max_exts = MAXEXTNUM; > > > > > + else if (whichfork == XFS_ATTR_FORK) > > > > > + max_exts = MAXAEXTNUM; > > > > > + else > > > > > + ASSERT(0); > > > > > + > > > > > + nr_exts = ifp->if_nextents + delta; > > > > > > > > Nope, it's a modify function all right. Then it should be named: > > > > > > > > xfs_nextents_mod(ip, whichfork, delta) > > > > > > > > > + if ((delta > 0 && nr_exts > max_exts) > > > > > + || (delta < 0 && nr_exts < 0)) > > > > > > > > Line these up, please. e.g., > > > > > > > > if ((delta > 0 && nr_exts > max_exts) || > > > > (delta < 0 && nr_exts < 0)) > > HA even the maintainer gets it wrong. :( > > > > > > > > > --D > > > > > > > > > + return -EOVERFLOW; > > > > > > Oh, also, shouldn't this be EFBIG ("File too big")? > > > > True, EFBIG is more appropriate than EOVERFLOW in this case. > > > > Darrick, I have one question. The purpose of this patch is to fix the zero day > > bug where we overflow extent counter silently and get to know about it only > > when flushing the incore inode to disk. Patches that come later in the series > > modify the extent count limits to 2^32 (for xattr fork) and 2^47 (for data > > fork). If this patch is not required to be sent to stable release, I will drop > > it from the series. > > I would leave it in the series, unless you mean to send this as a > separate cleanup ahead of everything else? > > Now that I think about it, this probably should become its own cleanup > series. I just realized that if we error out EFBIG in the middle of a > bmap function, we're probably going to end up cancelling a dirty > transaction, which will cause an fs shutdown. Since xfs cannot undo the > effects of a dirty transaction, we have to be able to error out earlier > in the transaction sequence so that we can back out to userspace without > affecting the filesystem. > > IOWs, this means that any code path that could increase an inode's > extent count will have to check the the inode (after we take the ILOCK) > to make sure that it can accomodate however many more extents we're > adding. > > static int > xfs_trans_inode_reserve_extent_count(ip, whichfork, nrtoadd) > { > if (MAX{,A}EXTNUM - XFS_IFORK_PTR(ip, whichfork)->if_nextents < nrtoadd) > return -EFBIG; > return 0; > } > > error = xfs_trans_alloc(..., &tp); > if (error) > goto out; > > xfs_ilock(ip, XFS_ILOCK_EXCL); > xfs_trans_ijoin(ip, 0); > > error = xfs_trans_inode_reserve_extent_count(ip, whichfork, nrtoadd) > if (error) > goto out; > > error = xfs_trans_reserve_quota_nblks(tp, ip, ...); > if (error) > goto out; > > ...or something like that. And now suddenly this grows into its own > cleanup series. :/ Ok. I will work on the cleanup series while we are reaching consensus on the log reservation changes. Thanks once again for the suggestions. > > > Also, I can't have a "fixes" tag because this is a zero > > day bug. > > Everything is a zero day now... but establishing a base for this one is > probably not going to be easy since I bet the overflow has existed since > the beginning. > > --D > > > > > > > > > --D > > > > > > > > + > > > > > + ifp->if_nextents = nr_exts; > > > > > + > > > > > + return 0; > > > > > +} > > > > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > > > > > index a4953e95c4f3..a84ae42ace79 100644 > > > > > --- a/fs/xfs/libxfs/xfs_inode_fork.h > > > > > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > > > > > @@ -173,4 +173,5 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); > > > > > int xfs_ifork_verify_local_data(struct xfs_inode *ip); > > > > > int xfs_ifork_verify_local_attr(struct xfs_inode *ip); > > > > > > > > > > +int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); > > > > > #endif /* __XFS_INODE_FORK_H__ */ > > > > > > > >
I'm lost in 4 layers of full quotes. Can someone summarize the discussion without hundreds of lines of irrelevant quotes?
On Fri, Jun 19, 2020 at 07:36:08AM -0700, Christoph Hellwig wrote: > I'm lost in 4 layers of full quotes. Can someone summarize the > discussion without hundreds of lines of irrelevant quotes? Naming issues with helper functions, and pointing out that any code path that thinks it could add $nr extents to a file needs to check for overflows in (ifp->if_nextents + $nr) after we take the ILOCK but before we dirty the transaction. --D
On Friday 19 June 2020 8:06:08 PM IST Christoph Hellwig wrote: > I'm lost in 4 layers of full quotes. Can someone summarize the > discussion without hundreds of lines of irrelevant quotes? > XFS does not check for possible overflow of per-inode extent counter fields when adding extents to either data or attr fork. For e.g. 1. Insert 5 million xattrs (each having a value size of 255 bytes) and then delete 50% of them in an alternating manner. ./benchmark-xattrs -l 255 -n 5000000 -s 50 -f $mntpnt/testfile-0 benchmark-xattrs.c and related sources can be obtained from https://github.com/chandanr/xfs-xattr-benchmark/blob/master/src/ 2. This causes 98511 extents to be created in the attr fork of the inode. xfsaild/loop0 2035 [003] 9643.390490: probe:xfs_iflush_int: (ffffffffac6225c0) if_nextents=98511 inode=131 3. The incore inode fork extent counter is a signed 32-bit quantity. However the on-disk extent counter is an unsigned 16-bit quantity and hence cannot hold 98511 extents. 4. The following incorrect value is stored in the attr extent counter # xfs_db -f -c 'inode 131' -c 'print core.naextents' /dev/loop0 core.naextents = -32561 As an aside, Please note that the sequence of events causing counter overflow that have been described in this patch's description is not applicable since the patches have been rebased on xfs-ifork-cleanup.2 which causes a signed 32-bit (xfs_extnum_t) quantity to be used to store the extent counter in memory. However, the "on-disk counter overflow" bug still exists as was described above. To prevent the overflow bug from occuring silently, this patch now checks for the overflow condition before incrementing the in-core value and returns an error if a possible overflow is detected. Darrick pointed out that returning an error code can cause the transaction to be aborted because it is most likely to be dirty. Hence his suggestion (to which I agree) was to check for possible overflow just after starting a transaction and obtaining the inode's i_lock. Also, since we are extending the data and xattr extent counters to 32 and 47 bits in the later patches the value of log reservations will change since they are a function of maximum height of BMBT trees. The maximum of height of BMBT trees are themselves calculated based on the maximum number of xattr and data extents. Due to this, "min log size" can end up being larger than what was calculated during mkfs.xfs time. This can cause mount to fail as shown by the following call trace which was generated when executing xfs/306 test, XFS (loop0): Mounting V5 Filesystem XFS (loop0): Log size 8906 blocks too small, minimum size is 9075 blocks XFS (loop0): AAIEEE! Log failed size checks. Abort! XFS: Assertion failed: 0, file: fs/xfs/xfs_log.c, line: 711 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 12821 at fs/xfs/xfs_message.c:112 assfail+0x25/0x28 Modules linked in: CPU: 0 PID: 12821 Comm: mount Tainted: G W 5.6.0-rc6-next-20200320-chandan-00003-g071c2af3f4de #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 RIP: 0010:assfail+0x25/0x28 Code: ff ff 0f 0b c3 0f 1f 44 00 00 41 89 c8 48 89 d1 48 89 f2 48 c7 c6 40 b7 4b b3 e8 82 f9 ff ff 80 3d 83 d6 64 01 00 74 02 0f $ RSP: 0018:ffffb05b414cbd78 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9d9d501d5000 RCX: 0000000000000000 RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffffb346dc65 RBP: ffff9da444b49a80 R08: 0000000000000000 R09: 0000000000000000 R10: 000000000000000a R11: f000000000000000 R12: 00000000ffffffea R13: 000000000000000e R14: 0000000000004594 R15: ffff9d9d501d5628 FS: 00007fd6c5d17c80(0000) GS:ffff9da44d800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000002 CR3: 00000008a48c0000 CR4: 00000000000006f0 Call Trace: xfs_log_mount+0xf8/0x300 xfs_mountfs+0x46e/0x950 xfs_fc_fill_super+0x318/0x510 ? xfs_mount_free+0x30/0x30 get_tree_bdev+0x15c/0x250 vfs_get_tree+0x25/0xb0 do_mount+0x740/0x9b0 ? memdup_user+0x41/0x80 __x64_sys_mount+0x8e/0xd0 do_syscall_64+0x48/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fd6c5f2ccda Code: 48 8b 0d b9 e1 0b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f $ RSP: 002b:00007ffe00dfb9f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000560c1aaa92c0 RCX: 00007fd6c5f2ccda RDX: 0000560c1aaae110 RSI: 0000560c1aaad040 RDI: 0000560c1aaa94d0 RBP: 00007fd6c607d204 R08: 0000000000000000 R09: 0000560c1aaadde0 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000560c1aaa94d0 R15: 0000560c1aaae110 ---[ end trace 6436391b468bc652 ]--- XFS (loop0): log mount failed The corresponding filesystem was created using mkfs options "-m rmapbt=1,reflink=1 -b size=1k -d size=20m -n size=64k". To prevent such incidents, we are contemplating on using the following approach, 1. Use existing constants for max extent counts (i.e. signed 2^16 for xattrs and signed 2^32 for data extents). 2. Compute max bmbt heights, log reservations and hence min log size during mount time. 3. Later, during the mount lifetime of the filesystem, when we are about to overflow the extent counter we use larger values (2^32 for xattr and 2^47 for data) for max extent count and then recompute log reservations and min logsize. At this point, if on-disk log size is smaller than min log size we return with an error. 4. Otherwise, we set an RO-feature flag and use the revised log reservations for future transactions. Please let me know your opinion on the above approaches. -- chandan
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index edc63dba007f..798fca5c52af 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -906,7 +906,10 @@ xfs_bmap_local_to_extents( xfs_iext_first(ifp, &icur); xfs_iext_insert(ip, &icur, &rec, 0); - ifp->if_nextents = 1; + error = xfs_next_set(ip, whichfork, 1); + if (error) + goto done; + ip->i_d.di_nblocks = 1; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); @@ -1594,7 +1597,10 @@ xfs_bmap_add_extent_delay_real( xfs_iext_remove(bma->ip, &bma->icur, state); xfs_iext_prev(ifp, &bma->icur); xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); - ifp->if_nextents--; + + error = xfs_next_set(bma->ip, whichfork, -1); + if (error) + goto done; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1698,7 +1704,10 @@ xfs_bmap_add_extent_delay_real( PREV.br_startblock = new->br_startblock; PREV.br_state = new->br_state; xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); - ifp->if_nextents++; + + error = xfs_next_set(bma->ip, whichfork, 1); + if (error) + goto done; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1764,7 +1773,10 @@ xfs_bmap_add_extent_delay_real( * The left neighbor is not contiguous. */ xfs_iext_update_extent(bma->ip, state, &bma->icur, new); - ifp->if_nextents++; + + error = xfs_next_set(bma->ip, whichfork, 1); + if (error) + goto done; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1851,7 +1863,10 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is not contiguous. */ xfs_iext_update_extent(bma->ip, state, &bma->icur, new); - ifp->if_nextents++; + + error = xfs_next_set(bma->ip, whichfork, 1); + if (error) + goto done; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1937,7 +1952,10 @@ xfs_bmap_add_extent_delay_real( xfs_iext_next(ifp, &bma->icur); xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); - ifp->if_nextents++; + + error = xfs_next_set(bma->ip, whichfork, 1); + if (error) + goto done; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -2141,7 +2159,11 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &LEFT); - ifp->if_nextents -= 2; + + error = xfs_next_set(ip, whichfork, -2); + if (error) + goto done; + if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2193,7 +2215,11 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &LEFT); - ifp->if_nextents--; + + error = xfs_next_set(ip, whichfork, -1); + if (error) + goto done; + if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2235,7 +2261,10 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &PREV); - ifp->if_nextents--; + + error = xfs_next_set(ip, whichfork, -1); + if (error) + goto done; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -2343,7 +2372,10 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_update_extent(ip, state, icur, &PREV); xfs_iext_insert(ip, icur, new, state); - ifp->if_nextents++; + + error = xfs_next_set(ip, whichfork, 1); + if (error) + goto done; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -2419,7 +2451,10 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_update_extent(ip, state, icur, &PREV); xfs_iext_next(ifp, icur); xfs_iext_insert(ip, icur, new, state); - ifp->if_nextents++; + + error = xfs_next_set(ip, whichfork, 1); + if (error) + goto done; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -2471,7 +2506,10 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_next(ifp, icur); xfs_iext_insert(ip, icur, &r[1], state); xfs_iext_insert(ip, icur, &r[0], state); - ifp->if_nextents += 2; + + error = xfs_next_set(ip, whichfork, 2); + if (error) + goto done; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -2787,7 +2825,10 @@ xfs_bmap_add_extent_hole_real( xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &left); - ifp->if_nextents--; + + error = xfs_next_set(ip, whichfork, -1); + if (error) + goto done; if (cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); @@ -2886,7 +2927,10 @@ xfs_bmap_add_extent_hole_real( * Insert a new entry. */ xfs_iext_insert(ip, icur, new, state); - ifp->if_nextents++; + + error = xfs_next_set(ip, whichfork, 1); + if (error) + goto done; if (cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); @@ -5083,7 +5127,10 @@ xfs_bmap_del_extent_real( */ xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); - ifp->if_nextents--; + + error = xfs_next_set(ip, whichfork, -1); + if (error) + goto done; flags |= XFS_ILOG_CORE; if (!cur) { @@ -5193,7 +5240,10 @@ xfs_bmap_del_extent_real( } else flags |= xfs_ilog_fext(whichfork); - ifp->if_nextents++; + error = xfs_next_set(ip, whichfork, 1); + if (error) + goto done; + xfs_iext_next(ifp, icur); xfs_iext_insert(ip, icur, &new, state); break; @@ -5660,7 +5710,10 @@ xfs_bmse_merge( * Update the on-disk extent count, the btree if necessary and log the * inode. */ - ifp->if_nextents--; + error = xfs_next_set(ip, whichfork, -1); + if (error) + goto done; + *logflags |= XFS_ILOG_CORE; if (!cur) { *logflags |= XFS_ILOG_DEXT; @@ -6047,7 +6100,10 @@ xfs_bmap_split_extent( /* Add new extent */ xfs_iext_next(ifp, &icur); xfs_iext_insert(ip, &icur, &new, 0); - ifp->if_nextents++; + + error = xfs_next_set(ip, whichfork, 1); + if (error) + goto del_cursor; if (cur) { error = xfs_bmbt_lookup_eq(cur, &new, &i); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 28b366275ae0..3bf5a2c391bd 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -728,3 +728,32 @@ xfs_ifork_verify_local_attr( return 0; } + +int +xfs_next_set( + struct xfs_inode *ip, + int whichfork, + int delta) +{ + struct xfs_ifork *ifp; + int64_t nr_exts; + int64_t max_exts; + + ifp = XFS_IFORK_PTR(ip, whichfork); + + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) + max_exts = MAXEXTNUM; + else if (whichfork == XFS_ATTR_FORK) + max_exts = MAXAEXTNUM; + else + ASSERT(0); + + nr_exts = ifp->if_nextents + delta; + if ((delta > 0 && nr_exts > max_exts) + || (delta < 0 && nr_exts < 0)) + return -EOVERFLOW; + + ifp->if_nextents = nr_exts; + + return 0; +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index a4953e95c4f3..a84ae42ace79 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -173,4 +173,5 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); int xfs_ifork_verify_local_data(struct xfs_inode *ip); int xfs_ifork_verify_local_attr(struct xfs_inode *ip); +int xfs_next_set(struct xfs_inode *ip, int whichfork, int delta); #endif /* __XFS_INODE_FORK_H__ */
The following error message was noticed when a workload added one million xattrs, deleted 50% of them and then inserted 400,000 new xattrs. XFS (loop0): xfs_iflush_int: detected corrupt incore inode 131, total extents = -19916, nblocks = 102937, ptr ffff9ce33b098c00 The error message was printed during unmounting the filesystem. The value printed under "total extents" indicates that we overflowed the per-inode signed 16-bit xattr extent counter. Instead of letting this silent corruption occur, this patch checks for extent counter (both data and xattr) overflow before we assign the new value to the corresponding in-memory extent counter. Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> --- fs/xfs/libxfs/xfs_bmap.c | 92 +++++++++++++++++++++++++++------- fs/xfs/libxfs/xfs_inode_fork.c | 29 +++++++++++ fs/xfs/libxfs/xfs_inode_fork.h | 1 + 3 files changed, 104 insertions(+), 18 deletions(-)