Message ID | 20230830232529.GL28186@frogsfrogsfrogs (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | [v2] xfs: load uncached unlinked inodes into memory on demand | expand |
On Wed, Aug 30, 2023 at 04:25:29PM -0700, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@kernel.org> > > Create an expert-mode debugger command to create unlinked inodes. > This will hopefully aid in simulation of leaked unlinked inode handling > in the kernel and elsewhere. > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Bill O'Donnell <bodonnel@redhat.com> > --- > db/iunlink.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++ > libxfs/libxfs_api_defs.h | 1 > man/man8/xfs_db.8 | 11 +++ > 3 files changed, 208 insertions(+) > > diff --git a/db/iunlink.c b/db/iunlink.c > index 303b5daf..d87562e3 100644 > --- a/db/iunlink.c > +++ b/db/iunlink.c > @@ -197,8 +197,204 @@ static const cmdinfo_t dump_iunlinked_cmd = > N_("[-a agno] [-b bucket] [-q] [-v]"), > N_("dump chain of unlinked inode buckets"), NULL }; > > +/* > + * Look up the inode cluster buffer and log the on-disk unlinked inode change > + * we need to make. > + */ > +static int > +iunlink_log_dinode( > + struct xfs_trans *tp, > + struct xfs_inode *ip, > + struct xfs_perag *pag, > + xfs_agino_t next_agino) > +{ > + struct xfs_mount *mp = tp->t_mountp; > + struct xfs_dinode *dip; > + struct xfs_buf *ibp; > + int offset; > + int error; > + > + error = -libxfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp); > + if (error) > + return error; > + > + dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset); > + > + dip->di_next_unlinked = cpu_to_be32(next_agino); > + offset = ip->i_imap.im_boffset + > + offsetof(struct xfs_dinode, di_next_unlinked); > + > + libxfs_dinode_calc_crc(mp, dip); > + libxfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); > + return 0; > +} > + > +static int > +iunlink_insert_inode( > + struct xfs_trans *tp, > + struct xfs_perag *pag, > + struct xfs_buf *agibp, > + struct xfs_inode *ip) > +{ > + struct xfs_mount *mp = tp->t_mountp; > + struct xfs_agi *agi = agibp->b_addr; > + xfs_agino_t next_agino; > + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); > + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; > + int offset; > + int error; > + > + /* > + * Get the index into the agi hash table for the list this inode will > + * go on. Make sure the pointer isn't garbage and that this inode > + * isn't already on the list. > + */ > + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); > + if (next_agino == agino || !xfs_verify_agino_or_null(pag, next_agino)) > + return EFSCORRUPTED; > + > + if (next_agino != NULLAGINO) { > + /* > + * There is already another inode in the bucket, so point this > + * inode to the current head of the list. > + */ > + error = iunlink_log_dinode(tp, ip, pag, next_agino); > + if (error) > + return error; > + } > + > + /* Update the bucket. */ > + agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); > + offset = offsetof(struct xfs_agi, agi_unlinked) + > + (sizeof(xfs_agino_t) * bucket_index); > + libxfs_trans_log_buf(tp, agibp, offset, > + offset + sizeof(xfs_agino_t) - 1); > + return 0; > +} > + > +/* > + * This is called when the inode's link count has gone to 0 or we are creating > + * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. > + * > + * We place the on-disk inode on a list in the AGI. It will be pulled from this > + * list when the inode is freed. > + */ > +static int > +iunlink( > + struct xfs_trans *tp, > + struct xfs_inode *ip) > +{ > + struct xfs_mount *mp = tp->t_mountp; > + struct xfs_perag *pag; > + struct xfs_buf *agibp; > + int error; > + > + ASSERT(VFS_I(ip)->i_nlink == 0); > + ASSERT(VFS_I(ip)->i_mode != 0); > + > + pag = libxfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); > + > + /* Get the agi buffer first. It ensures lock ordering on the list. */ > + error = -libxfs_read_agi(pag, tp, &agibp); > + if (error) > + goto out; > + > + error = iunlink_insert_inode(tp, pag, agibp, ip); > +out: > + libxfs_perag_put(pag); > + return error; > +} > + > +static int > +create_unlinked( > + struct xfs_mount *mp) > +{ > + struct cred cr = { }; > + struct fsxattr fsx = { }; > + struct xfs_inode *ip; > + struct xfs_trans *tp; > + unsigned int resblks; > + int error; > + > + resblks = XFS_IALLOC_SPACE_RES(mp); > + error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create_tmpfile, resblks, > + 0, 0, &tp); > + if (error) { > + dbprintf(_("alloc trans: %s\n"), strerror(error)); > + return error; > + } > + > + error = -libxfs_dir_ialloc(&tp, NULL, S_IFREG | 0600, 0, 0, &cr, &fsx, > + &ip); > + if (error) { > + dbprintf(_("create inode: %s\n"), strerror(error)); > + goto out_cancel; > + } > + > + error = iunlink(tp, ip); > + if (error) { > + dbprintf(_("unlink inode: %s\n"), strerror(error)); > + goto out_rele; > + } > + > + error = -libxfs_trans_commit(tp); > + if (error) > + dbprintf(_("commit inode: %s\n"), strerror(error)); > + > + dbprintf(_("Created unlinked inode %llu in agno %u\n"), > + (unsigned long long)ip->i_ino, > + XFS_INO_TO_AGNO(mp, ip->i_ino)); > + libxfs_irele(ip); > + return error; > +out_rele: > + libxfs_irele(ip); > +out_cancel: > + libxfs_trans_cancel(tp); > + return error; > +} > + > +static int > +iunlink_f( > + int argc, > + char **argv) > +{ > + int nr = 1; > + int c; > + int error; > + > + while ((c = getopt(argc, argv, "n:")) != EOF) { > + switch (c) { > + case 'n': > + nr = atoi(optarg); > + if (nr <= 0) { > + dbprintf(_("%s: need positive number\n")); > + return 0; > + } > + break; > + default: > + dbprintf(_("Bad option for iunlink command.\n")); > + return 0; > + } > + } > + > + for (c = 0; c < nr; c++) { > + error = create_unlinked(mp); > + if (error) > + return 1; > + } > + > + return 0; > +} > + > +static const cmdinfo_t iunlink_cmd = > + { "iunlink", NULL, iunlink_f, 0, -1, 0, > + N_("[-n nr]"), > + N_("allocate inodes and put them on the unlinked list"), NULL }; > + > void > iunlink_init(void) > { > add_command(&dump_iunlinked_cmd); > + if (expert_mode) > + add_command(&iunlink_cmd); > } > diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h > index ddba5c7c..04277c00 100644 > --- a/libxfs/libxfs_api_defs.h > +++ b/libxfs/libxfs_api_defs.h > @@ -149,6 +149,7 @@ > #define xfs_prealloc_blocks libxfs_prealloc_blocks > > #define xfs_read_agf libxfs_read_agf > +#define xfs_read_agi libxfs_read_agi > #define xfs_refc_block libxfs_refc_block > #define xfs_refcountbt_calc_reserves libxfs_refcountbt_calc_reserves > #define xfs_refcountbt_calc_size libxfs_refcountbt_calc_size > diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8 > index 2d6d0da4..f53ddd67 100644 > --- a/man/man8/xfs_db.8 > +++ b/man/man8/xfs_db.8 > @@ -840,6 +840,17 @@ Set the current inode number. If no > .I inode# > is given, print the current inode number. > .TP > +.BI "iunlink [-n " nr " ]" > +Allocate inodes and put them on the unlinked list. > + > +Options include: > +.RS 1.0i > +.TP 0.4i > +.B \-n > +Create this number of unlinked inodes. > +If not specified, 1 inode will be created. > +.RE > +.TP > .BI "label [" label ] > Set the filesystem label. The filesystem label can be used by > .BR mount (8) >
diff --git a/db/iunlink.c b/db/iunlink.c index 303b5daf..d87562e3 100644 --- a/db/iunlink.c +++ b/db/iunlink.c @@ -197,8 +197,204 @@ static const cmdinfo_t dump_iunlinked_cmd = N_("[-a agno] [-b bucket] [-q] [-v]"), N_("dump chain of unlinked inode buckets"), NULL }; +/* + * Look up the inode cluster buffer and log the on-disk unlinked inode change + * we need to make. + */ +static int +iunlink_log_dinode( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_perag *pag, + xfs_agino_t next_agino) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_dinode *dip; + struct xfs_buf *ibp; + int offset; + int error; + + error = -libxfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp); + if (error) + return error; + + dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset); + + dip->di_next_unlinked = cpu_to_be32(next_agino); + offset = ip->i_imap.im_boffset + + offsetof(struct xfs_dinode, di_next_unlinked); + + libxfs_dinode_calc_crc(mp, dip); + libxfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); + return 0; +} + +static int +iunlink_insert_inode( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_buf *agibp, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agi *agi = agibp->b_addr; + xfs_agino_t next_agino; + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; + int offset; + int error; + + /* + * Get the index into the agi hash table for the list this inode will + * go on. Make sure the pointer isn't garbage and that this inode + * isn't already on the list. + */ + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + if (next_agino == agino || !xfs_verify_agino_or_null(pag, next_agino)) + return EFSCORRUPTED; + + if (next_agino != NULLAGINO) { + /* + * There is already another inode in the bucket, so point this + * inode to the current head of the list. + */ + error = iunlink_log_dinode(tp, ip, pag, next_agino); + if (error) + return error; + } + + /* Update the bucket. */ + agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); + offset = offsetof(struct xfs_agi, agi_unlinked) + + (sizeof(xfs_agino_t) * bucket_index); + libxfs_trans_log_buf(tp, agibp, offset, + offset + sizeof(xfs_agino_t) - 1); + return 0; +} + +/* + * This is called when the inode's link count has gone to 0 or we are creating + * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. + * + * We place the on-disk inode on a list in the AGI. It will be pulled from this + * list when the inode is freed. + */ +static int +iunlink( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; + struct xfs_buf *agibp; + int error; + + ASSERT(VFS_I(ip)->i_nlink == 0); + ASSERT(VFS_I(ip)->i_mode != 0); + + pag = libxfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + + /* Get the agi buffer first. It ensures lock ordering on the list. */ + error = -libxfs_read_agi(pag, tp, &agibp); + if (error) + goto out; + + error = iunlink_insert_inode(tp, pag, agibp, ip); +out: + libxfs_perag_put(pag); + return error; +} + +static int +create_unlinked( + struct xfs_mount *mp) +{ + struct cred cr = { }; + struct fsxattr fsx = { }; + struct xfs_inode *ip; + struct xfs_trans *tp; + unsigned int resblks; + int error; + + resblks = XFS_IALLOC_SPACE_RES(mp); + error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create_tmpfile, resblks, + 0, 0, &tp); + if (error) { + dbprintf(_("alloc trans: %s\n"), strerror(error)); + return error; + } + + error = -libxfs_dir_ialloc(&tp, NULL, S_IFREG | 0600, 0, 0, &cr, &fsx, + &ip); + if (error) { + dbprintf(_("create inode: %s\n"), strerror(error)); + goto out_cancel; + } + + error = iunlink(tp, ip); + if (error) { + dbprintf(_("unlink inode: %s\n"), strerror(error)); + goto out_rele; + } + + error = -libxfs_trans_commit(tp); + if (error) + dbprintf(_("commit inode: %s\n"), strerror(error)); + + dbprintf(_("Created unlinked inode %llu in agno %u\n"), + (unsigned long long)ip->i_ino, + XFS_INO_TO_AGNO(mp, ip->i_ino)); + libxfs_irele(ip); + return error; +out_rele: + libxfs_irele(ip); +out_cancel: + libxfs_trans_cancel(tp); + return error; +} + +static int +iunlink_f( + int argc, + char **argv) +{ + int nr = 1; + int c; + int error; + + while ((c = getopt(argc, argv, "n:")) != EOF) { + switch (c) { + case 'n': + nr = atoi(optarg); + if (nr <= 0) { + dbprintf(_("%s: need positive number\n")); + return 0; + } + break; + default: + dbprintf(_("Bad option for iunlink command.\n")); + return 0; + } + } + + for (c = 0; c < nr; c++) { + error = create_unlinked(mp); + if (error) + return 1; + } + + return 0; +} + +static const cmdinfo_t iunlink_cmd = + { "iunlink", NULL, iunlink_f, 0, -1, 0, + N_("[-n nr]"), + N_("allocate inodes and put them on the unlinked list"), NULL }; + void iunlink_init(void) { add_command(&dump_iunlinked_cmd); + if (expert_mode) + add_command(&iunlink_cmd); } diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h index ddba5c7c..04277c00 100644 --- a/libxfs/libxfs_api_defs.h +++ b/libxfs/libxfs_api_defs.h @@ -149,6 +149,7 @@ #define xfs_prealloc_blocks libxfs_prealloc_blocks #define xfs_read_agf libxfs_read_agf +#define xfs_read_agi libxfs_read_agi #define xfs_refc_block libxfs_refc_block #define xfs_refcountbt_calc_reserves libxfs_refcountbt_calc_reserves #define xfs_refcountbt_calc_size libxfs_refcountbt_calc_size diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8 index 2d6d0da4..f53ddd67 100644 --- a/man/man8/xfs_db.8 +++ b/man/man8/xfs_db.8 @@ -840,6 +840,17 @@ Set the current inode number. If no .I inode# is given, print the current inode number. .TP +.BI "iunlink [-n " nr " ]" +Allocate inodes and put them on the unlinked list. + +Options include: +.RS 1.0i +.TP 0.4i +.B \-n +Create this number of unlinked inodes. +If not specified, 1 inode will be created. +.RE +.TP .BI "label [" label ] Set the filesystem label. The filesystem label can be used by .BR mount (8)