Message ID | 155968499323.1657646.9567491795149169699.stgit@magnolia (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: refactor and improve inode iteration | expand |
On Tue, Jun 04, 2019 at 02:49:53PM -0700, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@oracle.com> > > Create a new ibulk structure incore to help us deal with bulk inode stat > state tracking and then convert the bulkstat code to use the new iwalk > iterator. This disentangles inode walking from bulk stat control for > simpler code and enables us to isolate the formatter functions to the > ioctl handling code. > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > --- > fs/xfs/xfs_ioctl.c | 65 ++++++-- > fs/xfs/xfs_ioctl.h | 5 + > fs/xfs/xfs_ioctl32.c | 88 +++++------ > fs/xfs/xfs_itable.c | 407 ++++++++++++++------------------------------------ > fs/xfs/xfs_itable.h | 79 ++++------ > 5 files changed, 245 insertions(+), 399 deletions(-) > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > index 5ffbdcff3dba..43734901aeb9 100644 > --- a/fs/xfs/xfs_ioctl.c > +++ b/fs/xfs/xfs_ioctl.c ... > @@ -745,35 +757,54 @@ xfs_ioc_bulkstat( > if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) > return -EFAULT; > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > return -EFAULT; > > - if ((count = bulkreq.icount) <= 0) > + if (bulkreq.icount <= 0) > return -EINVAL; > > if (bulkreq.ubuffer == NULL) > return -EINVAL; > > - if (cmd == XFS_IOC_FSINUMBERS) > - error = xfs_inumbers(mp, &inlast, &count, > + breq.ubuffer = bulkreq.ubuffer; > + breq.icount = bulkreq.icount; > + > + /* > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > + * that *lastip contains either zero or the number of the last inode to > + * be examined by the previous call and return results starting with > + * the next inode after that. The new bulk request functions take the > + * inode to start with, so we have to adjust the lastino/startino > + * parameter to maintain correct function. > + */ It's kind of difficult to tell what's new or old when just looking at the code. The comment suggests FSINUMBERS and FSBULKSTAT use the same interface wrt to lastip, but they aren't handled the same below. I take it this is because xfs_inumbers() still has the same behavior whereas xfs_bulkstat() has been changed to operate based on breq rather than lastip..? > + if (cmd == XFS_IOC_FSINUMBERS) { > + int count = breq.icount; > + > + breq.startino = lastino; > + error = xfs_inumbers(mp, &breq.startino, &count, > bulkreq.ubuffer, xfs_inumbers_fmt); > - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) > - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, > - sizeof(xfs_bstat_t), NULL, &done); > - else /* XFS_IOC_FSBULKSTAT */ > - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, > - sizeof(xfs_bstat_t), bulkreq.ubuffer, > - &done); > + breq.ocount = count; > + lastino = breq.startino; > + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { > + breq.startino = lastino; > + error = xfs_bulkstat_one(&breq, xfs_bulkstat_one_fmt); > + lastino = breq.startino; > + } else { /* XFS_IOC_FSBULKSTAT */ > + breq.startino = lastino + 1; > + error = xfs_bulkstat(&breq, xfs_bulkstat_one_fmt); > + lastino = breq.startino - 1; > + } > ... > diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c > index 814ffe6fbab7..add15819daf3 100644 > --- a/fs/xfs/xfs_ioctl32.c > +++ b/fs/xfs/xfs_ioctl32.c ... > @@ -284,38 +263,57 @@ xfs_compat_ioc_bulkstat( > return -EFAULT; > bulkreq.ocount = compat_ptr(addr); > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > return -EFAULT; > + breq.startino = lastino + 1; > > - if ((count = bulkreq.icount) <= 0) > + if (bulkreq.icount <= 0) > return -EINVAL; > > if (bulkreq.ubuffer == NULL) > return -EINVAL; > > + breq.ubuffer = bulkreq.ubuffer; > + breq.icount = bulkreq.icount; > + > + /* > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > + * that *lastip contains either zero or the number of the last inode to > + * be examined by the previous call and return results starting with > + * the next inode after that. The new bulk request functions take the > + * inode to start with, so we have to adjust the lastino/startino > + * parameter to maintain correct function. > + */ (Same comment here.) > if (cmd == XFS_IOC_FSINUMBERS_32) { > - error = xfs_inumbers(mp, &inlast, &count, > + int count = breq.icount; > + > + breq.startino = lastino; > + error = xfs_inumbers(mp, &breq.startino, &count, > bulkreq.ubuffer, inumbers_func); > + breq.ocount = count; > + lastino = breq.startino; > } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { > - int res; > - > - error = bs_one_func(mp, inlast, bulkreq.ubuffer, > - bs_one_size, NULL, &res); > + breq.startino = lastino; > + error = xfs_bulkstat_one(&breq, bs_one_func); > + lastino = breq.startino; > } else if (cmd == XFS_IOC_FSBULKSTAT_32) { > - error = xfs_bulkstat(mp, &inlast, &count, > - bs_one_func, bs_one_size, > - bulkreq.ubuffer, &done); > - } else > + breq.startino = lastino + 1; > + error = xfs_bulkstat(&breq, bs_one_func); > + lastino = breq.startino - 1; > + } else { > error = -EINVAL; > + } > if (error) > return error; > > + lastino = breq.startino - 1; Should this be here? > if (bulkreq.lastip != NULL && > - copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) > + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) > return -EFAULT; > > if (bulkreq.ocount != NULL && > - copy_to_user(bulkreq.ocount, &count, sizeof(count))) > + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) > return -EFAULT; > > return 0; > diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c > index 3ca1c454afe6..87c597ea1df7 100644 > --- a/fs/xfs/xfs_itable.c > +++ b/fs/xfs/xfs_itable.c > @@ -22,37 +22,63 @@ > #include "xfs_iwalk.h" > > /* > - * Return stat information for one inode. > - * Return 0 if ok, else errno. > + * Bulk Stat > + * ========= > + * > + * Use the inode walking functions to fill out struct xfs_bstat for every > + * allocated inode, then pass the stat information to some externally provided > + * iteration function. > */ > -int > + > +struct xfs_bstat_chunk { > + bulkstat_one_fmt_pf formatter; > + struct xfs_ibulk *breq; > +}; > + > +/* > + * Fill out the bulkstat info for a single inode and report it somewhere. > + * > + * bc->breq->lastino is effectively the inode cursor as we walk through the > + * filesystem. Therefore, we update it any time we need to move the cursor > + * forward, regardless of whether or not we're sending any bstat information > + * back to userspace. If the inode is internal metadata or, has been freed > + * out from under us, we just simply keep going. > + * > + * However, if any other type of error happens we want to stop right where we > + * are so that userspace will call back with exact number of the bad inode and > + * we can send back an error code. > + * > + * Note that if the formatter tells us there's no space left in the buffer we > + * move the cursor forward and abort the walk. > + */ > +STATIC int > xfs_bulkstat_one_int( > - struct xfs_mount *mp, /* mount point for filesystem */ > - xfs_ino_t ino, /* inode to get data for */ > - void __user *buffer, /* buffer to place output in */ > - int ubsize, /* size of buffer */ > - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ > - int *ubused, /* bytes used by me */ > - int *stat) /* BULKSTAT_RV_... */ > + struct xfs_mount *mp, > + struct xfs_trans *tp, > + xfs_ino_t ino, > + void *data) > { Any reason this function takes an 'ino' param considering it's sourced from breq->startino and we bump that value from within this function? The latter seems slightly misplaced to me since it doesn't appear to control the iteration. It also looks like we bump startino in almost all cases. Exceptions are memory allocation failure of the buffer and formatter error. Hmm.. could we lift the buffer to the bc and reuse it to avoid that error entirely (along with repeated allocs/frees)? From there, perhaps we could lift the ->startino update to the callers based on something like error != -EFAULT..? (Alternatively, the caller could update it first and then walk it back if error == -EFAULT). > + struct xfs_bstat_chunk *bc = data; > struct xfs_icdinode *dic; /* dinode core info pointer */ > struct xfs_inode *ip; /* incore inode pointer */ > struct inode *inode; > struct xfs_bstat *buf; /* return buffer */ > int error = 0; /* error value */ > > - *stat = BULKSTAT_RV_NOTHING; > - > - if (!buffer || xfs_internal_inum(mp, ino)) > + if (xfs_internal_inum(mp, ino)) { > + bc->breq->startino = ino + 1; > return -EINVAL; > + } > > buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); > if (!buf) > return -ENOMEM; > > - error = xfs_iget(mp, NULL, ino, > + error = xfs_iget(mp, tp, ino, > (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), > XFS_ILOCK_SHARED, &ip); > + if (error == -ENOENT || error == -EINVAL) > + bc->breq->startino = ino + 1; > if (error) > goto out_free; > > @@ -119,43 +145,45 @@ xfs_bulkstat_one_int( > xfs_iunlock(ip, XFS_ILOCK_SHARED); > xfs_irele(ip); > > - error = formatter(buffer, ubsize, ubused, buf); > - if (!error) > - *stat = BULKSTAT_RV_DIDONE; > - > - out_free: > + error = bc->formatter(bc->breq, buf); > + switch (error) { > + case XFS_IBULK_BUFFER_FULL: > + error = XFS_IWALK_ABORT; > + /* fall through */ > + case 0: > + bc->breq->startino = ino + 1; > + break; > + } > +out_free: > kmem_free(buf); > return error; > } > > -/* Return 0 on success or positive error */ > -STATIC int > -xfs_bulkstat_one_fmt( > - void __user *ubuffer, > - int ubsize, > - int *ubused, > - const xfs_bstat_t *buffer) > -{ > - if (ubsize < sizeof(*buffer)) > - return -ENOMEM; > - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) > - return -EFAULT; > - if (ubused) > - *ubused = sizeof(*buffer); > - return 0; > -} > - > +/* Bulkstat a single inode. */ > int > xfs_bulkstat_one( > - xfs_mount_t *mp, /* mount point for filesystem */ > - xfs_ino_t ino, /* inode number to get data for */ > - void __user *buffer, /* buffer to place output in */ > - int ubsize, /* size of buffer */ > - int *ubused, /* bytes used by me */ > - int *stat) /* BULKSTAT_RV_... */ > + struct xfs_ibulk *breq, > + bulkstat_one_fmt_pf formatter) > { > - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, > - xfs_bulkstat_one_fmt, ubused, stat); > + struct xfs_bstat_chunk bc = { > + .formatter = formatter, > + .breq = breq, > + }; > + int error; > + > + breq->icount = 1; > + breq->ocount = 0; > + So ->icount is already set by the caller based on user input. I'd guess this is set here to guarantee a single cycle in the event that the user provided value is >1, but that seems unnecessary since we're calling the internal helper to handle a single inode. If we want to set ->icount for whatever reason, can we do it in the caller where it's more obvious? That also shows that the ->ocount init is unnecessary since the whole structure is initialized in the caller. > + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); > + > + /* > + * If we reported one inode to userspace then we abort because we hit > + * the end of the buffer. Don't leak that back to userspace. > + */ > + if (error == XFS_IWALK_ABORT) > + error = 0; > + > + return error; > } > > /* ... > diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h > index 369e3f159d4e..366d391eb11f 100644 > --- a/fs/xfs/xfs_itable.h > +++ b/fs/xfs/xfs_itable.h > @@ -5,63 +5,46 @@ > #ifndef __XFS_ITABLE_H__ > #define __XFS_ITABLE_H__ > > -/* > - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat > - * structures (by the dmi library). This is a pointer to a formatter function > - * that will iget the inode and fill in the appropriate structure. > - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c > - */ > -typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, > - xfs_ino_t ino, > - void __user *buffer, > - int ubsize, > - int *ubused, > - int *stat); > +/* In-memory representation of a userspace request for batch inode data. */ > +struct xfs_ibulk { > + struct xfs_mount *mp; > + void __user *ubuffer; /* user output buffer */ > + xfs_ino_t startino; /* start with this inode */ > + unsigned int icount; /* number of elements in ubuffer */ > + unsigned int ocount; /* number of records returned */ > +}; > + > +/* Return value that means we want to abort the walk. */ > +#define XFS_IBULK_ABORT (XFS_IWALK_ABORT) > + > +/* Return value that means the formatting buffer is now full. */ > +#define XFS_IBULK_BUFFER_FULL (2) > It might be wise to define this such that it's guaranteed not to be the same as the abort value, since that is defined externally to this header. IBULK_ABORT + 1 perhaps? Brian > /* > - * Values for stat return value. > + * Advance the user buffer pointer by one record of the given size. If the > + * buffer is now full, return the appropriate error code. > */ > -#define BULKSTAT_RV_NOTHING 0 > -#define BULKSTAT_RV_DIDONE 1 > -#define BULKSTAT_RV_GIVEUP 2 > +static inline int > +xfs_ibulk_advance( > + struct xfs_ibulk *breq, > + size_t bytes) > +{ > + char __user *b = breq->ubuffer; > + > + breq->ubuffer = b + bytes; > + breq->ocount++; > + return breq->ocount == breq->icount ? XFS_IBULK_BUFFER_FULL : 0; > +} > > /* > * Return stat information in bulk (by-inode) for the filesystem. > */ > -int /* error status */ > -xfs_bulkstat( > - xfs_mount_t *mp, /* mount point for filesystem */ > - xfs_ino_t *lastino, /* last inode returned */ > - int *count, /* size of buffer/count returned */ > - bulkstat_one_pf formatter, /* func that'd fill a single buf */ > - size_t statstruct_size,/* sizeof struct that we're filling */ > - char __user *ubuffer,/* buffer with inode stats */ > - int *done); /* 1 if there are more stats to get */ > > -typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ > - void __user *ubuffer, /* buffer to write to */ > - int ubsize, /* remaining user buffer sz */ > - int *ubused, /* bytes used by formatter */ > - const xfs_bstat_t *buffer); /* buffer to read from */ > +typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, > + const struct xfs_bstat *bstat); > > -int > -xfs_bulkstat_one_int( > - xfs_mount_t *mp, > - xfs_ino_t ino, > - void __user *buffer, > - int ubsize, > - bulkstat_one_fmt_pf formatter, > - int *ubused, > - int *stat); > - > -int > -xfs_bulkstat_one( > - xfs_mount_t *mp, > - xfs_ino_t ino, > - void __user *buffer, > - int ubsize, > - int *ubused, > - int *stat); > +int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > +int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > > typedef int (*inumbers_fmt_pf)( > void __user *ubuffer, /* buffer to write to */ >
On Mon, Jun 10, 2019 at 10:02:29AM -0400, Brian Foster wrote: > On Tue, Jun 04, 2019 at 02:49:53PM -0700, Darrick J. Wong wrote: > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > Create a new ibulk structure incore to help us deal with bulk inode stat > > state tracking and then convert the bulkstat code to use the new iwalk > > iterator. This disentangles inode walking from bulk stat control for > > simpler code and enables us to isolate the formatter functions to the > > ioctl handling code. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > --- > > fs/xfs/xfs_ioctl.c | 65 ++++++-- > > fs/xfs/xfs_ioctl.h | 5 + > > fs/xfs/xfs_ioctl32.c | 88 +++++------ > > fs/xfs/xfs_itable.c | 407 ++++++++++++++------------------------------------ > > fs/xfs/xfs_itable.h | 79 ++++------ > > 5 files changed, 245 insertions(+), 399 deletions(-) > > > > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > > index 5ffbdcff3dba..43734901aeb9 100644 > > --- a/fs/xfs/xfs_ioctl.c > > +++ b/fs/xfs/xfs_ioctl.c > ... > > @@ -745,35 +757,54 @@ xfs_ioc_bulkstat( > > if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) > > return -EFAULT; > > > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > > return -EFAULT; > > > > - if ((count = bulkreq.icount) <= 0) > > + if (bulkreq.icount <= 0) > > return -EINVAL; > > > > if (bulkreq.ubuffer == NULL) > > return -EINVAL; > > > > - if (cmd == XFS_IOC_FSINUMBERS) > > - error = xfs_inumbers(mp, &inlast, &count, > > + breq.ubuffer = bulkreq.ubuffer; > > + breq.icount = bulkreq.icount; > > + > > + /* > > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > > + * that *lastip contains either zero or the number of the last inode to > > + * be examined by the previous call and return results starting with > > + * the next inode after that. The new bulk request functions take the > > + * inode to start with, so we have to adjust the lastino/startino > > + * parameter to maintain correct function. > > + */ > > It's kind of difficult to tell what's new or old when just looking at > the code. The comment suggests FSINUMBERS and FSBULKSTAT use the same > interface wrt to lastip, but they aren't handled the same below. I take > it this is because xfs_inumbers() still has the same behavior whereas > xfs_bulkstat() has been changed to operate based on breq rather than > lastip..? Yes. By the end of the series we'll have converted FSINUMBERS too, but for the ~5 or so patches until we get there, the only way to tell new vs. old interface is whether it takes breq or pointers to stuff in breq. > > + if (cmd == XFS_IOC_FSINUMBERS) { > > + int count = breq.icount; > > + > > + breq.startino = lastino; > > + error = xfs_inumbers(mp, &breq.startino, &count, > > bulkreq.ubuffer, xfs_inumbers_fmt); > > - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) > > - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, > > - sizeof(xfs_bstat_t), NULL, &done); > > - else /* XFS_IOC_FSBULKSTAT */ > > - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, > > - sizeof(xfs_bstat_t), bulkreq.ubuffer, > > - &done); > > + breq.ocount = count; > > + lastino = breq.startino; > > + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { > > + breq.startino = lastino; > > + error = xfs_bulkstat_one(&breq, xfs_bulkstat_one_fmt); > > + lastino = breq.startino; > > + } else { /* XFS_IOC_FSBULKSTAT */ > > + breq.startino = lastino + 1; > > + error = xfs_bulkstat(&breq, xfs_bulkstat_one_fmt); > > + lastino = breq.startino - 1; > > + } > > > ... > > diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c > > index 814ffe6fbab7..add15819daf3 100644 > > --- a/fs/xfs/xfs_ioctl32.c > > +++ b/fs/xfs/xfs_ioctl32.c > ... > > @@ -284,38 +263,57 @@ xfs_compat_ioc_bulkstat( > > return -EFAULT; > > bulkreq.ocount = compat_ptr(addr); > > > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > > return -EFAULT; > > + breq.startino = lastino + 1; > > > > - if ((count = bulkreq.icount) <= 0) > > + if (bulkreq.icount <= 0) > > return -EINVAL; > > > > if (bulkreq.ubuffer == NULL) > > return -EINVAL; > > > > + breq.ubuffer = bulkreq.ubuffer; > > + breq.icount = bulkreq.icount; > > + > > + /* > > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > > + * that *lastip contains either zero or the number of the last inode to > > + * be examined by the previous call and return results starting with > > + * the next inode after that. The new bulk request functions take the > > + * inode to start with, so we have to adjust the lastino/startino > > + * parameter to maintain correct function. > > + */ > > (Same comment here.) > > > if (cmd == XFS_IOC_FSINUMBERS_32) { > > - error = xfs_inumbers(mp, &inlast, &count, > > + int count = breq.icount; > > + > > + breq.startino = lastino; > > + error = xfs_inumbers(mp, &breq.startino, &count, > > bulkreq.ubuffer, inumbers_func); > > + breq.ocount = count; > > + lastino = breq.startino; > > } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { > > - int res; > > - > > - error = bs_one_func(mp, inlast, bulkreq.ubuffer, > > - bs_one_size, NULL, &res); > > + breq.startino = lastino; > > + error = xfs_bulkstat_one(&breq, bs_one_func); > > + lastino = breq.startino; > > } else if (cmd == XFS_IOC_FSBULKSTAT_32) { > > - error = xfs_bulkstat(mp, &inlast, &count, > > - bs_one_func, bs_one_size, > > - bulkreq.ubuffer, &done); > > - } else > > + breq.startino = lastino + 1; > > + error = xfs_bulkstat(&breq, bs_one_func); > > + lastino = breq.startino - 1; > > + } else { > > error = -EINVAL; > > + } > > if (error) > > return error; > > > > + lastino = breq.startino - 1; > > Should this be here? Nope. > > if (bulkreq.lastip != NULL && > > - copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) > > + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) > > return -EFAULT; > > > > if (bulkreq.ocount != NULL && > > - copy_to_user(bulkreq.ocount, &count, sizeof(count))) > > + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) > > return -EFAULT; > > > > return 0; > > diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c > > index 3ca1c454afe6..87c597ea1df7 100644 > > --- a/fs/xfs/xfs_itable.c > > +++ b/fs/xfs/xfs_itable.c > > @@ -22,37 +22,63 @@ > > #include "xfs_iwalk.h" > > > > /* > > - * Return stat information for one inode. > > - * Return 0 if ok, else errno. > > + * Bulk Stat > > + * ========= > > + * > > + * Use the inode walking functions to fill out struct xfs_bstat for every > > + * allocated inode, then pass the stat information to some externally provided > > + * iteration function. > > */ > > -int > > + > > +struct xfs_bstat_chunk { > > + bulkstat_one_fmt_pf formatter; > > + struct xfs_ibulk *breq; > > +}; > > + > > +/* > > + * Fill out the bulkstat info for a single inode and report it somewhere. > > + * > > + * bc->breq->lastino is effectively the inode cursor as we walk through the > > + * filesystem. Therefore, we update it any time we need to move the cursor > > + * forward, regardless of whether or not we're sending any bstat information > > + * back to userspace. If the inode is internal metadata or, has been freed > > + * out from under us, we just simply keep going. > > + * > > + * However, if any other type of error happens we want to stop right where we > > + * are so that userspace will call back with exact number of the bad inode and > > + * we can send back an error code. > > + * > > + * Note that if the formatter tells us there's no space left in the buffer we > > + * move the cursor forward and abort the walk. > > + */ > > +STATIC int > > xfs_bulkstat_one_int( > > - struct xfs_mount *mp, /* mount point for filesystem */ > > - xfs_ino_t ino, /* inode to get data for */ > > - void __user *buffer, /* buffer to place output in */ > > - int ubsize, /* size of buffer */ > > - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ > > - int *ubused, /* bytes used by me */ > > - int *stat) /* BULKSTAT_RV_... */ > > + struct xfs_mount *mp, > > + struct xfs_trans *tp, > > + xfs_ino_t ino, > > + void *data) > > { > > Any reason this function takes an 'ino' param considering it's sourced > from breq->startino and we bump that value from within this function? > The latter seems slightly misplaced to me since it doesn't appear to > control the iteration. > > It also looks like we bump startino in almost all cases. Exceptions are > memory allocation failure of the buffer and formatter error. Hmm.. could > we lift the buffer to the bc and reuse it to avoid that error entirely > (along with repeated allocs/frees)? From there, perhaps we could lift > the ->startino update to the callers based on something like error != > -EFAULT..? (Alternatively, the caller could update it first and then > walk it back if error == -EFAULT). xfs_iwalk doesn't know anything about the xfs_bstat_chunk or the xfs_ibulk that are being passed to the iterator function via the void *data parameter. iwalk is a generic iterator which shouldn't ever know about the bulkreq interface. I plan to reuse the xfs_iwalk code for other parts of online repair in the future, so that's why bulkstat_one takes the inode number that _iwalk gives it, and then uses that to mess around with bc and breq. I also sort of prefer to keep the startino update the way it is now because I only want to push it forward for the ~4 cases where we do now (internal, invalid number, past eofs, or successfully statted). Any other runtime error leaves the cursor where it was. > > + struct xfs_bstat_chunk *bc = data; > > struct xfs_icdinode *dic; /* dinode core info pointer */ > > struct xfs_inode *ip; /* incore inode pointer */ > > struct inode *inode; > > struct xfs_bstat *buf; /* return buffer */ > > int error = 0; /* error value */ > > > > - *stat = BULKSTAT_RV_NOTHING; > > - > > - if (!buffer || xfs_internal_inum(mp, ino)) > > + if (xfs_internal_inum(mp, ino)) { > > + bc->breq->startino = ino + 1; > > return -EINVAL; > > + } > > > > buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); > > if (!buf) > > return -ENOMEM; > > > > - error = xfs_iget(mp, NULL, ino, > > + error = xfs_iget(mp, tp, ino, > > (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), > > XFS_ILOCK_SHARED, &ip); > > + if (error == -ENOENT || error == -EINVAL) > > + bc->breq->startino = ino + 1; > > if (error) > > goto out_free; > > > > @@ -119,43 +145,45 @@ xfs_bulkstat_one_int( > > xfs_iunlock(ip, XFS_ILOCK_SHARED); > > xfs_irele(ip); > > > > - error = formatter(buffer, ubsize, ubused, buf); > > - if (!error) > > - *stat = BULKSTAT_RV_DIDONE; > > - > > - out_free: > > + error = bc->formatter(bc->breq, buf); > > + switch (error) { > > + case XFS_IBULK_BUFFER_FULL: > > + error = XFS_IWALK_ABORT; > > + /* fall through */ > > + case 0: > > + bc->breq->startino = ino + 1; > > + break; > > + } > > +out_free: > > kmem_free(buf); > > return error; > > } > > > > -/* Return 0 on success or positive error */ > > -STATIC int > > -xfs_bulkstat_one_fmt( > > - void __user *ubuffer, > > - int ubsize, > > - int *ubused, > > - const xfs_bstat_t *buffer) > > -{ > > - if (ubsize < sizeof(*buffer)) > > - return -ENOMEM; > > - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) > > - return -EFAULT; > > - if (ubused) > > - *ubused = sizeof(*buffer); > > - return 0; > > -} > > - > > +/* Bulkstat a single inode. */ > > int > > xfs_bulkstat_one( > > - xfs_mount_t *mp, /* mount point for filesystem */ > > - xfs_ino_t ino, /* inode number to get data for */ > > - void __user *buffer, /* buffer to place output in */ > > - int ubsize, /* size of buffer */ > > - int *ubused, /* bytes used by me */ > > - int *stat) /* BULKSTAT_RV_... */ > > + struct xfs_ibulk *breq, > > + bulkstat_one_fmt_pf formatter) > > { > > - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, > > - xfs_bulkstat_one_fmt, ubused, stat); > > + struct xfs_bstat_chunk bc = { > > + .formatter = formatter, > > + .breq = breq, > > + }; > > + int error; > > + > > + breq->icount = 1; > > + breq->ocount = 0; > > + > > So ->icount is already set by the caller based on user input. I'd guess > this is set here to guarantee a single cycle in the event that the user > provided value is >1, but that seems unnecessary since we're calling the > internal helper to handle a single inode. > > If we want to set ->icount for whatever reason, can we do it in the > caller where it's more obvious? That also shows that the ->ocount init > is unnecessary since the whole structure is initialized in the caller. Ok. > > + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); > > + > > + /* > > + * If we reported one inode to userspace then we abort because we hit > > + * the end of the buffer. Don't leak that back to userspace. > > + */ > > + if (error == XFS_IWALK_ABORT) > > + error = 0; > > + > > + return error; > > } > > > > /* > ... > > diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h > > index 369e3f159d4e..366d391eb11f 100644 > > --- a/fs/xfs/xfs_itable.h > > +++ b/fs/xfs/xfs_itable.h > > @@ -5,63 +5,46 @@ > > #ifndef __XFS_ITABLE_H__ > > #define __XFS_ITABLE_H__ > > > > -/* > > - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat > > - * structures (by the dmi library). This is a pointer to a formatter function > > - * that will iget the inode and fill in the appropriate structure. > > - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c > > - */ > > -typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, > > - xfs_ino_t ino, > > - void __user *buffer, > > - int ubsize, > > - int *ubused, > > - int *stat); > > +/* In-memory representation of a userspace request for batch inode data. */ > > +struct xfs_ibulk { > > + struct xfs_mount *mp; > > + void __user *ubuffer; /* user output buffer */ > > + xfs_ino_t startino; /* start with this inode */ > > + unsigned int icount; /* number of elements in ubuffer */ > > + unsigned int ocount; /* number of records returned */ > > +}; > > + > > +/* Return value that means we want to abort the walk. */ > > +#define XFS_IBULK_ABORT (XFS_IWALK_ABORT) > > + > > +/* Return value that means the formatting buffer is now full. */ > > +#define XFS_IBULK_BUFFER_FULL (2) > > > > It might be wise to define this such that it's guaranteed not to be the > same as the abort value, since that is defined externally to this > header. IBULK_ABORT + 1 perhaps? FWIW I'm of half a mind to establish a generic "abort walk" error code and key all of these iterator functions to use it instead of having all these #define XFS_FOO_ABORT 1 things everywhere. I'll do #define XFS_IBULK_BUFFER_FULL IBULK_ABORT+1 here. > Brian > > > /* > > - * Values for stat return value. > > + * Advance the user buffer pointer by one record of the given size. If the > > + * buffer is now full, return the appropriate error code. > > */ > > -#define BULKSTAT_RV_NOTHING 0 > > -#define BULKSTAT_RV_DIDONE 1 > > -#define BULKSTAT_RV_GIVEUP 2 > > +static inline int > > +xfs_ibulk_advance( > > + struct xfs_ibulk *breq, > > + size_t bytes) > > +{ > > + char __user *b = breq->ubuffer; > > + > > + breq->ubuffer = b + bytes; > > + breq->ocount++; > > + return breq->ocount == breq->icount ? XFS_IBULK_BUFFER_FULL : 0; > > +} > > > > /* > > * Return stat information in bulk (by-inode) for the filesystem. > > */ > > -int /* error status */ > > -xfs_bulkstat( > > - xfs_mount_t *mp, /* mount point for filesystem */ > > - xfs_ino_t *lastino, /* last inode returned */ > > - int *count, /* size of buffer/count returned */ > > - bulkstat_one_pf formatter, /* func that'd fill a single buf */ > > - size_t statstruct_size,/* sizeof struct that we're filling */ > > - char __user *ubuffer,/* buffer with inode stats */ > > - int *done); /* 1 if there are more stats to get */ > > > > -typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ > > - void __user *ubuffer, /* buffer to write to */ > > - int ubsize, /* remaining user buffer sz */ > > - int *ubused, /* bytes used by formatter */ > > - const xfs_bstat_t *buffer); /* buffer to read from */ > > +typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, > > + const struct xfs_bstat *bstat); > > > > -int > > -xfs_bulkstat_one_int( > > - xfs_mount_t *mp, > > - xfs_ino_t ino, > > - void __user *buffer, > > - int ubsize, > > - bulkstat_one_fmt_pf formatter, > > - int *ubused, > > - int *stat); > > - > > -int > > -xfs_bulkstat_one( > > - xfs_mount_t *mp, > > - xfs_ino_t ino, > > - void __user *buffer, > > - int ubsize, > > - int *ubused, > > - int *stat); > > +int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > > +int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > > > > typedef int (*inumbers_fmt_pf)( > > void __user *ubuffer, /* buffer to write to */ > >
On Mon, Jun 10, 2019 at 10:38:39AM -0700, Darrick J. Wong wrote: > On Mon, Jun 10, 2019 at 10:02:29AM -0400, Brian Foster wrote: > > On Tue, Jun 04, 2019 at 02:49:53PM -0700, Darrick J. Wong wrote: > > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > > > Create a new ibulk structure incore to help us deal with bulk inode stat > > > state tracking and then convert the bulkstat code to use the new iwalk > > > iterator. This disentangles inode walking from bulk stat control for > > > simpler code and enables us to isolate the formatter functions to the > > > ioctl handling code. > > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > > --- > > > fs/xfs/xfs_ioctl.c | 65 ++++++-- > > > fs/xfs/xfs_ioctl.h | 5 + > > > fs/xfs/xfs_ioctl32.c | 88 +++++------ > > > fs/xfs/xfs_itable.c | 407 ++++++++++++++------------------------------------ > > > fs/xfs/xfs_itable.h | 79 ++++------ > > > 5 files changed, 245 insertions(+), 399 deletions(-) > > > > > > > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > > > index 5ffbdcff3dba..43734901aeb9 100644 > > > --- a/fs/xfs/xfs_ioctl.c > > > +++ b/fs/xfs/xfs_ioctl.c > > ... > > > @@ -745,35 +757,54 @@ xfs_ioc_bulkstat( > > > if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) > > > return -EFAULT; > > > > > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > > > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > > > return -EFAULT; > > > > > > - if ((count = bulkreq.icount) <= 0) > > > + if (bulkreq.icount <= 0) > > > return -EINVAL; > > > > > > if (bulkreq.ubuffer == NULL) > > > return -EINVAL; > > > > > > - if (cmd == XFS_IOC_FSINUMBERS) > > > - error = xfs_inumbers(mp, &inlast, &count, > > > + breq.ubuffer = bulkreq.ubuffer; > > > + breq.icount = bulkreq.icount; > > > + > > > + /* > > > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > > > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > > > + * that *lastip contains either zero or the number of the last inode to > > > + * be examined by the previous call and return results starting with > > > + * the next inode after that. The new bulk request functions take the > > > + * inode to start with, so we have to adjust the lastino/startino > > > + * parameter to maintain correct function. > > > + */ > > > > It's kind of difficult to tell what's new or old when just looking at > > the code. The comment suggests FSINUMBERS and FSBULKSTAT use the same > > interface wrt to lastip, but they aren't handled the same below. I take > > it this is because xfs_inumbers() still has the same behavior whereas > > xfs_bulkstat() has been changed to operate based on breq rather than > > lastip..? > > Yes. By the end of the series we'll have converted FSINUMBERS too, but > for the ~5 or so patches until we get there, the only way to tell new > vs. old interface is whether it takes breq or pointers to stuff in breq. > > > > + if (cmd == XFS_IOC_FSINUMBERS) { > > > + int count = breq.icount; > > > + > > > + breq.startino = lastino; > > > + error = xfs_inumbers(mp, &breq.startino, &count, > > > bulkreq.ubuffer, xfs_inumbers_fmt); > > > - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) > > > - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, > > > - sizeof(xfs_bstat_t), NULL, &done); > > > - else /* XFS_IOC_FSBULKSTAT */ > > > - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, > > > - sizeof(xfs_bstat_t), bulkreq.ubuffer, > > > - &done); > > > + breq.ocount = count; > > > + lastino = breq.startino; > > > + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { > > > + breq.startino = lastino; > > > + error = xfs_bulkstat_one(&breq, xfs_bulkstat_one_fmt); > > > + lastino = breq.startino; > > > + } else { /* XFS_IOC_FSBULKSTAT */ > > > + breq.startino = lastino + 1; > > > + error = xfs_bulkstat(&breq, xfs_bulkstat_one_fmt); > > > + lastino = breq.startino - 1; > > > + } > > > > > ... > > > diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c > > > index 814ffe6fbab7..add15819daf3 100644 > > > --- a/fs/xfs/xfs_ioctl32.c > > > +++ b/fs/xfs/xfs_ioctl32.c > > ... > > > @@ -284,38 +263,57 @@ xfs_compat_ioc_bulkstat( > > > return -EFAULT; > > > bulkreq.ocount = compat_ptr(addr); > > > > > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > > > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > > > return -EFAULT; > > > + breq.startino = lastino + 1; > > > > > > - if ((count = bulkreq.icount) <= 0) > > > + if (bulkreq.icount <= 0) > > > return -EINVAL; > > > > > > if (bulkreq.ubuffer == NULL) > > > return -EINVAL; > > > > > > + breq.ubuffer = bulkreq.ubuffer; > > > + breq.icount = bulkreq.icount; > > > + > > > + /* > > > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > > > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > > > + * that *lastip contains either zero or the number of the last inode to > > > + * be examined by the previous call and return results starting with > > > + * the next inode after that. The new bulk request functions take the > > > + * inode to start with, so we have to adjust the lastino/startino > > > + * parameter to maintain correct function. > > > + */ > > > > (Same comment here.) > > > > > if (cmd == XFS_IOC_FSINUMBERS_32) { > > > - error = xfs_inumbers(mp, &inlast, &count, > > > + int count = breq.icount; > > > + > > > + breq.startino = lastino; > > > + error = xfs_inumbers(mp, &breq.startino, &count, > > > bulkreq.ubuffer, inumbers_func); > > > + breq.ocount = count; > > > + lastino = breq.startino; > > > } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { > > > - int res; > > > - > > > - error = bs_one_func(mp, inlast, bulkreq.ubuffer, > > > - bs_one_size, NULL, &res); > > > + breq.startino = lastino; > > > + error = xfs_bulkstat_one(&breq, bs_one_func); > > > + lastino = breq.startino; > > > } else if (cmd == XFS_IOC_FSBULKSTAT_32) { > > > - error = xfs_bulkstat(mp, &inlast, &count, > > > - bs_one_func, bs_one_size, > > > - bulkreq.ubuffer, &done); > > > - } else > > > + breq.startino = lastino + 1; > > > + error = xfs_bulkstat(&breq, bs_one_func); > > > + lastino = breq.startino - 1; > > > + } else { > > > error = -EINVAL; > > > + } > > > if (error) > > > return error; > > > > > > + lastino = breq.startino - 1; > > > > Should this be here? > > Nope. > > > > if (bulkreq.lastip != NULL && > > > - copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) > > > + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) > > > return -EFAULT; > > > > > > if (bulkreq.ocount != NULL && > > > - copy_to_user(bulkreq.ocount, &count, sizeof(count))) > > > + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) > > > return -EFAULT; > > > > > > return 0; > > > diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c > > > index 3ca1c454afe6..87c597ea1df7 100644 > > > --- a/fs/xfs/xfs_itable.c > > > +++ b/fs/xfs/xfs_itable.c > > > @@ -22,37 +22,63 @@ > > > #include "xfs_iwalk.h" > > > > > > /* > > > - * Return stat information for one inode. > > > - * Return 0 if ok, else errno. > > > + * Bulk Stat > > > + * ========= > > > + * > > > + * Use the inode walking functions to fill out struct xfs_bstat for every > > > + * allocated inode, then pass the stat information to some externally provided > > > + * iteration function. > > > */ > > > -int > > > + > > > +struct xfs_bstat_chunk { > > > + bulkstat_one_fmt_pf formatter; > > > + struct xfs_ibulk *breq; > > > +}; > > > + > > > +/* > > > + * Fill out the bulkstat info for a single inode and report it somewhere. > > > + * > > > + * bc->breq->lastino is effectively the inode cursor as we walk through the > > > + * filesystem. Therefore, we update it any time we need to move the cursor > > > + * forward, regardless of whether or not we're sending any bstat information > > > + * back to userspace. If the inode is internal metadata or, has been freed > > > + * out from under us, we just simply keep going. > > > + * > > > + * However, if any other type of error happens we want to stop right where we > > > + * are so that userspace will call back with exact number of the bad inode and > > > + * we can send back an error code. > > > + * > > > + * Note that if the formatter tells us there's no space left in the buffer we > > > + * move the cursor forward and abort the walk. > > > + */ > > > +STATIC int > > > xfs_bulkstat_one_int( > > > - struct xfs_mount *mp, /* mount point for filesystem */ > > > - xfs_ino_t ino, /* inode to get data for */ > > > - void __user *buffer, /* buffer to place output in */ > > > - int ubsize, /* size of buffer */ > > > - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ > > > - int *ubused, /* bytes used by me */ > > > - int *stat) /* BULKSTAT_RV_... */ > > > + struct xfs_mount *mp, > > > + struct xfs_trans *tp, > > > + xfs_ino_t ino, > > > + void *data) > > > { > > > > Any reason this function takes an 'ino' param considering it's sourced > > from breq->startino and we bump that value from within this function? > > The latter seems slightly misplaced to me since it doesn't appear to > > control the iteration. > > > > It also looks like we bump startino in almost all cases. Exceptions are > > memory allocation failure of the buffer and formatter error. Hmm.. could > > we lift the buffer to the bc and reuse it to avoid that error entirely > > (along with repeated allocs/frees)? From there, perhaps we could lift > > the ->startino update to the callers based on something like error != > > -EFAULT..? (Alternatively, the caller could update it first and then > > walk it back if error == -EFAULT). > > xfs_iwalk doesn't know anything about the xfs_bstat_chunk or the > xfs_ibulk that are being passed to the iterator function via the void > *data parameter. iwalk is a generic iterator which shouldn't ever know > about the bulkreq interface. > Sure, I'm not suggesting to push anything into the iwalk code. That's obviously a separate layer. I'm referring to the direct callers of xfs_bulkstat_one_int(), which is still bulkstat code AFAICT. > I plan to reuse the xfs_iwalk code for other parts of online repair in > the future, so that's why bulkstat_one takes the inode number that > _iwalk gives it, and then uses that to mess around with bc and breq. > Ok. BTW, looking again I notice that xfs_bulkstat_one_int() passes bc as a void pointer. That could probably be fixed up to take the xfs_bstat_chunk from the current callers. > I also sort of prefer to keep the startino update the way it is now > because I only want to push it forward for the ~4 cases where we do now > (internal, invalid number, past eofs, or successfully statted). > Any other runtime error leaves the cursor where it was. > Fair enough, though it could also be cleaned up a bit with an exit label IMO. Brian > > > + struct xfs_bstat_chunk *bc = data; > > > struct xfs_icdinode *dic; /* dinode core info pointer */ > > > struct xfs_inode *ip; /* incore inode pointer */ > > > struct inode *inode; > > > struct xfs_bstat *buf; /* return buffer */ > > > int error = 0; /* error value */ > > > > > > - *stat = BULKSTAT_RV_NOTHING; > > > - > > > - if (!buffer || xfs_internal_inum(mp, ino)) > > > + if (xfs_internal_inum(mp, ino)) { > > > + bc->breq->startino = ino + 1; > > > return -EINVAL; > > > + } > > > > > > buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); > > > if (!buf) > > > return -ENOMEM; > > > > > > - error = xfs_iget(mp, NULL, ino, > > > + error = xfs_iget(mp, tp, ino, > > > (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), > > > XFS_ILOCK_SHARED, &ip); > > > + if (error == -ENOENT || error == -EINVAL) > > > + bc->breq->startino = ino + 1; > > > if (error) > > > goto out_free; > > > > > > @@ -119,43 +145,45 @@ xfs_bulkstat_one_int( > > > xfs_iunlock(ip, XFS_ILOCK_SHARED); > > > xfs_irele(ip); > > > > > > - error = formatter(buffer, ubsize, ubused, buf); > > > - if (!error) > > > - *stat = BULKSTAT_RV_DIDONE; > > > - > > > - out_free: > > > + error = bc->formatter(bc->breq, buf); > > > + switch (error) { > > > + case XFS_IBULK_BUFFER_FULL: > > > + error = XFS_IWALK_ABORT; > > > + /* fall through */ > > > + case 0: > > > + bc->breq->startino = ino + 1; > > > + break; > > > + } > > > +out_free: > > > kmem_free(buf); > > > return error; > > > } > > > > > > -/* Return 0 on success or positive error */ > > > -STATIC int > > > -xfs_bulkstat_one_fmt( > > > - void __user *ubuffer, > > > - int ubsize, > > > - int *ubused, > > > - const xfs_bstat_t *buffer) > > > -{ > > > - if (ubsize < sizeof(*buffer)) > > > - return -ENOMEM; > > > - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) > > > - return -EFAULT; > > > - if (ubused) > > > - *ubused = sizeof(*buffer); > > > - return 0; > > > -} > > > - > > > +/* Bulkstat a single inode. */ > > > int > > > xfs_bulkstat_one( > > > - xfs_mount_t *mp, /* mount point for filesystem */ > > > - xfs_ino_t ino, /* inode number to get data for */ > > > - void __user *buffer, /* buffer to place output in */ > > > - int ubsize, /* size of buffer */ > > > - int *ubused, /* bytes used by me */ > > > - int *stat) /* BULKSTAT_RV_... */ > > > + struct xfs_ibulk *breq, > > > + bulkstat_one_fmt_pf formatter) > > > { > > > - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, > > > - xfs_bulkstat_one_fmt, ubused, stat); > > > + struct xfs_bstat_chunk bc = { > > > + .formatter = formatter, > > > + .breq = breq, > > > + }; > > > + int error; > > > + > > > + breq->icount = 1; > > > + breq->ocount = 0; > > > + > > > > So ->icount is already set by the caller based on user input. I'd guess > > this is set here to guarantee a single cycle in the event that the user > > provided value is >1, but that seems unnecessary since we're calling the > > internal helper to handle a single inode. > > > > If we want to set ->icount for whatever reason, can we do it in the > > caller where it's more obvious? That also shows that the ->ocount init > > is unnecessary since the whole structure is initialized in the caller. > > Ok. > > > > + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); > > > + > > > + /* > > > + * If we reported one inode to userspace then we abort because we hit > > > + * the end of the buffer. Don't leak that back to userspace. > > > + */ > > > + if (error == XFS_IWALK_ABORT) > > > + error = 0; > > > + > > > + return error; > > > } > > > > > > /* > > ... > > > diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h > > > index 369e3f159d4e..366d391eb11f 100644 > > > --- a/fs/xfs/xfs_itable.h > > > +++ b/fs/xfs/xfs_itable.h > > > @@ -5,63 +5,46 @@ > > > #ifndef __XFS_ITABLE_H__ > > > #define __XFS_ITABLE_H__ > > > > > > -/* > > > - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat > > > - * structures (by the dmi library). This is a pointer to a formatter function > > > - * that will iget the inode and fill in the appropriate structure. > > > - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c > > > - */ > > > -typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, > > > - xfs_ino_t ino, > > > - void __user *buffer, > > > - int ubsize, > > > - int *ubused, > > > - int *stat); > > > +/* In-memory representation of a userspace request for batch inode data. */ > > > +struct xfs_ibulk { > > > + struct xfs_mount *mp; > > > + void __user *ubuffer; /* user output buffer */ > > > + xfs_ino_t startino; /* start with this inode */ > > > + unsigned int icount; /* number of elements in ubuffer */ > > > + unsigned int ocount; /* number of records returned */ > > > +}; > > > + > > > +/* Return value that means we want to abort the walk. */ > > > +#define XFS_IBULK_ABORT (XFS_IWALK_ABORT) > > > + > > > +/* Return value that means the formatting buffer is now full. */ > > > +#define XFS_IBULK_BUFFER_FULL (2) > > > > > > > It might be wise to define this such that it's guaranteed not to be the > > same as the abort value, since that is defined externally to this > > header. IBULK_ABORT + 1 perhaps? > > FWIW I'm of half a mind to establish a generic "abort walk" error code > and key all of these iterator functions to use it instead of having all > these #define XFS_FOO_ABORT 1 things everywhere. > > I'll do #define XFS_IBULK_BUFFER_FULL IBULK_ABORT+1 here. > > > Brian > > > > > /* > > > - * Values for stat return value. > > > + * Advance the user buffer pointer by one record of the given size. If the > > > + * buffer is now full, return the appropriate error code. > > > */ > > > -#define BULKSTAT_RV_NOTHING 0 > > > -#define BULKSTAT_RV_DIDONE 1 > > > -#define BULKSTAT_RV_GIVEUP 2 > > > +static inline int > > > +xfs_ibulk_advance( > > > + struct xfs_ibulk *breq, > > > + size_t bytes) > > > +{ > > > + char __user *b = breq->ubuffer; > > > + > > > + breq->ubuffer = b + bytes; > > > + breq->ocount++; > > > + return breq->ocount == breq->icount ? XFS_IBULK_BUFFER_FULL : 0; > > > +} > > > > > > /* > > > * Return stat information in bulk (by-inode) for the filesystem. > > > */ > > > -int /* error status */ > > > -xfs_bulkstat( > > > - xfs_mount_t *mp, /* mount point for filesystem */ > > > - xfs_ino_t *lastino, /* last inode returned */ > > > - int *count, /* size of buffer/count returned */ > > > - bulkstat_one_pf formatter, /* func that'd fill a single buf */ > > > - size_t statstruct_size,/* sizeof struct that we're filling */ > > > - char __user *ubuffer,/* buffer with inode stats */ > > > - int *done); /* 1 if there are more stats to get */ > > > > > > -typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ > > > - void __user *ubuffer, /* buffer to write to */ > > > - int ubsize, /* remaining user buffer sz */ > > > - int *ubused, /* bytes used by formatter */ > > > - const xfs_bstat_t *buffer); /* buffer to read from */ > > > +typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, > > > + const struct xfs_bstat *bstat); > > > > > > -int > > > -xfs_bulkstat_one_int( > > > - xfs_mount_t *mp, > > > - xfs_ino_t ino, > > > - void __user *buffer, > > > - int ubsize, > > > - bulkstat_one_fmt_pf formatter, > > > - int *ubused, > > > - int *stat); > > > - > > > -int > > > -xfs_bulkstat_one( > > > - xfs_mount_t *mp, > > > - xfs_ino_t ino, > > > - void __user *buffer, > > > - int ubsize, > > > - int *ubused, > > > - int *stat); > > > +int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > > > +int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > > > > > > typedef int (*inumbers_fmt_pf)( > > > void __user *ubuffer, /* buffer to write to */ > > >
On Mon, Jun 10, 2019 at 02:29:21PM -0400, Brian Foster wrote: > On Mon, Jun 10, 2019 at 10:38:39AM -0700, Darrick J. Wong wrote: > > On Mon, Jun 10, 2019 at 10:02:29AM -0400, Brian Foster wrote: > > > On Tue, Jun 04, 2019 at 02:49:53PM -0700, Darrick J. Wong wrote: > > > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > > > > > Create a new ibulk structure incore to help us deal with bulk inode stat > > > > state tracking and then convert the bulkstat code to use the new iwalk > > > > iterator. This disentangles inode walking from bulk stat control for > > > > simpler code and enables us to isolate the formatter functions to the > > > > ioctl handling code. > > > > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > > > --- > > > > fs/xfs/xfs_ioctl.c | 65 ++++++-- > > > > fs/xfs/xfs_ioctl.h | 5 + > > > > fs/xfs/xfs_ioctl32.c | 88 +++++------ > > > > fs/xfs/xfs_itable.c | 407 ++++++++++++++------------------------------------ > > > > fs/xfs/xfs_itable.h | 79 ++++------ > > > > 5 files changed, 245 insertions(+), 399 deletions(-) > > > > > > > > > > > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > > > > index 5ffbdcff3dba..43734901aeb9 100644 > > > > --- a/fs/xfs/xfs_ioctl.c > > > > +++ b/fs/xfs/xfs_ioctl.c > > > ... > > > > @@ -745,35 +757,54 @@ xfs_ioc_bulkstat( > > > > if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) > > > > return -EFAULT; > > > > > > > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > > > > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > > > > return -EFAULT; > > > > > > > > - if ((count = bulkreq.icount) <= 0) > > > > + if (bulkreq.icount <= 0) > > > > return -EINVAL; > > > > > > > > if (bulkreq.ubuffer == NULL) > > > > return -EINVAL; > > > > > > > > - if (cmd == XFS_IOC_FSINUMBERS) > > > > - error = xfs_inumbers(mp, &inlast, &count, > > > > + breq.ubuffer = bulkreq.ubuffer; > > > > + breq.icount = bulkreq.icount; > > > > + > > > > + /* > > > > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > > > > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > > > > + * that *lastip contains either zero or the number of the last inode to > > > > + * be examined by the previous call and return results starting with > > > > + * the next inode after that. The new bulk request functions take the > > > > + * inode to start with, so we have to adjust the lastino/startino > > > > + * parameter to maintain correct function. > > > > + */ > > > > > > It's kind of difficult to tell what's new or old when just looking at > > > the code. The comment suggests FSINUMBERS and FSBULKSTAT use the same > > > interface wrt to lastip, but they aren't handled the same below. I take > > > it this is because xfs_inumbers() still has the same behavior whereas > > > xfs_bulkstat() has been changed to operate based on breq rather than > > > lastip..? > > > > Yes. By the end of the series we'll have converted FSINUMBERS too, but > > for the ~5 or so patches until we get there, the only way to tell new > > vs. old interface is whether it takes breq or pointers to stuff in breq. > > > > > > + if (cmd == XFS_IOC_FSINUMBERS) { > > > > + int count = breq.icount; > > > > + > > > > + breq.startino = lastino; > > > > + error = xfs_inumbers(mp, &breq.startino, &count, > > > > bulkreq.ubuffer, xfs_inumbers_fmt); > > > > - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) > > > > - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, > > > > - sizeof(xfs_bstat_t), NULL, &done); > > > > - else /* XFS_IOC_FSBULKSTAT */ > > > > - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, > > > > - sizeof(xfs_bstat_t), bulkreq.ubuffer, > > > > - &done); > > > > + breq.ocount = count; > > > > + lastino = breq.startino; > > > > + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { > > > > + breq.startino = lastino; > > > > + error = xfs_bulkstat_one(&breq, xfs_bulkstat_one_fmt); > > > > + lastino = breq.startino; > > > > + } else { /* XFS_IOC_FSBULKSTAT */ > > > > + breq.startino = lastino + 1; > > > > + error = xfs_bulkstat(&breq, xfs_bulkstat_one_fmt); > > > > + lastino = breq.startino - 1; > > > > + } > > > > > > > ... > > > > diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c > > > > index 814ffe6fbab7..add15819daf3 100644 > > > > --- a/fs/xfs/xfs_ioctl32.c > > > > +++ b/fs/xfs/xfs_ioctl32.c > > > ... > > > > @@ -284,38 +263,57 @@ xfs_compat_ioc_bulkstat( > > > > return -EFAULT; > > > > bulkreq.ocount = compat_ptr(addr); > > > > > > > > - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) > > > > + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) > > > > return -EFAULT; > > > > + breq.startino = lastino + 1; > > > > > > > > - if ((count = bulkreq.icount) <= 0) > > > > + if (bulkreq.icount <= 0) > > > > return -EINVAL; > > > > > > > > if (bulkreq.ubuffer == NULL) > > > > return -EINVAL; > > > > > > > > + breq.ubuffer = bulkreq.ubuffer; > > > > + breq.icount = bulkreq.icount; > > > > + > > > > + /* > > > > + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number > > > > + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect > > > > + * that *lastip contains either zero or the number of the last inode to > > > > + * be examined by the previous call and return results starting with > > > > + * the next inode after that. The new bulk request functions take the > > > > + * inode to start with, so we have to adjust the lastino/startino > > > > + * parameter to maintain correct function. > > > > + */ > > > > > > (Same comment here.) > > > > > > > if (cmd == XFS_IOC_FSINUMBERS_32) { > > > > - error = xfs_inumbers(mp, &inlast, &count, > > > > + int count = breq.icount; > > > > + > > > > + breq.startino = lastino; > > > > + error = xfs_inumbers(mp, &breq.startino, &count, > > > > bulkreq.ubuffer, inumbers_func); > > > > + breq.ocount = count; > > > > + lastino = breq.startino; > > > > } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { > > > > - int res; > > > > - > > > > - error = bs_one_func(mp, inlast, bulkreq.ubuffer, > > > > - bs_one_size, NULL, &res); > > > > + breq.startino = lastino; > > > > + error = xfs_bulkstat_one(&breq, bs_one_func); > > > > + lastino = breq.startino; > > > > } else if (cmd == XFS_IOC_FSBULKSTAT_32) { > > > > - error = xfs_bulkstat(mp, &inlast, &count, > > > > - bs_one_func, bs_one_size, > > > > - bulkreq.ubuffer, &done); > > > > - } else > > > > + breq.startino = lastino + 1; > > > > + error = xfs_bulkstat(&breq, bs_one_func); > > > > + lastino = breq.startino - 1; > > > > + } else { > > > > error = -EINVAL; > > > > + } > > > > if (error) > > > > return error; > > > > > > > > + lastino = breq.startino - 1; > > > > > > Should this be here? > > > > Nope. > > > > > > if (bulkreq.lastip != NULL && > > > > - copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) > > > > + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) > > > > return -EFAULT; > > > > > > > > if (bulkreq.ocount != NULL && > > > > - copy_to_user(bulkreq.ocount, &count, sizeof(count))) > > > > + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) > > > > return -EFAULT; > > > > > > > > return 0; > > > > diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c > > > > index 3ca1c454afe6..87c597ea1df7 100644 > > > > --- a/fs/xfs/xfs_itable.c > > > > +++ b/fs/xfs/xfs_itable.c > > > > @@ -22,37 +22,63 @@ > > > > #include "xfs_iwalk.h" > > > > > > > > /* > > > > - * Return stat information for one inode. > > > > - * Return 0 if ok, else errno. > > > > + * Bulk Stat > > > > + * ========= > > > > + * > > > > + * Use the inode walking functions to fill out struct xfs_bstat for every > > > > + * allocated inode, then pass the stat information to some externally provided > > > > + * iteration function. > > > > */ > > > > -int > > > > + > > > > +struct xfs_bstat_chunk { > > > > + bulkstat_one_fmt_pf formatter; > > > > + struct xfs_ibulk *breq; > > > > +}; > > > > + > > > > +/* > > > > + * Fill out the bulkstat info for a single inode and report it somewhere. > > > > + * > > > > + * bc->breq->lastino is effectively the inode cursor as we walk through the > > > > + * filesystem. Therefore, we update it any time we need to move the cursor > > > > + * forward, regardless of whether or not we're sending any bstat information > > > > + * back to userspace. If the inode is internal metadata or, has been freed > > > > + * out from under us, we just simply keep going. > > > > + * > > > > + * However, if any other type of error happens we want to stop right where we > > > > + * are so that userspace will call back with exact number of the bad inode and > > > > + * we can send back an error code. > > > > + * > > > > + * Note that if the formatter tells us there's no space left in the buffer we > > > > + * move the cursor forward and abort the walk. > > > > + */ > > > > +STATIC int > > > > xfs_bulkstat_one_int( > > > > - struct xfs_mount *mp, /* mount point for filesystem */ > > > > - xfs_ino_t ino, /* inode to get data for */ > > > > - void __user *buffer, /* buffer to place output in */ > > > > - int ubsize, /* size of buffer */ > > > > - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ > > > > - int *ubused, /* bytes used by me */ > > > > - int *stat) /* BULKSTAT_RV_... */ > > > > + struct xfs_mount *mp, > > > > + struct xfs_trans *tp, > > > > + xfs_ino_t ino, > > > > + void *data) > > > > { > > > > > > Any reason this function takes an 'ino' param considering it's sourced > > > from breq->startino and we bump that value from within this function? > > > The latter seems slightly misplaced to me since it doesn't appear to > > > control the iteration. > > > > > > It also looks like we bump startino in almost all cases. Exceptions are > > > memory allocation failure of the buffer and formatter error. Hmm.. could > > > we lift the buffer to the bc and reuse it to avoid that error entirely > > > (along with repeated allocs/frees)? From there, perhaps we could lift > > > the ->startino update to the callers based on something like error != > > > -EFAULT..? (Alternatively, the caller could update it first and then > > > walk it back if error == -EFAULT). > > > > xfs_iwalk doesn't know anything about the xfs_bstat_chunk or the > > xfs_ibulk that are being passed to the iterator function via the void > > *data parameter. iwalk is a generic iterator which shouldn't ever know > > about the bulkreq interface. > > > > Sure, I'm not suggesting to push anything into the iwalk code. That's > obviously a separate layer. I'm referring to the direct callers of > xfs_bulkstat_one_int(), which is still bulkstat code AFAICT. OH! Heh, I had wondered if maybe you were talking about xfs_bulkstat() and xfs_bulkstat_one(). Maybe I should've drank more coffee this morning. Uh, the buffer allocation thing definitely can move out to the callers... > > I plan to reuse the xfs_iwalk code for other parts of online repair in > > the future, so that's why bulkstat_one takes the inode number that > > _iwalk gives it, and then uses that to mess around with bc and breq. > > > > Ok. BTW, looking again I notice that xfs_bulkstat_one_int() passes bc as > a void pointer. That could probably be fixed up to take the > xfs_bstat_chunk from the current callers. > > > I also sort of prefer to keep the startino update the way it is now > > because I only want to push it forward for the ~4 cases where we do now > > (internal, invalid number, past eofs, or successfully statted). > > Any other runtime error leaves the cursor where it was. > > > > Fair enough, though it could also be cleaned up a bit with an exit label > IMO. ...and having an exit label to make it clearer why we do or don't push startino forward sounds like a good idea. --D > > Brian > > > > > + struct xfs_bstat_chunk *bc = data; > > > > struct xfs_icdinode *dic; /* dinode core info pointer */ > > > > struct xfs_inode *ip; /* incore inode pointer */ > > > > struct inode *inode; > > > > struct xfs_bstat *buf; /* return buffer */ > > > > int error = 0; /* error value */ > > > > > > > > - *stat = BULKSTAT_RV_NOTHING; > > > > - > > > > - if (!buffer || xfs_internal_inum(mp, ino)) > > > > + if (xfs_internal_inum(mp, ino)) { > > > > + bc->breq->startino = ino + 1; > > > > return -EINVAL; > > > > + } > > > > > > > > buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); > > > > if (!buf) > > > > return -ENOMEM; > > > > > > > > - error = xfs_iget(mp, NULL, ino, > > > > + error = xfs_iget(mp, tp, ino, > > > > (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), > > > > XFS_ILOCK_SHARED, &ip); > > > > + if (error == -ENOENT || error == -EINVAL) > > > > + bc->breq->startino = ino + 1; > > > > if (error) > > > > goto out_free; > > > > > > > > @@ -119,43 +145,45 @@ xfs_bulkstat_one_int( > > > > xfs_iunlock(ip, XFS_ILOCK_SHARED); > > > > xfs_irele(ip); > > > > > > > > - error = formatter(buffer, ubsize, ubused, buf); > > > > - if (!error) > > > > - *stat = BULKSTAT_RV_DIDONE; > > > > - > > > > - out_free: > > > > + error = bc->formatter(bc->breq, buf); > > > > + switch (error) { > > > > + case XFS_IBULK_BUFFER_FULL: > > > > + error = XFS_IWALK_ABORT; > > > > + /* fall through */ > > > > + case 0: > > > > + bc->breq->startino = ino + 1; > > > > + break; > > > > + } > > > > +out_free: > > > > kmem_free(buf); > > > > return error; > > > > } > > > > > > > > -/* Return 0 on success or positive error */ > > > > -STATIC int > > > > -xfs_bulkstat_one_fmt( > > > > - void __user *ubuffer, > > > > - int ubsize, > > > > - int *ubused, > > > > - const xfs_bstat_t *buffer) > > > > -{ > > > > - if (ubsize < sizeof(*buffer)) > > > > - return -ENOMEM; > > > > - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) > > > > - return -EFAULT; > > > > - if (ubused) > > > > - *ubused = sizeof(*buffer); > > > > - return 0; > > > > -} > > > > - > > > > +/* Bulkstat a single inode. */ > > > > int > > > > xfs_bulkstat_one( > > > > - xfs_mount_t *mp, /* mount point for filesystem */ > > > > - xfs_ino_t ino, /* inode number to get data for */ > > > > - void __user *buffer, /* buffer to place output in */ > > > > - int ubsize, /* size of buffer */ > > > > - int *ubused, /* bytes used by me */ > > > > - int *stat) /* BULKSTAT_RV_... */ > > > > + struct xfs_ibulk *breq, > > > > + bulkstat_one_fmt_pf formatter) > > > > { > > > > - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, > > > > - xfs_bulkstat_one_fmt, ubused, stat); > > > > + struct xfs_bstat_chunk bc = { > > > > + .formatter = formatter, > > > > + .breq = breq, > > > > + }; > > > > + int error; > > > > + > > > > + breq->icount = 1; > > > > + breq->ocount = 0; > > > > + > > > > > > So ->icount is already set by the caller based on user input. I'd guess > > > this is set here to guarantee a single cycle in the event that the user > > > provided value is >1, but that seems unnecessary since we're calling the > > > internal helper to handle a single inode. > > > > > > If we want to set ->icount for whatever reason, can we do it in the > > > caller where it's more obvious? That also shows that the ->ocount init > > > is unnecessary since the whole structure is initialized in the caller. > > > > Ok. > > > > > > + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); > > > > + > > > > + /* > > > > + * If we reported one inode to userspace then we abort because we hit > > > > + * the end of the buffer. Don't leak that back to userspace. > > > > + */ > > > > + if (error == XFS_IWALK_ABORT) > > > > + error = 0; > > > > + > > > > + return error; > > > > } > > > > > > > > /* > > > ... > > > > diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h > > > > index 369e3f159d4e..366d391eb11f 100644 > > > > --- a/fs/xfs/xfs_itable.h > > > > +++ b/fs/xfs/xfs_itable.h > > > > @@ -5,63 +5,46 @@ > > > > #ifndef __XFS_ITABLE_H__ > > > > #define __XFS_ITABLE_H__ > > > > > > > > -/* > > > > - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat > > > > - * structures (by the dmi library). This is a pointer to a formatter function > > > > - * that will iget the inode and fill in the appropriate structure. > > > > - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c > > > > - */ > > > > -typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, > > > > - xfs_ino_t ino, > > > > - void __user *buffer, > > > > - int ubsize, > > > > - int *ubused, > > > > - int *stat); > > > > +/* In-memory representation of a userspace request for batch inode data. */ > > > > +struct xfs_ibulk { > > > > + struct xfs_mount *mp; > > > > + void __user *ubuffer; /* user output buffer */ > > > > + xfs_ino_t startino; /* start with this inode */ > > > > + unsigned int icount; /* number of elements in ubuffer */ > > > > + unsigned int ocount; /* number of records returned */ > > > > +}; > > > > + > > > > +/* Return value that means we want to abort the walk. */ > > > > +#define XFS_IBULK_ABORT (XFS_IWALK_ABORT) > > > > + > > > > +/* Return value that means the formatting buffer is now full. */ > > > > +#define XFS_IBULK_BUFFER_FULL (2) > > > > > > > > > > It might be wise to define this such that it's guaranteed not to be the > > > same as the abort value, since that is defined externally to this > > > header. IBULK_ABORT + 1 perhaps? > > > > FWIW I'm of half a mind to establish a generic "abort walk" error code > > and key all of these iterator functions to use it instead of having all > > these #define XFS_FOO_ABORT 1 things everywhere. > > > > I'll do #define XFS_IBULK_BUFFER_FULL IBULK_ABORT+1 here. > > > > > Brian > > > > > > > /* > > > > - * Values for stat return value. > > > > + * Advance the user buffer pointer by one record of the given size. If the > > > > + * buffer is now full, return the appropriate error code. > > > > */ > > > > -#define BULKSTAT_RV_NOTHING 0 > > > > -#define BULKSTAT_RV_DIDONE 1 > > > > -#define BULKSTAT_RV_GIVEUP 2 > > > > +static inline int > > > > +xfs_ibulk_advance( > > > > + struct xfs_ibulk *breq, > > > > + size_t bytes) > > > > +{ > > > > + char __user *b = breq->ubuffer; > > > > + > > > > + breq->ubuffer = b + bytes; > > > > + breq->ocount++; > > > > + return breq->ocount == breq->icount ? XFS_IBULK_BUFFER_FULL : 0; > > > > +} > > > > > > > > /* > > > > * Return stat information in bulk (by-inode) for the filesystem. > > > > */ > > > > -int /* error status */ > > > > -xfs_bulkstat( > > > > - xfs_mount_t *mp, /* mount point for filesystem */ > > > > - xfs_ino_t *lastino, /* last inode returned */ > > > > - int *count, /* size of buffer/count returned */ > > > > - bulkstat_one_pf formatter, /* func that'd fill a single buf */ > > > > - size_t statstruct_size,/* sizeof struct that we're filling */ > > > > - char __user *ubuffer,/* buffer with inode stats */ > > > > - int *done); /* 1 if there are more stats to get */ > > > > > > > > -typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ > > > > - void __user *ubuffer, /* buffer to write to */ > > > > - int ubsize, /* remaining user buffer sz */ > > > > - int *ubused, /* bytes used by formatter */ > > > > - const xfs_bstat_t *buffer); /* buffer to read from */ > > > > +typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, > > > > + const struct xfs_bstat *bstat); > > > > > > > > -int > > > > -xfs_bulkstat_one_int( > > > > - xfs_mount_t *mp, > > > > - xfs_ino_t ino, > > > > - void __user *buffer, > > > > - int ubsize, > > > > - bulkstat_one_fmt_pf formatter, > > > > - int *ubused, > > > > - int *stat); > > > > - > > > > -int > > > > -xfs_bulkstat_one( > > > > - xfs_mount_t *mp, > > > > - xfs_ino_t ino, > > > > - void __user *buffer, > > > > - int ubsize, > > > > - int *ubused, > > > > - int *stat); > > > > +int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > > > > +int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); > > > > > > > > typedef int (*inumbers_fmt_pf)( > > > > void __user *ubuffer, /* buffer to write to */ > > > >
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 5ffbdcff3dba..43734901aeb9 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -721,16 +721,28 @@ xfs_ioc_space( return error; } +/* Return 0 on success or positive error */ +int +xfs_bulkstat_one_fmt( + struct xfs_ibulk *breq, + const struct xfs_bstat *bstat) +{ + if (copy_to_user(breq->ubuffer, bstat, sizeof(*bstat))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat)); +} + STATIC int xfs_ioc_bulkstat( xfs_mount_t *mp, unsigned int cmd, void __user *arg) { - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; + struct xfs_fsop_bulkreq bulkreq; + struct xfs_ibulk breq = { + .mp = mp, + }; + xfs_ino_t lastino; int error; /* done = 1 if there are more stats to get and if bulkstat */ @@ -745,35 +757,54 @@ xfs_ioc_bulkstat( if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) return -EFAULT; - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) return -EFAULT; - if ((count = bulkreq.icount) <= 0) + if (bulkreq.icount <= 0) return -EINVAL; if (bulkreq.ubuffer == NULL) return -EINVAL; - if (cmd == XFS_IOC_FSINUMBERS) - error = xfs_inumbers(mp, &inlast, &count, + breq.ubuffer = bulkreq.ubuffer; + breq.icount = bulkreq.icount; + + /* + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect + * that *lastip contains either zero or the number of the last inode to + * be examined by the previous call and return results starting with + * the next inode after that. The new bulk request functions take the + * inode to start with, so we have to adjust the lastino/startino + * parameter to maintain correct function. + */ + if (cmd == XFS_IOC_FSINUMBERS) { + int count = breq.icount; + + breq.startino = lastino; + error = xfs_inumbers(mp, &breq.startino, &count, bulkreq.ubuffer, xfs_inumbers_fmt); - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, - sizeof(xfs_bstat_t), NULL, &done); - else /* XFS_IOC_FSBULKSTAT */ - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, - sizeof(xfs_bstat_t), bulkreq.ubuffer, - &done); + breq.ocount = count; + lastino = breq.startino; + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { + breq.startino = lastino; + error = xfs_bulkstat_one(&breq, xfs_bulkstat_one_fmt); + lastino = breq.startino; + } else { /* XFS_IOC_FSBULKSTAT */ + breq.startino = lastino + 1; + error = xfs_bulkstat(&breq, xfs_bulkstat_one_fmt); + lastino = breq.startino - 1; + } if (error) return error; if (bulkreq.lastip != NULL && - copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) return -EFAULT; if (bulkreq.ocount != NULL && - copy_to_user(bulkreq.ocount, &count, sizeof(count))) + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) return -EFAULT; return 0; diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 4b17f67c888a..f32c8aadfeba 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -77,4 +77,9 @@ xfs_set_dmattrs( uint evmask, uint16_t state); +struct xfs_ibulk; +struct xfs_bstat; + +int xfs_bulkstat_one_fmt(struct xfs_ibulk *breq, const struct xfs_bstat *bstat); + #endif diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 814ffe6fbab7..add15819daf3 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -172,15 +172,10 @@ xfs_bstime_store_compat( /* Return 0 on success or positive error (to xfs_bulkstat()) */ STATIC int xfs_bulkstat_one_fmt_compat( - void __user *ubuffer, - int ubsize, - int *ubused, - const xfs_bstat_t *buffer) + struct xfs_ibulk *breq, + const struct xfs_bstat *buffer) { - compat_xfs_bstat_t __user *p32 = ubuffer; - - if (ubsize < sizeof(*p32)) - return -ENOMEM; + struct compat_xfs_bstat __user *p32 = breq->ubuffer; if (put_user(buffer->bs_ino, &p32->bs_ino) || put_user(buffer->bs_mode, &p32->bs_mode) || @@ -205,23 +200,8 @@ xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) return -EFAULT; - if (ubused) - *ubused = sizeof(*p32); - return 0; -} -STATIC int -xfs_bulkstat_one_compat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ -{ - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt_compat, - ubused, stat); + return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat)); } /* copied from xfs_ioctl.c */ @@ -232,10 +212,11 @@ xfs_compat_ioc_bulkstat( compat_xfs_fsop_bulkreq_t __user *p32) { u32 addr; - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; + struct xfs_fsop_bulkreq bulkreq; + struct xfs_ibulk breq = { + .mp = mp, + }; + xfs_ino_t lastino; int error; /* @@ -245,8 +226,7 @@ xfs_compat_ioc_bulkstat( * functions and structure size are the correct ones to use ... */ inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat; - bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat; - size_t bs_one_size = sizeof(struct compat_xfs_bstat); + bulkstat_one_fmt_pf bs_one_func = xfs_bulkstat_one_fmt_compat; #ifdef CONFIG_X86_X32 if (in_x32_syscall()) { @@ -259,8 +239,7 @@ xfs_compat_ioc_bulkstat( * x32 userspace expects. */ inumbers_func = xfs_inumbers_fmt; - bs_one_func = xfs_bulkstat_one; - bs_one_size = sizeof(struct xfs_bstat); + bs_one_func = xfs_bulkstat_one_fmt; } #endif @@ -284,38 +263,57 @@ xfs_compat_ioc_bulkstat( return -EFAULT; bulkreq.ocount = compat_ptr(addr); - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) return -EFAULT; + breq.startino = lastino + 1; - if ((count = bulkreq.icount) <= 0) + if (bulkreq.icount <= 0) return -EINVAL; if (bulkreq.ubuffer == NULL) return -EINVAL; + breq.ubuffer = bulkreq.ubuffer; + breq.icount = bulkreq.icount; + + /* + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect + * that *lastip contains either zero or the number of the last inode to + * be examined by the previous call and return results starting with + * the next inode after that. The new bulk request functions take the + * inode to start with, so we have to adjust the lastino/startino + * parameter to maintain correct function. + */ if (cmd == XFS_IOC_FSINUMBERS_32) { - error = xfs_inumbers(mp, &inlast, &count, + int count = breq.icount; + + breq.startino = lastino; + error = xfs_inumbers(mp, &breq.startino, &count, bulkreq.ubuffer, inumbers_func); + breq.ocount = count; + lastino = breq.startino; } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { - int res; - - error = bs_one_func(mp, inlast, bulkreq.ubuffer, - bs_one_size, NULL, &res); + breq.startino = lastino; + error = xfs_bulkstat_one(&breq, bs_one_func); + lastino = breq.startino; } else if (cmd == XFS_IOC_FSBULKSTAT_32) { - error = xfs_bulkstat(mp, &inlast, &count, - bs_one_func, bs_one_size, - bulkreq.ubuffer, &done); - } else + breq.startino = lastino + 1; + error = xfs_bulkstat(&breq, bs_one_func); + lastino = breq.startino - 1; + } else { error = -EINVAL; + } if (error) return error; + lastino = breq.startino - 1; if (bulkreq.lastip != NULL && - copy_to_user(bulkreq.lastip, &inlast, sizeof(xfs_ino_t))) + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) return -EFAULT; if (bulkreq.ocount != NULL && - copy_to_user(bulkreq.ocount, &count, sizeof(count))) + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) return -EFAULT; return 0; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 3ca1c454afe6..87c597ea1df7 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -22,37 +22,63 @@ #include "xfs_iwalk.h" /* - * Return stat information for one inode. - * Return 0 if ok, else errno. + * Bulk Stat + * ========= + * + * Use the inode walking functions to fill out struct xfs_bstat for every + * allocated inode, then pass the stat information to some externally provided + * iteration function. */ -int + +struct xfs_bstat_chunk { + bulkstat_one_fmt_pf formatter; + struct xfs_ibulk *breq; +}; + +/* + * Fill out the bulkstat info for a single inode and report it somewhere. + * + * bc->breq->lastino is effectively the inode cursor as we walk through the + * filesystem. Therefore, we update it any time we need to move the cursor + * forward, regardless of whether or not we're sending any bstat information + * back to userspace. If the inode is internal metadata or, has been freed + * out from under us, we just simply keep going. + * + * However, if any other type of error happens we want to stop right where we + * are so that userspace will call back with exact number of the bad inode and + * we can send back an error code. + * + * Note that if the formatter tells us there's no space left in the buffer we + * move the cursor forward and abort the walk. + */ +STATIC int xfs_bulkstat_one_int( - struct xfs_mount *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + void *data) { + struct xfs_bstat_chunk *bc = data; struct xfs_icdinode *dic; /* dinode core info pointer */ struct xfs_inode *ip; /* incore inode pointer */ struct inode *inode; struct xfs_bstat *buf; /* return buffer */ int error = 0; /* error value */ - *stat = BULKSTAT_RV_NOTHING; - - if (!buffer || xfs_internal_inum(mp, ino)) + if (xfs_internal_inum(mp, ino)) { + bc->breq->startino = ino + 1; return -EINVAL; + } buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); if (!buf) return -ENOMEM; - error = xfs_iget(mp, NULL, ino, + error = xfs_iget(mp, tp, ino, (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), XFS_ILOCK_SHARED, &ip); + if (error == -ENOENT || error == -EINVAL) + bc->breq->startino = ino + 1; if (error) goto out_free; @@ -119,43 +145,45 @@ xfs_bulkstat_one_int( xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_irele(ip); - error = formatter(buffer, ubsize, ubused, buf); - if (!error) - *stat = BULKSTAT_RV_DIDONE; - - out_free: + error = bc->formatter(bc->breq, buf); + switch (error) { + case XFS_IBULK_BUFFER_FULL: + error = XFS_IWALK_ABORT; + /* fall through */ + case 0: + bc->breq->startino = ino + 1; + break; + } +out_free: kmem_free(buf); return error; } -/* Return 0 on success or positive error */ -STATIC int -xfs_bulkstat_one_fmt( - void __user *ubuffer, - int ubsize, - int *ubused, - const xfs_bstat_t *buffer) -{ - if (ubsize < sizeof(*buffer)) - return -ENOMEM; - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) - return -EFAULT; - if (ubused) - *ubused = sizeof(*buffer); - return 0; -} - +/* Bulkstat a single inode. */ int xfs_bulkstat_one( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ + struct xfs_ibulk *breq, + bulkstat_one_fmt_pf formatter) { - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt, ubused, stat); + struct xfs_bstat_chunk bc = { + .formatter = formatter, + .breq = breq, + }; + int error; + + breq->icount = 1; + breq->ocount = 0; + + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); + + /* + * If we reported one inode to userspace then we abort because we hit + * the end of the buffer. Don't leak that back to userspace. + */ + if (error == XFS_IWALK_ABORT) + error = 0; + + return error; } /* @@ -251,256 +279,65 @@ xfs_bulkstat_grab_ichunk( #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) -struct xfs_bulkstat_agichunk { - char __user **ac_ubuffer;/* pointer into user's buffer */ - int ac_ubleft; /* bytes left in user's buffer */ - int ac_ubelem; /* spaces used in user's buffer */ -}; - -/* - * Process inodes in chunk with a pointer to a formatter function - * that will iget the inode and fill in the appropriate structure. - */ static int -xfs_bulkstat_ag_ichunk( - struct xfs_mount *mp, - xfs_agnumber_t agno, - struct xfs_inobt_rec_incore *irbp, - bulkstat_one_pf formatter, - size_t statstruct_size, - struct xfs_bulkstat_agichunk *acp, - xfs_agino_t *last_agino) +xfs_bulkstat_iwalk( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + void *data) { - char __user **ubufp = acp->ac_ubuffer; - int chunkidx; - int error = 0; - xfs_agino_t agino = irbp->ir_startino; - - for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; - chunkidx++, agino++) { - int fmterror; - int ubused; - - /* inode won't fit in buffer, we are done */ - if (acp->ac_ubleft < statstruct_size) - break; - - /* Skip if this inode is free */ - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) - continue; - - /* Get the inode and fill in a single buffer */ - ubused = statstruct_size; - error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), - *ubufp, acp->ac_ubleft, &ubused, &fmterror); - - if (fmterror == BULKSTAT_RV_GIVEUP || - (error && error != -ENOENT && error != -EINVAL)) { - acp->ac_ubleft = 0; - ASSERT(error); - break; - } - - /* be careful not to leak error if at end of chunk */ - if (fmterror == BULKSTAT_RV_NOTHING || error) { - error = 0; - continue; - } - - *ubufp += ubused; - acp->ac_ubleft -= ubused; - acp->ac_ubelem++; - } - - /* - * Post-update *last_agino. At this point, agino will always point one - * inode past the last inode we processed successfully. Hence we - * substract that inode when setting the *last_agino cursor so that we - * return the correct cookie to userspace. On the next bulkstat call, - * the inode under the lastino cookie will be skipped as we have already - * processed it here. - */ - *last_agino = agino - 1; + int error; + error = xfs_bulkstat_one_int(mp, tp, ino, data); + /* bulkstat just skips over missing inodes */ + if (error == -ENOENT || error == -EINVAL) + return 0; return error; } /* - * Return stat information in bulk (by-inode) for the filesystem. + * Check the incoming lastino parameter. + * + * We allow any inode value that could map to physical space inside the + * filesystem because if there are no inodes there, bulkstat moves on to the + * next chunk. In other words, the magic agino value of zero takes us to the + * first chunk in the AG, and an agino value past the end of the AG takes us to + * the first chunk in the next AG. + * + * Therefore we can end early if the requested inode is beyond the end of the + * filesystem or doesn't map properly. */ -int /* error status */ -xfs_bulkstat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastinop, /* last inode returned */ - int *ubcountp, /* size of buffer/count returned */ - bulkstat_one_pf formatter, /* func that'd fill a single buf */ - size_t statstruct_size, /* sizeof struct filling */ - char __user *ubuffer, /* buffer with inode stats */ - int *done) /* 1 if there are more stats to get */ +static inline bool +xfs_bulkstat_already_done( + struct xfs_mount *mp, + xfs_ino_t startino) { - xfs_buf_t *agbp; /* agi header buffer */ - xfs_agino_t agino; /* inode # in allocation group */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ - int nirbuf; /* size of irbuf */ - int ubcount; /* size of user's buffer */ - struct xfs_bulkstat_agichunk ac; - int error = 0; - - /* - * Get the last inode value, see if there's nothing to do. - */ - agno = XFS_INO_TO_AGNO(mp, *lastinop); - agino = XFS_INO_TO_AGINO(mp, *lastinop); - if (agno >= mp->m_sb.sb_agcount || - *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { - *done = 1; - *ubcountp = 0; - return 0; - } - - ubcount = *ubcountp; /* statstruct's */ - ac.ac_ubuffer = &ubuffer; - ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; - ac.ac_ubelem = 0; - - *ubcountp = 0; - *done = 0; - - irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); - if (!irbuf) - return -ENOMEM; - nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); - - /* - * Loop over the allocation groups, starting from the last - * inode returned; 0 means start of the allocation group. - */ - while (agno < mp->m_sb.sb_agcount) { - struct xfs_inobt_rec_incore *irbp = irbuf; - struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; - bool end_of_ag = false; - int icount = 0; - int stat; - - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - if (error) - break; - /* - * Allocate and initialize a btree cursor for ialloc btree. - */ - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, - XFS_BTNUM_INO); - if (agino > 0) { - /* - * In the middle of an allocation group, we need to get - * the remainder of the chunk we're in. - */ - struct xfs_inobt_rec_incore r; - - error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); - if (error) - goto del_cursor; - if (icount) { - irbp->ir_startino = r.ir_startino; - irbp->ir_holemask = r.ir_holemask; - irbp->ir_count = r.ir_count; - irbp->ir_freecount = r.ir_freecount; - irbp->ir_free = r.ir_free; - irbp++; - } - /* Increment to the next record */ - error = xfs_btree_increment(cur, 0, &stat); - } else { - /* Start of ag. Lookup the first inode chunk */ - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); - } - if (error || stat == 0) { - end_of_ag = true; - goto del_cursor; - } + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino); - /* - * Loop through inode btree records in this ag, - * until we run out of inodes or space in the buffer. - */ - while (irbp < irbufend && icount < ubcount) { - struct xfs_inobt_rec_incore r; - - error = xfs_inobt_get_rec(cur, &r, &stat); - if (error || stat == 0) { - end_of_ag = true; - goto del_cursor; - } - - /* - * If this chunk has any allocated inodes, save it. - * Also start read-ahead now for this chunk. - */ - if (r.ir_freecount < r.ir_count) { - xfs_bulkstat_ichunk_ra(mp, agno, &r); - irbp->ir_startino = r.ir_startino; - irbp->ir_holemask = r.ir_holemask; - irbp->ir_count = r.ir_count; - irbp->ir_freecount = r.ir_freecount; - irbp->ir_free = r.ir_free; - irbp++; - icount += r.ir_count - r.ir_freecount; - } - error = xfs_btree_increment(cur, 0, &stat); - if (error || stat == 0) { - end_of_ag = true; - goto del_cursor; - } - cond_resched(); - } + return agno >= mp->m_sb.sb_agcount || + startino != XFS_AGINO_TO_INO(mp, agno, agino); +} - /* - * Drop the btree buffers and the agi buffer as we can't hold any - * of the locks these represent when calling iget. If there is a - * pending error, then we are done. - */ -del_cursor: - xfs_btree_del_cursor(cur, error); - xfs_buf_relse(agbp); - if (error) - break; - /* - * Now format all the good inodes into the user's buffer. The - * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer - * for the next loop iteration. - */ - irbufend = irbp; - for (irbp = irbuf; - irbp < irbufend && ac.ac_ubleft >= statstruct_size; - irbp++) { - error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, - formatter, statstruct_size, &ac, - &agino); - if (error) - break; +/* Return stat information in bulk (by-inode) for the filesystem. */ +int +xfs_bulkstat( + struct xfs_ibulk *breq, + bulkstat_one_fmt_pf formatter) +{ + struct xfs_bstat_chunk bc = { + .formatter = formatter, + .breq = breq, + }; + int error; - cond_resched(); - } + breq->ocount = 0; - /* - * If we've run out of space or had a formatting error, we - * are now done - */ - if (ac.ac_ubleft < statstruct_size || error) - break; + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) + return 0; - if (end_of_ag) { - agno++; - agino = 0; - } - } - /* - * Done, we're either out of filesystem or space to put the data. - */ - kmem_free(irbuf); - *ubcountp = ac.ac_ubelem; + error = xfs_iwalk(breq->mp, NULL, breq->startino, xfs_bulkstat_iwalk, + breq->icount, &bc); /* * We found some inodes, so clear the error status and return them. @@ -509,17 +346,9 @@ xfs_bulkstat( * triggered again and propagated to userspace as there will be no * formatted inodes in the buffer. */ - if (ac.ac_ubelem) + if (breq->ocount > 0) error = 0; - /* - * If we ran out of filesystem, lastino will point off the end of - * the filesystem so the next call will return immediately. - */ - *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); - if (agno >= mp->m_sb.sb_agcount) - *done = 1; - return error; } diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 369e3f159d4e..366d391eb11f 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -5,63 +5,46 @@ #ifndef __XFS_ITABLE_H__ #define __XFS_ITABLE_H__ -/* - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat - * structures (by the dmi library). This is a pointer to a formatter function - * that will iget the inode and fill in the appropriate structure. - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c - */ -typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, - xfs_ino_t ino, - void __user *buffer, - int ubsize, - int *ubused, - int *stat); +/* In-memory representation of a userspace request for batch inode data. */ +struct xfs_ibulk { + struct xfs_mount *mp; + void __user *ubuffer; /* user output buffer */ + xfs_ino_t startino; /* start with this inode */ + unsigned int icount; /* number of elements in ubuffer */ + unsigned int ocount; /* number of records returned */ +}; + +/* Return value that means we want to abort the walk. */ +#define XFS_IBULK_ABORT (XFS_IWALK_ABORT) + +/* Return value that means the formatting buffer is now full. */ +#define XFS_IBULK_BUFFER_FULL (2) /* - * Values for stat return value. + * Advance the user buffer pointer by one record of the given size. If the + * buffer is now full, return the appropriate error code. */ -#define BULKSTAT_RV_NOTHING 0 -#define BULKSTAT_RV_DIDONE 1 -#define BULKSTAT_RV_GIVEUP 2 +static inline int +xfs_ibulk_advance( + struct xfs_ibulk *breq, + size_t bytes) +{ + char __user *b = breq->ubuffer; + + breq->ubuffer = b + bytes; + breq->ocount++; + return breq->ocount == breq->icount ? XFS_IBULK_BUFFER_FULL : 0; +} /* * Return stat information in bulk (by-inode) for the filesystem. */ -int /* error status */ -xfs_bulkstat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastino, /* last inode returned */ - int *count, /* size of buffer/count returned */ - bulkstat_one_pf formatter, /* func that'd fill a single buf */ - size_t statstruct_size,/* sizeof struct that we're filling */ - char __user *ubuffer,/* buffer with inode stats */ - int *done); /* 1 if there are more stats to get */ -typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ - void __user *ubuffer, /* buffer to write to */ - int ubsize, /* remaining user buffer sz */ - int *ubused, /* bytes used by formatter */ - const xfs_bstat_t *buffer); /* buffer to read from */ +typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, + const struct xfs_bstat *bstat); -int -xfs_bulkstat_one_int( - xfs_mount_t *mp, - xfs_ino_t ino, - void __user *buffer, - int ubsize, - bulkstat_one_fmt_pf formatter, - int *ubused, - int *stat); - -int -xfs_bulkstat_one( - xfs_mount_t *mp, - xfs_ino_t ino, - void __user *buffer, - int ubsize, - int *ubused, - int *stat); +int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); +int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); typedef int (*inumbers_fmt_pf)( void __user *ubuffer, /* buffer to write to */