Message ID | 20170428194652.GG22884@birch.djwong.org (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
On Fri, Apr 28, 2017 at 12:46:52PM -0700, Darrick J. Wong wrote: > Currently, the dir2 leaf block getdents function uses a complex state > tracking mechanism to create a shadow copy of the block mappings and > then uses the shadow copy to schedule readahead. Since the read and > readahead functions are perfectly capable of reading the mappings > themselves, we can tear all that out in favor of a simpler function that > simply keeps pushing the readahead window further out. > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > --- > v3: use sliding window to constrain the amount of readahead > v2: fix readahead of more than ra_want > --- Thanks for the updates. This looks much more simple and seems true to the current readahead behavior. The code also looks fine to me (one bit of whitespace damage noted below). That aside, have you happened to test this against a huge/ugly directory to verify it works as expected? Note that I don't think in depth performance analysis is required. Verification of any kind of dir that is known to benefit from readahead is probably sufficient IMO. Perhaps dm-delay with a small enough latency to allow us to measure the effect of readahead could help us here. > fs/xfs/xfs_dir2_readdir.c | 316 ++++++++++++--------------------------------- > 1 file changed, 82 insertions(+), 234 deletions(-) > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c > index 20b7a5c..d05c1ec 100644 > --- a/fs/xfs/xfs_dir2_readdir.c > +++ b/fs/xfs/xfs_dir2_readdir.c > @@ -243,214 +243,98 @@ xfs_dir2_block_getdents( > return 0; > } > ... > /* > - * Do we need more readahead? > - * Each loop tries to process 1 full dir blk; last may be partial. > + * Start readahead for the next bufsize's worth of dir data blocks. > + * We may have already issued readahead for some of that range; > + * ra_blk tracks the last block we tried to read(ahead). > */ > + ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > + if (*ra_blk >= last_da) > + goto out; > + else if (*ra_blk == 0) > + *ra_blk = map.br_startoff; > + next_ra = map.br_startoff + geo->fsbcount; > + if (next_ra >= last_da) > + goto out_no_ra; > + found = xfs_iext_lookup_extent(dp, ifp, next_ra, &idx, &map); > + if (!found || map.br_startoff >= last_da) > + goto out_no_ra; > + xfs_trim_extent(&map, next_ra, last_da - next_ra); > + ^ trailing space. Brian > + /* Start ra for each dir (not fs) block that has a mapping. */ > blk_start_plug(&plug); > - for (mip->ra_index = mip->ra_offset = i = 0; > - mip->ra_want > mip->ra_current && i < mip->map_blocks; > - i += geo->fsbcount) { > - ASSERT(mip->ra_index < mip->map_valid); > - /* > - * Read-ahead a contiguous directory block. > - */ > - if (i > mip->ra_current && > - (map[mip->ra_index].br_blockcount - mip->ra_offset) >= > - geo->fsbcount) { > - xfs_dir3_data_readahead(dp, > - map[mip->ra_index].br_startoff + mip->ra_offset, > - XFS_FSB_TO_DADDR(dp->i_mount, > - map[mip->ra_index].br_startblock + > - mip->ra_offset)); > - mip->ra_current = i; > - } > - > - /* > - * Read-ahead a non-contiguous directory block. This doesn't > - * use our mapping, but this is a very rare case. > - */ > - else if (i > mip->ra_current) { > - xfs_dir3_data_readahead(dp, > - map[mip->ra_index].br_startoff + > - mip->ra_offset, -1); > - mip->ra_current = i; > - } > - > - /* > - * Advance offset through the mapping table, processing a full > - * dir block even if it is fragmented into several extents. > - * But stop if we have consumed all valid mappings, even if > - * it's not yet a full directory block. > - */ > - for (j = 0; > - j < geo->fsbcount && mip->ra_index < mip->map_valid; > - j += length ) { > - /* > - * The rest of this extent but not more than a dir > - * block. > - */ > - length = min_t(int, geo->fsbcount - j, > - map[mip->ra_index].br_blockcount - > - mip->ra_offset); > - mip->ra_offset += length; > - > - /* > - * Advance to the next mapping if this one is used up. > - */ > - if (mip->ra_offset == map[mip->ra_index].br_blockcount) { > - mip->ra_offset = 0; > - mip->ra_index++; > + while (ra_want > 0) { > + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); > + while (ra_want > 0 && > + next_ra < map.br_startoff + map.br_blockcount) { > + if (next_ra >= last_da) { > + *ra_blk = last_da; > + break; > + } else if (next_ra > *ra_blk) { > + xfs_dir3_data_readahead(dp, next_ra, -2); > + *ra_blk = next_ra; > } > + ra_want -= geo->fsbcount; > + next_ra += geo->fsbcount; > + } > + found = xfs_iext_get_extent(ifp, ++idx, &map); > + if (!found) { > + *ra_blk = last_da; > + break; > } > } > blk_finish_plug(&plug); > @@ -458,6 +342,9 @@ xfs_dir2_leaf_readbuf( > out: > *bpp = bp; > return error; > +out_no_ra: > + *ra_blk = last_da; > + goto out; > } > > /* > @@ -475,14 +362,14 @@ xfs_dir2_leaf_getdents( > xfs_dir2_data_hdr_t *hdr; /* data block header */ > xfs_dir2_data_entry_t *dep; /* data entry */ > xfs_dir2_data_unused_t *dup; /* unused entry */ > - int error = 0; /* error return value */ > - int length; /* temporary length value */ > - int byteoff; /* offset in current block */ > - xfs_dir2_off_t curoff; /* current overall offset */ > - xfs_dir2_off_t newoff; /* new curoff after new blk */ > char *ptr = NULL; /* pointer to current data */ > - struct xfs_dir2_leaf_map_info *map_info; > struct xfs_da_geometry *geo = args->geo; > + xfs_dablk_t rablk = 0; /* current readahead block */ > + xfs_dir2_off_t curoff; /* current overall offset */ > + int length; /* temporary length value */ > + int byteoff; /* offset in current block */ > + int lock_mode; > + int error = 0; /* error return value */ > > /* > * If the offset is at or past the largest allowed value, > @@ -492,30 +379,12 @@ xfs_dir2_leaf_getdents( > return 0; > > /* > - * Set up to bmap a number of blocks based on the caller's > - * buffer size, the directory block size, and the filesystem > - * block size. > - */ > - length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > - map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + > - (length * sizeof(struct xfs_bmbt_irec)), > - KM_SLEEP | KM_NOFS); > - map_info->map_size = length; > - > - /* > * Inside the loop we keep the main offset value as a byte offset > * in the directory file. > */ > curoff = xfs_dir2_dataptr_to_byte(ctx->pos); > > /* > - * Force this conversion through db so we truncate the offset > - * down to get the start of the data block. > - */ > - map_info->map_off = xfs_dir2_db_to_da(geo, > - xfs_dir2_byte_to_db(geo, curoff)); > - > - /* > * Loop over directory entries until we reach the end offset. > * Get more blocks and readahead as necessary. > */ > @@ -527,38 +396,18 @@ xfs_dir2_leaf_getdents( > * current buffer, need to get another one. > */ > if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { > - int lock_mode; > - bool trim_map = false; > - > if (bp) { > - xfs_trans_brelse(NULL, bp); > + xfs_trans_brelse(args->trans, bp); > bp = NULL; > - trim_map = true; > } > > lock_mode = xfs_ilock_data_map_shared(dp); > - error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, > - &curoff, &bp, trim_map); > + error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff, > + &rablk, &bp); > xfs_iunlock(dp, lock_mode); > - if (error || !map_info->map_valid) > + if (error || !bp) > break; > > - /* > - * Having done a read, we need to set a new offset. > - */ > - newoff = xfs_dir2_db_off_to_byte(geo, > - map_info->curdb, 0); > - /* > - * Start of the current block. > - */ > - if (curoff < newoff) > - curoff = newoff; > - /* > - * Make sure we're in the right block. > - */ > - else if (curoff > newoff) > - ASSERT(xfs_dir2_byte_to_db(geo, curoff) == > - map_info->curdb); > hdr = bp->b_addr; > xfs_dir3_data_check(dp, bp); > /* > @@ -643,7 +492,6 @@ xfs_dir2_leaf_getdents( > ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; > else > ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; > - kmem_free(map_info); > if (bp) > xfs_trans_brelse(NULL, bp); > return error; > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 01, 2017 at 02:32:43PM -0400, Brian Foster wrote: > On Fri, Apr 28, 2017 at 12:46:52PM -0700, Darrick J. Wong wrote: > > Currently, the dir2 leaf block getdents function uses a complex state > > tracking mechanism to create a shadow copy of the block mappings and > > then uses the shadow copy to schedule readahead. Since the read and > > readahead functions are perfectly capable of reading the mappings > > themselves, we can tear all that out in favor of a simpler function that > > simply keeps pushing the readahead window further out. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > --- > > v3: use sliding window to constrain the amount of readahead > > v2: fix readahead of more than ra_want > > --- > > Thanks for the updates. This looks much more simple and seems true to > the current readahead behavior. The code also looks fine to me (one bit > of whitespace damage noted below). > > That aside, have you happened to test this against a huge/ugly directory > to verify it works as expected? Note that I don't think in depth > performance analysis is required. Verification of any kind of dir that > is known to benefit from readahead is probably sufficient IMO. Perhaps > dm-delay with a small enough latency to allow us to measure the effect > of readahead could help us here. Yeah, I used xfs/349 to generate a filesystem containing a directory with a freeindex block, then ls'd the entire directory to see how long the getdents calls took. The readhead calls were nearly identical with similar runtimes. > > fs/xfs/xfs_dir2_readdir.c | 316 ++++++++++++--------------------------------- > > 1 file changed, 82 insertions(+), 234 deletions(-) > > > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c > > index 20b7a5c..d05c1ec 100644 > > --- a/fs/xfs/xfs_dir2_readdir.c > > +++ b/fs/xfs/xfs_dir2_readdir.c > > @@ -243,214 +243,98 @@ xfs_dir2_block_getdents( > > return 0; > > } > > > ... > > /* > > - * Do we need more readahead? > > - * Each loop tries to process 1 full dir blk; last may be partial. > > + * Start readahead for the next bufsize's worth of dir data blocks. > > + * We may have already issued readahead for some of that range; > > + * ra_blk tracks the last block we tried to read(ahead). > > */ > > + ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > > + if (*ra_blk >= last_da) > > + goto out; > > + else if (*ra_blk == 0) > > + *ra_blk = map.br_startoff; > > + next_ra = map.br_startoff + geo->fsbcount; > > + if (next_ra >= last_da) > > + goto out_no_ra; > > + found = xfs_iext_lookup_extent(dp, ifp, next_ra, &idx, &map); > > + if (!found || map.br_startoff >= last_da) > > + goto out_no_ra; > > + xfs_trim_extent(&map, next_ra, last_da - next_ra); > > + > > ^ trailing space. (I don't see it...?) --D > > Brian > > > + /* Start ra for each dir (not fs) block that has a mapping. */ > > blk_start_plug(&plug); > > - for (mip->ra_index = mip->ra_offset = i = 0; > > - mip->ra_want > mip->ra_current && i < mip->map_blocks; > > - i += geo->fsbcount) { > > - ASSERT(mip->ra_index < mip->map_valid); > > - /* > > - * Read-ahead a contiguous directory block. > > - */ > > - if (i > mip->ra_current && > > - (map[mip->ra_index].br_blockcount - mip->ra_offset) >= > > - geo->fsbcount) { > > - xfs_dir3_data_readahead(dp, > > - map[mip->ra_index].br_startoff + mip->ra_offset, > > - XFS_FSB_TO_DADDR(dp->i_mount, > > - map[mip->ra_index].br_startblock + > > - mip->ra_offset)); > > - mip->ra_current = i; > > - } > > - > > - /* > > - * Read-ahead a non-contiguous directory block. This doesn't > > - * use our mapping, but this is a very rare case. > > - */ > > - else if (i > mip->ra_current) { > > - xfs_dir3_data_readahead(dp, > > - map[mip->ra_index].br_startoff + > > - mip->ra_offset, -1); > > - mip->ra_current = i; > > - } > > - > > - /* > > - * Advance offset through the mapping table, processing a full > > - * dir block even if it is fragmented into several extents. > > - * But stop if we have consumed all valid mappings, even if > > - * it's not yet a full directory block. > > - */ > > - for (j = 0; > > - j < geo->fsbcount && mip->ra_index < mip->map_valid; > > - j += length ) { > > - /* > > - * The rest of this extent but not more than a dir > > - * block. > > - */ > > - length = min_t(int, geo->fsbcount - j, > > - map[mip->ra_index].br_blockcount - > > - mip->ra_offset); > > - mip->ra_offset += length; > > - > > - /* > > - * Advance to the next mapping if this one is used up. > > - */ > > - if (mip->ra_offset == map[mip->ra_index].br_blockcount) { > > - mip->ra_offset = 0; > > - mip->ra_index++; > > + while (ra_want > 0) { > > + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); > > + while (ra_want > 0 && > > + next_ra < map.br_startoff + map.br_blockcount) { > > + if (next_ra >= last_da) { > > + *ra_blk = last_da; > > + break; > > + } else if (next_ra > *ra_blk) { > > + xfs_dir3_data_readahead(dp, next_ra, -2); > > + *ra_blk = next_ra; > > } > > + ra_want -= geo->fsbcount; > > + next_ra += geo->fsbcount; > > + } > > + found = xfs_iext_get_extent(ifp, ++idx, &map); > > + if (!found) { > > + *ra_blk = last_da; > > + break; > > } > > } > > blk_finish_plug(&plug); > > @@ -458,6 +342,9 @@ xfs_dir2_leaf_readbuf( > > out: > > *bpp = bp; > > return error; > > +out_no_ra: > > + *ra_blk = last_da; > > + goto out; > > } > > > > /* > > @@ -475,14 +362,14 @@ xfs_dir2_leaf_getdents( > > xfs_dir2_data_hdr_t *hdr; /* data block header */ > > xfs_dir2_data_entry_t *dep; /* data entry */ > > xfs_dir2_data_unused_t *dup; /* unused entry */ > > - int error = 0; /* error return value */ > > - int length; /* temporary length value */ > > - int byteoff; /* offset in current block */ > > - xfs_dir2_off_t curoff; /* current overall offset */ > > - xfs_dir2_off_t newoff; /* new curoff after new blk */ > > char *ptr = NULL; /* pointer to current data */ > > - struct xfs_dir2_leaf_map_info *map_info; > > struct xfs_da_geometry *geo = args->geo; > > + xfs_dablk_t rablk = 0; /* current readahead block */ > > + xfs_dir2_off_t curoff; /* current overall offset */ > > + int length; /* temporary length value */ > > + int byteoff; /* offset in current block */ > > + int lock_mode; > > + int error = 0; /* error return value */ > > > > /* > > * If the offset is at or past the largest allowed value, > > @@ -492,30 +379,12 @@ xfs_dir2_leaf_getdents( > > return 0; > > > > /* > > - * Set up to bmap a number of blocks based on the caller's > > - * buffer size, the directory block size, and the filesystem > > - * block size. > > - */ > > - length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > > - map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + > > - (length * sizeof(struct xfs_bmbt_irec)), > > - KM_SLEEP | KM_NOFS); > > - map_info->map_size = length; > > - > > - /* > > * Inside the loop we keep the main offset value as a byte offset > > * in the directory file. > > */ > > curoff = xfs_dir2_dataptr_to_byte(ctx->pos); > > > > /* > > - * Force this conversion through db so we truncate the offset > > - * down to get the start of the data block. > > - */ > > - map_info->map_off = xfs_dir2_db_to_da(geo, > > - xfs_dir2_byte_to_db(geo, curoff)); > > - > > - /* > > * Loop over directory entries until we reach the end offset. > > * Get more blocks and readahead as necessary. > > */ > > @@ -527,38 +396,18 @@ xfs_dir2_leaf_getdents( > > * current buffer, need to get another one. > > */ > > if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { > > - int lock_mode; > > - bool trim_map = false; > > - > > if (bp) { > > - xfs_trans_brelse(NULL, bp); > > + xfs_trans_brelse(args->trans, bp); > > bp = NULL; > > - trim_map = true; > > } > > > > lock_mode = xfs_ilock_data_map_shared(dp); > > - error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, > > - &curoff, &bp, trim_map); > > + error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff, > > + &rablk, &bp); > > xfs_iunlock(dp, lock_mode); > > - if (error || !map_info->map_valid) > > + if (error || !bp) > > break; > > > > - /* > > - * Having done a read, we need to set a new offset. > > - */ > > - newoff = xfs_dir2_db_off_to_byte(geo, > > - map_info->curdb, 0); > > - /* > > - * Start of the current block. > > - */ > > - if (curoff < newoff) > > - curoff = newoff; > > - /* > > - * Make sure we're in the right block. > > - */ > > - else if (curoff > newoff) > > - ASSERT(xfs_dir2_byte_to_db(geo, curoff) == > > - map_info->curdb); > > hdr = bp->b_addr; > > xfs_dir3_data_check(dp, bp); > > /* > > @@ -643,7 +492,6 @@ xfs_dir2_leaf_getdents( > > ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; > > else > > ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; > > - kmem_free(map_info); > > if (bp) > > xfs_trans_brelse(NULL, bp); > > return error; > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 01, 2017 at 02:50:18PM -0700, Darrick J. Wong wrote: > On Mon, May 01, 2017 at 02:32:43PM -0400, Brian Foster wrote: > > On Fri, Apr 28, 2017 at 12:46:52PM -0700, Darrick J. Wong wrote: > > > Currently, the dir2 leaf block getdents function uses a complex state > > > tracking mechanism to create a shadow copy of the block mappings and > > > then uses the shadow copy to schedule readahead. Since the read and > > > readahead functions are perfectly capable of reading the mappings > > > themselves, we can tear all that out in favor of a simpler function that > > > simply keeps pushing the readahead window further out. > > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > > --- > > > v3: use sliding window to constrain the amount of readahead > > > v2: fix readahead of more than ra_want > > > --- > > > > Thanks for the updates. This looks much more simple and seems true to > > the current readahead behavior. The code also looks fine to me (one bit > > of whitespace damage noted below). > > > > That aside, have you happened to test this against a huge/ugly directory > > to verify it works as expected? Note that I don't think in depth > > performance analysis is required. Verification of any kind of dir that > > is known to benefit from readahead is probably sufficient IMO. Perhaps > > dm-delay with a small enough latency to allow us to measure the effect > > of readahead could help us here. > > Yeah, I used xfs/349 to generate a filesystem containing a directory > with a freeindex block, then ls'd the entire directory to see how long > the getdents calls took. The readhead calls were nearly identical with > similar runtimes. > Ok.. > > > fs/xfs/xfs_dir2_readdir.c | 316 ++++++++++++--------------------------------- > > > 1 file changed, 82 insertions(+), 234 deletions(-) > > > > > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c > > > index 20b7a5c..d05c1ec 100644 > > > --- a/fs/xfs/xfs_dir2_readdir.c > > > +++ b/fs/xfs/xfs_dir2_readdir.c > > > @@ -243,214 +243,98 @@ xfs_dir2_block_getdents( > > > return 0; > > > } > > > > > ... > > > /* > > > - * Do we need more readahead? > > > - * Each loop tries to process 1 full dir blk; last may be partial. > > > + * Start readahead for the next bufsize's worth of dir data blocks. > > > + * We may have already issued readahead for some of that range; > > > + * ra_blk tracks the last block we tried to read(ahead). > > > */ > > > + ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > > > + if (*ra_blk >= last_da) > > > + goto out; > > > + else if (*ra_blk == 0) > > > + *ra_blk = map.br_startoff; > > > + next_ra = map.br_startoff + geo->fsbcount; > > > + if (next_ra >= last_da) > > > + goto out_no_ra; > > > + found = xfs_iext_lookup_extent(dp, ifp, next_ra, &idx, &map); > > > + if (!found || map.br_startoff >= last_da) > > > + goto out_no_ra; > > > + xfs_trim_extent(&map, next_ra, last_da - next_ra); > > > + > > > > ^ trailing space. > > (I don't see it...?) > checkpatch catches it: $ ./scripts/checkpatch.pl /tmp/patch ERROR: trailing whitespace #227: FILE: fs/xfs/xfs_dir2_readdir.c:317: + $ WARNING: please, no spaces at the start of a line #227: FILE: fs/xfs/xfs_dir2_readdir.c:317: + $ total: 1 errors, 1 warnings, 391 lines checked ... With that fixed: Reviewed-by: Brian Foster <bfoster@redhat.com> > --D > > > > > Brian > > > > > + /* Start ra for each dir (not fs) block that has a mapping. */ > > > blk_start_plug(&plug); > > > - for (mip->ra_index = mip->ra_offset = i = 0; > > > - mip->ra_want > mip->ra_current && i < mip->map_blocks; > > > - i += geo->fsbcount) { > > > - ASSERT(mip->ra_index < mip->map_valid); > > > - /* > > > - * Read-ahead a contiguous directory block. > > > - */ > > > - if (i > mip->ra_current && > > > - (map[mip->ra_index].br_blockcount - mip->ra_offset) >= > > > - geo->fsbcount) { > > > - xfs_dir3_data_readahead(dp, > > > - map[mip->ra_index].br_startoff + mip->ra_offset, > > > - XFS_FSB_TO_DADDR(dp->i_mount, > > > - map[mip->ra_index].br_startblock + > > > - mip->ra_offset)); > > > - mip->ra_current = i; > > > - } > > > - > > > - /* > > > - * Read-ahead a non-contiguous directory block. This doesn't > > > - * use our mapping, but this is a very rare case. > > > - */ > > > - else if (i > mip->ra_current) { > > > - xfs_dir3_data_readahead(dp, > > > - map[mip->ra_index].br_startoff + > > > - mip->ra_offset, -1); > > > - mip->ra_current = i; > > > - } > > > - > > > - /* > > > - * Advance offset through the mapping table, processing a full > > > - * dir block even if it is fragmented into several extents. > > > - * But stop if we have consumed all valid mappings, even if > > > - * it's not yet a full directory block. > > > - */ > > > - for (j = 0; > > > - j < geo->fsbcount && mip->ra_index < mip->map_valid; > > > - j += length ) { > > > - /* > > > - * The rest of this extent but not more than a dir > > > - * block. > > > - */ > > > - length = min_t(int, geo->fsbcount - j, > > > - map[mip->ra_index].br_blockcount - > > > - mip->ra_offset); > > > - mip->ra_offset += length; > > > - > > > - /* > > > - * Advance to the next mapping if this one is used up. > > > - */ > > > - if (mip->ra_offset == map[mip->ra_index].br_blockcount) { > > > - mip->ra_offset = 0; > > > - mip->ra_index++; > > > + while (ra_want > 0) { > > > + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); > > > + while (ra_want > 0 && > > > + next_ra < map.br_startoff + map.br_blockcount) { > > > + if (next_ra >= last_da) { > > > + *ra_blk = last_da; > > > + break; > > > + } else if (next_ra > *ra_blk) { > > > + xfs_dir3_data_readahead(dp, next_ra, -2); > > > + *ra_blk = next_ra; > > > } > > > + ra_want -= geo->fsbcount; > > > + next_ra += geo->fsbcount; > > > + } > > > + found = xfs_iext_get_extent(ifp, ++idx, &map); > > > + if (!found) { > > > + *ra_blk = last_da; > > > + break; > > > } > > > } > > > blk_finish_plug(&plug); > > > @@ -458,6 +342,9 @@ xfs_dir2_leaf_readbuf( > > > out: > > > *bpp = bp; > > > return error; > > > +out_no_ra: > > > + *ra_blk = last_da; > > > + goto out; > > > } > > > > > > /* > > > @@ -475,14 +362,14 @@ xfs_dir2_leaf_getdents( > > > xfs_dir2_data_hdr_t *hdr; /* data block header */ > > > xfs_dir2_data_entry_t *dep; /* data entry */ > > > xfs_dir2_data_unused_t *dup; /* unused entry */ > > > - int error = 0; /* error return value */ > > > - int length; /* temporary length value */ > > > - int byteoff; /* offset in current block */ > > > - xfs_dir2_off_t curoff; /* current overall offset */ > > > - xfs_dir2_off_t newoff; /* new curoff after new blk */ > > > char *ptr = NULL; /* pointer to current data */ > > > - struct xfs_dir2_leaf_map_info *map_info; > > > struct xfs_da_geometry *geo = args->geo; > > > + xfs_dablk_t rablk = 0; /* current readahead block */ > > > + xfs_dir2_off_t curoff; /* current overall offset */ > > > + int length; /* temporary length value */ > > > + int byteoff; /* offset in current block */ > > > + int lock_mode; > > > + int error = 0; /* error return value */ > > > > > > /* > > > * If the offset is at or past the largest allowed value, > > > @@ -492,30 +379,12 @@ xfs_dir2_leaf_getdents( > > > return 0; > > > > > > /* > > > - * Set up to bmap a number of blocks based on the caller's > > > - * buffer size, the directory block size, and the filesystem > > > - * block size. > > > - */ > > > - length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > > > - map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + > > > - (length * sizeof(struct xfs_bmbt_irec)), > > > - KM_SLEEP | KM_NOFS); > > > - map_info->map_size = length; > > > - > > > - /* > > > * Inside the loop we keep the main offset value as a byte offset > > > * in the directory file. > > > */ > > > curoff = xfs_dir2_dataptr_to_byte(ctx->pos); > > > > > > /* > > > - * Force this conversion through db so we truncate the offset > > > - * down to get the start of the data block. > > > - */ > > > - map_info->map_off = xfs_dir2_db_to_da(geo, > > > - xfs_dir2_byte_to_db(geo, curoff)); > > > - > > > - /* > > > * Loop over directory entries until we reach the end offset. > > > * Get more blocks and readahead as necessary. > > > */ > > > @@ -527,38 +396,18 @@ xfs_dir2_leaf_getdents( > > > * current buffer, need to get another one. > > > */ > > > if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { > > > - int lock_mode; > > > - bool trim_map = false; > > > - > > > if (bp) { > > > - xfs_trans_brelse(NULL, bp); > > > + xfs_trans_brelse(args->trans, bp); > > > bp = NULL; > > > - trim_map = true; > > > } > > > > > > lock_mode = xfs_ilock_data_map_shared(dp); > > > - error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, > > > - &curoff, &bp, trim_map); > > > + error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff, > > > + &rablk, &bp); > > > xfs_iunlock(dp, lock_mode); > > > - if (error || !map_info->map_valid) > > > + if (error || !bp) > > > break; > > > > > > - /* > > > - * Having done a read, we need to set a new offset. > > > - */ > > > - newoff = xfs_dir2_db_off_to_byte(geo, > > > - map_info->curdb, 0); > > > - /* > > > - * Start of the current block. > > > - */ > > > - if (curoff < newoff) > > > - curoff = newoff; > > > - /* > > > - * Make sure we're in the right block. > > > - */ > > > - else if (curoff > newoff) > > > - ASSERT(xfs_dir2_byte_to_db(geo, curoff) == > > > - map_info->curdb); > > > hdr = bp->b_addr; > > > xfs_dir3_data_check(dp, bp); > > > /* > > > @@ -643,7 +492,6 @@ xfs_dir2_leaf_getdents( > > > ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; > > > else > > > ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; > > > - kmem_free(map_info); > > > if (bp) > > > xfs_trans_brelse(NULL, bp); > > > return error; > > > -- > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > > the body of a message to majordomo@vger.kernel.org > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 01, 2017 at 07:13:24PM -0400, Brian Foster wrote: > On Mon, May 01, 2017 at 02:50:18PM -0700, Darrick J. Wong wrote: > > On Mon, May 01, 2017 at 02:32:43PM -0400, Brian Foster wrote: > > > On Fri, Apr 28, 2017 at 12:46:52PM -0700, Darrick J. Wong wrote: > > > > Currently, the dir2 leaf block getdents function uses a complex state > > > > tracking mechanism to create a shadow copy of the block mappings and > > > > then uses the shadow copy to schedule readahead. Since the read and > > > > readahead functions are perfectly capable of reading the mappings > > > > themselves, we can tear all that out in favor of a simpler function that > > > > simply keeps pushing the readahead window further out. > > > > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > > > --- > > > > v3: use sliding window to constrain the amount of readahead > > > > v2: fix readahead of more than ra_want > > > > --- > > > > > > Thanks for the updates. This looks much more simple and seems true to > > > the current readahead behavior. The code also looks fine to me (one bit > > > of whitespace damage noted below). > > > > > > That aside, have you happened to test this against a huge/ugly directory > > > to verify it works as expected? Note that I don't think in depth > > > performance analysis is required. Verification of any kind of dir that > > > is known to benefit from readahead is probably sufficient IMO. Perhaps > > > dm-delay with a small enough latency to allow us to measure the effect > > > of readahead could help us here. > > > > Yeah, I used xfs/349 to generate a filesystem containing a directory > > with a freeindex block, then ls'd the entire directory to see how long > > the getdents calls took. The readhead calls were nearly identical with > > similar runtimes. > > > > Ok.. > > > > > fs/xfs/xfs_dir2_readdir.c | 316 ++++++++++++--------------------------------- > > > > 1 file changed, 82 insertions(+), 234 deletions(-) > > > > > > > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c > > > > index 20b7a5c..d05c1ec 100644 > > > > --- a/fs/xfs/xfs_dir2_readdir.c > > > > +++ b/fs/xfs/xfs_dir2_readdir.c > > > > @@ -243,214 +243,98 @@ xfs_dir2_block_getdents( > > > > return 0; > > > > } > > > > > > > ... > > > > /* > > > > - * Do we need more readahead? > > > > - * Each loop tries to process 1 full dir blk; last may be partial. > > > > + * Start readahead for the next bufsize's worth of dir data blocks. > > > > + * We may have already issued readahead for some of that range; > > > > + * ra_blk tracks the last block we tried to read(ahead). > > > > */ > > > > + ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > > > > + if (*ra_blk >= last_da) > > > > + goto out; > > > > + else if (*ra_blk == 0) > > > > + *ra_blk = map.br_startoff; > > > > + next_ra = map.br_startoff + geo->fsbcount; > > > > + if (next_ra >= last_da) > > > > + goto out_no_ra; > > > > + found = xfs_iext_lookup_extent(dp, ifp, next_ra, &idx, &map); > > > > + if (!found || map.br_startoff >= last_da) > > > > + goto out_no_ra; > > > > + xfs_trim_extent(&map, next_ra, last_da - next_ra); > > > > + > > > > > > ^ trailing space. > > > > (I don't see it...?) > > > > checkpatch catches it: > > $ ./scripts/checkpatch.pl /tmp/patch > ERROR: trailing whitespace > #227: FILE: fs/xfs/xfs_dir2_readdir.c:317: > + $ > > WARNING: please, no spaces at the start of a line > #227: FILE: fs/xfs/xfs_dir2_readdir.c:317: > + $ <sigh> vi was lying to me, like always. Reload buffer and *poof* the trailing whitespace appears... <grumble> Thanks for the review, --D > > total: 1 errors, 1 warnings, 391 lines checked > ... > > With that fixed: > > Reviewed-by: Brian Foster <bfoster@redhat.com> > > > --D > > > > > > > > Brian > > > > > > > + /* Start ra for each dir (not fs) block that has a mapping. */ > > > > blk_start_plug(&plug); > > > > - for (mip->ra_index = mip->ra_offset = i = 0; > > > > - mip->ra_want > mip->ra_current && i < mip->map_blocks; > > > > - i += geo->fsbcount) { > > > > - ASSERT(mip->ra_index < mip->map_valid); > > > > - /* > > > > - * Read-ahead a contiguous directory block. > > > > - */ > > > > - if (i > mip->ra_current && > > > > - (map[mip->ra_index].br_blockcount - mip->ra_offset) >= > > > > - geo->fsbcount) { > > > > - xfs_dir3_data_readahead(dp, > > > > - map[mip->ra_index].br_startoff + mip->ra_offset, > > > > - XFS_FSB_TO_DADDR(dp->i_mount, > > > > - map[mip->ra_index].br_startblock + > > > > - mip->ra_offset)); > > > > - mip->ra_current = i; > > > > - } > > > > - > > > > - /* > > > > - * Read-ahead a non-contiguous directory block. This doesn't > > > > - * use our mapping, but this is a very rare case. > > > > - */ > > > > - else if (i > mip->ra_current) { > > > > - xfs_dir3_data_readahead(dp, > > > > - map[mip->ra_index].br_startoff + > > > > - mip->ra_offset, -1); > > > > - mip->ra_current = i; > > > > - } > > > > - > > > > - /* > > > > - * Advance offset through the mapping table, processing a full > > > > - * dir block even if it is fragmented into several extents. > > > > - * But stop if we have consumed all valid mappings, even if > > > > - * it's not yet a full directory block. > > > > - */ > > > > - for (j = 0; > > > > - j < geo->fsbcount && mip->ra_index < mip->map_valid; > > > > - j += length ) { > > > > - /* > > > > - * The rest of this extent but not more than a dir > > > > - * block. > > > > - */ > > > > - length = min_t(int, geo->fsbcount - j, > > > > - map[mip->ra_index].br_blockcount - > > > > - mip->ra_offset); > > > > - mip->ra_offset += length; > > > > - > > > > - /* > > > > - * Advance to the next mapping if this one is used up. > > > > - */ > > > > - if (mip->ra_offset == map[mip->ra_index].br_blockcount) { > > > > - mip->ra_offset = 0; > > > > - mip->ra_index++; > > > > + while (ra_want > 0) { > > > > + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); > > > > + while (ra_want > 0 && > > > > + next_ra < map.br_startoff + map.br_blockcount) { > > > > + if (next_ra >= last_da) { > > > > + *ra_blk = last_da; > > > > + break; > > > > + } else if (next_ra > *ra_blk) { > > > > + xfs_dir3_data_readahead(dp, next_ra, -2); > > > > + *ra_blk = next_ra; > > > > } > > > > + ra_want -= geo->fsbcount; > > > > + next_ra += geo->fsbcount; > > > > + } > > > > + found = xfs_iext_get_extent(ifp, ++idx, &map); > > > > + if (!found) { > > > > + *ra_blk = last_da; > > > > + break; > > > > } > > > > } > > > > blk_finish_plug(&plug); > > > > @@ -458,6 +342,9 @@ xfs_dir2_leaf_readbuf( > > > > out: > > > > *bpp = bp; > > > > return error; > > > > +out_no_ra: > > > > + *ra_blk = last_da; > > > > + goto out; > > > > } > > > > > > > > /* > > > > @@ -475,14 +362,14 @@ xfs_dir2_leaf_getdents( > > > > xfs_dir2_data_hdr_t *hdr; /* data block header */ > > > > xfs_dir2_data_entry_t *dep; /* data entry */ > > > > xfs_dir2_data_unused_t *dup; /* unused entry */ > > > > - int error = 0; /* error return value */ > > > > - int length; /* temporary length value */ > > > > - int byteoff; /* offset in current block */ > > > > - xfs_dir2_off_t curoff; /* current overall offset */ > > > > - xfs_dir2_off_t newoff; /* new curoff after new blk */ > > > > char *ptr = NULL; /* pointer to current data */ > > > > - struct xfs_dir2_leaf_map_info *map_info; > > > > struct xfs_da_geometry *geo = args->geo; > > > > + xfs_dablk_t rablk = 0; /* current readahead block */ > > > > + xfs_dir2_off_t curoff; /* current overall offset */ > > > > + int length; /* temporary length value */ > > > > + int byteoff; /* offset in current block */ > > > > + int lock_mode; > > > > + int error = 0; /* error return value */ > > > > > > > > /* > > > > * If the offset is at or past the largest allowed value, > > > > @@ -492,30 +379,12 @@ xfs_dir2_leaf_getdents( > > > > return 0; > > > > > > > > /* > > > > - * Set up to bmap a number of blocks based on the caller's > > > > - * buffer size, the directory block size, and the filesystem > > > > - * block size. > > > > - */ > > > > - length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > > > > - map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + > > > > - (length * sizeof(struct xfs_bmbt_irec)), > > > > - KM_SLEEP | KM_NOFS); > > > > - map_info->map_size = length; > > > > - > > > > - /* > > > > * Inside the loop we keep the main offset value as a byte offset > > > > * in the directory file. > > > > */ > > > > curoff = xfs_dir2_dataptr_to_byte(ctx->pos); > > > > > > > > /* > > > > - * Force this conversion through db so we truncate the offset > > > > - * down to get the start of the data block. > > > > - */ > > > > - map_info->map_off = xfs_dir2_db_to_da(geo, > > > > - xfs_dir2_byte_to_db(geo, curoff)); > > > > - > > > > - /* > > > > * Loop over directory entries until we reach the end offset. > > > > * Get more blocks and readahead as necessary. > > > > */ > > > > @@ -527,38 +396,18 @@ xfs_dir2_leaf_getdents( > > > > * current buffer, need to get another one. > > > > */ > > > > if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { > > > > - int lock_mode; > > > > - bool trim_map = false; > > > > - > > > > if (bp) { > > > > - xfs_trans_brelse(NULL, bp); > > > > + xfs_trans_brelse(args->trans, bp); > > > > bp = NULL; > > > > - trim_map = true; > > > > } > > > > > > > > lock_mode = xfs_ilock_data_map_shared(dp); > > > > - error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, > > > > - &curoff, &bp, trim_map); > > > > + error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff, > > > > + &rablk, &bp); > > > > xfs_iunlock(dp, lock_mode); > > > > - if (error || !map_info->map_valid) > > > > + if (error || !bp) > > > > break; > > > > > > > > - /* > > > > - * Having done a read, we need to set a new offset. > > > > - */ > > > > - newoff = xfs_dir2_db_off_to_byte(geo, > > > > - map_info->curdb, 0); > > > > - /* > > > > - * Start of the current block. > > > > - */ > > > > - if (curoff < newoff) > > > > - curoff = newoff; > > > > - /* > > > > - * Make sure we're in the right block. > > > > - */ > > > > - else if (curoff > newoff) > > > > - ASSERT(xfs_dir2_byte_to_db(geo, curoff) == > > > > - map_info->curdb); > > > > hdr = bp->b_addr; > > > > xfs_dir3_data_check(dp, bp); > > > > /* > > > > @@ -643,7 +492,6 @@ xfs_dir2_leaf_getdents( > > > > ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; > > > > else > > > > ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; > > > > - kmem_free(map_info); > > > > if (bp) > > > > xfs_trans_brelse(NULL, bp); > > > > return error; > > > > -- > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > > > the body of a message to majordomo@vger.kernel.org > > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Darrick, a few comments below. Most are cosmetic except for one which is a minor improvement. Reviewed-by: Christoph Hellwig <hch@lst.de> with the cosmetic bits fixed up. > + last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); > + map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off)); > + found = xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map); > + if (!found || map.br_startoff >= last_da) > goto out; I don't think we need the found variable in this function, all the users only check for in the next line and then ignore it. E.g. rewrite this into if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map)) goto out; if (map.br_startoff >= last_da)) goto out; > + ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > + if (*ra_blk >= last_da) > + goto out; > + else if (*ra_blk == 0) > + *ra_blk = map.br_startoff; No need for the else here. > + next_ra = map.br_startoff + geo->fsbcount; > + if (next_ra >= last_da) > + goto out_no_ra; > + found = xfs_iext_lookup_extent(dp, ifp, next_ra, &idx, &map); Do we really need a new full lookup here? This should be the same or the next map compared to the one the original xfs_iext_lookup_extent returned. So just checking if it's in the original map that could be stashed away or otherwise calling xfs_iext_get_extent would more efficient. > + while (ra_want > 0) { > + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); > + while (ra_want > 0 && > + next_ra < map.br_startoff + map.br_blockcount) { > + if (next_ra >= last_da) { > + *ra_blk = last_da; > + break; > + } else if (next_ra > *ra_blk) { No need for the else. -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 01, 2017 at 04:30:49PM -0700, Darrick J. Wong wrote: > On Mon, May 01, 2017 at 07:13:24PM -0400, Brian Foster wrote: > > On Mon, May 01, 2017 at 02:50:18PM -0700, Darrick J. Wong wrote: > > > On Mon, May 01, 2017 at 02:32:43PM -0400, Brian Foster wrote: > > > > On Fri, Apr 28, 2017 at 12:46:52PM -0700, Darrick J. Wong wrote: > > > > > Currently, the dir2 leaf block getdents function uses a complex state > > > > > tracking mechanism to create a shadow copy of the block mappings and > > > > > then uses the shadow copy to schedule readahead. Since the read and > > > > > readahead functions are perfectly capable of reading the mappings > > > > > themselves, we can tear all that out in favor of a simpler function that > > > > > simply keeps pushing the readahead window further out. > > > > > > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > > > > --- > > > > > v3: use sliding window to constrain the amount of readahead > > > > > v2: fix readahead of more than ra_want > > > > > --- > > > > > > > > Thanks for the updates. This looks much more simple and seems true to > > > > the current readahead behavior. The code also looks fine to me (one bit > > > > of whitespace damage noted below). > > > > > > > > That aside, have you happened to test this against a huge/ugly directory > > > > to verify it works as expected? Note that I don't think in depth > > > > performance analysis is required. Verification of any kind of dir that > > > > is known to benefit from readahead is probably sufficient IMO. Perhaps > > > > dm-delay with a small enough latency to allow us to measure the effect > > > > of readahead could help us here. > > > > > > Yeah, I used xfs/349 to generate a filesystem containing a directory > > > with a freeindex block, then ls'd the entire directory to see how long > > > the getdents calls took. The readhead calls were nearly identical with > > > similar runtimes. > > > > > > > Ok.. > > FWIW and out of curiosity, I also ran a little test using a directory from xfs/349 and dm-delay with a 200ms read delay. I measured an 'xfs_io -c readdir <dir>' command in the following configs: Current code: ~3.55s Current code, readahead disabled: ~17.06s This patch: ~3.35s So this also looks roughly equivalent to current performance, if not a little better. Brian > > > > > fs/xfs/xfs_dir2_readdir.c | 316 ++++++++++++--------------------------------- > > > > > 1 file changed, 82 insertions(+), 234 deletions(-) > > > > > > > > > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c > > > > > index 20b7a5c..d05c1ec 100644 > > > > > --- a/fs/xfs/xfs_dir2_readdir.c > > > > > +++ b/fs/xfs/xfs_dir2_readdir.c > > > > > @@ -243,214 +243,98 @@ xfs_dir2_block_getdents( > > > > > return 0; > > > > > } > > > > > > > > > ... > > > > > /* > > > > > - * Do we need more readahead? > > > > > - * Each loop tries to process 1 full dir blk; last may be partial. > > > > > + * Start readahead for the next bufsize's worth of dir data blocks. > > > > > + * We may have already issued readahead for some of that range; > > > > > + * ra_blk tracks the last block we tried to read(ahead). > > > > > */ > > > > > + ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > > > > > + if (*ra_blk >= last_da) > > > > > + goto out; > > > > > + else if (*ra_blk == 0) > > > > > + *ra_blk = map.br_startoff; > > > > > + next_ra = map.br_startoff + geo->fsbcount; > > > > > + if (next_ra >= last_da) > > > > > + goto out_no_ra; > > > > > + found = xfs_iext_lookup_extent(dp, ifp, next_ra, &idx, &map); > > > > > + if (!found || map.br_startoff >= last_da) > > > > > + goto out_no_ra; > > > > > + xfs_trim_extent(&map, next_ra, last_da - next_ra); > > > > > + > > > > > > > > ^ trailing space. > > > > > > (I don't see it...?) > > > > > > > checkpatch catches it: > > > > $ ./scripts/checkpatch.pl /tmp/patch > > ERROR: trailing whitespace > > #227: FILE: fs/xfs/xfs_dir2_readdir.c:317: > > + $ > > > > WARNING: please, no spaces at the start of a line > > #227: FILE: fs/xfs/xfs_dir2_readdir.c:317: > > + $ > > <sigh> vi was lying to me, like always. > > Reload buffer and *poof* the trailing whitespace appears... <grumble> > > Thanks for the review, > > --D > > > > > total: 1 errors, 1 warnings, 391 lines checked > > ... > > > > With that fixed: > > > > Reviewed-by: Brian Foster <bfoster@redhat.com> > > > > > --D > > > > > > > > > > > Brian > > > > > > > > > + /* Start ra for each dir (not fs) block that has a mapping. */ > > > > > blk_start_plug(&plug); > > > > > - for (mip->ra_index = mip->ra_offset = i = 0; > > > > > - mip->ra_want > mip->ra_current && i < mip->map_blocks; > > > > > - i += geo->fsbcount) { > > > > > - ASSERT(mip->ra_index < mip->map_valid); > > > > > - /* > > > > > - * Read-ahead a contiguous directory block. > > > > > - */ > > > > > - if (i > mip->ra_current && > > > > > - (map[mip->ra_index].br_blockcount - mip->ra_offset) >= > > > > > - geo->fsbcount) { > > > > > - xfs_dir3_data_readahead(dp, > > > > > - map[mip->ra_index].br_startoff + mip->ra_offset, > > > > > - XFS_FSB_TO_DADDR(dp->i_mount, > > > > > - map[mip->ra_index].br_startblock + > > > > > - mip->ra_offset)); > > > > > - mip->ra_current = i; > > > > > - } > > > > > - > > > > > - /* > > > > > - * Read-ahead a non-contiguous directory block. This doesn't > > > > > - * use our mapping, but this is a very rare case. > > > > > - */ > > > > > - else if (i > mip->ra_current) { > > > > > - xfs_dir3_data_readahead(dp, > > > > > - map[mip->ra_index].br_startoff + > > > > > - mip->ra_offset, -1); > > > > > - mip->ra_current = i; > > > > > - } > > > > > - > > > > > - /* > > > > > - * Advance offset through the mapping table, processing a full > > > > > - * dir block even if it is fragmented into several extents. > > > > > - * But stop if we have consumed all valid mappings, even if > > > > > - * it's not yet a full directory block. > > > > > - */ > > > > > - for (j = 0; > > > > > - j < geo->fsbcount && mip->ra_index < mip->map_valid; > > > > > - j += length ) { > > > > > - /* > > > > > - * The rest of this extent but not more than a dir > > > > > - * block. > > > > > - */ > > > > > - length = min_t(int, geo->fsbcount - j, > > > > > - map[mip->ra_index].br_blockcount - > > > > > - mip->ra_offset); > > > > > - mip->ra_offset += length; > > > > > - > > > > > - /* > > > > > - * Advance to the next mapping if this one is used up. > > > > > - */ > > > > > - if (mip->ra_offset == map[mip->ra_index].br_blockcount) { > > > > > - mip->ra_offset = 0; > > > > > - mip->ra_index++; > > > > > + while (ra_want > 0) { > > > > > + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); > > > > > + while (ra_want > 0 && > > > > > + next_ra < map.br_startoff + map.br_blockcount) { > > > > > + if (next_ra >= last_da) { > > > > > + *ra_blk = last_da; > > > > > + break; > > > > > + } else if (next_ra > *ra_blk) { > > > > > + xfs_dir3_data_readahead(dp, next_ra, -2); > > > > > + *ra_blk = next_ra; > > > > > } > > > > > + ra_want -= geo->fsbcount; > > > > > + next_ra += geo->fsbcount; > > > > > + } > > > > > + found = xfs_iext_get_extent(ifp, ++idx, &map); > > > > > + if (!found) { > > > > > + *ra_blk = last_da; > > > > > + break; > > > > > } > > > > > } > > > > > blk_finish_plug(&plug); > > > > > @@ -458,6 +342,9 @@ xfs_dir2_leaf_readbuf( > > > > > out: > > > > > *bpp = bp; > > > > > return error; > > > > > +out_no_ra: > > > > > + *ra_blk = last_da; > > > > > + goto out; > > > > > } > > > > > > > > > > /* > > > > > @@ -475,14 +362,14 @@ xfs_dir2_leaf_getdents( > > > > > xfs_dir2_data_hdr_t *hdr; /* data block header */ > > > > > xfs_dir2_data_entry_t *dep; /* data entry */ > > > > > xfs_dir2_data_unused_t *dup; /* unused entry */ > > > > > - int error = 0; /* error return value */ > > > > > - int length; /* temporary length value */ > > > > > - int byteoff; /* offset in current block */ > > > > > - xfs_dir2_off_t curoff; /* current overall offset */ > > > > > - xfs_dir2_off_t newoff; /* new curoff after new blk */ > > > > > char *ptr = NULL; /* pointer to current data */ > > > > > - struct xfs_dir2_leaf_map_info *map_info; > > > > > struct xfs_da_geometry *geo = args->geo; > > > > > + xfs_dablk_t rablk = 0; /* current readahead block */ > > > > > + xfs_dir2_off_t curoff; /* current overall offset */ > > > > > + int length; /* temporary length value */ > > > > > + int byteoff; /* offset in current block */ > > > > > + int lock_mode; > > > > > + int error = 0; /* error return value */ > > > > > > > > > > /* > > > > > * If the offset is at or past the largest allowed value, > > > > > @@ -492,30 +379,12 @@ xfs_dir2_leaf_getdents( > > > > > return 0; > > > > > > > > > > /* > > > > > - * Set up to bmap a number of blocks based on the caller's > > > > > - * buffer size, the directory block size, and the filesystem > > > > > - * block size. > > > > > - */ > > > > > - length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > > > > > - map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + > > > > > - (length * sizeof(struct xfs_bmbt_irec)), > > > > > - KM_SLEEP | KM_NOFS); > > > > > - map_info->map_size = length; > > > > > - > > > > > - /* > > > > > * Inside the loop we keep the main offset value as a byte offset > > > > > * in the directory file. > > > > > */ > > > > > curoff = xfs_dir2_dataptr_to_byte(ctx->pos); > > > > > > > > > > /* > > > > > - * Force this conversion through db so we truncate the offset > > > > > - * down to get the start of the data block. > > > > > - */ > > > > > - map_info->map_off = xfs_dir2_db_to_da(geo, > > > > > - xfs_dir2_byte_to_db(geo, curoff)); > > > > > - > > > > > - /* > > > > > * Loop over directory entries until we reach the end offset. > > > > > * Get more blocks and readahead as necessary. > > > > > */ > > > > > @@ -527,38 +396,18 @@ xfs_dir2_leaf_getdents( > > > > > * current buffer, need to get another one. > > > > > */ > > > > > if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { > > > > > - int lock_mode; > > > > > - bool trim_map = false; > > > > > - > > > > > if (bp) { > > > > > - xfs_trans_brelse(NULL, bp); > > > > > + xfs_trans_brelse(args->trans, bp); > > > > > bp = NULL; > > > > > - trim_map = true; > > > > > } > > > > > > > > > > lock_mode = xfs_ilock_data_map_shared(dp); > > > > > - error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, > > > > > - &curoff, &bp, trim_map); > > > > > + error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff, > > > > > + &rablk, &bp); > > > > > xfs_iunlock(dp, lock_mode); > > > > > - if (error || !map_info->map_valid) > > > > > + if (error || !bp) > > > > > break; > > > > > > > > > > - /* > > > > > - * Having done a read, we need to set a new offset. > > > > > - */ > > > > > - newoff = xfs_dir2_db_off_to_byte(geo, > > > > > - map_info->curdb, 0); > > > > > - /* > > > > > - * Start of the current block. > > > > > - */ > > > > > - if (curoff < newoff) > > > > > - curoff = newoff; > > > > > - /* > > > > > - * Make sure we're in the right block. > > > > > - */ > > > > > - else if (curoff > newoff) > > > > > - ASSERT(xfs_dir2_byte_to_db(geo, curoff) == > > > > > - map_info->curdb); > > > > > hdr = bp->b_addr; > > > > > xfs_dir3_data_check(dp, bp); > > > > > /* > > > > > @@ -643,7 +492,6 @@ xfs_dir2_leaf_getdents( > > > > > ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; > > > > > else > > > > > ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; > > > > > - kmem_free(map_info); > > > > > if (bp) > > > > > xfs_trans_brelse(NULL, bp); > > > > > return error; > > > > > -- > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > > > > the body of a message to majordomo@vger.kernel.org > > > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > -- > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > > the body of a message to majordomo@vger.kernel.org > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, May 02, 2017 at 12:44:00AM -0700, Christoph Hellwig wrote: > Hi Darrick, > > a few comments below. Most are cosmetic except for one which is > a minor improvement. > > Reviewed-by: Christoph Hellwig <hch@lst.de> > > with the cosmetic bits fixed up. > > > + last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); > > + map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off)); > > + found = xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map); > > + if (!found || map.br_startoff >= last_da) > > goto out; > > I don't think we need the found variable in this function, all the users > only check for in the next line and then ignore it. E.g. rewrite this > into > > if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map)) > goto out; > if (map.br_startoff >= last_da)) > goto out; Ok. > > + ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > > + if (*ra_blk >= last_da) > > + goto out; > > + else if (*ra_blk == 0) > > + *ra_blk = map.br_startoff; > > No need for the else here. Ok. > > + next_ra = map.br_startoff + geo->fsbcount; > > + if (next_ra >= last_da) > > + goto out_no_ra; > > + found = xfs_iext_lookup_extent(dp, ifp, next_ra, &idx, &map); > > Do we really need a new full lookup here? This should be the same > or the next map compared to the one the original xfs_iext_lookup_extent > returned. So just checking if it's in the original map that could > be stashed away or otherwise calling xfs_iext_get_extent would more > efficient. Sure, that could be something like: next_ra = map.br_startoff + geo->fsbcount; if (next_ra >= last_da) goto out_no_ra; if (map.br_blockcount < geo->fsbcount && !xfs_iext_get_extent(ifp, ++idx, &map)) goto out_no_ra; if (map.br_startoff >= last_da) goto out_no_ra; xfs_trim_extent(&map, next_ra, last_da - next_ra); > > + while (ra_want > 0) { > > + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); > > + while (ra_want > 0 && > > + next_ra < map.br_startoff + map.br_blockcount) { > > + if (next_ra >= last_da) { > > + *ra_blk = last_da; > > + break; > > + } else if (next_ra > *ra_blk) { > > No need for the else. Ok. --D > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 20b7a5c..d05c1ec 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -243,214 +243,98 @@ xfs_dir2_block_getdents( return 0; } -struct xfs_dir2_leaf_map_info { - xfs_extlen_t map_blocks; /* number of fsbs in map */ - xfs_dablk_t map_off; /* last mapped file offset */ - int map_size; /* total entries in *map */ - int map_valid; /* valid entries in *map */ - int nmap; /* mappings to ask xfs_bmapi */ - xfs_dir2_db_t curdb; /* db for current block */ - int ra_current; /* number of read-ahead blks */ - int ra_index; /* *map index for read-ahead */ - int ra_offset; /* map entry offset for ra */ - int ra_want; /* readahead count wanted */ - struct xfs_bmbt_irec map[]; /* map vector for blocks */ -}; - +/* + * Read a directory block and initiate readahead for blocks beyond that. + * We maintain a sliding readahead window of the remaining space in the + * buffer rounded up to the nearest block. + */ STATIC int xfs_dir2_leaf_readbuf( struct xfs_da_args *args, size_t bufsize, - struct xfs_dir2_leaf_map_info *mip, - xfs_dir2_off_t *curoff, - struct xfs_buf **bpp, - bool trim_map) + xfs_dir2_off_t *cur_off, + xfs_dablk_t *ra_blk, + struct xfs_buf **bpp) { struct xfs_inode *dp = args->dp; struct xfs_buf *bp = NULL; - struct xfs_bmbt_irec *map = mip->map; + struct xfs_da_geometry *geo = args->geo; + struct xfs_ifork *ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); + struct xfs_bmbt_irec map; struct blk_plug plug; + xfs_dir2_off_t new_off; + xfs_dablk_t next_ra; + xfs_dablk_t map_off; + xfs_dablk_t last_da; + xfs_extnum_t idx; + bool found; + int ra_want; int error = 0; - int length; - int i; - int j; - struct xfs_da_geometry *geo = args->geo; - - /* - * If the caller just finished processing a buffer, it will tell us - * we need to trim that block out of the mapping now it is done. - */ - if (trim_map) { - mip->map_blocks -= geo->fsbcount; - /* - * Loop to get rid of the extents for the - * directory block. - */ - for (i = geo->fsbcount; i > 0; ) { - j = min_t(int, map->br_blockcount, i); - map->br_blockcount -= j; - map->br_startblock += j; - map->br_startoff += j; - /* - * If mapping is done, pitch it from - * the table. - */ - if (!map->br_blockcount && --mip->map_valid) - memmove(&map[0], &map[1], - sizeof(map[0]) * mip->map_valid); - i -= j; - } - } - - /* - * Recalculate the readahead blocks wanted. - */ - mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1; - ASSERT(mip->ra_want >= 0); - - /* - * If we don't have as many as we want, and we haven't - * run out of data blocks, get some more mappings. - */ - if (1 + mip->ra_want > mip->map_blocks && - mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) { - /* - * Get more bmaps, fill in after the ones - * we already have in the table. - */ - mip->nmap = mip->map_size - mip->map_valid; - error = xfs_bmapi_read(dp, mip->map_off, - xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) - - mip->map_off, - &map[mip->map_valid], &mip->nmap, 0); - /* - * Don't know if we should ignore this or try to return an - * error. The trouble with returning errors is that readdir - * will just stop without actually passing the error through. - */ + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(args->trans, dp, XFS_DATA_FORK); if (error) - goto out; /* XXX */ - - /* - * If we got all the mappings we asked for, set the final map - * offset based on the last bmap value received. Otherwise, - * we've reached the end. - */ - if (mip->nmap == mip->map_size - mip->map_valid) { - i = mip->map_valid + mip->nmap - 1; - mip->map_off = map[i].br_startoff + map[i].br_blockcount; - } else - mip->map_off = xfs_dir2_byte_to_da(geo, - XFS_DIR2_LEAF_OFFSET); - - /* - * Look for holes in the mapping, and eliminate them. Count up - * the valid blocks. - */ - for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) { - if (map[i].br_startblock == HOLESTARTBLOCK) { - mip->nmap--; - length = mip->map_valid + mip->nmap - i; - if (length) - memmove(&map[i], &map[i + 1], - sizeof(map[i]) * length); - } else { - mip->map_blocks += map[i].br_blockcount; - i++; - } - } - mip->map_valid += mip->nmap; + goto out; } /* - * No valid mappings, so no more data blocks. + * Look for mapped directory blocks at or above the current offset. + * Truncate down to the nearest directory block to start the scanning + * operation. */ - if (!mip->map_valid) { - *curoff = xfs_dir2_da_to_byte(geo, mip->map_off); + last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET); + map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off)); + found = xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map); + if (!found || map.br_startoff >= last_da) goto out; - } + xfs_trim_extent(&map, map_off, last_da - map_off); - /* - * Read the directory block starting at the first mapping. - */ - mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff); - error = xfs_dir3_data_read(NULL, dp, map->br_startoff, - map->br_blockcount >= geo->fsbcount ? - XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) : - -1, &bp); - /* - * Should just skip over the data block instead of giving up. - */ + /* Read the directory block of that first mapping. */ + new_off = xfs_dir2_da_to_byte(geo, map.br_startoff); + if (new_off > *cur_off) + *cur_off = new_off; + error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, -1, &bp); if (error) - goto out; /* XXX */ - - /* - * Adjust the current amount of read-ahead: we just read a block that - * was previously ra. - */ - if (mip->ra_current) - mip->ra_current -= geo->fsbcount; + goto out; /* - * Do we need more readahead? - * Each loop tries to process 1 full dir blk; last may be partial. + * Start readahead for the next bufsize's worth of dir data blocks. + * We may have already issued readahead for some of that range; + * ra_blk tracks the last block we tried to read(ahead). */ + ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); + if (*ra_blk >= last_da) + goto out; + else if (*ra_blk == 0) + *ra_blk = map.br_startoff; + next_ra = map.br_startoff + geo->fsbcount; + if (next_ra >= last_da) + goto out_no_ra; + found = xfs_iext_lookup_extent(dp, ifp, next_ra, &idx, &map); + if (!found || map.br_startoff >= last_da) + goto out_no_ra; + xfs_trim_extent(&map, next_ra, last_da - next_ra); + + /* Start ra for each dir (not fs) block that has a mapping. */ blk_start_plug(&plug); - for (mip->ra_index = mip->ra_offset = i = 0; - mip->ra_want > mip->ra_current && i < mip->map_blocks; - i += geo->fsbcount) { - ASSERT(mip->ra_index < mip->map_valid); - /* - * Read-ahead a contiguous directory block. - */ - if (i > mip->ra_current && - (map[mip->ra_index].br_blockcount - mip->ra_offset) >= - geo->fsbcount) { - xfs_dir3_data_readahead(dp, - map[mip->ra_index].br_startoff + mip->ra_offset, - XFS_FSB_TO_DADDR(dp->i_mount, - map[mip->ra_index].br_startblock + - mip->ra_offset)); - mip->ra_current = i; - } - - /* - * Read-ahead a non-contiguous directory block. This doesn't - * use our mapping, but this is a very rare case. - */ - else if (i > mip->ra_current) { - xfs_dir3_data_readahead(dp, - map[mip->ra_index].br_startoff + - mip->ra_offset, -1); - mip->ra_current = i; - } - - /* - * Advance offset through the mapping table, processing a full - * dir block even if it is fragmented into several extents. - * But stop if we have consumed all valid mappings, even if - * it's not yet a full directory block. - */ - for (j = 0; - j < geo->fsbcount && mip->ra_index < mip->map_valid; - j += length ) { - /* - * The rest of this extent but not more than a dir - * block. - */ - length = min_t(int, geo->fsbcount - j, - map[mip->ra_index].br_blockcount - - mip->ra_offset); - mip->ra_offset += length; - - /* - * Advance to the next mapping if this one is used up. - */ - if (mip->ra_offset == map[mip->ra_index].br_blockcount) { - mip->ra_offset = 0; - mip->ra_index++; + while (ra_want > 0) { + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); + while (ra_want > 0 && + next_ra < map.br_startoff + map.br_blockcount) { + if (next_ra >= last_da) { + *ra_blk = last_da; + break; + } else if (next_ra > *ra_blk) { + xfs_dir3_data_readahead(dp, next_ra, -2); + *ra_blk = next_ra; } + ra_want -= geo->fsbcount; + next_ra += geo->fsbcount; + } + found = xfs_iext_get_extent(ifp, ++idx, &map); + if (!found) { + *ra_blk = last_da; + break; } } blk_finish_plug(&plug); @@ -458,6 +342,9 @@ xfs_dir2_leaf_readbuf( out: *bpp = bp; return error; +out_no_ra: + *ra_blk = last_da; + goto out; } /* @@ -475,14 +362,14 @@ xfs_dir2_leaf_getdents( xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_dir2_data_unused_t *dup; /* unused entry */ - int error = 0; /* error return value */ - int length; /* temporary length value */ - int byteoff; /* offset in current block */ - xfs_dir2_off_t curoff; /* current overall offset */ - xfs_dir2_off_t newoff; /* new curoff after new blk */ char *ptr = NULL; /* pointer to current data */ - struct xfs_dir2_leaf_map_info *map_info; struct xfs_da_geometry *geo = args->geo; + xfs_dablk_t rablk = 0; /* current readahead block */ + xfs_dir2_off_t curoff; /* current overall offset */ + int length; /* temporary length value */ + int byteoff; /* offset in current block */ + int lock_mode; + int error = 0; /* error return value */ /* * If the offset is at or past the largest allowed value, @@ -492,30 +379,12 @@ xfs_dir2_leaf_getdents( return 0; /* - * Set up to bmap a number of blocks based on the caller's - * buffer size, the directory block size, and the filesystem - * block size. - */ - length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); - map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + - (length * sizeof(struct xfs_bmbt_irec)), - KM_SLEEP | KM_NOFS); - map_info->map_size = length; - - /* * Inside the loop we keep the main offset value as a byte offset * in the directory file. */ curoff = xfs_dir2_dataptr_to_byte(ctx->pos); /* - * Force this conversion through db so we truncate the offset - * down to get the start of the data block. - */ - map_info->map_off = xfs_dir2_db_to_da(geo, - xfs_dir2_byte_to_db(geo, curoff)); - - /* * Loop over directory entries until we reach the end offset. * Get more blocks and readahead as necessary. */ @@ -527,38 +396,18 @@ xfs_dir2_leaf_getdents( * current buffer, need to get another one. */ if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { - int lock_mode; - bool trim_map = false; - if (bp) { - xfs_trans_brelse(NULL, bp); + xfs_trans_brelse(args->trans, bp); bp = NULL; - trim_map = true; } lock_mode = xfs_ilock_data_map_shared(dp); - error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, - &curoff, &bp, trim_map); + error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff, + &rablk, &bp); xfs_iunlock(dp, lock_mode); - if (error || !map_info->map_valid) + if (error || !bp) break; - /* - * Having done a read, we need to set a new offset. - */ - newoff = xfs_dir2_db_off_to_byte(geo, - map_info->curdb, 0); - /* - * Start of the current block. - */ - if (curoff < newoff) - curoff = newoff; - /* - * Make sure we're in the right block. - */ - else if (curoff > newoff) - ASSERT(xfs_dir2_byte_to_db(geo, curoff) == - map_info->curdb); hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); /* @@ -643,7 +492,6 @@ xfs_dir2_leaf_getdents( ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; else ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; - kmem_free(map_info); if (bp) xfs_trans_brelse(NULL, bp); return error;
Currently, the dir2 leaf block getdents function uses a complex state tracking mechanism to create a shadow copy of the block mappings and then uses the shadow copy to schedule readahead. Since the read and readahead functions are perfectly capable of reading the mappings themselves, we can tear all that out in favor of a simpler function that simply keeps pushing the readahead window further out. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> --- v3: use sliding window to constrain the amount of readahead v2: fix readahead of more than ra_want --- fs/xfs/xfs_dir2_readdir.c | 316 ++++++++++++--------------------------------- 1 file changed, 82 insertions(+), 234 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html