Message ID | 20240318224715.3367463-10-david@fromorbit.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | xfs: use large folios for buffers | expand |
On Tue, Mar 19, 2024 at 09:46:00AM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > The count is used purely to allocate the correct number of bvecs for > submitting IO. Rename it to b_bvec_count. Well, I think we should just kill it as it simplies is the rounded up length in PAGE_SIZE units. The patch below passes a quick xfstests run and is on top of this series: diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 2a6796c48454f7..8ecf88b5504c18 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -67,27 +67,17 @@ static inline bool xfs_buf_is_uncached(struct xfs_buf *bp) } /* - * Return true if the buffer is vmapped. - * - * b_addr is always set, so we have to look at bp->b_bvec_count to determine if - * the buffer was vmalloc()d or not. + * See comment above xfs_buf_alloc_folios() about the constraints placed on + * allocating vmapped buffers. */ -static inline int -xfs_buf_is_vmapped( - struct xfs_buf *bp) +static inline unsigned int xfs_buf_vmap_len(struct xfs_buf *bp) { - return bp->b_bvec_count > 1; + return roundup(BBTOB(bp->b_length), PAGE_SIZE); } -/* - * See comment above xfs_buf_alloc_folios() about the constraints placed on - * allocating vmapped buffers. - */ -static inline int -xfs_buf_vmap_len( - struct xfs_buf *bp) +static inline unsigned int xfs_buf_nr_pages(struct xfs_buf *bp) { - return (bp->b_bvec_count * PAGE_SIZE); + return DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE); } /* @@ -304,13 +294,15 @@ xfs_buf_free( goto free; } - if (!(bp->b_flags & _XBF_KMEM)) - mm_account_reclaimed_pages(bp->b_bvec_count); - - if (bp->b_flags & _XBF_FOLIOS) - __folio_put(kmem_to_folio(bp->b_addr)); - else + if (bp->b_flags & _XBF_FOLIOS) { + /* XXX: should this pass xfs_buf_nr_pages()? */ + mm_account_reclaimed_pages(1); + folio_put(kmem_to_folio(bp->b_addr)); + } else { + if (!(bp->b_flags & _XBF_KMEM)) + mm_account_reclaimed_pages(xfs_buf_nr_pages(bp)); kvfree(bp->b_addr); + } bp->b_flags &= _XBF_KMEM | _XBF_FOLIOS; @@ -341,7 +333,6 @@ xfs_buf_alloc_kmem( bp->b_addr = NULL; return -ENOMEM; } - bp->b_bvec_count = 1; bp->b_flags |= _XBF_KMEM; return 0; } @@ -369,7 +360,6 @@ xfs_buf_alloc_folio( return false; bp->b_addr = folio_address(folio); - bp->b_bvec_count = 1; bp->b_flags |= _XBF_FOLIOS; return true; } @@ -441,7 +431,6 @@ xfs_buf_alloc_folios( count); return -ENOMEM; } - bp->b_bvec_count = count; return 0; } @@ -1470,7 +1459,9 @@ xfs_buf_bio_end_io( cmpxchg(&bp->b_io_error, 0, error); } - if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + if (!bp->b_error && + (bp->b_flags & XBF_READ) && + is_vmalloc_addr(bp->b_addr)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); if (atomic_dec_and_test(&bp->b_io_remaining) == 1) @@ -1485,6 +1476,7 @@ xfs_buf_ioapply_map( unsigned int *buf_offset, blk_opf_t op) { + unsigned int nr_vecs = 1; struct bio *bio; int size; @@ -1494,7 +1486,9 @@ xfs_buf_ioapply_map( atomic_inc(&bp->b_io_remaining); - bio = bio_alloc(bp->b_target->bt_bdev, bp->b_bvec_count, op, GFP_NOIO); + if (is_vmalloc_addr(bp->b_addr)) + nr_vecs = xfs_buf_nr_pages(bp); + bio = bio_alloc(bp->b_target->bt_bdev, nr_vecs, op, GFP_NOIO); bio->bi_iter.bi_sector = bp->b_maps[map].bm_bn; bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; @@ -1511,7 +1505,7 @@ xfs_buf_ioapply_map( *buf_offset += len; } while (size); - if (xfs_buf_is_vmapped(bp)) + if (is_vmalloc_addr(bp->b_addr)) flush_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); submit_bio(bio); } diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 32688525890bec..ad92d11f4ae173 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -195,7 +195,6 @@ struct xfs_buf { int b_map_count; atomic_t b_pin_count; /* pin count */ atomic_t b_io_remaining; /* #outstanding I/O requests */ - unsigned int b_bvec_count; /* bvecs needed for IO */ int b_error; /* error code on I/O */ /* diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index 30d53ddd6e6980..f082b1a64fc950 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -169,7 +169,6 @@ xmbuf_map_folio( unlock_page(page); bp->b_addr = page_address(page); - bp->b_bvec_count = 1; return 0; } @@ -182,7 +181,6 @@ xmbuf_unmap_folio( folio_put(kmem_to_folio(bp->b_addr)); bp->b_addr = NULL; - bp->b_bvec_count = 0; } /* Is this a valid daddr within the buftarg? */
On Tue, Mar 19, 2024 at 12:37:09AM -0700, Christoph Hellwig wrote: > On Tue, Mar 19, 2024 at 09:46:00AM +1100, Dave Chinner wrote: > > From: Dave Chinner <dchinner@redhat.com> > > > > The count is used purely to allocate the correct number of bvecs for > > submitting IO. Rename it to b_bvec_count. > > Well, I think we should just kill it as it simplies is the rounded > up length in PAGE_SIZE units. The patch below passes a quick xfstests > run and is on top of this series: This seems like a reasonable approach - fixing the mm_account_reclaimed_pages() issues earlier in the patch set meant I'd already done the xfs_buf_free() changes you made here. :) That just leaves the vmap wrappers to be fixed up - I think I'll just replace the b_folio_count rename patch with that cleanup... -Dave.
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 6d6bad80722e..2a6796c48454 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -69,15 +69,14 @@ static inline bool xfs_buf_is_uncached(struct xfs_buf *bp) /* * Return true if the buffer is vmapped. * - * b_addr is null if the buffer is not mapped, but the code is clever enough to - * know it doesn't have to map a single folio, so the check has to be both for - * b_addr and bp->b_folio_count > 1. + * b_addr is always set, so we have to look at bp->b_bvec_count to determine if + * the buffer was vmalloc()d or not. */ static inline int xfs_buf_is_vmapped( struct xfs_buf *bp) { - return bp->b_addr && bp->b_folio_count > 1; + return bp->b_bvec_count > 1; } /* @@ -88,7 +87,7 @@ static inline int xfs_buf_vmap_len( struct xfs_buf *bp) { - return (bp->b_folio_count * PAGE_SIZE); + return (bp->b_bvec_count * PAGE_SIZE); } /* @@ -306,7 +305,7 @@ xfs_buf_free( } if (!(bp->b_flags & _XBF_KMEM)) - mm_account_reclaimed_pages(bp->b_folio_count); + mm_account_reclaimed_pages(bp->b_bvec_count); if (bp->b_flags & _XBF_FOLIOS) __folio_put(kmem_to_folio(bp->b_addr)); @@ -342,7 +341,7 @@ xfs_buf_alloc_kmem( bp->b_addr = NULL; return -ENOMEM; } - bp->b_folio_count = 1; + bp->b_bvec_count = 1; bp->b_flags |= _XBF_KMEM; return 0; } @@ -370,7 +369,7 @@ xfs_buf_alloc_folio( return false; bp->b_addr = folio_address(folio); - bp->b_folio_count = 1; + bp->b_bvec_count = 1; bp->b_flags |= _XBF_FOLIOS; return true; } @@ -398,6 +397,7 @@ xfs_buf_alloc_folios( { gfp_t gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOWARN; unsigned nofs_flag; + unsigned int count; if (flags & XBF_READ_AHEAD) gfp_mask |= __GFP_NORETRY; @@ -407,16 +407,24 @@ xfs_buf_alloc_folios( gfp_mask |= __GFP_ZERO; /* Fall back to allocating an array of single page folios. */ - bp->b_folio_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE); + count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE); /* Optimistically attempt a single high order folio allocation. */ if (xfs_buf_alloc_folio(bp, gfp_mask)) return 0; /* We are done if an order-0 allocation has already failed. */ - if (bp->b_folio_count == 1) + if (count == 1) return -ENOMEM; + /* + * Largest buffer we allocate should fit entirely in a single bio, + * so warn and fail if somebody asks for a buffer larger than can + * be supported. + */ + if (WARN_ON_ONCE(count > BIO_MAX_VECS)) + return -EIO; + /* * XXX(dgc): I think dquot reclaim is the only place we can get * to this function from memory reclaim context now. If we fix @@ -430,9 +438,10 @@ xfs_buf_alloc_folios( if (!bp->b_addr) { xfs_warn_ratelimited(bp->b_mount, "%s: failed to allocate %u folios", __func__, - bp->b_folio_count); + count); return -ENOMEM; } + bp->b_bvec_count = count; return 0; } @@ -1483,14 +1492,9 @@ xfs_buf_ioapply_map( size = min_t(unsigned int, BBTOB(bp->b_maps[map].bm_len), BBTOB(bp->b_length) - *buf_offset); - if (WARN_ON_ONCE(bp->b_folio_count > BIO_MAX_VECS)) { - xfs_buf_ioerror(bp, -EIO); - return; - } - atomic_inc(&bp->b_io_remaining); - bio = bio_alloc(bp->b_target->bt_bdev, bp->b_folio_count, op, GFP_NOIO); + bio = bio_alloc(bp->b_target->bt_bdev, bp->b_bvec_count, op, GFP_NOIO); bio->bi_iter.bi_sector = bp->b_maps[map].bm_bn; bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 68c24947ca1a..32688525890b 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -195,7 +195,7 @@ struct xfs_buf { int b_map_count; atomic_t b_pin_count; /* pin count */ atomic_t b_io_remaining; /* #outstanding I/O requests */ - unsigned int b_folio_count; /* size of folio array */ + unsigned int b_bvec_count; /* bvecs needed for IO */ int b_error; /* error code on I/O */ /* diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index 336e7c8effb7..30d53ddd6e69 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -169,7 +169,7 @@ xmbuf_map_folio( unlock_page(page); bp->b_addr = page_address(page); - bp->b_folio_count = 1; + bp->b_bvec_count = 1; return 0; } @@ -182,7 +182,7 @@ xmbuf_unmap_folio( folio_put(kmem_to_folio(bp->b_addr)); bp->b_addr = NULL; - bp->b_folio_count = 0; + bp->b_bvec_count = 0; } /* Is this a valid daddr within the buftarg? */