Message ID | 20210811193533.766613-4-axboe@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable bio recycling for polled IO | expand |
On Wed, Aug 11, 2021 at 01:35:30PM -0600, Jens Axboe wrote: > + struct bio *bio; > + unsigned int i; > + > + i = 0; Initialize at declaration time? > +static inline bool __bio_put(struct bio *bio) > +{ > + if (!bio_flagged(bio, BIO_REFFED)) > + return true; > + > + BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); > + > + /* > + * last put frees it > + */ > + return atomic_dec_and_test(&bio->__bi_cnt); > +} Please avoid this helper, we can trivially do the check inside of bio_put: if (bio_flagged(bio, BIO_REFFED)) { BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); if (!atomic_dec_and_test(&bio->__bi_cnt)) return; } > - bio_free(bio); > + if (bio_flagged(bio, BIO_PERCPU_CACHE)) { > + struct bio_alloc_cache *cache; > + > + bio_uninit(bio); > + cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu()); > + bio_list_add_head(&cache->free_list, bio); > + cache->nr++; > + if (cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK) Folding the increment as a prefix here would make the increment and test semantics a little more obvious. > +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, gfp_t gfp, > + unsigned short nr_vecs, struct bio_set *bs) > +{ > + struct bio_alloc_cache *cache = NULL; > + struct bio *bio; > + > + if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) > + goto normal_alloc; > + > + cache = per_cpu_ptr(bs->cache, get_cpu()); > + bio = bio_list_pop(&cache->free_list); > + if (bio) { > + cache->nr--; > + put_cpu(); > + bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs); > + bio_set_flag(bio, BIO_PERCPU_CACHE); > + return bio; > + } > + put_cpu(); > +normal_alloc: > + bio = bio_alloc_bioset(gfp, nr_vecs, bs); > + if (cache) > + bio_set_flag(bio, BIO_PERCPU_CACHE); > + return bio; The goto here is pretty obsfucating and adds an extra patch to the fast path. Also I don't think we need the gfp argument here at all - it should always be GFP_KERNEL. In fact I plan to kill these arguments from much of the block layer as the places that need NOIO of NOFS semantics can and should move to set that in the per-thread context. > -static inline struct bio *bio_list_pop(struct bio_list *bl) > +static inline void bio_list_del_head(struct bio_list *bl, struct bio *head) > { > - struct bio *bio = bl->head; > - > - if (bio) { > + if (head) { > bl->head = bl->head->bi_next; > if (!bl->head) > bl->tail = NULL; > > - bio->bi_next = NULL; > + head->bi_next = NULL; > } > +} > + > +static inline struct bio *bio_list_pop(struct bio_list *bl) > +{ > + struct bio *bio = bl->head; > > + bio_list_del_head(bl, bio); > return bio; > } No need for this change. > > > @@ -699,6 +706,12 @@ struct bio_set { > struct kmem_cache *bio_slab; > unsigned int front_pad; > > + /* > + * per-cpu bio alloc cache and notifier > + */ > + struct bio_alloc_cache __percpu *cache; > + struct hlist_node cpuhp_dead; > + I'd keep the hotplug list entry at the end instead of bloating the cache line used in the fast path.
On 8/12/21 1:01 AM, Christoph Hellwig wrote: > On Wed, Aug 11, 2021 at 01:35:30PM -0600, Jens Axboe wrote: >> + struct bio *bio; >> + unsigned int i; >> + >> + i = 0; > > Initialize at declaration time? Sure, done. >> +static inline bool __bio_put(struct bio *bio) >> +{ >> + if (!bio_flagged(bio, BIO_REFFED)) >> + return true; >> + >> + BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); >> + >> + /* >> + * last put frees it >> + */ >> + return atomic_dec_and_test(&bio->__bi_cnt); >> +} > > Please avoid this helper, we can trivially do the check inside of bio_put: > > if (bio_flagged(bio, BIO_REFFED)) { > BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); > if (!atomic_dec_and_test(&bio->__bi_cnt)) > return; > } Done >> - bio_free(bio); >> + if (bio_flagged(bio, BIO_PERCPU_CACHE)) { >> + struct bio_alloc_cache *cache; >> + >> + bio_uninit(bio); >> + cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu()); >> + bio_list_add_head(&cache->free_list, bio); >> + cache->nr++; >> + if (cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK) > > Folding the increment as a prefix here would make the increment and test > semantics a little more obvious. I don't really care that deeply about it, but I generally prefer keeping them separate as it makes it easier to read (for me). But I'll change it. >> +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, gfp_t gfp, >> + unsigned short nr_vecs, struct bio_set *bs) >> +{ >> + struct bio_alloc_cache *cache = NULL; >> + struct bio *bio; >> + >> + if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) >> + goto normal_alloc; >> + >> + cache = per_cpu_ptr(bs->cache, get_cpu()); >> + bio = bio_list_pop(&cache->free_list); >> + if (bio) { >> + cache->nr--; >> + put_cpu(); >> + bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs); >> + bio_set_flag(bio, BIO_PERCPU_CACHE); >> + return bio; >> + } >> + put_cpu(); >> +normal_alloc: >> + bio = bio_alloc_bioset(gfp, nr_vecs, bs); >> + if (cache) >> + bio_set_flag(bio, BIO_PERCPU_CACHE); >> + return bio; > > The goto here is pretty obsfucating and adds an extra patch to the fast > path. I don't agree, and it's not the fast path - the fast path is popping off a bio off the list, not hitting the allocator. > Also I don't think we need the gfp argument here at all - it should > always be GFP_KERNEL. In fact I plan to kill these arguments from much > of the block layer as the places that need NOIO of NOFS semantics can > and should move to set that in the per-thread context. Yeah, I actually kept it on purpose for future users, but let's just kill it as both potential use cases I have use GFP_KERNEL anyway. >> -static inline struct bio *bio_list_pop(struct bio_list *bl) >> +static inline void bio_list_del_head(struct bio_list *bl, struct bio *head) >> { >> - struct bio *bio = bl->head; >> - >> - if (bio) { >> + if (head) { >> bl->head = bl->head->bi_next; >> if (!bl->head) >> bl->tail = NULL; >> >> - bio->bi_next = NULL; >> + head->bi_next = NULL; >> } >> +} >> + >> +static inline struct bio *bio_list_pop(struct bio_list *bl) >> +{ >> + struct bio *bio = bl->head; >> >> + bio_list_del_head(bl, bio); >> return bio; >> } > > No need for this change. Leftover from earlier series, killed it now. >> @@ -699,6 +706,12 @@ struct bio_set { >> struct kmem_cache *bio_slab; >> unsigned int front_pad; >> >> + /* >> + * per-cpu bio alloc cache and notifier >> + */ >> + struct bio_alloc_cache __percpu *cache; >> + struct hlist_node cpuhp_dead; >> + > > I'd keep the hotplug list entry at the end instead of bloating the > cache line used in the fast path. Good point, moved to the end.
On Thu, Aug 12, 2021 at 09:08:29AM -0600, Jens Axboe wrote: > >> + cache = per_cpu_ptr(bs->cache, get_cpu()); > >> + bio = bio_list_pop(&cache->free_list); > >> + if (bio) { > >> + cache->nr--; > >> + put_cpu(); > >> + bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs); > >> + bio_set_flag(bio, BIO_PERCPU_CACHE); > >> + return bio; > >> + } > >> + put_cpu(); > >> +normal_alloc: > >> + bio = bio_alloc_bioset(gfp, nr_vecs, bs); > >> + if (cache) > >> + bio_set_flag(bio, BIO_PERCPU_CACHE); > >> + return bio; > > > > The goto here is pretty obsfucating and adds an extra patch to the fast > > path. > > I don't agree, and it's not the fast path - the fast path is popping off > a bio off the list, not hitting the allocator. Oh, I see you special case the list pop return now. Still seems much easier to follow to avoid the goto, the cache initialization and the conditional in the no bio found in the list case (see patch below). diff --git a/block/bio.c b/block/bio.c index 689335c00937..b42621cecbef 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1707,11 +1707,11 @@ EXPORT_SYMBOL(bioset_init_from_src); struct bio *bio_alloc_kiocb(struct kiocb *kiocb, gfp_t gfp, unsigned short nr_vecs, struct bio_set *bs) { - struct bio_alloc_cache *cache = NULL; + struct bio_alloc_cache *cache; struct bio *bio; if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) - goto normal_alloc; + return bio_alloc_bioset(gfp, nr_vecs, bs); cache = per_cpu_ptr(bs->cache, get_cpu()); bio = bio_list_pop(&cache->free_list); @@ -1723,10 +1723,8 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, gfp_t gfp, return bio; } put_cpu(); -normal_alloc: bio = bio_alloc_bioset(gfp, nr_vecs, bs); - if (cache) - bio_set_flag(bio, BIO_PERCPU_CACHE); + bio_set_flag(bio, BIO_PERCPU_CACHE); return bio; }
On 8/12/21 9:18 AM, Christoph Hellwig wrote: > On Thu, Aug 12, 2021 at 09:08:29AM -0600, Jens Axboe wrote: >>>> + cache = per_cpu_ptr(bs->cache, get_cpu()); >>>> + bio = bio_list_pop(&cache->free_list); >>>> + if (bio) { >>>> + cache->nr--; >>>> + put_cpu(); >>>> + bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs); >>>> + bio_set_flag(bio, BIO_PERCPU_CACHE); >>>> + return bio; >>>> + } >>>> + put_cpu(); >>>> +normal_alloc: >>>> + bio = bio_alloc_bioset(gfp, nr_vecs, bs); >>>> + if (cache) >>>> + bio_set_flag(bio, BIO_PERCPU_CACHE); >>>> + return bio; >>> >>> The goto here is pretty obsfucating and adds an extra patch to the fast >>> path. >> >> I don't agree, and it's not the fast path - the fast path is popping off >> a bio off the list, not hitting the allocator. > > Oh, I see you special case the list pop return now. Still seems much > easier to follow to avoid the goto, the cache initialization and the > conditional in the no bio found in the list case (see patch below). > > diff --git a/block/bio.c b/block/bio.c > index 689335c00937..b42621cecbef 100644 > --- a/block/bio.c > +++ b/block/bio.c > @@ -1707,11 +1707,11 @@ EXPORT_SYMBOL(bioset_init_from_src); > struct bio *bio_alloc_kiocb(struct kiocb *kiocb, gfp_t gfp, > unsigned short nr_vecs, struct bio_set *bs) > { > - struct bio_alloc_cache *cache = NULL; > + struct bio_alloc_cache *cache; > struct bio *bio; > > if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) > - goto normal_alloc; > + return bio_alloc_bioset(gfp, nr_vecs, bs); > > cache = per_cpu_ptr(bs->cache, get_cpu()); > bio = bio_list_pop(&cache->free_list); > @@ -1723,10 +1723,8 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, gfp_t gfp, > return bio; > } > put_cpu(); > -normal_alloc: > bio = bio_alloc_bioset(gfp, nr_vecs, bs); > - if (cache) > - bio_set_flag(bio, BIO_PERCPU_CACHE); > + bio_set_flag(bio, BIO_PERCPU_CACHE); > return bio; > } Sure, if that'll shut it down, I'll make the edit :-)
diff --git a/block/bio.c b/block/bio.c index 0b1025899131..689335c00937 100644 --- a/block/bio.c +++ b/block/bio.c @@ -25,6 +25,11 @@ #include "blk.h" #include "blk-rq-qos.h" +struct bio_alloc_cache { + struct bio_list free_list; + unsigned int nr; +}; + static struct biovec_slab { int nr_vecs; char *name; @@ -620,6 +625,69 @@ void guard_bio_eod(struct bio *bio) bio_truncate(bio, maxsector << 9); } +#define ALLOC_CACHE_MAX 512 +#define ALLOC_CACHE_SLACK 64 + +static void bio_alloc_cache_prune(struct bio_alloc_cache *cache, + unsigned int nr) +{ + struct bio *bio; + unsigned int i; + + i = 0; + while ((bio = bio_list_pop(&cache->free_list)) != NULL) { + cache->nr--; + bio_free(bio); + if (++i == nr) + break; + } +} + +static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node) +{ + struct bio_set *bs; + + bs = hlist_entry_safe(node, struct bio_set, cpuhp_dead); + if (bs->cache) { + struct bio_alloc_cache *cache = per_cpu_ptr(bs->cache, cpu); + + bio_alloc_cache_prune(cache, -1U); + } + return 0; +} + +static void bio_alloc_cache_destroy(struct bio_set *bs) +{ + int cpu; + + if (!bs->cache) + return; + + preempt_disable(); + cpuhp_state_remove_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead); + for_each_possible_cpu(cpu) { + struct bio_alloc_cache *cache; + + cache = per_cpu_ptr(bs->cache, cpu); + bio_alloc_cache_prune(cache, -1U); + } + preempt_enable(); + free_percpu(bs->cache); +} + +static inline bool __bio_put(struct bio *bio) +{ + if (!bio_flagged(bio, BIO_REFFED)) + return true; + + BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); + + /* + * last put frees it + */ + return atomic_dec_and_test(&bio->__bi_cnt); +} + /** * bio_put - release a reference to a bio * @bio: bio to release reference to @@ -630,16 +698,21 @@ void guard_bio_eod(struct bio *bio) **/ void bio_put(struct bio *bio) { - if (!bio_flagged(bio, BIO_REFFED)) - bio_free(bio); - else { - BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); + if (unlikely(!__bio_put(bio))) + return; - /* - * last put frees it - */ - if (atomic_dec_and_test(&bio->__bi_cnt)) - bio_free(bio); + if (bio_flagged(bio, BIO_PERCPU_CACHE)) { + struct bio_alloc_cache *cache; + + bio_uninit(bio); + cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu()); + bio_list_add_head(&cache->free_list, bio); + cache->nr++; + if (cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK) + bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK); + put_cpu(); + } else { + bio_free(bio); } } EXPORT_SYMBOL(bio_put); @@ -1531,6 +1604,7 @@ int biovec_init_pool(mempool_t *pool, int pool_entries) */ void bioset_exit(struct bio_set *bs) { + bio_alloc_cache_destroy(bs); if (bs->rescue_workqueue) destroy_workqueue(bs->rescue_workqueue); bs->rescue_workqueue = NULL; @@ -1592,12 +1666,18 @@ int bioset_init(struct bio_set *bs, biovec_init_pool(&bs->bvec_pool, pool_size)) goto bad; - if (!(flags & BIOSET_NEED_RESCUER)) - return 0; - - bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); - if (!bs->rescue_workqueue) - goto bad; + if (flags & BIOSET_NEED_RESCUER) { + bs->rescue_workqueue = alloc_workqueue("bioset", + WQ_MEM_RECLAIM, 0); + if (!bs->rescue_workqueue) + goto bad; + } + if (flags & BIOSET_PERCPU_CACHE) { + bs->cache = alloc_percpu(struct bio_alloc_cache); + if (!bs->cache) + goto bad; + cpuhp_state_add_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead); + } return 0; bad: @@ -1624,6 +1704,32 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src) } EXPORT_SYMBOL(bioset_init_from_src); +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, gfp_t gfp, + unsigned short nr_vecs, struct bio_set *bs) +{ + struct bio_alloc_cache *cache = NULL; + struct bio *bio; + + if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) + goto normal_alloc; + + cache = per_cpu_ptr(bs->cache, get_cpu()); + bio = bio_list_pop(&cache->free_list); + if (bio) { + cache->nr--; + put_cpu(); + bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs); + bio_set_flag(bio, BIO_PERCPU_CACHE); + return bio; + } + put_cpu(); +normal_alloc: + bio = bio_alloc_bioset(gfp, nr_vecs, bs); + if (cache) + bio_set_flag(bio, BIO_PERCPU_CACHE); + return bio; +} + static int __init init_bio(void) { int i; @@ -1638,6 +1744,9 @@ static int __init init_bio(void) SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); } + cpuhp_setup_state_multi(CPUHP_BIO_DEAD, "block/bio:dead", NULL, + bio_cpu_dead); + if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS)) panic("bio: can't allocate bios\n"); diff --git a/include/linux/bio.h b/include/linux/bio.h index 2203b686e1f0..5f4a741ee97d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -401,6 +401,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, enum { BIOSET_NEED_BVECS = BIT(0), BIOSET_NEED_RESCUER = BIT(1), + BIOSET_PERCPU_CACHE = BIT(2), }; extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); extern void bioset_exit(struct bio_set *); @@ -409,6 +410,8 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs, struct bio_set *bs); +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, gfp_t, unsigned short nr_vecs, + struct bio_set *bs); struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); @@ -652,18 +655,22 @@ static inline struct bio *bio_list_peek(struct bio_list *bl) return bl->head; } -static inline struct bio *bio_list_pop(struct bio_list *bl) +static inline void bio_list_del_head(struct bio_list *bl, struct bio *head) { - struct bio *bio = bl->head; - - if (bio) { + if (head) { bl->head = bl->head->bi_next; if (!bl->head) bl->tail = NULL; - bio->bi_next = NULL; + head->bi_next = NULL; } +} + +static inline struct bio *bio_list_pop(struct bio_list *bl) +{ + struct bio *bio = bl->head; + bio_list_del_head(bl, bio); return bio; } @@ -699,6 +706,12 @@ struct bio_set { struct kmem_cache *bio_slab; unsigned int front_pad; + /* + * per-cpu bio alloc cache and notifier + */ + struct bio_alloc_cache __percpu *cache; + struct hlist_node cpuhp_dead; + mempool_t bio_pool; mempool_t bvec_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 290f9061b29a..f68d4e8c775e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -301,6 +301,7 @@ enum { BIO_TRACKED, /* set if bio goes through the rq_qos path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ + BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ BIO_FLAG_LAST }; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f39b34b13871..fe72c8d6c980 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -46,6 +46,7 @@ enum cpuhp_state { CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_IRQ_POLL_DEAD, CPUHP_BLOCK_SOFTIRQ_DEAD, + CPUHP_BIO_DEAD, CPUHP_ACPI_CPUDRV_DEAD, CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD,
Add a per-cpu bio_set cache for bio allocations, enabling us to quickly recycle them instead of going through the slab allocator. This cache isn't IRQ safe, and hence is only really suitable for polled IO. Very simple - keeps a count of bio's in the cache, and maintains a max of 512 with a slack of 64. If we get above max + slack, we drop slack number of bio's. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/bio.c | 139 +++++++++++++++++++++++++++++++++---- include/linux/bio.h | 23 ++++-- include/linux/blk_types.h | 1 + include/linux/cpuhotplug.h | 1 + 4 files changed, 144 insertions(+), 20 deletions(-)