Message ID | 20230529131106.2123367-8-yukuai1@huaweicloud.com (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
Series | limit the number of plugged bio | expand |
On Mon, May 29, 2023 at 9:14 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > From: Yu Kuai <yukuai3@huawei.com> > > bio can be added to plug infinitely, and following writeback test can > trigger huge amount of plugged bio: > > Test script: > modprobe brd rd_nr=4 rd_size=10485760 > mdadm -CR /dev/md0 -l10 -n4 /dev/ram[0123] --assume-clean --bitmap=internal > echo 0 > /proc/sys/vm/dirty_background_ratio > fio -filename=/dev/md0 -ioengine=libaio -rw=write -bs=4k -numjobs=1 -iodepth=128 -name=test > > Test result: > Monitor /sys/block/md0/inflight will found that inflight keep increasing > until fio finish writing, after running for about 2 minutes: > > [root@fedora ~]# cat /sys/block/md0/inflight > 0 4474191 > > Fix the problem by limiting the number of plugged bio based on the number > of copies for original bio. > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > drivers/md/raid1-10.c | 9 ++++++++- > drivers/md/raid1.c | 2 +- > drivers/md/raid10.c | 2 +- > 3 files changed, 10 insertions(+), 3 deletions(-) > > diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c > index 17e55c1fd5a1..bb1e23b66c45 100644 > --- a/drivers/md/raid1-10.c > +++ b/drivers/md/raid1-10.c > @@ -21,6 +21,7 @@ > #define IO_MADE_GOOD ((struct bio *)2) > > #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) > +#define MAX_PLUG_BIO 32 > > /* for managing resync I/O pages */ > struct resync_pages { > @@ -31,6 +32,7 @@ struct resync_pages { > struct raid1_plug_cb { > struct blk_plug_cb cb; > struct bio_list pending; > + unsigned int count; > }; > > static void rbio_pool_free(void *rbio, void *data) > @@ -128,7 +130,7 @@ static inline void raid1_submit_write(struct bio *bio) > } > > static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio, > - blk_plug_cb_fn unplug) > + blk_plug_cb_fn unplug, int copies) > { > struct raid1_plug_cb *plug = NULL; > struct blk_plug_cb *cb; > @@ -148,6 +150,11 @@ static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio, > > plug = container_of(cb, struct raid1_plug_cb, cb); > bio_list_add(&plug->pending, bio); > + if (++plug->count / MAX_PLUG_BIO >= copies) { > + list_del(&cb->list); > + cb->callback(cb, false); > + } > + It doesn't need this line here. Have you done some performance tests with this patch set? Regards Xiao > > return true; > } > diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c > index 006620fed595..dc89a1c4b1f1 100644 > --- a/drivers/md/raid1.c > +++ b/drivers/md/raid1.c > @@ -1562,7 +1562,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, > r1_bio->sector); > /* flush_pending_writes() needs access to the rdev so...*/ > mbio->bi_bdev = (void *)rdev; > - if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug)) { > + if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug, disks)) { > spin_lock_irqsave(&conf->device_lock, flags); > bio_list_add(&conf->pending_bio_list, mbio); > spin_unlock_irqrestore(&conf->device_lock, flags); > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > index fb22cfe94d32..9237dbeb07ba 100644 > --- a/drivers/md/raid10.c > +++ b/drivers/md/raid10.c > @@ -1306,7 +1306,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, > > atomic_inc(&r10_bio->remaining); > > - if (!raid1_add_bio_to_plug(mddev, mbio, raid10_unplug)) { > + if (!raid1_add_bio_to_plug(mddev, mbio, raid10_unplug, conf->copies)) { > spin_lock_irqsave(&conf->device_lock, flags); > bio_list_add(&conf->pending_bio_list, mbio); > spin_unlock_irqrestore(&conf->device_lock, flags); > -- > 2.39.2 >
Hi, 在 2023/05/31 23:42, Xiao Ni 写道: > On Mon, May 29, 2023 at 9:14 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: >> >> From: Yu Kuai <yukuai3@huawei.com> >> >> bio can be added to plug infinitely, and following writeback test can >> trigger huge amount of plugged bio: >> >> Test script: >> modprobe brd rd_nr=4 rd_size=10485760 >> mdadm -CR /dev/md0 -l10 -n4 /dev/ram[0123] --assume-clean --bitmap=internal >> echo 0 > /proc/sys/vm/dirty_background_ratio >> fio -filename=/dev/md0 -ioengine=libaio -rw=write -bs=4k -numjobs=1 -iodepth=128 -name=test >> >> Test result: >> Monitor /sys/block/md0/inflight will found that inflight keep increasing >> until fio finish writing, after running for about 2 minutes: >> >> [root@fedora ~]# cat /sys/block/md0/inflight >> 0 4474191 >> >> Fix the problem by limiting the number of plugged bio based on the number >> of copies for original bio. >> >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >> --- >> drivers/md/raid1-10.c | 9 ++++++++- >> drivers/md/raid1.c | 2 +- >> drivers/md/raid10.c | 2 +- >> 3 files changed, 10 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c >> index 17e55c1fd5a1..bb1e23b66c45 100644 >> --- a/drivers/md/raid1-10.c >> +++ b/drivers/md/raid1-10.c >> @@ -21,6 +21,7 @@ >> #define IO_MADE_GOOD ((struct bio *)2) >> >> #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) >> +#define MAX_PLUG_BIO 32 >> >> /* for managing resync I/O pages */ >> struct resync_pages { >> @@ -31,6 +32,7 @@ struct resync_pages { >> struct raid1_plug_cb { >> struct blk_plug_cb cb; >> struct bio_list pending; >> + unsigned int count; >> }; >> >> static void rbio_pool_free(void *rbio, void *data) >> @@ -128,7 +130,7 @@ static inline void raid1_submit_write(struct bio *bio) >> } >> >> static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio, >> - blk_plug_cb_fn unplug) >> + blk_plug_cb_fn unplug, int copies) >> { >> struct raid1_plug_cb *plug = NULL; >> struct blk_plug_cb *cb; >> @@ -148,6 +150,11 @@ static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio, >> >> plug = container_of(cb, struct raid1_plug_cb, cb); >> bio_list_add(&plug->pending, bio); >> + if (++plug->count / MAX_PLUG_BIO >= copies) { >> + list_del(&cb->list); >> + cb->callback(cb, false); >> + } >> + > It doesn't need this line here. > > Have you done some performance tests with this patch set? Just a simple fio script to test 4 ramdisk/loop 16thread 4k write in my VM, and I didn't notice regression, however, I didn't run benchmarks yet, I don't have such physical environment to test performance for now... I'll definitely run some performance tests in physical evironment later. Thanks, Kuai > > Regards > Xiao >> >> return true; >> } >> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c >> index 006620fed595..dc89a1c4b1f1 100644 >> --- a/drivers/md/raid1.c >> +++ b/drivers/md/raid1.c >> @@ -1562,7 +1562,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, >> r1_bio->sector); >> /* flush_pending_writes() needs access to the rdev so...*/ >> mbio->bi_bdev = (void *)rdev; >> - if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug)) { >> + if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug, disks)) { >> spin_lock_irqsave(&conf->device_lock, flags); >> bio_list_add(&conf->pending_bio_list, mbio); >> spin_unlock_irqrestore(&conf->device_lock, flags); >> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c >> index fb22cfe94d32..9237dbeb07ba 100644 >> --- a/drivers/md/raid10.c >> +++ b/drivers/md/raid10.c >> @@ -1306,7 +1306,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, >> >> atomic_inc(&r10_bio->remaining); >> >> - if (!raid1_add_bio_to_plug(mddev, mbio, raid10_unplug)) { >> + if (!raid1_add_bio_to_plug(mddev, mbio, raid10_unplug, conf->copies)) { >> spin_lock_irqsave(&conf->device_lock, flags); >> bio_list_add(&conf->pending_bio_list, mbio); >> spin_unlock_irqrestore(&conf->device_lock, flags); >> -- >> 2.39.2 >> > > . >
diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 17e55c1fd5a1..bb1e23b66c45 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -21,6 +21,7 @@ #define IO_MADE_GOOD ((struct bio *)2) #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) +#define MAX_PLUG_BIO 32 /* for managing resync I/O pages */ struct resync_pages { @@ -31,6 +32,7 @@ struct resync_pages { struct raid1_plug_cb { struct blk_plug_cb cb; struct bio_list pending; + unsigned int count; }; static void rbio_pool_free(void *rbio, void *data) @@ -128,7 +130,7 @@ static inline void raid1_submit_write(struct bio *bio) } static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio, - blk_plug_cb_fn unplug) + blk_plug_cb_fn unplug, int copies) { struct raid1_plug_cb *plug = NULL; struct blk_plug_cb *cb; @@ -148,6 +150,11 @@ static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio, plug = container_of(cb, struct raid1_plug_cb, cb); bio_list_add(&plug->pending, bio); + if (++plug->count / MAX_PLUG_BIO >= copies) { + list_del(&cb->list); + cb->callback(cb, false); + } + return true; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 006620fed595..dc89a1c4b1f1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1562,7 +1562,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ mbio->bi_bdev = (void *)rdev; - if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug)) { + if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug, disks)) { spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); spin_unlock_irqrestore(&conf->device_lock, flags); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index fb22cfe94d32..9237dbeb07ba 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1306,7 +1306,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, atomic_inc(&r10_bio->remaining); - if (!raid1_add_bio_to_plug(mddev, mbio, raid10_unplug)) { + if (!raid1_add_bio_to_plug(mddev, mbio, raid10_unplug, conf->copies)) { spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); spin_unlock_irqrestore(&conf->device_lock, flags);