Message ID | 3d57b2def025d49df83e6e62fd153f40e91a87e4.1497544265.git.dsterba@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/16/2017 12:49 AM, David Sterba wrote: > For devices that support flushing, we allocate a bio, submit, wait for > it and then free it. The bio allocation does not fail so ENOMEM is not a > problem but we still may unnecessarily stress the allocation subsystem. > > Instead, we can allocate the device at the same time we allocate the > device and reuse it each time we need to flush the barriers. The bio is > reset before each use. Reference counting is simplified to just device > allocation (get) and freeing (put). > > Note for write_dev_flush: we check the queue flush status again as we > can't use the existence of bio as before. Looks good few items as below.. > Signed-off-by: David Sterba <dsterba@suse.com> > --- > fs/btrfs/disk-io.c | 24 ++++++------------------ > fs/btrfs/volumes.c | 12 ++++++++++++ > 2 files changed, 18 insertions(+), 18 deletions(-) > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 2b00ebff13f8..27d44d6ab775 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -3482,9 +3482,7 @@ static int write_dev_supers(struct btrfs_device *device, > */ > static void btrfs_end_empty_barrier(struct bio *bio) > { > - if (bio->bi_private) > - complete(bio->bi_private); > - bio_put(bio); > + complete(bio->bi_private); > } > > /* > @@ -3494,26 +3492,19 @@ static void btrfs_end_empty_barrier(struct bio *bio) > static void write_dev_flush(struct btrfs_device *device) > { > struct request_queue *q = bdev_get_queue(device->bdev); > - struct bio *bio; > + struct bio *bio = device->flush_bio; > > if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) > return; > > - /* > - * one reference for us, and we leave it for the > - * caller > - */ > - device->flush_bio = NULL; > - bio = btrfs_io_bio_alloc(0); > + bio_reset(bio); > bio->bi_end_io = btrfs_end_empty_barrier; > bio->bi_bdev = device->bdev; > bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; > init_completion(&device->flush_wait); > bio->bi_private = &device->flush_wait; > - device->flush_bio = bio; > > - bio_get(bio); > - btrfsic_submit_bio(bio); > + submit_bio(bio); Originally it went through the btrfsic. There is no mention of this change if its not an oversight. > } > > /* > @@ -3522,9 +3513,10 @@ static void write_dev_flush(struct btrfs_device *device) > static int wait_dev_flush(struct btrfs_device *device) > { > int ret = 0; > + struct request_queue *q = bdev_get_queue(device->bdev); > struct bio *bio = device->flush_bio; > > - if (!bio) > + if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) > return 0; It returns here if its write through. Which can be toggled after write_dev_flush() has been called such as.. echo "write back" > /sys/block/sdd/queue/write_cache write_dev_flush(sdd) echo "write through" > /sys/block/sdd/queue/write_cache wait_dev_flush(sdd) So it would fails to check error. > wait_for_completion(&device->flush_wait); > @@ -3535,10 +3527,6 @@ static int wait_dev_flush(struct btrfs_device *device) > BTRFS_DEV_STAT_FLUSH_ERRS); > } > > - /* drop the reference from the wait == 0 run */ > - bio_put(bio); > - device->flush_bio = NULL; > - > return ret; > } > > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index 8bb1f4e5905a..251ae81e4363 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -242,6 +242,17 @@ static struct btrfs_device *__alloc_device(void) > if (!dev) > return ERR_PTR(-ENOMEM); > > + /* > + * Preallocate a bio that's always going to be used for flushing device > + * barriers and matches the device lifespan > + */ > + dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL); Nice. Thanks, Anand > + if (!dev->flush_bio) { > + kfree(dev); > + return ERR_PTR(-ENOMEM); > + } > + bio_get(dev->flush_bio); > + > INIT_LIST_HEAD(&dev->dev_list); > INIT_LIST_HEAD(&dev->dev_alloc_list); > INIT_LIST_HEAD(&dev->resized_list); > @@ -838,6 +849,7 @@ static void __free_device(struct work_struct *work) > > device = container_of(work, struct btrfs_device, rcu_work); > rcu_string_free(device->name); > + bio_put(device->flush_bio); > kfree(device); > } > > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Jun 16, 2017 at 05:53:12AM +0800, Anand Jain wrote: > On 06/16/2017 12:49 AM, David Sterba wrote: > > For devices that support flushing, we allocate a bio, submit, wait for > > it and then free it. The bio allocation does not fail so ENOMEM is not a > > problem but we still may unnecessarily stress the allocation subsystem. > > > > Instead, we can allocate the device at the same time we allocate the > > device and reuse it each time we need to flush the barriers. The bio is > > reset before each use. Reference counting is simplified to just device > > allocation (get) and freeing (put). > > > > Note for write_dev_flush: we check the queue flush status again as we > > can't use the existence of bio as before. > > Looks good few items as below.. > > > Signed-off-by: David Sterba <dsterba@suse.com> > > --- > > fs/btrfs/disk-io.c | 24 ++++++------------------ > > fs/btrfs/volumes.c | 12 ++++++++++++ > > 2 files changed, 18 insertions(+), 18 deletions(-) > > > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > > index 2b00ebff13f8..27d44d6ab775 100644 > > --- a/fs/btrfs/disk-io.c > > +++ b/fs/btrfs/disk-io.c > > @@ -3482,9 +3482,7 @@ static int write_dev_supers(struct btrfs_device *device, > > */ > > static void btrfs_end_empty_barrier(struct bio *bio) > > { > > - if (bio->bi_private) > > - complete(bio->bi_private); > > - bio_put(bio); > > + complete(bio->bi_private); > > } > > > > /* > > @@ -3494,26 +3492,19 @@ static void btrfs_end_empty_barrier(struct bio *bio) > > static void write_dev_flush(struct btrfs_device *device) > > { > > struct request_queue *q = bdev_get_queue(device->bdev); > > - struct bio *bio; > > + struct bio *bio = device->flush_bio; > > > > if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) > > return; > > > > - /* > > - * one reference for us, and we leave it for the > > - * caller > > - */ > > - device->flush_bio = NULL; > > - bio = btrfs_io_bio_alloc(0); > > + bio_reset(bio); > > bio->bi_end_io = btrfs_end_empty_barrier; > > bio->bi_bdev = device->bdev; > > bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; > > init_completion(&device->flush_wait); > > bio->bi_private = &device->flush_wait; > > - device->flush_bio = bio; > > > > - bio_get(bio); > > - btrfsic_submit_bio(bio); > > + submit_bio(bio); > > Originally it went through the btrfsic. There is no mention > of this change if its not an oversight. Right, avoiding is intentional I just forgot to mention it in the changelog. The bio has no data attached so integrity checker will skip it. > > /* > > @@ -3522,9 +3513,10 @@ static void write_dev_flush(struct btrfs_device *device) > > static int wait_dev_flush(struct btrfs_device *device) > > { > > int ret = 0; > > + struct request_queue *q = bdev_get_queue(device->bdev); > > struct bio *bio = device->flush_bio; > > > > - if (!bio) > > + if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) > > return 0; > > It returns here if its write through. Which can be toggled > after write_dev_flush() has been called such as.. > > echo "write back" > /sys/block/sdd/queue/write_cache > write_dev_flush(sdd) > echo "write through" > /sys/block/sdd/queue/write_cache > wait_dev_flush(sdd) > > So it would fails to check error. Yeah, the bio would stay in flight. I had to read more about the flushes but I apparently mixed it up with FUA. Toggling write cache needs to be handled properly which needs to pull the relevant bits from patch 4/5 and the force_dev_flush sysfs knob does not make sense, as you noted. Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2b00ebff13f8..27d44d6ab775 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3482,9 +3482,7 @@ static int write_dev_supers(struct btrfs_device *device, */ static void btrfs_end_empty_barrier(struct bio *bio) { - if (bio->bi_private) - complete(bio->bi_private); - bio_put(bio); + complete(bio->bi_private); } /* @@ -3494,26 +3492,19 @@ static void btrfs_end_empty_barrier(struct bio *bio) static void write_dev_flush(struct btrfs_device *device) { struct request_queue *q = bdev_get_queue(device->bdev); - struct bio *bio; + struct bio *bio = device->flush_bio; if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) return; - /* - * one reference for us, and we leave it for the - * caller - */ - device->flush_bio = NULL; - bio = btrfs_io_bio_alloc(0); + bio_reset(bio); bio->bi_end_io = btrfs_end_empty_barrier; bio->bi_bdev = device->bdev; bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; - device->flush_bio = bio; - bio_get(bio); - btrfsic_submit_bio(bio); + submit_bio(bio); } /* @@ -3522,9 +3513,10 @@ static void write_dev_flush(struct btrfs_device *device) static int wait_dev_flush(struct btrfs_device *device) { int ret = 0; + struct request_queue *q = bdev_get_queue(device->bdev); struct bio *bio = device->flush_bio; - if (!bio) + if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) return 0; wait_for_completion(&device->flush_wait); @@ -3535,10 +3527,6 @@ static int wait_dev_flush(struct btrfs_device *device) BTRFS_DEV_STAT_FLUSH_ERRS); } - /* drop the reference from the wait == 0 run */ - bio_put(bio); - device->flush_bio = NULL; - return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8bb1f4e5905a..251ae81e4363 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -242,6 +242,17 @@ static struct btrfs_device *__alloc_device(void) if (!dev) return ERR_PTR(-ENOMEM); + /* + * Preallocate a bio that's always going to be used for flushing device + * barriers and matches the device lifespan + */ + dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL); + if (!dev->flush_bio) { + kfree(dev); + return ERR_PTR(-ENOMEM); + } + bio_get(dev->flush_bio); + INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_alloc_list); INIT_LIST_HEAD(&dev->resized_list); @@ -838,6 +849,7 @@ static void __free_device(struct work_struct *work) device = container_of(work, struct btrfs_device, rcu_work); rcu_string_free(device->name); + bio_put(device->flush_bio); kfree(device); }
For devices that support flushing, we allocate a bio, submit, wait for it and then free it. The bio allocation does not fail so ENOMEM is not a problem but we still may unnecessarily stress the allocation subsystem. Instead, we can allocate the device at the same time we allocate the device and reuse it each time we need to flush the barriers. The bio is reset before each use. Reference counting is simplified to just device allocation (get) and freeing (put). Note for write_dev_flush: we check the queue flush status again as we can't use the existence of bio as before. Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/disk-io.c | 24 ++++++------------------ fs/btrfs/volumes.c | 12 ++++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-)