diff mbox series

[4/8] xfs: async blkdev cache flush

Message ID 20210223033442.3267258-5-david@fromorbit.com (mailing list archive)
State Superseded
Headers show
Series [1/8] xfs: log stripe roundoff is a property of the log | expand

Commit Message

Dave Chinner Feb. 23, 2021, 3:34 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

The new checkpoint caceh flush mechanism requires us to issue an
unconditional cache flush before we start a new checkpoint. We don't
want to block for this if we can help it, and we have a fair chunk
of CPU work to do between starting the checkpoint and issuing the
first journal IO.

Hence it makes sense to amortise the latency cost of the cache flush
by issuing it asynchronously and then waiting for it only when we
need to issue the first IO in the transaction.

TO do this, we need async cache flush primitives to submit the cache
flush bio and to wait on it. THe block layer has no such primitives
for filesystems, so roll our own for the moment.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_bio_io.c | 30 ++++++++++++++++++++++++++++++
 fs/xfs/xfs_linux.h  |  1 +
 2 files changed, 31 insertions(+)

Comments

Chaitanya Kulkarni Feb. 23, 2021, 5:29 a.m. UTC | #1
On 2/22/21 19:35, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The new checkpoint caceh flush mechanism requires us to issue an
> unconditional cache flush before we start a new checkpoint. We don't
> want to block for this if we can help it, and we have a fair chunk
> of CPU work to do between starting the checkpoint and issuing the
> first journal IO.
>
> Hence it makes sense to amortise the latency cost of the cache flush
> by issuing it asynchronously and then waiting for it only when we
> need to issue the first IO in the transaction.
>
> TO do this, we need async cache flush primitives to submit the cache
> flush bio and to wait on it. THe block layer has no such primitives
> for filesystems, so roll our own for the moment.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_bio_io.c | 30 ++++++++++++++++++++++++++++++
>  fs/xfs/xfs_linux.h  |  1 +
>  2 files changed, 31 insertions(+)
>
> diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
> index 5abf653a45d4..d55420bc72b5 100644
> --- a/fs/xfs/xfs_bio_io.c
> +++ b/fs/xfs/xfs_bio_io.c
> @@ -67,3 +67,33 @@ xfs_flush_bdev(
>  	blkdev_issue_flush(bdev, GFP_NOFS);
>  }
>  
> +void
> +xfs_flush_bdev_async_endio(
> +	struct bio	*bio)
> +{
> +	if (bio->bi_private)
> +		complete(bio->bi_private);
> +	bio_put(bio);
> +}
> +
> +/*
> + * Submit a request for an async cache flush to run. If the caller needs to wait
> + * for the flush completion at a later point in time, they must supply a
> + * valid completion. This will be signalled when the flush completes.
> + * The caller never sees the bio that is issued here.
> + */
> +void
> +xfs_flush_bdev_async(
> +	struct block_device	*bdev,
> +	struct completion	*done)
> +{
> +	struct bio *bio;
> +
> +	bio = bio_alloc(GFP_NOFS, 0);
> +	bio_set_dev(bio, bdev);
> +	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
> +	bio->bi_private = done;
> +        bio->bi_end_io = xfs_flush_bdev_async_endio;
> +
nit: need to align above line with the rest of the code ? can be done at
the time of applying the patch.
> +	submit_bio(bio);
> +}
> diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
> index e94a2aeefee8..293ff2355e80 100644
> --- a/fs/xfs/xfs_linux.h
> +++ b/fs/xfs/xfs_linux.h
> @@ -197,6 +197,7 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
>  int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
>  		char *data, unsigned int op);
>  void xfs_flush_bdev(struct block_device *bdev);
> +void xfs_flush_bdev_async(struct block_device *bdev, struct completion *done);
>  
>  #define ASSERT_ALWAYS(expr)	\
>  	(likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__))
Looks good.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Chandan Babu R Feb. 23, 2021, 2:02 p.m. UTC | #2
On 23 Feb 2021 at 09:04, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The new checkpoint caceh flush mechanism requires us to issue an
> unconditional cache flush before we start a new checkpoint. We don't
> want to block for this if we can help it, and we have a fair chunk
> of CPU work to do between starting the checkpoint and issuing the
> first journal IO.
>
> Hence it makes sense to amortise the latency cost of the cache flush
> by issuing it asynchronously and then waiting for it only when we
> need to issue the first IO in the transaction.
>
> TO do this, we need async cache flush primitives to submit the cache
> flush bio and to wait on it. THe block layer has no such primitives
> for filesystems, so roll our own for the moment.
>

Thanks for the detailed commit message explaining the reasoning behind the
requirement for an async cache flush primitive.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_bio_io.c | 30 ++++++++++++++++++++++++++++++
>  fs/xfs/xfs_linux.h  |  1 +
>  2 files changed, 31 insertions(+)
>
> diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
> index 5abf653a45d4..d55420bc72b5 100644
> --- a/fs/xfs/xfs_bio_io.c
> +++ b/fs/xfs/xfs_bio_io.c
> @@ -67,3 +67,33 @@ xfs_flush_bdev(
>  	blkdev_issue_flush(bdev, GFP_NOFS);
>  }
>
> +void
> +xfs_flush_bdev_async_endio(
> +	struct bio	*bio)
> +{
> +	if (bio->bi_private)
> +		complete(bio->bi_private);
> +	bio_put(bio);
> +}
> +
> +/*
> + * Submit a request for an async cache flush to run. If the caller needs to wait
> + * for the flush completion at a later point in time, they must supply a
> + * valid completion. This will be signalled when the flush completes.
> + * The caller never sees the bio that is issued here.
> + */
> +void
> +xfs_flush_bdev_async(
> +	struct block_device	*bdev,
> +	struct completion	*done)
> +{
> +	struct bio *bio;
> +
> +	bio = bio_alloc(GFP_NOFS, 0);
> +	bio_set_dev(bio, bdev);
> +	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
> +	bio->bi_private = done;
> +        bio->bi_end_io = xfs_flush_bdev_async_endio;
> +
> +	submit_bio(bio);
> +}
> diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
> index e94a2aeefee8..293ff2355e80 100644
> --- a/fs/xfs/xfs_linux.h
> +++ b/fs/xfs/xfs_linux.h
> @@ -197,6 +197,7 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
>  int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
>  		char *data, unsigned int op);
>  void xfs_flush_bdev(struct block_device *bdev);
> +void xfs_flush_bdev_async(struct block_device *bdev, struct completion *done);
>
>  #define ASSERT_ALWAYS(expr)	\
>  	(likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__))


--
chandan
Darrick J. Wong Feb. 24, 2021, 8:51 p.m. UTC | #3
On Tue, Feb 23, 2021 at 02:34:38PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> The new checkpoint caceh flush mechanism requires us to issue an
> unconditional cache flush before we start a new checkpoint. We don't
> want to block for this if we can help it, and we have a fair chunk
> of CPU work to do between starting the checkpoint and issuing the
> first journal IO.
> 
> Hence it makes sense to amortise the latency cost of the cache flush
> by issuing it asynchronously and then waiting for it only when we
> need to issue the first IO in the transaction.
> 
> TO do this, we need async cache flush primitives to submit the cache
> flush bio and to wait on it. THe block layer has no such primitives
> for filesystems, so roll our own for the moment.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_bio_io.c | 30 ++++++++++++++++++++++++++++++
>  fs/xfs/xfs_linux.h  |  1 +
>  2 files changed, 31 insertions(+)
> 
> diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
> index 5abf653a45d4..d55420bc72b5 100644
> --- a/fs/xfs/xfs_bio_io.c
> +++ b/fs/xfs/xfs_bio_io.c
> @@ -67,3 +67,33 @@ xfs_flush_bdev(
>  	blkdev_issue_flush(bdev, GFP_NOFS);
>  }
>  
> +void
> +xfs_flush_bdev_async_endio(
> +	struct bio	*bio)
> +{
> +	if (bio->bi_private)
> +		complete(bio->bi_private);
> +	bio_put(bio);
> +}
> +
> +/*
> + * Submit a request for an async cache flush to run. If the caller needs to wait
> + * for the flush completion at a later point in time, they must supply a
> + * valid completion. This will be signalled when the flush completes.
> + * The caller never sees the bio that is issued here.
> + */
> +void
> +xfs_flush_bdev_async(
> +	struct block_device	*bdev,

Not sure why this isn't a buftarg function, since (AFAICT) this is the
only caller in the ~30 patches you've sent to the list.  Is there
something else coming down the pipeline such that you only have a raw
block_device pointer?

> +	struct completion	*done)
> +{
> +	struct bio *bio;
> +
> +	bio = bio_alloc(GFP_NOFS, 0);
> +	bio_set_dev(bio, bdev);
> +	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
> +	bio->bi_private = done;
> +        bio->bi_end_io = xfs_flush_bdev_async_endio;

Weird indent here.

--D

> +
> +	submit_bio(bio);
> +}
> diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
> index e94a2aeefee8..293ff2355e80 100644
> --- a/fs/xfs/xfs_linux.h
> +++ b/fs/xfs/xfs_linux.h
> @@ -197,6 +197,7 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
>  int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
>  		char *data, unsigned int op);
>  void xfs_flush_bdev(struct block_device *bdev);
> +void xfs_flush_bdev_async(struct block_device *bdev, struct completion *done);
>  
>  #define ASSERT_ALWAYS(expr)	\
>  	(likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__))
> -- 
> 2.28.0
>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
index 5abf653a45d4..d55420bc72b5 100644
--- a/fs/xfs/xfs_bio_io.c
+++ b/fs/xfs/xfs_bio_io.c
@@ -67,3 +67,33 @@  xfs_flush_bdev(
 	blkdev_issue_flush(bdev, GFP_NOFS);
 }
 
+void
+xfs_flush_bdev_async_endio(
+	struct bio	*bio)
+{
+	if (bio->bi_private)
+		complete(bio->bi_private);
+	bio_put(bio);
+}
+
+/*
+ * Submit a request for an async cache flush to run. If the caller needs to wait
+ * for the flush completion at a later point in time, they must supply a
+ * valid completion. This will be signalled when the flush completes.
+ * The caller never sees the bio that is issued here.
+ */
+void
+xfs_flush_bdev_async(
+	struct block_device	*bdev,
+	struct completion	*done)
+{
+	struct bio *bio;
+
+	bio = bio_alloc(GFP_NOFS, 0);
+	bio_set_dev(bio, bdev);
+	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+	bio->bi_private = done;
+        bio->bi_end_io = xfs_flush_bdev_async_endio;
+
+	submit_bio(bio);
+}
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index e94a2aeefee8..293ff2355e80 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -197,6 +197,7 @@  static inline uint64_t howmany_64(uint64_t x, uint32_t y)
 int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
 		char *data, unsigned int op);
 void xfs_flush_bdev(struct block_device *bdev);
+void xfs_flush_bdev_async(struct block_device *bdev, struct completion *done);
 
 #define ASSERT_ALWAYS(expr)	\
 	(likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__))