diff mbox series

[6/9] fs: add IOCB flags related to passing back dio completions

Message ID 20230721161650.319414-7-axboe@kernel.dk (mailing list archive)
State Superseded
Headers show
Series Improve async iomap DIO performance | expand

Commit Message

Jens Axboe July 21, 2023, 4:16 p.m. UTC
Async dio completions generally happen from hard/soft IRQ context, which
means that users like iomap may need to defer some of the completion
handling to a workqueue. This is less efficient than having the original
issuer handle it, like we do for sync IO, and it adds latency to the
completions.

Add IOCB_DIO_CALLER_COMP, which the issuer can set if it is able to
safely punt these completions to a safe context. If the dio handler is
aware of this flag, assign a callback handler in kiocb->dio_complete and
associated data io kiocb->private. The issuer will then call this
handler with that data from task context.

No functional changes in this patch.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

Comments

Darrick J. Wong July 21, 2023, 4:28 p.m. UTC | #1
On Fri, Jul 21, 2023 at 10:16:47AM -0600, Jens Axboe wrote:
> Async dio completions generally happen from hard/soft IRQ context, which
> means that users like iomap may need to defer some of the completion
> handling to a workqueue. This is less efficient than having the original
> issuer handle it, like we do for sync IO, and it adds latency to the
> completions.
> 
> Add IOCB_DIO_CALLER_COMP, which the issuer can set if it is able to
> safely punt these completions to a safe context. If the dio handler is
> aware of this flag, assign a callback handler in kiocb->dio_complete and
> associated data io kiocb->private. The issuer will then call this
> handler with that data from task context.
> 
> No functional changes in this patch.
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  include/linux/fs.h | 35 +++++++++++++++++++++++++++++++++--
>  1 file changed, 33 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 6867512907d6..60e2b4ecfc4d 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -338,6 +338,20 @@ enum rw_hint {
>  #define IOCB_NOIO		(1 << 20)
>  /* can use bio alloc cache */
>  #define IOCB_ALLOC_CACHE	(1 << 21)
> +/*
> + * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
> + * iocb completion can be passed back to the owner for execution from a safe
> + * context rather than needing to be punted through a workqueue.If this If this

"...through a workqueue.  If this flag is set..."

Need a space after the period, and delete one of the "If this".

With that fixed,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> + * flag is set, the bio completion handling may set iocb->dio_complete to a
> + * handler function and iocb->private to context information for that handler.
> + * The issuer should call the handler with that context information from task
> + * context to complete the processing of the iocb. Note that while this
> + * provides a task context for the dio_complete() callback, it should only be
> + * used on the completion side for non-IO generating completions. It's fine to
> + * call blocking functions from this callback, but they should not wait for
> + * unrelated IO (like cache flushing, new IO generation, etc).
> + */
> +#define IOCB_DIO_CALLER_COMP	(1 << 22)
>  
>  /* for use in trace events */
>  #define TRACE_IOCB_STRINGS \
> @@ -351,7 +365,8 @@ enum rw_hint {
>  	{ IOCB_WRITE,		"WRITE" }, \
>  	{ IOCB_WAITQ,		"WAITQ" }, \
>  	{ IOCB_NOIO,		"NOIO" }, \
> -	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" }
> +	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" }, \
> +	{ IOCB_DIO_CALLER_COMP,	"CALLER_COMP" }
>  
>  struct kiocb {
>  	struct file		*ki_filp;
> @@ -360,7 +375,23 @@ struct kiocb {
>  	void			*private;
>  	int			ki_flags;
>  	u16			ki_ioprio; /* See linux/ioprio.h */
> -	struct wait_page_queue	*ki_waitq; /* for async buffered IO */
> +	union {
> +		/*
> +		 * Only used for async buffered reads, where it denotes the
> +		 * page waitqueue associated with completing the read. Valid
> +		 * IFF IOCB_WAITQ is set.
> +		 */
> +		struct wait_page_queue	*ki_waitq;
> +		/*
> +		 * Can be used for O_DIRECT IO, where the completion handling
> +		 * is punted back to the issuer of the IO. May only be set
> +		 * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
> +		 * must then check for presence of this handler when ki_complete
> +		 * is invoked. The data passed in to this handler must be
> +		 * assigned to ->private when dio_complete is assigned.
> +		 */
> +		ssize_t (*dio_complete)(void *data);
> +	};
>  };
>  
>  static inline bool is_sync_kiocb(struct kiocb *kiocb)
> -- 
> 2.40.1
>
Jens Axboe July 21, 2023, 4:30 p.m. UTC | #2
On 7/21/23 10:28?AM, Darrick J. Wong wrote:
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 6867512907d6..60e2b4ecfc4d 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -338,6 +338,20 @@ enum rw_hint {
>>  #define IOCB_NOIO		(1 << 20)
>>  /* can use bio alloc cache */
>>  #define IOCB_ALLOC_CACHE	(1 << 21)
>> +/*
>> + * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
>> + * iocb completion can be passed back to the owner for execution from a safe
>> + * context rather than needing to be punted through a workqueue.If this If this
> 
> "...through a workqueue.  If this flag is set..."
> 
> Need a space after the period, and delete one of the "If this".
> 
> With that fixed,
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>

Thanks - same ask on the edit. Or let me know if:

a) you're fine with staging this in a separate branch for 6.6, or
b) you want a v5a/v6 edition posted

Either way is no trouble for me, just wary of spamming...
Jens Axboe July 21, 2023, 4:43 p.m. UTC | #3
On 7/21/23 10:30?AM, Jens Axboe wrote:
> On 7/21/23 10:28?AM, Darrick J. Wong wrote:
>>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>>> index 6867512907d6..60e2b4ecfc4d 100644
>>> --- a/include/linux/fs.h
>>> +++ b/include/linux/fs.h
>>> @@ -338,6 +338,20 @@ enum rw_hint {
>>>  #define IOCB_NOIO		(1 << 20)
>>>  /* can use bio alloc cache */
>>>  #define IOCB_ALLOC_CACHE	(1 << 21)
>>> +/*
>>> + * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
>>> + * iocb completion can be passed back to the owner for execution from a safe
>>> + * context rather than needing to be punted through a workqueue.If this If this
>>
>> "...through a workqueue.  If this flag is set..."
>>
>> Need a space after the period, and delete one of the "If this".
>>
>> With that fixed,
>> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> 
> Thanks - same ask on the edit. Or let me know if:
> 
> a) you're fine with staging this in a separate branch for 6.6, or
> b) you want a v5a/v6 edition posted
> 
> Either way is no trouble for me, just wary of spamming...

FWIW, here's the updated branch:

https://git.kernel.dk/cgit/linux/log/?h=xfs-async-dio.5
diff mbox series

Patch

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..60e2b4ecfc4d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -338,6 +338,20 @@  enum rw_hint {
 #define IOCB_NOIO		(1 << 20)
 /* can use bio alloc cache */
 #define IOCB_ALLOC_CACHE	(1 << 21)
+/*
+ * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
+ * iocb completion can be passed back to the owner for execution from a safe
+ * context rather than needing to be punted through a workqueue.If this If this
+ * flag is set, the bio completion handling may set iocb->dio_complete to a
+ * handler function and iocb->private to context information for that handler.
+ * The issuer should call the handler with that context information from task
+ * context to complete the processing of the iocb. Note that while this
+ * provides a task context for the dio_complete() callback, it should only be
+ * used on the completion side for non-IO generating completions. It's fine to
+ * call blocking functions from this callback, but they should not wait for
+ * unrelated IO (like cache flushing, new IO generation, etc).
+ */
+#define IOCB_DIO_CALLER_COMP	(1 << 22)
 
 /* for use in trace events */
 #define TRACE_IOCB_STRINGS \
@@ -351,7 +365,8 @@  enum rw_hint {
 	{ IOCB_WRITE,		"WRITE" }, \
 	{ IOCB_WAITQ,		"WAITQ" }, \
 	{ IOCB_NOIO,		"NOIO" }, \
-	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" }
+	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" }, \
+	{ IOCB_DIO_CALLER_COMP,	"CALLER_COMP" }
 
 struct kiocb {
 	struct file		*ki_filp;
@@ -360,7 +375,23 @@  struct kiocb {
 	void			*private;
 	int			ki_flags;
 	u16			ki_ioprio; /* See linux/ioprio.h */
-	struct wait_page_queue	*ki_waitq; /* for async buffered IO */
+	union {
+		/*
+		 * Only used for async buffered reads, where it denotes the
+		 * page waitqueue associated with completing the read. Valid
+		 * IFF IOCB_WAITQ is set.
+		 */
+		struct wait_page_queue	*ki_waitq;
+		/*
+		 * Can be used for O_DIRECT IO, where the completion handling
+		 * is punted back to the issuer of the IO. May only be set
+		 * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
+		 * must then check for presence of this handler when ki_complete
+		 * is invoked. The data passed in to this handler must be
+		 * assigned to ->private when dio_complete is assigned.
+		 */
+		ssize_t (*dio_complete)(void *data);
+	};
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)