diff mbox series

[1/2] block: add blk_default_io_timeout() for avoiding task hung in sync IO

Message ID 20200428074657.645441-2-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series block: prevent task hung from being triggered in sync dio | expand

Commit Message

Ming Lei April 28, 2020, 7:46 a.m. UTC
Add helper of blk_default_io_timeout(), so that the two current users
can benefit from it.

Also direct IO users will use it in the following patch, so define the
helper in public header.

Cc: Salman Qazi <sqazi@google.com>
Cc: Jesse Barnes <jsbarnes@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/bio.c            |  9 +++------
 block/blk-exec.c       |  8 +++-----
 include/linux/blkdev.h | 10 ++++++++++
 3 files changed, 16 insertions(+), 11 deletions(-)

Comments

Hannes Reinecke April 28, 2020, 8:05 a.m. UTC | #1
On 4/28/20 9:46 AM, Ming Lei wrote:
> Add helper of blk_default_io_timeout(), so that the two current users
> can benefit from it.
> 
> Also direct IO users will use it in the following patch, so define the
> helper in public header.
> 
> Cc: Salman Qazi <sqazi@google.com>
> Cc: Jesse Barnes <jsbarnes@google.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Bart Van Assche <bvanassche@acm.org>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>   block/bio.c            |  9 +++------
>   block/blk-exec.c       |  8 +++-----
>   include/linux/blkdev.h | 10 ++++++++++
>   3 files changed, 16 insertions(+), 11 deletions(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 21cbaa6a1c20..f67afa159de7 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -1069,18 +1069,15 @@ static void submit_bio_wait_endio(struct bio *bio)
>   int submit_bio_wait(struct bio *bio)
>   {
>   	DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
> -	unsigned long hang_check;
> +	unsigned long timeout = blk_default_io_timeout();
>   
>   	bio->bi_private = &done;
>   	bio->bi_end_io = submit_bio_wait_endio;
>   	bio->bi_opf |= REQ_SYNC;
>   	submit_bio(bio);
>   
> -	/* Prevent hang_check timer from firing at us during very long I/O */
> -	hang_check = sysctl_hung_task_timeout_secs;
> -	if (hang_check)
> -		while (!wait_for_completion_io_timeout(&done,
> -					hang_check * (HZ/2)))
> +	if (timeout)
> +		while (!wait_for_completion_io_timeout(&done, timeout))
>   			;
>   	else
>   		wait_for_completion_io(&done);
> diff --git a/block/blk-exec.c b/block/blk-exec.c
> index e20a852ae432..17b5cf07e1a3 100644
> --- a/block/blk-exec.c
> +++ b/block/blk-exec.c
> @@ -80,15 +80,13 @@ void blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
>   		   struct request *rq, int at_head)
>   {
>   	DECLARE_COMPLETION_ONSTACK(wait);
> -	unsigned long hang_check;
> +	unsigned long timeout = blk_default_io_timeout();
>   
>   	rq->end_io_data = &wait;
>   	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
>   
> -	/* Prevent hang_check timer from firing at us during very long I/O */
> -	hang_check = sysctl_hung_task_timeout_secs;
> -	if (hang_check)
> -		while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
> +	if (timeout)
> +		while (!wait_for_completion_io_timeout(&wait, timeout));
>   	else
>   		wait_for_completion_io(&wait);
>   }
This probably just shows my ignorance, but why don't we check for 
rq->timeout here?
I do see that not all requests have a timeout, but what about those who 
have?

Cheers,

Hannes
Ming Lei April 28, 2020, 8:30 a.m. UTC | #2
On Tue, Apr 28, 2020 at 10:05:36AM +0200, Hannes Reinecke wrote:
> On 4/28/20 9:46 AM, Ming Lei wrote:
> > Add helper of blk_default_io_timeout(), so that the two current users
> > can benefit from it.
> > 
> > Also direct IO users will use it in the following patch, so define the
> > helper in public header.
> > 
> > Cc: Salman Qazi <sqazi@google.com>
> > Cc: Jesse Barnes <jsbarnes@google.com>
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Bart Van Assche <bvanassche@acm.org>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >   block/bio.c            |  9 +++------
> >   block/blk-exec.c       |  8 +++-----
> >   include/linux/blkdev.h | 10 ++++++++++
> >   3 files changed, 16 insertions(+), 11 deletions(-)
> > 
> > diff --git a/block/bio.c b/block/bio.c
> > index 21cbaa6a1c20..f67afa159de7 100644
> > --- a/block/bio.c
> > +++ b/block/bio.c
> > @@ -1069,18 +1069,15 @@ static void submit_bio_wait_endio(struct bio *bio)
> >   int submit_bio_wait(struct bio *bio)
> >   {
> >   	DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
> > -	unsigned long hang_check;
> > +	unsigned long timeout = blk_default_io_timeout();
> >   	bio->bi_private = &done;
> >   	bio->bi_end_io = submit_bio_wait_endio;
> >   	bio->bi_opf |= REQ_SYNC;
> >   	submit_bio(bio);
> > -	/* Prevent hang_check timer from firing at us during very long I/O */
> > -	hang_check = sysctl_hung_task_timeout_secs;
> > -	if (hang_check)
> > -		while (!wait_for_completion_io_timeout(&done,
> > -					hang_check * (HZ/2)))
> > +	if (timeout)
> > +		while (!wait_for_completion_io_timeout(&done, timeout))
> >   			;
> >   	else
> >   		wait_for_completion_io(&done);
> > diff --git a/block/blk-exec.c b/block/blk-exec.c
> > index e20a852ae432..17b5cf07e1a3 100644
> > --- a/block/blk-exec.c
> > +++ b/block/blk-exec.c
> > @@ -80,15 +80,13 @@ void blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
> >   		   struct request *rq, int at_head)
> >   {
> >   	DECLARE_COMPLETION_ONSTACK(wait);
> > -	unsigned long hang_check;
> > +	unsigned long timeout = blk_default_io_timeout();
> >   	rq->end_io_data = &wait;
> >   	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
> > -	/* Prevent hang_check timer from firing at us during very long I/O */
> > -	hang_check = sysctl_hung_task_timeout_secs;
> > -	if (hang_check)
> > -		while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
> > +	if (timeout)
> > +		while (!wait_for_completion_io_timeout(&wait, timeout));
> >   	else
> >   		wait_for_completion_io(&wait);
> >   }
> This probably just shows my ignorance, but why don't we check for
> rq->timeout here?
> I do see that not all requests have a timeout, but what about those who
> have?

Here the IO means IO from upper layer(FS, user space, ...), and this
kind of IO isn't same with block layer's IO request which is splitted
from upper layer's bio.

So we can't apply the rq->timeout directly, especially we want to avoid
the task hung on the sync IO from upper layer.

Thanks,
Ming
Bart Van Assche April 28, 2020, 2:19 p.m. UTC | #3
On 2020-04-28 00:46, Ming Lei wrote:
> +/*
> + * Used in sync IO for avoiding to triger task hung warning, which may
> + * cause system panic or reboot.
> + */
> +static inline unsigned long blk_default_io_timeout(void)
> +{
> +	return sysctl_hung_task_timeout_secs * (HZ / 2);
> +}
> +
>  #endif

This function is only used inside the block layer. Has it been
considered to move this function from the public block layer API into a
private header file, e.g. block/blk.h?

Thanks,

Bart.
Ming Lei April 29, 2020, 1:17 a.m. UTC | #4
On Tue, Apr 28, 2020 at 07:19:33AM -0700, Bart Van Assche wrote:
> On 2020-04-28 00:46, Ming Lei wrote:
> > +/*
> > + * Used in sync IO for avoiding to triger task hung warning, which may
> > + * cause system panic or reboot.
> > + */
> > +static inline unsigned long blk_default_io_timeout(void)
> > +{
> > +	return sysctl_hung_task_timeout_secs * (HZ / 2);
> > +}
> > +
> >  #endif
> 
> This function is only used inside the block layer. Has it been
> considered to move this function from the public block layer API into a
> private header file, e.g. block/blk.h?

Please look at the commit log or the 2nd patch, and the helper will be
used in 2nd patch in dio code.

Thanks,
Ming
Bart Van Assche April 30, 2020, 3:08 a.m. UTC | #5
On 2020-04-28 18:17, Ming Lei wrote:
> On Tue, Apr 28, 2020 at 07:19:33AM -0700, Bart Van Assche wrote:
>> On 2020-04-28 00:46, Ming Lei wrote:
>>> +/*
>>> + * Used in sync IO for avoiding to triger task hung warning, which may
>>> + * cause system panic or reboot.
>>> + */
>>> +static inline unsigned long blk_default_io_timeout(void)
>>> +{
>>> +	return sysctl_hung_task_timeout_secs * (HZ / 2);
>>> +}
>>> +
>>>  #endif
>>
>> This function is only used inside the block layer. Has it been
>> considered to move this function from the public block layer API into a
>> private header file, e.g. block/blk.h?
> 
> Please look at the commit log or the 2nd patch, and the helper will be
> used in 2nd patch in dio code.

Has it been considered to use the expression
"sysctl_hung_task_timeout_secs * (HZ / 2)" directly instead of wrapping
that expression in a function? I think using the expression directly may
be more clear. Additionally, it is slightly confusing that the function
name starts with "blk_" but that nothing in the implementation of that
function is specific to the block layer.

Thanks,

Bart.
Ming Lei May 3, 2020, 1:43 a.m. UTC | #6
On Wed, Apr 29, 2020 at 08:08:03PM -0700, Bart Van Assche wrote:
> On 2020-04-28 18:17, Ming Lei wrote:
> > On Tue, Apr 28, 2020 at 07:19:33AM -0700, Bart Van Assche wrote:
> >> On 2020-04-28 00:46, Ming Lei wrote:
> >>> +/*
> >>> + * Used in sync IO for avoiding to triger task hung warning, which may
> >>> + * cause system panic or reboot.
> >>> + */
> >>> +static inline unsigned long blk_default_io_timeout(void)
> >>> +{
> >>> +	return sysctl_hung_task_timeout_secs * (HZ / 2);
> >>> +}
> >>> +
> >>>  #endif
> >>
> >> This function is only used inside the block layer. Has it been
> >> considered to move this function from the public block layer API into a
> >> private header file, e.g. block/blk.h?
> > 
> > Please look at the commit log or the 2nd patch, and the helper will be
> > used in 2nd patch in dio code.
> 
> Has it been considered to use the expression
> "sysctl_hung_task_timeout_secs * (HZ / 2)" directly instead of wrapping
> that expression in a function? I think using the expression directly may
> be more clear. Additionally, it is slightly confusing that the function
> name starts with "blk_" but that nothing in the implementation of that
> function is specific to the block layer.

Fine, will do it in V2.

thanks,
Ming
diff mbox series

Patch

diff --git a/block/bio.c b/block/bio.c
index 21cbaa6a1c20..f67afa159de7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1069,18 +1069,15 @@  static void submit_bio_wait_endio(struct bio *bio)
 int submit_bio_wait(struct bio *bio)
 {
 	DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
-	unsigned long hang_check;
+	unsigned long timeout = blk_default_io_timeout();
 
 	bio->bi_private = &done;
 	bio->bi_end_io = submit_bio_wait_endio;
 	bio->bi_opf |= REQ_SYNC;
 	submit_bio(bio);
 
-	/* Prevent hang_check timer from firing at us during very long I/O */
-	hang_check = sysctl_hung_task_timeout_secs;
-	if (hang_check)
-		while (!wait_for_completion_io_timeout(&done,
-					hang_check * (HZ/2)))
+	if (timeout)
+		while (!wait_for_completion_io_timeout(&done, timeout))
 			;
 	else
 		wait_for_completion_io(&done);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index e20a852ae432..17b5cf07e1a3 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -80,15 +80,13 @@  void blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
 		   struct request *rq, int at_head)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
-	unsigned long hang_check;
+	unsigned long timeout = blk_default_io_timeout();
 
 	rq->end_io_data = &wait;
 	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
 
-	/* Prevent hang_check timer from firing at us during very long I/O */
-	hang_check = sysctl_hung_task_timeout_secs;
-	if (hang_check)
-		while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
+	if (timeout)
+		while (!wait_for_completion_io_timeout(&wait, timeout));
 	else
 		wait_for_completion_io(&wait);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f00bd4042295..3d594406b96c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,6 +27,7 @@ 
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
+#include <linux/sched/sysctl.h>
 
 struct module;
 struct scsi_ioctl_command;
@@ -1827,4 +1828,13 @@  static inline void blk_wake_io_task(struct task_struct *waiter)
 		wake_up_process(waiter);
 }
 
+/*
+ * Used in sync IO for avoiding to triger task hung warning, which may
+ * cause system panic or reboot.
+ */
+static inline unsigned long blk_default_io_timeout(void)
+{
+	return sysctl_hung_task_timeout_secs * (HZ / 2);
+}
+
 #endif