[0/7] blk-mq: fix queue quiescing
diff mbox

Message ID 1495733047.2615.1.camel@sandisk.com
State New
Headers show

Commit Message

Bart Van Assche May 25, 2017, 5:24 p.m. UTC
On Thu, 2017-05-25 at 17:09 +0800, Ming Lei wrote:
> Another big issue is that 'srcu_struct' is very big, which shouldn't
> be embedded into hctx, since we only have one real user of
> BLK_MQ_F_BLOCKING.
> 
> So I will fix that too.

Hello Ming,

Is something like the (untested) patch below perhaps what you had in mind?

Subject: [PATCH] blk-mq: Reduce blk_mq_hw_ctx size

Since the srcu structure is rather large (184 bytes on an x86-64
system), only allocate it if needed.

Reported-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c         | 13 ++++++++++++-
 include/linux/blk-mq.h |  5 +++--
 2 files changed, 15 insertions(+), 3 deletions(-)

-- 
2.12.2

Comments

Jens Axboe May 25, 2017, 5:31 p.m. UTC | #1
On 05/25/2017 11:24 AM, Bart Van Assche wrote:
> On Thu, 2017-05-25 at 17:09 +0800, Ming Lei wrote:
>> Another big issue is that 'srcu_struct' is very big, which shouldn't
>> be embedded into hctx, since we only have one real user of
>> BLK_MQ_F_BLOCKING.
>>
>> So I will fix that too.
> 
> Hello Ming,
> 
> Is something like the (untested) patch below perhaps what you had in mind?
> 
> Subject: [PATCH] blk-mq: Reduce blk_mq_hw_ctx size
> 
> Since the srcu structure is rather large (184 bytes on an x86-64
> system), only allocate it if needed.

On my normal laptop setup, it's actually 408 bytes (!!).
Jens Axboe May 25, 2017, 5:42 p.m. UTC | #2
On 05/25/2017 11:24 AM, Bart Van Assche wrote:
> On Thu, 2017-05-25 at 17:09 +0800, Ming Lei wrote:
>> Another big issue is that 'srcu_struct' is very big, which shouldn't
>> be embedded into hctx, since we only have one real user of
>> BLK_MQ_F_BLOCKING.
>>
>> So I will fix that too.
> 
> Hello Ming,
> 
> Is something like the (untested) patch below perhaps what you had in mind?
> 
> Subject: [PATCH] blk-mq: Reduce blk_mq_hw_ctx size
> 
> Since the srcu structure is rather large (184 bytes on an x86-64
> system), only allocate it if needed.
> 
> Reported-by: Ming Lei <ming.lei@redhat.com>
> ---
>  block/blk-mq.c         | 13 ++++++++++++-
>  include/linux/blk-mq.h |  5 +++--
>  2 files changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 1e330de4e3c5..15b7d4077638 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2233,6 +2233,17 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
>  }
>  EXPORT_SYMBOL(blk_mq_init_queue);
>  
> +static int blk_mq_hw_ctx_size(struct request_queue *q)
> +{
> +	BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu) +
> +		sizeof(((struct blk_mq_hw_ctx *)NULL)->queue_rq_srcu), 64) !=
> +		sizeof(struct blk_mq_hw_ctx));
> +
> +	return q->tag_set->flags & BLK_MQ_F_BLOCKING ?
> +		sizeof(struct blk_mq_hw_ctx) :
> +		offsetof(struct blk_mq_hw_ctx, queue_rq_srcu);
> +}
> +
>  static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
>  						struct request_queue *q)
>  {
> @@ -2247,7 +2258,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
>  			continue;
>  
>  		node = blk_mq_hw_queue_to_node(q->mq_map, i);
> -		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
> +		hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(q),
>  					GFP_KERNEL, node);
>  		if (!hctxs[i])
>  			break;
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index c0d59330b5e0..8467e1f83524 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -39,8 +39,6 @@ struct blk_mq_hw_ctx {
>  	struct blk_mq_tags	*tags;
>  	struct blk_mq_tags	*sched_tags;
>  
> -	struct srcu_struct	queue_rq_srcu;
> -
>  	unsigned long		queued;
>  	unsigned long		run;
>  #define BLK_MQ_MAX_DISPATCH_ORDER	7
> @@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
>  	struct dentry		*debugfs_dir;
>  	struct dentry		*sched_debugfs_dir;
>  #endif
> +
> +	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
> +	struct srcu_struct	queue_rq_srcu;
>  };

Why not make it

	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
	struct srcu_struct	queue_rq_srcu[0];

and fixup blk_mq_hw_ctx_size()

static int blk_mq_hw_ctx_size(struct request_queue *q)
{
	int size = sizeof(struct blk_mq_hw_ctx);

	if (q->tag_set->flags & BLK_MQ_F_BLOCKING)
		size += sizeof(struct srcu_struct);

	return size;
}

I think that'd be cleaner. Keep the end-of-struct checking, just to be
on the safe side.

Neither one is super pretty though, and still doesn't fix the fact that
the srcu_struct is _half_ the blk_mq_hw_ctx in total.
Bart Van Assche May 25, 2017, 5:59 p.m. UTC | #3
On Thu, 2017-05-25 at 11:42 -0600, Jens Axboe wrote:
> Why not make it
> 
> 	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
> 	struct srcu_struct	queue_rq_srcu[0];
> 
> and fixup blk_mq_hw_ctx_size()
> 
> static int blk_mq_hw_ctx_size(struct request_queue *q)
> {
> 	int size = sizeof(struct blk_mq_hw_ctx);
> 
> 	if (q->tag_set->flags & BLK_MQ_F_BLOCKING)
> 		size += sizeof(struct srcu_struct);
> 
> 	return size;
> }
> 
> I think that'd be cleaner. Keep the end-of-struct checking, just to be
> on the safe side.
> 
> Neither one is super pretty though, and still doesn't fix the fact that
> the srcu_struct is _half_ the blk_mq_hw_ctx in total.

Hello Jens,

Making these changes seems like a good idea to me. I will make these changes
and post a patch.

Bart.
Ming Lei May 26, 2017, 12:44 a.m. UTC | #4
On Thu, May 25, 2017 at 11:42:59AM -0600, Jens Axboe wrote:
> On 05/25/2017 11:24 AM, Bart Van Assche wrote:
> > On Thu, 2017-05-25 at 17:09 +0800, Ming Lei wrote:
> >> Another big issue is that 'srcu_struct' is very big, which shouldn't
> >> be embedded into hctx, since we only have one real user of
> >> BLK_MQ_F_BLOCKING.
> >>
> >> So I will fix that too.
> > 
> > Hello Ming,
> > 
> > Is something like the (untested) patch below perhaps what you had in mind?
> > 
> > Subject: [PATCH] blk-mq: Reduce blk_mq_hw_ctx size
> > 
> > Since the srcu structure is rather large (184 bytes on an x86-64
> > system), only allocate it if needed.
> > 
> > Reported-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >  block/blk-mq.c         | 13 ++++++++++++-
> >  include/linux/blk-mq.h |  5 +++--
> >  2 files changed, 15 insertions(+), 3 deletions(-)
> > 
> > diff --git a/block/blk-mq.c b/block/blk-mq.c
> > index 1e330de4e3c5..15b7d4077638 100644
> > --- a/block/blk-mq.c
> > +++ b/block/blk-mq.c
> > @@ -2233,6 +2233,17 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
> >  }
> >  EXPORT_SYMBOL(blk_mq_init_queue);
> >  
> > +static int blk_mq_hw_ctx_size(struct request_queue *q)
> > +{
> > +	BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu) +
> > +		sizeof(((struct blk_mq_hw_ctx *)NULL)->queue_rq_srcu), 64) !=
> > +		sizeof(struct blk_mq_hw_ctx));
> > +
> > +	return q->tag_set->flags & BLK_MQ_F_BLOCKING ?
> > +		sizeof(struct blk_mq_hw_ctx) :
> > +		offsetof(struct blk_mq_hw_ctx, queue_rq_srcu);
> > +}
> > +
> >  static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
> >  						struct request_queue *q)
> >  {
> > @@ -2247,7 +2258,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
> >  			continue;
> >  
> >  		node = blk_mq_hw_queue_to_node(q->mq_map, i);
> > -		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
> > +		hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(q),
> >  					GFP_KERNEL, node);
> >  		if (!hctxs[i])
> >  			break;
> > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> > index c0d59330b5e0..8467e1f83524 100644
> > --- a/include/linux/blk-mq.h
> > +++ b/include/linux/blk-mq.h
> > @@ -39,8 +39,6 @@ struct blk_mq_hw_ctx {
> >  	struct blk_mq_tags	*tags;
> >  	struct blk_mq_tags	*sched_tags;
> >  
> > -	struct srcu_struct	queue_rq_srcu;
> > -
> >  	unsigned long		queued;
> >  	unsigned long		run;
> >  #define BLK_MQ_MAX_DISPATCH_ORDER	7
> > @@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
> >  	struct dentry		*debugfs_dir;
> >  	struct dentry		*sched_debugfs_dir;
> >  #endif
> > +
> > +	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
> > +	struct srcu_struct	queue_rq_srcu;
> >  };
> 
> Why not make it
> 
> 	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
> 	struct srcu_struct	queue_rq_srcu[0];

Yeah, actually that was what I did yesterday in my local tree, will
posted out today with the blk_mq_quiesce_queue() fix.


Thanks,
Ming

Patch
diff mbox

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1e330de4e3c5..15b7d4077638 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2233,6 +2233,17 @@  struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
+static int blk_mq_hw_ctx_size(struct request_queue *q)
+{
+	BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu) +
+		sizeof(((struct blk_mq_hw_ctx *)NULL)->queue_rq_srcu), 64) !=
+		sizeof(struct blk_mq_hw_ctx));
+
+	return q->tag_set->flags & BLK_MQ_F_BLOCKING ?
+		sizeof(struct blk_mq_hw_ctx) :
+		offsetof(struct blk_mq_hw_ctx, queue_rq_srcu);
+}
+
 static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 						struct request_queue *q)
 {
@@ -2247,7 +2258,7 @@  static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 			continue;
 
 		node = blk_mq_hw_queue_to_node(q->mq_map, i);
-		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
+		hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(q),
 					GFP_KERNEL, node);
 		if (!hctxs[i])
 			break;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c0d59330b5e0..8467e1f83524 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -39,8 +39,6 @@  struct blk_mq_hw_ctx {
 	struct blk_mq_tags	*tags;
 	struct blk_mq_tags	*sched_tags;
 
-	struct srcu_struct	queue_rq_srcu;
-
 	unsigned long		queued;
 	unsigned long		run;
 #define BLK_MQ_MAX_DISPATCH_ORDER	7
@@ -62,6 +60,9 @@  struct blk_mq_hw_ctx {
 	struct dentry		*debugfs_dir;
 	struct dentry		*sched_debugfs_dir;
 #endif
+
+	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
+	struct srcu_struct	queue_rq_srcu;
 };
 
 struct blk_mq_tag_set {