diff mbox

[03/14] blk-mq: introduce blk_mq_dispatch_rq_from_ctxs()

Message ID 20170731165111.11536-5-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ming Lei July 31, 2017, 4:51 p.m. UTC
This function is introduced for picking up request
from sw queue so that we can dispatch in scheduler's way.

More importantly, for some SCSI devices, driver
tags are host wide, and the number is quite big,
but each lun has very limited queue depth. This
function is introduced for avoiding to take too
many requests from sw queue when queue is busy,
and only try to dispatch request when queue
isn't busy.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c | 38 +++++++++++++++++++++++++++++++++++++-
 block/blk-mq.h |  1 +
 2 files changed, 38 insertions(+), 1 deletion(-)

Comments

Bart Van Assche July 31, 2017, 11:09 p.m. UTC | #1
On Tue, 2017-08-01 at 00:51 +0800, Ming Lei wrote:
> @@ -810,7 +810,11 @@ static void blk_mq_timeout_work(struct work_struct *work)
>  
>  struct ctx_iter_data {
>  	struct blk_mq_hw_ctx *hctx;
> -	struct list_head *list;
> +
> +	union {
> +		struct list_head *list;
> +		struct request *rq;
> +	};
>  };

Hello Ming,

Please introduce a new data structure for dispatch_rq_from_ctx() /
blk_mq_dispatch_rq_from_ctxs() instead of introducing a union in struct
ctx_iter_data. That will avoid that .list can be used in a context where
a struct request * pointer has been stored in the structure and vice versa.
 
>  static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
> @@ -826,6 +830,26 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
>  	return true;
>  }
>  
> +static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
> +{
> +	struct ctx_iter_data *dispatch_data = data;
> +	struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
> +	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
> +	bool empty = true;
> +
> +	spin_lock(&ctx->lock);
> +	if (unlikely(!list_empty(&ctx->rq_list))) {
> +		dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
> +		list_del_init(&dispatch_data->rq->queuelist);
> +		empty = list_empty(&ctx->rq_list);
> +	}
> +	spin_unlock(&ctx->lock);
> +	if (empty)
> +		sbitmap_clear_bit(sb, bitnr);

This sbitmap_clear_bit() occurs without holding blk_mq_ctx.lock. Sorry but
I don't think this is safe. Please either remove this sbitmap_clear_bit() call
or make sure that it happens with blk_mq_ctx.lock held.

Thanks,

Bart.
Ming Lei Aug. 1, 2017, 10:07 a.m. UTC | #2
On Mon, Jul 31, 2017 at 11:09:38PM +0000, Bart Van Assche wrote:
> On Tue, 2017-08-01 at 00:51 +0800, Ming Lei wrote:
> > @@ -810,7 +810,11 @@ static void blk_mq_timeout_work(struct work_struct *work)
> >  
> >  struct ctx_iter_data {
> >  	struct blk_mq_hw_ctx *hctx;
> > -	struct list_head *list;
> > +
> > +	union {
> > +		struct list_head *list;
> > +		struct request *rq;
> > +	};
> >  };
> 
> Hello Ming,
> 
> Please introduce a new data structure for dispatch_rq_from_ctx() /
> blk_mq_dispatch_rq_from_ctxs() instead of introducing a union in struct
> ctx_iter_data. That will avoid that .list can be used in a context where
> a struct request * pointer has been stored in the structure and vice versa.

Looks there isn't such usage now, or we can just both 'list' and 'rq' in
this data structure if there is.

>  
> >  static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
> > @@ -826,6 +830,26 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
> >  	return true;
> >  }
> >  
> > +static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
> > +{
> > +	struct ctx_iter_data *dispatch_data = data;
> > +	struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
> > +	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
> > +	bool empty = true;
> > +
> > +	spin_lock(&ctx->lock);
> > +	if (unlikely(!list_empty(&ctx->rq_list))) {
> > +		dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
> > +		list_del_init(&dispatch_data->rq->queuelist);
> > +		empty = list_empty(&ctx->rq_list);
> > +	}
> > +	spin_unlock(&ctx->lock);
> > +	if (empty)
> > +		sbitmap_clear_bit(sb, bitnr);
> 
> This sbitmap_clear_bit() occurs without holding blk_mq_ctx.lock. Sorry but
> I don't think this is safe. Please either remove this sbitmap_clear_bit() call
> or make sure that it happens with blk_mq_ctx.lock held.

Good catch, sbitmap_clear_bit() should have been done with holding
ctx->lock, otherwise a new pending bit may be cleared.
kernel test robot Aug. 2, 2017, 5:19 p.m. UTC | #3
Hi Ming,

[auto build test ERROR on block/for-next]
[also build test ERROR on v4.13-rc3 next-20170802]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Ming-Lei/blk-mq-sched-fix-SCSI-MQ-performance-regression/20170801-031007
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: x86_64-randconfig-b0-08022356 (attached as .config)
compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All error/warnings (new ones prefixed by >>):

   block/blk-mq.c: In function 'blk_mq_flush_busy_ctxs':
>> block/blk-mq.c:861: error: unknown field 'list' specified in initializer
>> block/blk-mq.c:861: warning: missing braces around initializer
   block/blk-mq.c:861: warning: (near initialization for 'data.<anonymous>')
   block/blk-mq.c: In function 'blk_mq_dispatch_rq_from_ctxs':
>> block/blk-mq.c:872: error: unknown field 'rq' specified in initializer
   block/blk-mq.c:872: warning: missing braces around initializer
   block/blk-mq.c:872: warning: (near initialization for 'data.<anonymous>')

vim +/list +861 block/blk-mq.c

22e09fd59 Ming Lei      2017-08-01  852  
320ae51fe Jens Axboe    2013-10-24  853  /*
1429d7c94 Jens Axboe    2014-05-19  854   * Process software queues that have been marked busy, splicing them
1429d7c94 Jens Axboe    2014-05-19  855   * to the for-dispatch
1429d7c94 Jens Axboe    2014-05-19  856   */
2c3ad6679 Jens Axboe    2016-12-14  857  void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
1429d7c94 Jens Axboe    2014-05-19  858  {
4b5ef3bbb Ming Lei      2017-08-01  859  	struct ctx_iter_data data = {
88459642c Omar Sandoval 2016-09-17  860  		.hctx = hctx,
88459642c Omar Sandoval 2016-09-17 @861  		.list = list,
88459642c Omar Sandoval 2016-09-17  862  	};
1429d7c94 Jens Axboe    2014-05-19  863  
88459642c Omar Sandoval 2016-09-17  864  	sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
1429d7c94 Jens Axboe    2014-05-19  865  }
2c3ad6679 Jens Axboe    2016-12-14  866  EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
1429d7c94 Jens Axboe    2014-05-19  867  
22e09fd59 Ming Lei      2017-08-01  868  struct request *blk_mq_dispatch_rq_from_ctxs(struct blk_mq_hw_ctx *hctx)
22e09fd59 Ming Lei      2017-08-01  869  {
22e09fd59 Ming Lei      2017-08-01  870  	struct ctx_iter_data data = {
22e09fd59 Ming Lei      2017-08-01  871  		.hctx = hctx,
22e09fd59 Ming Lei      2017-08-01 @872  		.rq   = NULL,
22e09fd59 Ming Lei      2017-08-01  873  	};
22e09fd59 Ming Lei      2017-08-01  874  
22e09fd59 Ming Lei      2017-08-01  875  	sbitmap_for_each_set(&hctx->ctx_map, dispatch_rq_from_ctx, &data);
22e09fd59 Ming Lei      2017-08-01  876  
22e09fd59 Ming Lei      2017-08-01  877  	return data.rq;
22e09fd59 Ming Lei      2017-08-01  878  }
22e09fd59 Ming Lei      2017-08-01  879  

:::::: The code at line 861 was first introduced by commit
:::::: 88459642cba452630326b9cab1c651e09577d4e4 blk-mq: abstract tag allocation out into sbitmap library

:::::: TO: Omar Sandoval <osandov@fb.com>
:::::: CC: Jens Axboe <axboe@fb.com>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 94818f78c099..86b8fdcb8434 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -810,7 +810,11 @@  static void blk_mq_timeout_work(struct work_struct *work)
 
 struct ctx_iter_data {
 	struct blk_mq_hw_ctx *hctx;
-	struct list_head *list;
+
+	union {
+		struct list_head *list;
+		struct request *rq;
+	};
 };
 
 static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
@@ -826,6 +830,26 @@  static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
 	return true;
 }
 
+static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
+{
+	struct ctx_iter_data *dispatch_data = data;
+	struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
+	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+	bool empty = true;
+
+	spin_lock(&ctx->lock);
+	if (unlikely(!list_empty(&ctx->rq_list))) {
+		dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
+		list_del_init(&dispatch_data->rq->queuelist);
+		empty = list_empty(&ctx->rq_list);
+	}
+	spin_unlock(&ctx->lock);
+	if (empty)
+		sbitmap_clear_bit(sb, bitnr);
+
+	return !dispatch_data->rq;
+}
+
 /*
  * Process software queues that have been marked busy, splicing them
  * to the for-dispatch
@@ -841,6 +865,18 @@  void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 }
 EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
 
+struct request *blk_mq_dispatch_rq_from_ctxs(struct blk_mq_hw_ctx *hctx)
+{
+	struct ctx_iter_data data = {
+		.hctx = hctx,
+		.rq   = NULL,
+	};
+
+	sbitmap_for_each_set(&hctx->ctx_map, dispatch_rq_from_ctx, &data);
+
+	return data.rq;
+}
+
 static inline unsigned int queued_to_index(unsigned int queued)
 {
 	if (!queued)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 60b01c0309bc..0c398f29dc4b 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -35,6 +35,7 @@  void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
 bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
 				bool wait);
+struct request *blk_mq_dispatch_rq_from_ctxs(struct blk_mq_hw_ctx *hctx);
 
 /*
  * Internal helpers for allocating/freeing the request map