diff mbox series

[v5,2/3] scsi: core: Support disabling fair tag sharing

Message ID 20231114180426.1184601-3-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Disable fair tag sharing for UFS devices | expand

Commit Message

Bart Van Assche Nov. 14, 2023, 6:04 p.m. UTC
Allow SCSI drivers to disable the block layer fair tag sharing algorithm.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Cc: Yu Kuai <yukuai1@huaweicloud.com>
Cc: Ed Tsai <ed.tsai@mediatek.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/hosts.c     | 1 +
 drivers/scsi/scsi_lib.c  | 2 ++
 include/scsi/scsi_host.h | 6 ++++++
 3 files changed, 9 insertions(+)

Comments

Yu Kuai Nov. 15, 2023, 7:24 a.m. UTC | #1
Hi,

在 2023/11/15 2:04, Bart Van Assche 写道:
> Allow SCSI drivers to disable the block layer fair tag sharing algorithm.
> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Martin K. Petersen <martin.petersen@oracle.com>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: Keith Busch <kbusch@kernel.org>
> Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> Cc: Yu Kuai <yukuai1@huaweicloud.com>
> Cc: Ed Tsai <ed.tsai@mediatek.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>   drivers/scsi/hosts.c     | 1 +
>   drivers/scsi/scsi_lib.c  | 2 ++
>   include/scsi/scsi_host.h | 6 ++++++
>   3 files changed, 9 insertions(+)
> 
> diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
> index d7f51b84f3c7..872f87001374 100644
> --- a/drivers/scsi/hosts.c
> +++ b/drivers/scsi/hosts.c
> @@ -442,6 +442,7 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv
>   	shost->no_write_same = sht->no_write_same;
>   	shost->host_tagset = sht->host_tagset;
>   	shost->queuecommand_may_block = sht->queuecommand_may_block;
> +	shost->disable_fair_tag_sharing = sht->disable_fair_tag_sharing;

Can we also consider to disable fair tag sharing by default for the
driver that total driver tags is less than a threshold?

Thanks,
Kuai

>   
>   	if (shost_eh_deadline == -1 || !sht->eh_host_reset_handler)
>   		shost->eh_deadline = -1;
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index cf3864f72093..291fbfacf542 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1984,6 +1984,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
>   		BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
>   	if (shost->queuecommand_may_block)
>   		tag_set->flags |= BLK_MQ_F_BLOCKING;
> +	if (shost->disable_fair_tag_sharing)
> +		tag_set->flags |= BLK_MQ_F_DISABLE_FAIR_TAG_SHARING;
>   	tag_set->driver_data = shost;
>   	if (shost->host_tagset)
>   		tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
> diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
> index 3b907fc2ef08..04238ae9e22c 100644
> --- a/include/scsi/scsi_host.h
> +++ b/include/scsi/scsi_host.h
> @@ -464,6 +464,9 @@ struct scsi_host_template {
>   	/* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */
>   	unsigned queuecommand_may_block:1;
>   
> +	/* See also BLK_MQ_F_DISABLE_FAIR_TAG_SHARING. */
> +	unsigned disable_fair_tag_sharing:1;
> +
>   	/*
>   	 * Countdown for host blocking with no commands outstanding.
>   	 */
> @@ -662,6 +665,9 @@ struct Scsi_Host {
>   	/* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */
>   	unsigned queuecommand_may_block:1;
>   
> +	/* See also BLK_MQ_F_DISABLE_FAIR_TAG_SHARING. */
> +	unsigned disable_fair_tag_sharing:1;
> +
>   	/* Host responded with short (<36 bytes) INQUIRY result */
>   	unsigned short_inquiry:1;
>   
> 
> 
> .
>
Bart Van Assche Nov. 15, 2023, 6:19 p.m. UTC | #2
On 11/14/23 23:24, Yu Kuai wrote:
> 在 2023/11/15 2:04, Bart Van Assche 写道:
>> diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
>> index d7f51b84f3c7..872f87001374 100644
>> --- a/drivers/scsi/hosts.c
>> +++ b/drivers/scsi/hosts.c
>> @@ -442,6 +442,7 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv
>>       shost->no_write_same = sht->no_write_same;
>>       shost->host_tagset = sht->host_tagset;
>>       shost->queuecommand_may_block = sht->queuecommand_may_block;
>> +    shost->disable_fair_tag_sharing = sht->disable_fair_tag_sharing;
> 
> Can we also consider to disable fair tag sharing by default for the
> driver that total driver tags is less than a threshold?
I don't want to do this because such a change could disable fair tag
sharing for drivers that support both SSDs and hard disks being associated
with a single SCSI host.

Thanks,

Bart.
Yu Kuai Nov. 16, 2023, 1:08 a.m. UTC | #3
Hi,

在 2023/11/16 2:19, Bart Van Assche 写道:
> On 11/14/23 23:24, Yu Kuai wrote:
>> 在 2023/11/15 2:04, Bart Van Assche 写道:
>>> diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
>>> index d7f51b84f3c7..872f87001374 100644
>>> --- a/drivers/scsi/hosts.c
>>> +++ b/drivers/scsi/hosts.c
>>> @@ -442,6 +442,7 @@ struct Scsi_Host *scsi_host_alloc(const struct 
>>> scsi_host_template *sht, int priv
>>>       shost->no_write_same = sht->no_write_same;
>>>       shost->host_tagset = sht->host_tagset;
>>>       shost->queuecommand_may_block = sht->queuecommand_may_block;
>>> +    shost->disable_fair_tag_sharing = sht->disable_fair_tag_sharing;
>>
>> Can we also consider to disable fair tag sharing by default for the
>> driver that total driver tags is less than a threshold?
> I don't want to do this because such a change could disable fair tag
> sharing for drivers that support both SSDs and hard disks being associated
> with a single SCSI host.

Ok, then is this possible to add a sysfs entry to disable/enable fair
tag sharing manually?

Thanks,
Kuai

> 
> Thanks,
> 
> Bart.
> 
> .
>
Bart Van Assche Nov. 16, 2023, 9:35 p.m. UTC | #4
On 11/15/23 17:08, Yu Kuai wrote:
> 在 2023/11/16 2:19, Bart Van Assche 写道:
>> On 11/14/23 23:24, Yu Kuai wrote:
>>> Can we also consider to disable fair tag sharing by default for the
>>> driver that total driver tags is less than a threshold?
>> I don't want to do this because such a change could disable fair tag
>> sharing for drivers that support both SSDs and hard disks being associated
>> with a single SCSI host.
> 
> Ok, then is this possible to add a sysfs entry to disable/enable fair
> tag sharing manually?

Hi Yu,

I will look into this.

Thanks,

Bart.
Bart Van Assche Nov. 20, 2023, 11:03 p.m. UTC | #5
On 11/15/23 17:08, Yu Kuai wrote:
> Hi,
> 
> 在 2023/11/16 2:19, Bart Van Assche 写道:
>> On 11/14/23 23:24, Yu Kuai wrote:
>>> 在 2023/11/15 2:04, Bart Van Assche 写道:
>>>> diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
>>>> index d7f51b84f3c7..872f87001374 100644
>>>> --- a/drivers/scsi/hosts.c
>>>> +++ b/drivers/scsi/hosts.c
>>>> @@ -442,6 +442,7 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv
>>>>       shost->no_write_same = sht->no_write_same;
>>>>       shost->host_tagset = sht->host_tagset;
>>>>       shost->queuecommand_may_block = sht->queuecommand_may_block;
>>>> +    shost->disable_fair_tag_sharing = sht->disable_fair_tag_sharing;
>>>
>>> Can we also consider to disable fair tag sharing by default for the
>>> driver that total driver tags is less than a threshold?
>> I don't want to do this because such a change could disable fair tag
>> sharing for drivers that support both SSDs and hard disks being associated
>> with a single SCSI host.
> 
> Ok, then is this possible to add a sysfs entry to disable/enable fair
> tag sharing manually?

How about replacing patch 1/3 from this series with the patch below?

Thanks,

Bart.


     block: Make fair tag sharing configurable

     The fair sharing algorithm has a negative performance impact for storage
     devices for which the full queue depth is required to reach peak
     performance, e.g. UFS devices. This is because it takes long after a
     request queue became inactive until tags are reassigned to the active
     request queue(s). Since making tag sharing fair is not needed if the
     request processing latency is similar for all request queues, introduce
     a sysfs attribute for controlling the fair tag sharing algorithm.
     Increase BLK_MQ_F_ALLOC_POLICY_START_BIT to prevent that the fair tag
     sharing flag overlaps with the tag allocation policy.

     Cc: Christoph Hellwig <hch@lst.de>
     Cc: Martin K. Petersen <martin.petersen@oracle.com>
     Cc: Ming Lei <ming.lei@redhat.com>
     Cc: Keith Busch <kbusch@kernel.org>
     Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
     Cc: Yu Kuai <yukuai1@huaweicloud.com>
     Cc: Ed Tsai <ed.tsai@mediatek.com>
     Signed-off-by: Bart Van Assche <bvanassche@acm.org>

diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 1fe9a553c37b..7b66eb938882 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -269,6 +269,19 @@ Description:
  		specific passthrough mechanisms.


+What:		/sys/block/<disk>/queue/fair_sharing
+Date:		November 2023
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RW] If hardware queues are shared across request queues, by
+		default the request tags are distributed evenly across the
+		active request queues. If the total number of tags is low and
+		if the workload differs per request queue this approach may
+		reduce throughput. This sysfs attribute controls whether or not
+		the fair tag sharing algorithm is enabled. 1 means enabled
+		while 0 means disabled.
+
+
  What:		/sys/block/<disk>/queue/fua
  Date:		May 2018
  Contact:	linux-block@vger.kernel.org
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 5cbeb9344f2f..f41408103106 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -198,6 +198,7 @@ static const char *const hctx_flag_name[] = {
  	HCTX_FLAG_NAME(NO_SCHED),
  	HCTX_FLAG_NAME(STACKING),
  	HCTX_FLAG_NAME(TAG_HCTX_SHARED),
+	HCTX_FLAG_NAME(DISABLE_FAIR_TAG_SHARING),
  };
  #undef HCTX_FLAG_NAME

diff --git a/block/blk-mq.h b/block/blk-mq.h
index f75a9ecfebde..eda6bd0611ea 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -416,7 +416,8 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
  {
  	unsigned int depth, users;

-	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
+	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||
+	    (hctx->flags & BLK_MQ_F_DISABLE_FAIR_TAG_SHARING))
  		return true;

  	/*
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 63e481262336..f044bbe57509 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -473,6 +473,43 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
  	return queue_var_show(blk_queue_dax(q), page);
  }

+static ssize_t queue_fair_sharing_show(struct request_queue *q, char *page)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned long i;
+	bool fair_sharing = true;
+
+	/* q->sysfs_lock serializes against blk_mq_realloc_hw_ctxs() */
+	queue_for_each_hw_ctx(q, hctx, i)
+		if (hctx->flags & BLK_MQ_F_DISABLE_FAIR_TAG_SHARING)
+			fair_sharing = false;
+
+	return sysfs_emit(page, "%u\n", fair_sharing);
+}
+
+static ssize_t queue_fair_sharing_store(struct request_queue *q,
+					const char *page, size_t count)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned long i;
+	int res, val;
+
+	res = kstrtoint(page, 0, &val);
+	if (res < 0)
+		return res;
+
+	/* q->sysfs_lock serializes against blk_mq_realloc_hw_ctxs() */
+	if (val) {
+		queue_for_each_hw_ctx(q, hctx, i)
+			hctx->flags &= ~BLK_MQ_F_DISABLE_FAIR_TAG_SHARING;
+	} else {
+		queue_for_each_hw_ctx(q, hctx, i)
+			hctx->flags |= BLK_MQ_F_DISABLE_FAIR_TAG_SHARING;
+	}
+
+	return count;
+}
+
  #define QUEUE_RO_ENTRY(_prefix, _name)			\
  static struct queue_sysfs_entry _prefix##_entry = {	\
  	.attr	= { .name = _name, .mode = 0444 },	\
@@ -542,6 +579,7 @@ QUEUE_RW_ENTRY(queue_nonrot, "rotational");
  QUEUE_RW_ENTRY(queue_iostats, "iostats");
  QUEUE_RW_ENTRY(queue_random, "add_random");
  QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
+QUEUE_RW_ENTRY(queue_fair_sharing, "fair_sharing");

  #ifdef CONFIG_BLK_WBT
  static ssize_t queue_var_store64(s64 *var, const char *page)
@@ -664,6 +702,7 @@ static struct attribute *blk_mq_queue_attrs[] = {
  	&elv_iosched_entry.attr,
  	&queue_rq_affinity_entry.attr,
  	&queue_io_timeout_entry.attr,
+	&queue_fair_sharing_entry.attr,
  #ifdef CONFIG_BLK_WBT
  	&queue_wb_lat_entry.attr,
  #endif
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1ab3081c82ed..fd5a51e8b628 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -662,7 +662,8 @@ enum {
  	 * or shared hwqs instead of 'mq-deadline'.
  	 */
  	BLK_MQ_F_NO_SCHED_BY_DEFAULT	= 1 << 7,
-	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
+	BLK_MQ_F_DISABLE_FAIR_TAG_SHARING = 1 << 8,
+	BLK_MQ_F_ALLOC_POLICY_START_BIT = 16,
  	BLK_MQ_F_ALLOC_POLICY_BITS = 1,

  	BLK_MQ_S_STOPPED	= 0,
Yu Kuai Nov. 21, 2023, 1:35 a.m. UTC | #6
Hi,

在 2023/11/21 7:03, Bart Van Assche 写道:
> On 11/15/23 17:08, Yu Kuai wrote:
>> Hi,
>>
>> 在 2023/11/16 2:19, Bart Van Assche 写道:
>>> On 11/14/23 23:24, Yu Kuai wrote:
>>>> 在 2023/11/15 2:04, Bart Van Assche 写道:
>>>>> diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
>>>>> index d7f51b84f3c7..872f87001374 100644
>>>>> --- a/drivers/scsi/hosts.c
>>>>> +++ b/drivers/scsi/hosts.c
>>>>> @@ -442,6 +442,7 @@ struct Scsi_Host *scsi_host_alloc(const struct 
>>>>> scsi_host_template *sht, int priv
>>>>>       shost->no_write_same = sht->no_write_same;
>>>>>       shost->host_tagset = sht->host_tagset;
>>>>>       shost->queuecommand_may_block = sht->queuecommand_may_block;
>>>>> +    shost->disable_fair_tag_sharing = sht->disable_fair_tag_sharing;
>>>>
>>>> Can we also consider to disable fair tag sharing by default for the
>>>> driver that total driver tags is less than a threshold?
>>> I don't want to do this because such a change could disable fair tag
>>> sharing for drivers that support both SSDs and hard disks being 
>>> associated
>>> with a single SCSI host.
>>
>> Ok, then is this possible to add a sysfs entry to disable/enable fair
>> tag sharing manually?
> 
> How about replacing patch 1/3 from this series with the patch below?
> 
> Thanks,
> 
> Bart.
> 
> 
>      block: Make fair tag sharing configurable
> 
>      The fair sharing algorithm has a negative performance impact for 
> storage
>      devices for which the full queue depth is required to reach peak
>      performance, e.g. UFS devices. This is because it takes long after a
>      request queue became inactive until tags are reassigned to the active
>      request queue(s). Since making tag sharing fair is not needed if the
>      request processing latency is similar for all request queues, 
> introduce
>      a sysfs attribute for controlling the fair tag sharing algorithm.
>      Increase BLK_MQ_F_ALLOC_POLICY_START_BIT to prevent that the fair tag
>      sharing flag overlaps with the tag allocation policy.
> 
>      Cc: Christoph Hellwig <hch@lst.de>
>      Cc: Martin K. Petersen <martin.petersen@oracle.com>
>      Cc: Ming Lei <ming.lei@redhat.com>
>      Cc: Keith Busch <kbusch@kernel.org>
>      Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
>      Cc: Yu Kuai <yukuai1@huaweicloud.com>
>      Cc: Ed Tsai <ed.tsai@mediatek.com>
>      Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> 
> diff --git a/Documentation/ABI/stable/sysfs-block 
> b/Documentation/ABI/stable/sysfs-block
> index 1fe9a553c37b..7b66eb938882 100644
> --- a/Documentation/ABI/stable/sysfs-block
> +++ b/Documentation/ABI/stable/sysfs-block
> @@ -269,6 +269,19 @@ Description:
>           specific passthrough mechanisms.
> 
> 
> +What:        /sys/block/<disk>/queue/fair_sharing
> +Date:        November 2023
> +Contact:    linux-block@vger.kernel.org
> +Description:
> +        [RW] If hardware queues are shared across request queues, by
> +        default the request tags are distributed evenly across the
> +        active request queues. If the total number of tags is low and
> +        if the workload differs per request queue this approach may
> +        reduce throughput. This sysfs attribute controls whether or not
> +        the fair tag sharing algorithm is enabled. 1 means enabled
> +        while 0 means disabled.
> +
> +
>   What:        /sys/block/<disk>/queue/fua
>   Date:        May 2018
>   Contact:    linux-block@vger.kernel.org
> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> index 5cbeb9344f2f..f41408103106 100644
> --- a/block/blk-mq-debugfs.c
> +++ b/block/blk-mq-debugfs.c
> @@ -198,6 +198,7 @@ static const char *const hctx_flag_name[] = {
>       HCTX_FLAG_NAME(NO_SCHED),
>       HCTX_FLAG_NAME(STACKING),
>       HCTX_FLAG_NAME(TAG_HCTX_SHARED),
> +    HCTX_FLAG_NAME(DISABLE_FAIR_TAG_SHARING),
>   };
>   #undef HCTX_FLAG_NAME
> 
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index f75a9ecfebde..eda6bd0611ea 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -416,7 +416,8 @@ static inline bool hctx_may_queue(struct 
> blk_mq_hw_ctx *hctx,
>   {
>       unsigned int depth, users;
> 
> -    if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
> +    if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||
> +        (hctx->flags & BLK_MQ_F_DISABLE_FAIR_TAG_SHARING))
>           return true;
> 
>       /*
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 63e481262336..f044bbe57509 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -473,6 +473,43 @@ static ssize_t queue_dax_show(struct request_queue 
> *q, char *page)
>       return queue_var_show(blk_queue_dax(q), page);
>   }
> 
> +static ssize_t queue_fair_sharing_show(struct request_queue *q, char 
> *page)
> +{
> +    struct blk_mq_hw_ctx *hctx;
> +    unsigned long i;
> +    bool fair_sharing = true;
> +
> +    /* q->sysfs_lock serializes against blk_mq_realloc_hw_ctxs() */
> +    queue_for_each_hw_ctx(q, hctx, i)
> +        if (hctx->flags & BLK_MQ_F_DISABLE_FAIR_TAG_SHARING)
> +            fair_sharing = false;
> +
> +    return sysfs_emit(page, "%u\n", fair_sharing);
> +}
> +
> +static ssize_t queue_fair_sharing_store(struct request_queue *q,
> +                    const char *page, size_t count)
> +{
> +    struct blk_mq_hw_ctx *hctx;
> +    unsigned long i;
> +    int res, val;
> +
> +    res = kstrtoint(page, 0, &val);
> +    if (res < 0)
> +        return res;
> +
> +    /* q->sysfs_lock serializes against blk_mq_realloc_hw_ctxs() */
> +    if (val) {
> +        queue_for_each_hw_ctx(q, hctx, i)
> +            hctx->flags &= ~BLK_MQ_F_DISABLE_FAIR_TAG_SHARING;
> +    } else {
> +        queue_for_each_hw_ctx(q, hctx, i)
> +            hctx->flags |= BLK_MQ_F_DISABLE_FAIR_TAG_SHARING;
> +    }

I'm not sure that change just one queue instead of all queues using the
same tag_set won't case any regression, for example,
BLK_MQ_F_TAG_QUEUE_SHARED is not cleared, and other queues are still
sharing tags fairly while this queue doesn't.

Perhaps can we add a helper similiar to __blk_mq_update_nr_hw_queues
to update all queues using the same tag_set?

Thanks,
Kuai

> +
> +    return count;
> +}
> +
>   #define QUEUE_RO_ENTRY(_prefix, _name)            \
>   static struct queue_sysfs_entry _prefix##_entry = {    \
>       .attr    = { .name = _name, .mode = 0444 },    \
> @@ -542,6 +579,7 @@ QUEUE_RW_ENTRY(queue_nonrot, "rotational");
>   QUEUE_RW_ENTRY(queue_iostats, "iostats");
>   QUEUE_RW_ENTRY(queue_random, "add_random");
>   QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
> +QUEUE_RW_ENTRY(queue_fair_sharing, "fair_sharing");
> 
>   #ifdef CONFIG_BLK_WBT
>   static ssize_t queue_var_store64(s64 *var, const char *page)
> @@ -664,6 +702,7 @@ static struct attribute *blk_mq_queue_attrs[] = {
>       &elv_iosched_entry.attr,
>       &queue_rq_affinity_entry.attr,
>       &queue_io_timeout_entry.attr,
> +    &queue_fair_sharing_entry.attr,
>   #ifdef CONFIG_BLK_WBT
>       &queue_wb_lat_entry.attr,
>   #endif
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 1ab3081c82ed..fd5a51e8b628 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -662,7 +662,8 @@ enum {
>        * or shared hwqs instead of 'mq-deadline'.
>        */
>       BLK_MQ_F_NO_SCHED_BY_DEFAULT    = 1 << 7,
> -    BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
> +    BLK_MQ_F_DISABLE_FAIR_TAG_SHARING = 1 << 8,
> +    BLK_MQ_F_ALLOC_POLICY_START_BIT = 16,
>       BLK_MQ_F_ALLOC_POLICY_BITS = 1,
> 
>       BLK_MQ_S_STOPPED    = 0,
> 
> 
> .
>
Bart Van Assche Nov. 21, 2023, 7:32 p.m. UTC | #7
On 11/20/23 17:35, Yu Kuai wrote:
> I'm not sure that change just one queue instead of all queues using the
> same tag_set won't case any regression, for example,
> BLK_MQ_F_TAG_QUEUE_SHARED is not cleared, and other queues are still
> sharing tags fairly while this queue doesn't.
> 
> Perhaps can we add a helper similiar to __blk_mq_update_nr_hw_queues
> to update all queues using the same tag_set?

Hi Kuai,

How about the patch below?

Thanks,

Bart.


diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 1fe9a553c37b..7b66eb938882 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -269,6 +269,19 @@ Description:
  		specific passthrough mechanisms.


+What:		/sys/block/<disk>/queue/fair_sharing
+Date:		November 2023
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RW] If hardware queues are shared across request queues, by
+		default the request tags are distributed evenly across the
+		active request queues. If the total number of tags is low and
+		if the workload differs per request queue this approach may
+		reduce throughput. This sysfs attribute controls whether or not
+		the fair tag sharing algorithm is enabled. 1 means enabled
+		while 0 means disabled.
+
+
  What:		/sys/block/<disk>/queue/fua
  Date:		May 2018
  Contact:	linux-block@vger.kernel.org
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 5cbeb9344f2f..f41408103106 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -198,6 +198,7 @@ static const char *const hctx_flag_name[] = {
  	HCTX_FLAG_NAME(NO_SCHED),
  	HCTX_FLAG_NAME(STACKING),
  	HCTX_FLAG_NAME(TAG_HCTX_SHARED),
+	HCTX_FLAG_NAME(DISABLE_FAIR_TAG_SHARING),
  };
  #undef HCTX_FLAG_NAME

diff --git a/block/blk-mq.h b/block/blk-mq.h
index f75a9ecfebde..eda6bd0611ea 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -416,7 +416,8 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
  {
  	unsigned int depth, users;

-	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
+	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||
+	    (hctx->flags & BLK_MQ_F_DISABLE_FAIR_TAG_SHARING))
  		return true;

  	/*
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 0b2d04766324..0009450dc8cf 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -24,6 +24,7 @@ struct queue_sysfs_entry {
  	struct attribute attr;
  	ssize_t (*show)(struct request_queue *, char *);
  	ssize_t (*store)(struct request_queue *, const char *, size_t);
+	bool no_sysfs_mutex;
  };

  static ssize_t
@@ -473,6 +474,55 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
  	return queue_var_show(blk_queue_dax(q), page);
  }

+static ssize_t queue_fair_sharing_show(struct request_queue *q, char *page)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned long i;
+	bool fair_sharing = true;
+
+	/* Serialize against blk_mq_realloc_hw_ctxs() */
+	mutex_lock(&q->sysfs_lock);
+	queue_for_each_hw_ctx(q, hctx, i)
+		if (hctx->flags & BLK_MQ_F_DISABLE_FAIR_TAG_SHARING)
+			fair_sharing = false;
+	mutex_unlock(&q->sysfs_lock);
+
+	return sysfs_emit(page, "%u\n", fair_sharing);
+}
+
+static ssize_t queue_fair_sharing_store(struct request_queue *q,
+					const char *page, size_t count)
+{
+	const unsigned int DFTS_BIT = ilog2(BLK_MQ_F_DISABLE_FAIR_TAG_SHARING);
+	struct blk_mq_tag_set *set = q->tag_set;
+	struct blk_mq_hw_ctx *hctx;
+	unsigned long i;
+	int res;
+	bool val;
+
+	res = kstrtobool(page, &val);
+	if (res < 0)
+		return res;
+
+	mutex_lock(&set->tag_list_lock);
+	clear_bit(DFTS_BIT, &set->flags);
+	list_for_each_entry(q, &set->tag_list, tag_set_list) {
+		/* Serialize against blk_mq_realloc_hw_ctxs() */
+		mutex_lock(&q->sysfs_lock);
+		if (val) {
+			queue_for_each_hw_ctx(q, hctx, i)
+				clear_bit(DFTS_BIT, &hctx->flags);
+		} else {
+			queue_for_each_hw_ctx(q, hctx, i)
+				set_bit(DFTS_BIT, &hctx->flags);
+		}
+		mutex_unlock(&q->sysfs_lock);
+	}
+	mutex_unlock(&set->tag_list_lock);
+
+	return count;
+}
+
  #define QUEUE_RO_ENTRY(_prefix, _name)			\
  static struct queue_sysfs_entry _prefix##_entry = {	\
  	.attr	= { .name = _name, .mode = 0444 },	\
@@ -486,6 +536,14 @@ static struct queue_sysfs_entry _prefix##_entry = {	\
  	.store	= _prefix##_store,			\
  };

+#define QUEUE_RW_ENTRY_NO_SYSFS_MUTEX(_prefix, _name)       \
+	static struct queue_sysfs_entry _prefix##_entry = { \
+		.attr = { .name = _name, .mode = 0644 },    \
+		.show = _prefix##_show,                     \
+		.store = _prefix##_store,                   \
+		.no_sysfs_mutex = true,                     \
+	};
+
  QUEUE_RW_ENTRY(queue_requests, "nr_requests");
  QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
  QUEUE_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
@@ -542,6 +600,7 @@ QUEUE_RW_ENTRY(queue_nonrot, "rotational");
  QUEUE_RW_ENTRY(queue_iostats, "iostats");
  QUEUE_RW_ENTRY(queue_random, "add_random");
  QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
+QUEUE_RW_ENTRY_NO_SYSFS_MUTEX(queue_fair_sharing, "fair_sharing");

  #ifdef CONFIG_BLK_WBT
  static ssize_t queue_var_store64(s64 *var, const char *page)
@@ -666,6 +725,7 @@ static struct attribute *blk_mq_queue_attrs[] = {
  	&elv_iosched_entry.attr,
  	&queue_rq_affinity_entry.attr,
  	&queue_io_timeout_entry.attr,
+	&queue_fair_sharing_entry.attr,
  #ifdef CONFIG_BLK_WBT
  	&queue_wb_lat_entry.attr,
  #endif
@@ -723,6 +783,10 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)

  	if (!entry->show)
  		return -EIO;
+
+	if (entry->no_sysfs_mutex)
+		return entry->show(q, page);
+
  	mutex_lock(&q->sysfs_lock);
  	res = entry->show(q, page);
  	mutex_unlock(&q->sysfs_lock);
@@ -741,6 +805,9 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
  	if (!entry->store)
  		return -EIO;

+	if (entry->no_sysfs_mutex)
+		return entry->store(q, page, length);
+
  	mutex_lock(&q->sysfs_lock);
  	res = entry->store(q, page, length);
  	mutex_unlock(&q->sysfs_lock);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1ab3081c82ed..aadb74aa23a3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -503,7 +503,7 @@ struct blk_mq_tag_set {
  	unsigned int		cmd_size;
  	int			numa_node;
  	unsigned int		timeout;
-	unsigned int		flags;
+	unsigned long		flags;
  	void			*driver_data;

  	struct blk_mq_tags	**tags;
@@ -662,7 +662,8 @@ enum {
  	 * or shared hwqs instead of 'mq-deadline'.
  	 */
  	BLK_MQ_F_NO_SCHED_BY_DEFAULT	= 1 << 7,
-	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
+	BLK_MQ_F_DISABLE_FAIR_TAG_SHARING = 1 << 8,
+	BLK_MQ_F_ALLOC_POLICY_START_BIT = 16,
  	BLK_MQ_F_ALLOC_POLICY_BITS = 1,

  	BLK_MQ_S_STOPPED	= 0,
Yu Kuai Nov. 23, 2023, 6:29 a.m. UTC | #8
Hi,

在 2023/11/22 3:32, Bart Van Assche 写道:
> On 11/20/23 17:35, Yu Kuai wrote:
>> I'm not sure that change just one queue instead of all queues using the
>> same tag_set won't case any regression, for example,
>> BLK_MQ_F_TAG_QUEUE_SHARED is not cleared, and other queues are still
>> sharing tags fairly while this queue doesn't.
>>
>> Perhaps can we add a helper similiar to __blk_mq_update_nr_hw_queues
>> to update all queues using the same tag_set?
> 
> Hi Kuai,
> 
> How about the patch below?

Thanks for the patch!
> 
> Thanks,
> 
> Bart.
> 
> 
> diff --git a/Documentation/ABI/stable/sysfs-block 
> b/Documentation/ABI/stable/sysfs-block
> index 1fe9a553c37b..7b66eb938882 100644
> --- a/Documentation/ABI/stable/sysfs-block
> +++ b/Documentation/ABI/stable/sysfs-block
> @@ -269,6 +269,19 @@ Description:
>           specific passthrough mechanisms.
> 
> 
> +What:        /sys/block/<disk>/queue/fair_sharing
> +Date:        November 2023
> +Contact:    linux-block@vger.kernel.org
> +Description:
> +        [RW] If hardware queues are shared across request queues, by
> +        default the request tags are distributed evenly across the
> +        active request queues. If the total number of tags is low and
> +        if the workload differs per request queue this approach may
> +        reduce throughput. This sysfs attribute controls whether or not
> +        the fair tag sharing algorithm is enabled. 1 means enabled
> +        while 0 means disabled.
> +
> +
>   What:        /sys/block/<disk>/queue/fua
>   Date:        May 2018
>   Contact:    linux-block@vger.kernel.org
> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> index 5cbeb9344f2f..f41408103106 100644
> --- a/block/blk-mq-debugfs.c
> +++ b/block/blk-mq-debugfs.c
> @@ -198,6 +198,7 @@ static const char *const hctx_flag_name[] = {
>       HCTX_FLAG_NAME(NO_SCHED),
>       HCTX_FLAG_NAME(STACKING),
>       HCTX_FLAG_NAME(TAG_HCTX_SHARED),
> +    HCTX_FLAG_NAME(DISABLE_FAIR_TAG_SHARING),
>   };
>   #undef HCTX_FLAG_NAME
> 
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index f75a9ecfebde..eda6bd0611ea 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -416,7 +416,8 @@ static inline bool hctx_may_queue(struct 
> blk_mq_hw_ctx *hctx,
>   {
>       unsigned int depth, users;
> 
> -    if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
> +    if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||
> +        (hctx->flags & BLK_MQ_F_DISABLE_FAIR_TAG_SHARING))
>           return true;
> 
>       /*
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 0b2d04766324..0009450dc8cf 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -24,6 +24,7 @@ struct queue_sysfs_entry {
>       struct attribute attr;
>       ssize_t (*show)(struct request_queue *, char *);
>       ssize_t (*store)(struct request_queue *, const char *, size_t);
> +    bool no_sysfs_mutex;
>   };
> 
>   static ssize_t
> @@ -473,6 +474,55 @@ static ssize_t queue_dax_show(struct request_queue 
> *q, char *page)
>       return queue_var_show(blk_queue_dax(q), page);
>   }
> 
> +static ssize_t queue_fair_sharing_show(struct request_queue *q, char 
> *page)
> +{
> +    struct blk_mq_hw_ctx *hctx;
> +    unsigned long i;
> +    bool fair_sharing = true;
> +
> +    /* Serialize against blk_mq_realloc_hw_ctxs() */
> +    mutex_lock(&q->sysfs_lock);
> +    queue_for_each_hw_ctx(q, hctx, i)
> +        if (hctx->flags & BLK_MQ_F_DISABLE_FAIR_TAG_SHARING)
> +            fair_sharing = false;
> +    mutex_unlock(&q->sysfs_lock);
> +
> +    return sysfs_emit(page, "%u\n", fair_sharing);
> +}
> +
> +static ssize_t queue_fair_sharing_store(struct request_queue *q,
> +                    const char *page, size_t count)
> +{
> +    const unsigned int DFTS_BIT = 
> ilog2(BLK_MQ_F_DISABLE_FAIR_TAG_SHARING);
> +    struct blk_mq_tag_set *set = q->tag_set;
> +    struct blk_mq_hw_ctx *hctx;
> +    unsigned long i;
> +    int res;
> +    bool val;
> +
> +    res = kstrtobool(page, &val);
> +    if (res < 0)
> +        return res;
> +
> +    mutex_lock(&set->tag_list_lock);
> +    clear_bit(DFTS_BIT, &set->flags);
> +    list_for_each_entry(q, &set->tag_list, tag_set_list) {
> +        /* Serialize against blk_mq_realloc_hw_ctxs() */

If set/clear bit concurrent with test bit from io path, will there be
problem? Why don't freeze these queues?
> +        mutex_lock(&q->sysfs_lock);
> +        if (val) {
> +            queue_for_each_hw_ctx(q, hctx, i)
> +                clear_bit(DFTS_BIT, &hctx->flags);
> +        } else {
> +            queue_for_each_hw_ctx(q, hctx, i)
> +                set_bit(DFTS_BIT, &hctx->flags);
> +        }
> +        mutex_unlock(&q->sysfs_lock);
> +    }
> +    mutex_unlock(&set->tag_list_lock);
> +
> +    return count;
> +}
> +
>   #define QUEUE_RO_ENTRY(_prefix, _name)            \
>   static struct queue_sysfs_entry _prefix##_entry = {    \
>       .attr    = { .name = _name, .mode = 0444 },    \
> @@ -486,6 +536,14 @@ static struct queue_sysfs_entry _prefix##_entry = 
> {    \
>       .store    = _prefix##_store,            \
>   };
> 
> +#define QUEUE_RW_ENTRY_NO_SYSFS_MUTEX(_prefix, _name)       \
> +    static struct queue_sysfs_entry _prefix##_entry = { \
> +        .attr = { .name = _name, .mode = 0644 },    \
> +        .show = _prefix##_show,                     \
> +        .store = _prefix##_store,                   \
> +        .no_sysfs_mutex = true,                     \
> +    };
> +

This actually change all the queues from the same tagset, can we add
this new entry to /sys/class/scsi_host/hostx/xxx ?

Thanks,
Kuai

>   QUEUE_RW_ENTRY(queue_requests, "nr_requests");
>   QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
>   QUEUE_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
> @@ -542,6 +600,7 @@ QUEUE_RW_ENTRY(queue_nonrot, "rotational");
>   QUEUE_RW_ENTRY(queue_iostats, "iostats");
>   QUEUE_RW_ENTRY(queue_random, "add_random");
>   QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
> +QUEUE_RW_ENTRY_NO_SYSFS_MUTEX(queue_fair_sharing, "fair_sharing");
> 
>   #ifdef CONFIG_BLK_WBT
>   static ssize_t queue_var_store64(s64 *var, const char *page)
> @@ -666,6 +725,7 @@ static struct attribute *blk_mq_queue_attrs[] = {
>       &elv_iosched_entry.attr,
>       &queue_rq_affinity_entry.attr,
>       &queue_io_timeout_entry.attr,
> +    &queue_fair_sharing_entry.attr,
>   #ifdef CONFIG_BLK_WBT
>       &queue_wb_lat_entry.attr,
>   #endif
> @@ -723,6 +783,10 @@ queue_attr_show(struct kobject *kobj, struct 
> attribute *attr, char *page)
> 
>       if (!entry->show)
>           return -EIO;
> +
> +    if (entry->no_sysfs_mutex)
> +        return entry->show(q, page);
> +
>       mutex_lock(&q->sysfs_lock);
>       res = entry->show(q, page);
>       mutex_unlock(&q->sysfs_lock);
> @@ -741,6 +805,9 @@ queue_attr_store(struct kobject *kobj, struct 
> attribute *attr,
>       if (!entry->store)
>           return -EIO;
> 
> +    if (entry->no_sysfs_mutex)
> +        return entry->store(q, page, length);
> +
>       mutex_lock(&q->sysfs_lock);
>       res = entry->store(q, page, length);
>       mutex_unlock(&q->sysfs_lock);
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 1ab3081c82ed..aadb74aa23a3 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -503,7 +503,7 @@ struct blk_mq_tag_set {
>       unsigned int        cmd_size;
>       int            numa_node;
>       unsigned int        timeout;
> -    unsigned int        flags;
> +    unsigned long        flags;
>       void            *driver_data;
> 
>       struct blk_mq_tags    **tags;
> @@ -662,7 +662,8 @@ enum {
>        * or shared hwqs instead of 'mq-deadline'.
>        */
>       BLK_MQ_F_NO_SCHED_BY_DEFAULT    = 1 << 7,
> -    BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
> +    BLK_MQ_F_DISABLE_FAIR_TAG_SHARING = 1 << 8,
> +    BLK_MQ_F_ALLOC_POLICY_START_BIT = 16,
>       BLK_MQ_F_ALLOC_POLICY_BITS = 1,
> 
>       BLK_MQ_S_STOPPED    = 0,
> 
> 
> 
> .
>
Bart Van Assche Nov. 27, 2023, 11:05 p.m. UTC | #9
On 11/22/23 22:29, Yu Kuai wrote:
> 在 2023/11/22 3:32, Bart Van Assche 写道:
>> +static ssize_t queue_fair_sharing_store(struct request_queue *q,
>> +                    const char *page, size_t count)
>> +{
>> +    const unsigned int DFTS_BIT = ilog2(BLK_MQ_F_DISABLE_FAIR_TAG_SHARING);
>> +    struct blk_mq_tag_set *set = q->tag_set;
>> +    struct blk_mq_hw_ctx *hctx;
>> +    unsigned long i;
>> +    int res;
>> +    bool val;
>> +
>> +    res = kstrtobool(page, &val);
>> +    if (res < 0)
>> +        return res;
>> +
>> +    mutex_lock(&set->tag_list_lock);
>> +    clear_bit(DFTS_BIT, &set->flags);
>> +    list_for_each_entry(q, &set->tag_list, tag_set_list) {
>> +        /* Serialize against blk_mq_realloc_hw_ctxs() */
> 
> If set/clear bit concurrent with test bit from io path, will there be
> problem? Why don't freeze these queues?

If that happens the changes applied through this sysfs attribute may only take
effect after a short delay (depending on how fast changes are propagated from
one CPU to another). I don't think that this is an issue?
  >> +#define QUEUE_RW_ENTRY_NO_SYSFS_MUTEX(_prefix, _name)       \
>> +    static struct queue_sysfs_entry _prefix##_entry = { \
>> +        .attr = { .name = _name, .mode = 0644 },    \
>> +        .show = _prefix##_show,                     \
>> +        .store = _prefix##_store,                   \
>> +        .no_sysfs_mutex = true,                     \
>> +    };
>> +
> 
> This actually change all the queues from the same tagset, can we add
> this new entry to /sys/class/scsi_host/hostx/xxx ?

That would make it impossible to disable fair tag sharing for block drivers
that are not based on the SCSI core. Are you sure that's what you want?

Thanks,

Bart.
Yu Kuai Nov. 28, 2023, 2:03 a.m. UTC | #10
Hi,

在 2023/11/28 7:05, Bart Van Assche 写道:
> On 11/22/23 22:29, Yu Kuai wrote:
>> 在 2023/11/22 3:32, Bart Van Assche 写道:
>>> +static ssize_t queue_fair_sharing_store(struct request_queue *q,
>>> +                    const char *page, size_t count)
>>> +{
>>> +    const unsigned int DFTS_BIT = 
>>> ilog2(BLK_MQ_F_DISABLE_FAIR_TAG_SHARING);
>>> +    struct blk_mq_tag_set *set = q->tag_set;
>>> +    struct blk_mq_hw_ctx *hctx;
>>> +    unsigned long i;
>>> +    int res;
>>> +    bool val;
>>> +
>>> +    res = kstrtobool(page, &val);
>>> +    if (res < 0)
>>> +        return res;
>>> +
>>> +    mutex_lock(&set->tag_list_lock);
>>> +    clear_bit(DFTS_BIT, &set->flags);
>>> +    list_for_each_entry(q, &set->tag_list, tag_set_list) {
>>> +        /* Serialize against blk_mq_realloc_hw_ctxs() */
>>
>> If set/clear bit concurrent with test bit from io path, will there be
>> problem? Why don't freeze these queues?
> 
> If that happens the changes applied through this sysfs attribute may 
> only take
> effect after a short delay (depending on how fast changes are propagated 
> from
> one CPU to another). I don't think that this is an issue?

Because wake_batch is not updated, hence actually wait/wakeup is still
the same before tag sharing is disabled.

I was worried that there might be missing wakeups, why not using
blk_mq_update_tag_set_shared() directly to disable tag sharing? And for
new disks, change blk_mq_add_queue_tag_set() to not set
BLK_MQ_F_TAG_QUEUE_SHARED as well. This way we only need a new flag for
tag_set, that's why I want to add the new sysfs entry for scsi_host,
since there are no entry represent tag_set for now...

>   >> +#define QUEUE_RW_ENTRY_NO_SYSFS_MUTEX(_prefix, _name)       \
>>> +    static struct queue_sysfs_entry _prefix##_entry = { \
>>> +        .attr = { .name = _name, .mode = 0644 },    \
>>> +        .show = _prefix##_show,                     \
>>> +        .store = _prefix##_store,                   \
>>> +        .no_sysfs_mutex = true,                     \
>>> +    };
>>> +
>>
>> This actually change all the queues from the same tagset, can we add
>> this new entry to /sys/class/scsi_host/hostx/xxx ?
> 
> That would make it impossible to disable fair tag sharing for block drivers
> that are not based on the SCSI core. Are you sure that's what you want?

Yes, if there are other drivers that are sharing driver tags, this is
not good, can you give some examples?

Thanks,
Kuai
> 
> Thanks,
> 
> Bart.
> .
>
Bart Van Assche Nov. 28, 2023, 6:17 p.m. UTC | #11
On 11/27/23 18:03, Yu Kuai wrote:
> I was worried that there might be missing wakeups, why not using
> blk_mq_update_tag_set_shared() directly to disable tag sharing?

I think that calling blk_mq_update_tag_set_shared() to disable tag sharing
would be wrong because BLK_MQ_F_TAG_QUEUE_SHARED is also used for other
purposes than fair tag sharing. See e.g. blk_mq_mark_tag_wait().

>>   >> +#define QUEUE_RW_ENTRY_NO_SYSFS_MUTEX(_prefix, _name)       \
>>>> +    static struct queue_sysfs_entry _prefix##_entry = { \
>>>> +        .attr = { .name = _name, .mode = 0644 },    \
>>>> +        .show = _prefix##_show,                     \
>>>> +        .store = _prefix##_store,                   \
>>>> +        .no_sysfs_mutex = true,                     \
>>>> +    };
>>>> +
>>>
>>> This actually change all the queues from the same tagset, can we add
>>> this new entry to /sys/class/scsi_host/hostx/xxx ?
>>
>> That would make it impossible to disable fair tag sharing for block drivers
>> that are not based on the SCSI core. Are you sure that's what you want?
> 
> Yes, if there are other drivers that are sharing driver tags, this is
> not good, can you give some examples?

There is one tag set for all NVMe namespaces associated with the same
controller. Anyway, I will move this sysfs attribute to the SCSI host and
will organize the code such that a similar sysfs attribute can be added
easily to other block drivers than the SCSI core if that would be considered
useful.

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index d7f51b84f3c7..872f87001374 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -442,6 +442,7 @@  struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv
 	shost->no_write_same = sht->no_write_same;
 	shost->host_tagset = sht->host_tagset;
 	shost->queuecommand_may_block = sht->queuecommand_may_block;
+	shost->disable_fair_tag_sharing = sht->disable_fair_tag_sharing;
 
 	if (shost_eh_deadline == -1 || !sht->eh_host_reset_handler)
 		shost->eh_deadline = -1;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index cf3864f72093..291fbfacf542 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1984,6 +1984,8 @@  int scsi_mq_setup_tags(struct Scsi_Host *shost)
 		BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
 	if (shost->queuecommand_may_block)
 		tag_set->flags |= BLK_MQ_F_BLOCKING;
+	if (shost->disable_fair_tag_sharing)
+		tag_set->flags |= BLK_MQ_F_DISABLE_FAIR_TAG_SHARING;
 	tag_set->driver_data = shost;
 	if (shost->host_tagset)
 		tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 3b907fc2ef08..04238ae9e22c 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -464,6 +464,9 @@  struct scsi_host_template {
 	/* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */
 	unsigned queuecommand_may_block:1;
 
+	/* See also BLK_MQ_F_DISABLE_FAIR_TAG_SHARING. */
+	unsigned disable_fair_tag_sharing:1;
+
 	/*
 	 * Countdown for host blocking with no commands outstanding.
 	 */
@@ -662,6 +665,9 @@  struct Scsi_Host {
 	/* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */
 	unsigned queuecommand_may_block:1;
 
+	/* See also BLK_MQ_F_DISABLE_FAIR_TAG_SHARING. */
+	unsigned disable_fair_tag_sharing:1;
+
 	/* Host responded with short (<36 bytes) INQUIRY result */
 	unsigned short_inquiry:1;