diff mbox series

[RFC,v7,11/12] smartpqi: enable host tagset

Message ID 1591810159-240929-12-git-send-email-john.garry@huawei.com (mailing list archive)
State New, archived
Headers show
Series blk-mq/scsi: Provide hostwide shared tags for SCSI HBAs | expand

Commit Message

John Garry June 10, 2020, 5:29 p.m. UTC
From: Hannes Reinecke <hare@suse.de>

Enable host tagset for smartpqi; with this we can use the request
tag to look command from the pool avoiding the list iteration in
the hot path.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 38 ++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 10 deletions(-)

Comments

John Garry July 14, 2020, 1:16 p.m. UTC | #1
Hi Hannes,

>   static struct pqi_io_request *pqi_alloc_io_request(
> -	struct pqi_ctrl_info *ctrl_info)
> +	struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
>   {
>   	struct pqi_io_request *io_request;
> +	unsigned int limit = PQI_RESERVED_IO_SLOTS;
>   	u16 i = ctrl_info->next_io_request_slot;	/* benignly racy */
>   
> -	while (1) {
> +	if (scmd) {
> +		u32 blk_tag = blk_mq_unique_tag(scmd->request);
> +
> +		i = blk_mq_unique_tag_to_tag(blk_tag) + limit;
>   		io_request = &ctrl_info->io_request_pool[i];

This looks ok

> -		if (atomic_inc_return(&io_request->refcount) == 1)
> -			break;
> -		atomic_dec(&io_request->refcount);
> -		i = (i + 1) % ctrl_info->max_io_slots;
> +		if (WARN_ON(atomic_inc_return(&io_request->refcount) > 1)) {
> +			atomic_dec(&io_request->refcount);
> +			return NULL;
> +		}
> +	} else {
> +		while (1) {
> +			io_request = &ctrl_info->io_request_pool[i];
> +			if (atomic_inc_return(&io_request->refcount) == 1)
> +				break;
> +			atomic_dec(&io_request->refcount);
> +			i = (i + 1) % limit;

To me, the range we use here looks incorrect. I would assume we should 
restrict range to [max_io_slots - PQI_RESERVED_IO_SLOTS, max_io_slots).

But then your reserved commands support would solve that.

> +		}
>   	}
>   

Thanks,
John
John Garry July 14, 2020, 1:31 p.m. UTC | #2
On 14/07/2020 14:16, John Garry wrote:
> Hi Hannes,
> 
>>   static struct pqi_io_request *pqi_alloc_io_request(
>> -    struct pqi_ctrl_info *ctrl_info)
>> +    struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
>>   {
>>       struct pqi_io_request *io_request;
>> +    unsigned int limit = PQI_RESERVED_IO_SLOTS;
>>       u16 i = ctrl_info->next_io_request_slot;    /* benignly racy */
>> -    while (1) {
>> +    if (scmd) {
>> +        u32 blk_tag = blk_mq_unique_tag(scmd->request);
>> +
>> +        i = blk_mq_unique_tag_to_tag(blk_tag) + limit;
>>           io_request = &ctrl_info->io_request_pool[i];
> 
> This looks ok
> 
>> -        if (atomic_inc_return(&io_request->refcount) == 1)
>> -            break;
>> -        atomic_dec(&io_request->refcount);
>> -        i = (i + 1) % ctrl_info->max_io_slots;
>> +        if (WARN_ON(atomic_inc_return(&io_request->refcount) > 1)) {
>> +            atomic_dec(&io_request->refcount);
>> +            return NULL;
>> +        }
>> +    } else {
>> +        while (1) {
>> +            io_request = &ctrl_info->io_request_pool[i];
>> +            if (atomic_inc_return(&io_request->refcount) == 1)
>> +                break;
>> +            atomic_dec(&io_request->refcount);
>> +            i = (i + 1) % limit;
> 
> To me, the range we use here looks incorrect. I would assume we should 
> restrict range to [max_io_slots - PQI_RESERVED_IO_SLOTS, max_io_slots).
> 

Sorry, I missed that you use i = blk_mq_unique_tag_to_tag(blk_tag) + 
limit for regular command.

But we still set next_io_request_slot for regular command and maybe then 
read and use it for reserved command.

Thanks
Hannes Reinecke July 14, 2020, 2:02 p.m. UTC | #3
On 7/14/20 3:16 PM, John Garry wrote:
> Hi Hannes,
> 
>>   static struct pqi_io_request *pqi_alloc_io_request(
>> -    struct pqi_ctrl_info *ctrl_info)
>> +    struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
>>   {
>>       struct pqi_io_request *io_request;
>> +    unsigned int limit = PQI_RESERVED_IO_SLOTS;
>>       u16 i = ctrl_info->next_io_request_slot;    /* benignly racy */
>>   -    while (1) {
>> +    if (scmd) {
>> +        u32 blk_tag = blk_mq_unique_tag(scmd->request);
>> +
>> +        i = blk_mq_unique_tag_to_tag(blk_tag) + limit;
>>           io_request = &ctrl_info->io_request_pool[i];
> 
> This looks ok
> 
>> -        if (atomic_inc_return(&io_request->refcount) == 1)
>> -            break;
>> -        atomic_dec(&io_request->refcount);
>> -        i = (i + 1) % ctrl_info->max_io_slots;
>> +        if (WARN_ON(atomic_inc_return(&io_request->refcount) > 1)) {
>> +            atomic_dec(&io_request->refcount);
>> +            return NULL;
>> +        }
>> +    } else {
>> +        while (1) {
>> +            io_request = &ctrl_info->io_request_pool[i];
>> +            if (atomic_inc_return(&io_request->refcount) == 1)
>> +                break;
>> +            atomic_dec(&io_request->refcount);
>> +            i = (i + 1) % limit;
> 
> To me, the range we use here looks incorrect. I would assume we should
> restrict range to [max_io_slots - PQI_RESERVED_IO_SLOTS, max_io_slots).
> 
> But then your reserved commands support would solve that.
> 
This branch of the 'if' condition will only be taken for internal
commands, for which we only allow up to PQI_RESERVED_IO_SLOTS.
And we set the 'normal' I/O commands above at an offset, so we're fine here.

Cheers,

Hannes
Don Brace July 14, 2020, 6:16 p.m. UTC | #4
Subject: Re: [PATCH RFC v7 11/12] smartpqi: enable host tagset


Both the driver author and myself do not want to change how commands are processed in the smartpqi driver.

Nak.
John Garry July 15, 2020, 7:28 a.m. UTC | #5
On 14/07/2020 19:16, Don.Brace@microchip.com wrote:
> Subject: Re: [PATCH RFC v7 11/12] smartpqi: enable host tagset
> 
> 
> Both the driver author and myself do not want to change how commands are processed in the smartpqi driver.
> 
> Nak.
> 

ok, your call. But it still seems that the driver should set the 
host_tagset flag.

Anyway, can you also let us know your stance on the hpsa change in this 
series?

https://lore.kernel.org/linux-scsi/1591810159-240929-1-git-send-email-john.garry@huawei.com/T/#m1057b8a8c23a9bf558a048817a1cda4a576291b2

thanks
John Garry Aug. 18, 2020, 8:33 a.m. UTC | #6
On 14/07/2020 15:02, Hannes Reinecke wrote:
> On 7/14/20 3:16 PM, John Garry wrote:
>> Hi Hannes,
>>
>>>    static struct pqi_io_request *pqi_alloc_io_request(
>>> -    struct pqi_ctrl_info *ctrl_info)
>>> +    struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
>>>    {
>>>        struct pqi_io_request *io_request;
>>> +    unsigned int limit = PQI_RESERVED_IO_SLOTS;
>>>        u16 i = ctrl_info->next_io_request_slot;    /* benignly racy */
>>>    -    while (1) {
>>> +    if (scmd) {
>>> +        u32 blk_tag = blk_mq_unique_tag(scmd->request);
>>> +
>>> +        i = blk_mq_unique_tag_to_tag(blk_tag) + limit;
>>>            io_request = &ctrl_info->io_request_pool[i];
>>
>> This looks ok
>>
>>> -        if (atomic_inc_return(&io_request->refcount) == 1)
>>> -            break;
>>> -        atomic_dec(&io_request->refcount);
>>> -        i = (i + 1) % ctrl_info->max_io_slots;
>>> +        if (WARN_ON(atomic_inc_return(&io_request->refcount) > 1)) {
>>> +            atomic_dec(&io_request->refcount);
>>> +            return NULL;
>>> +        }
>>> +    } else {
>>> +        while (1) {
>>> +            io_request = &ctrl_info->io_request_pool[i];
>>> +            if (atomic_inc_return(&io_request->refcount) == 1)
>>> +                break;
>>> +            atomic_dec(&io_request->refcount);
>>> +            i = (i + 1) % limit;
>>
>> To me, the range we use here looks incorrect. I would assume we should
>> restrict range to [max_io_slots - PQI_RESERVED_IO_SLOTS, max_io_slots).
>>
>> But then your reserved commands support would solve that.
>>
> This branch of the 'if' condition will only be taken for internal
> commands, for which we only allow up to PQI_RESERVED_IO_SLOTS.
> And we set the 'normal' I/O commands above at an offset, so we're fine here.

Here is the code:

----8<----
	unsigned int limit = PQI_RESERVED_IO_SLOTS;
	u16 i = ctrl_info->next_io_request_slot; /* benignly racy */

	if (scmd) {
		u32 blk_tag = blk_mq_unique_tag(scmd->request);

		i = blk_mq_unique_tag_to_tag(blk_tag) + limit;
		io_request = &ctrl_info->io_request_pool[i];
		if (WARN_ON(atomic_inc_return(&io_request->refcount) > 1)) {
			atomic_dec(&io_request->refcount);
			return NULL;
		}
	} else {
		while (1) {
			io_request = &ctrl_info->io_request_pool[i];
			if (atomic_inc_return(&io_request->refcount) == 1)
				break;
			atomic_dec(&io_request->refcount);
			i = (i + 1) % limit;
		}
	}

	/* benignly racy */
	ctrl_info->next_io_request_slot = (i + 1) % ctrl_info->max_io_slots;

---->8----

Is how we set ctrl_info->next_io_request_slot ok? Should it be:

ctrl_info->next_io_request_slot = (i + 1) % limit;

And also moved into 'else' leg for good measure.

Thanks,
John
diff mbox series

Patch

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index cd157f11eb22..1f4de4c2d876 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -575,17 +575,29 @@  static inline void pqi_reinit_io_request(struct pqi_io_request *io_request)
 }
 
 static struct pqi_io_request *pqi_alloc_io_request(
-	struct pqi_ctrl_info *ctrl_info)
+	struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
 {
 	struct pqi_io_request *io_request;
+	unsigned int limit = PQI_RESERVED_IO_SLOTS;
 	u16 i = ctrl_info->next_io_request_slot;	/* benignly racy */
 
-	while (1) {
+	if (scmd) {
+		u32 blk_tag = blk_mq_unique_tag(scmd->request);
+
+		i = blk_mq_unique_tag_to_tag(blk_tag) + limit;
 		io_request = &ctrl_info->io_request_pool[i];
-		if (atomic_inc_return(&io_request->refcount) == 1)
-			break;
-		atomic_dec(&io_request->refcount);
-		i = (i + 1) % ctrl_info->max_io_slots;
+		if (WARN_ON(atomic_inc_return(&io_request->refcount) > 1)) {
+			atomic_dec(&io_request->refcount);
+			return NULL;
+		}
+	} else {
+		while (1) {
+			io_request = &ctrl_info->io_request_pool[i];
+			if (atomic_inc_return(&io_request->refcount) == 1)
+				break;
+			atomic_dec(&io_request->refcount);
+			i = (i + 1) % limit;
+		}
 	}
 
 	/* benignly racy */
@@ -4075,7 +4087,7 @@  static int pqi_submit_raid_request_synchronous(struct pqi_ctrl_info *ctrl_info,
 
 	atomic_inc(&ctrl_info->sync_cmds_outstanding);
 
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, NULL);
 
 	put_unaligned_le16(io_request->index,
 		&(((struct pqi_raid_path_request *)request)->request_id));
@@ -5032,7 +5044,9 @@  static inline int pqi_raid_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info,
 {
 	struct pqi_io_request *io_request;
 
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, scmd);
+	if (!io_request)
+		return SCSI_MLQUEUE_HOST_BUSY;
 
 	return pqi_raid_submit_scsi_cmd_with_io_request(ctrl_info, io_request,
 		device, scmd, queue_group);
@@ -5230,7 +5244,10 @@  static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_io_request *io_request;
 	struct pqi_aio_path_request *request;
 
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, scmd);
+	if (!io_request)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
 	io_request->io_complete_callback = pqi_aio_io_complete;
 	io_request->scmd = scmd;
 	io_request->raid_bypass = raid_bypass;
@@ -5657,7 +5674,7 @@  static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info,
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct pqi_task_management_request *request;
 
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, NULL);
 	io_request->io_complete_callback = pqi_lun_reset_complete;
 	io_request->context = &wait;
 
@@ -6504,6 +6521,7 @@  static struct scsi_host_template pqi_driver_template = {
 	.map_queues = pqi_map_queues,
 	.sdev_attrs = pqi_sdev_attrs,
 	.shost_attrs = pqi_shost_attrs,
+	.host_tagset = 1,
 };
 
 static int pqi_register_scsi(struct pqi_ctrl_info *ctrl_info)