diff mbox series

[v9,13/13] lpfc: vmid: Introducing vmid in io path.

Message ID 1617750397-26466-14-git-send-email-muneendra.kumar@broadcom.com (mailing list archive)
State Superseded
Headers show
Series blkcg:Support to track FC storage blk io traffic | expand

Commit Message

Muneendra Kumar April 6, 2021, 11:06 p.m. UTC
From: Gaurav Srivastava <gaurav.srivastava@broadcom.com>

The patch introduces the vmid in the io path. It checks if the vmid is
enabled and if io belongs to a vm or not and acts accordingly. Other
supporing APIs are also included in the patch.

Signed-off-by: Gaurav Srivastava  <gaurav.srivastava@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>

---
v9:
Added changes related to locking and new hashtable implementation

v8:
Added proper error codes
updated logic while handling vmid

v7:
No change

v6:
No change

v5:
No change

v4:
No change

v3:
Replaced blkcg_get_app_identifier with blkcg_get_fc_appid

v2:
Ported the patch on top of 5.10/scsi-queue
Added a fix for issuing QFPA command which was not included in the
last submit
---
 drivers/scsi/lpfc/lpfc_scsi.c | 169 ++++++++++++++++++++++++++++++++++
 1 file changed, 169 insertions(+)

Comments

Hannes Reinecke April 8, 2021, 8:46 a.m. UTC | #1
On 4/7/21 1:06 AM, Muneendra wrote:
> From: Gaurav Srivastava <gaurav.srivastava@broadcom.com>
> 
> The patch introduces the vmid in the io path. It checks if the vmid is
> enabled and if io belongs to a vm or not and acts accordingly. Other
> supporing APIs are also included in the patch.

supporting (sp)

> 
> Signed-off-by: Gaurav Srivastava  <gaurav.srivastava@broadcom.com>
> Signed-off-by: James Smart <jsmart2021@gmail.com>
> 
> ---
> v9:
> Added changes related to locking and new hashtable implementation
> 
> v8:
> Added proper error codes
> updated logic while handling vmid
> 
> v7:
> No change
> 
> v6:
> No change
> 
> v5:
> No change
> 
> v4:
> No change
> 
> v3:
> Replaced blkcg_get_app_identifier with blkcg_get_fc_appid
> 
> v2:
> Ported the patch on top of 5.10/scsi-queue
> Added a fix for issuing QFPA command which was not included in the
> last submit
> ---
>   drivers/scsi/lpfc/lpfc_scsi.c | 169 ++++++++++++++++++++++++++++++++++
>   1 file changed, 169 insertions(+)
> 
> diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
> index f81178464436..3267c5858238 100644
> --- a/drivers/scsi/lpfc/lpfc_scsi.c
> +++ b/drivers/scsi/lpfc/lpfc_scsi.c
> @@ -5263,6 +5263,155 @@ static void lpfc_vmid_assign_cs_ctl(struct lpfc_vport *vport,
>   	}
>   }
>   
> +/*
> + * lpfc_vmid_get_appid- get the vmid associated with the uuid
> + * @vport: The virtual port for which this call is being executed.
> + * @uuid: uuid associated with the VE
> + * @cmd: address of scsi cmmd descriptor
> + * @tag: VMID tag
> + * Returns status of the function
> + */
> +static int lpfc_vmid_get_appid(struct lpfc_vport *vport, char *uuid, struct
> +			       scsi_cmnd * cmd, union lpfc_vmid_io_tag *tag)
> +{
> +	struct lpfc_vmid *vmp = NULL;
> +	int hash, len, rc, i;
> +	u8 pending = 0;
> +
> +	/* check if QFPA is complete */
> +	if (lpfc_vmid_is_type_priority_tag(vport) && !(vport->vmid_flag &
> +	      LPFC_VMID_QFPA_CMPL)) {
> +		vport->work_port_events |= WORKER_CHECK_VMID_ISSUE_QFPA;
> +		return -EAGAIN;
> +	}
> +
> +	/* search if the uuid has already been mapped to the vmid */
> +	len = strlen(uuid);
> +	hash = lpfc_vmid_hash_fn(uuid, len);
> +
> +	/* search for the VMID in the table */
> +	read_lock(&vport->vmid_lock);
> +	vmp = lpfc_get_vmid_from_hastable(vport, hash, uuid);
> +
> +	/* if found, check if its already registered  */
> +	if (vmp  && vmp->flag & LPFC_VMID_REGISTERED) {
> +		read_unlock(&vport->vmid_lock);
> +		lpfc_vmid_update_entry(vport, cmd, vmp, tag);
> +		rc = 0;
> +	} else if (vmp && (vmp->flag & LPFC_VMID_REQ_REGISTER ||
> +			   vmp->flag & LPFC_VMID_DE_REGISTER)) {
> +		/* else if register or dereg request has already been sent */
> +		/* Hence vmid tag will not be added for this IO */
> +		read_unlock(&vport->vmid_lock);
> +		rc = -EBUSY;
> +	} else {
> +		/* The vmid was not found in the hashtable. At this point, */
> +		/* drop the read lock first before proceeding further */
> +		read_unlock(&vport->vmid_lock);
> +		/* start the process to obtain one as per the */
> +		/* type of the vmid indicated */
> +		write_lock(&vport->vmid_lock);
> +		vmp = lpfc_get_vmid_from_hastable(vport, hash, uuid);
> +
> +		/* while the read lock was released, in case the entry was */
> +		/* added by other context or is in process of being added */
> +		if (vmp && vmp->flag & LPFC_VMID_REGISTERED) {
> +			lpfc_vmid_update_entry(vport, cmd, vmp, tag);
> +			write_unlock(&vport->vmid_lock);
> +			return 0;
> +		} else if (vmp && vmp->flag & LPFC_VMID_REQ_REGISTER) {
> +			write_unlock(&vport->vmid_lock);
> +			return -EBUSY;
> +		}
> +
> +		/* else search and allocate a free slot in the hash table */
> +		if (vport->cur_vmid_cnt < vport->max_vmid) {
> +			for (i = 0; i < vport->max_vmid; ++i) {
> +				vmp = vport->vmid + i;
> +				if (vmp->flag == LPFC_VMID_SLOT_FREE) {
> +					vmp = vport->vmid + i;
> +					break;
> +				}
> +			}
> +		} else {
> +			write_unlock(&vport->vmid_lock);
> +			return -ENOMEM;
> +		}
> +
> +		if (vmp && (vmp->flag == LPFC_VMID_SLOT_FREE)) {
> +			/* Add the vmid and register  */
> +			lpfc_put_vmid_in_hashtable(vport, hash, vmp);
> +			vmp->vmid_len = len;
> +			memcpy(vmp->host_vmid, uuid, vmp->vmid_len);
> +			vmp->io_rd_cnt = 0;
> +			vmp->io_wr_cnt = 0;
> +			vmp->flag = LPFC_VMID_SLOT_USED;
> +
> +			vmp->delete_inactive =
> +			    vport->vmid_inactivity_timeout ? 1 : 0;
> +
> +			/* if type priority tag, get next available vmid */
> +			if (lpfc_vmid_is_type_priority_tag(vport))
> +				lpfc_vmid_assign_cs_ctl(vport, vmp);
> +
> +			/* allocate the per cpu variable for holding */
> +			/* the last access time stamp only if vmid is enabled */
> +			if (!vmp->last_io_time)
> +				vmp->last_io_time =
> +				    __alloc_percpu(sizeof(u64),
> +						   __alignof__(struct
> +							       lpfc_vmid));
> +
> +			/* registration pending */
> +			pending = 1;
> +		} else {
> +			rc = -ENOMEM;
> +		}
> +		write_unlock(&vport->vmid_lock);
> +
> +		/* complete transaction with switch */
> +		if (pending) {
> +			if (lpfc_vmid_is_type_priority_tag(vport))
> +				rc = lpfc_vmid_uvem(vport, vmp, true);
> +			else
> +				rc = lpfc_vmid_cmd(vport,
> +						   SLI_CTAS_RAPP_IDENT,
> +						   vmp);
> +			if (!rc) {
> +				write_lock(&vport->vmid_lock);
> +				vport->cur_vmid_cnt++;
> +				vmp->flag |= LPFC_VMID_REQ_REGISTER;
> +				write_unlock(&vport->vmid_lock);
> +			}
> +		}
> +
> +		/* finally, enable the idle timer once */
> +		if (!(vport->phba->pport->vmid_flag & LPFC_VMID_TIMER_ENBLD)) {
> +			mod_timer(&vport->phba->inactive_vmid_poll,
> +				  jiffies +
> +				  msecs_to_jiffies(1000 * LPFC_VMID_TIMER));
> +			vport->phba->pport->vmid_flag |= LPFC_VMID_TIMER_ENBLD;
> +		}
> +	}
> +	return rc;
> +}
> +
> +/*
> + * lpfc_is_command_vm_io - get the uuid from blk cgroup
> + * @cmd:Pointer to scsi_cmnd data structure
> + * Returns uuid if present if not null
> + */
> +static char *lpfc_is_command_vm_io(struct scsi_cmnd *cmd)
> +{
> +	char *uuid = NULL;
> +
> +	if (cmd->request) {
> +		if (cmd->request->bio)
> +			uuid = blkcg_get_fc_appid(cmd->request->bio);
> +	}
> +	return uuid;
> +}
> +
>   /**
>    * lpfc_queuecommand - scsi_host_template queuecommand entry point
>    * @shost: kernel scsi host pointer.
> @@ -5288,6 +5437,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
>   	int err, idx;
>   #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
>   	uint64_t start = 0L;
> +	u8 *uuid = NULL;
>   
>   	if (phba->ktime_on)
>   		start = ktime_get_ns();
> @@ -5415,6 +5565,25 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
>   	}
>   
>   
> +	/* check the necessary and sufficient condition to support VMID */
> +	if (lpfc_is_vmid_enabled(phba) &&
> +	    (ndlp->vmid_support ||
> +	     phba->pport->vmid_priority_tagging ==
> +	     LPFC_VMID_PRIO_TAG_ALL_TARGETS)) {
> +		/* is the IO generated by a VM, get the associated virtual */
> +		/* entity id */
> +		uuid = lpfc_is_command_vm_io(cmnd);
> +
> +		if (uuid) {
> +			err = lpfc_vmid_get_appid(vport, uuid, cmnd,
> +				(union lpfc_vmid_io_tag *)
> +					&lpfc_cmd->cur_iocbq.vmid_tag);
> +			if (!err)
> +				lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_VMID;
> +		}
> +	}
> +
> +	atomic_inc(&ndlp->cmd_pending);
>   #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
>   	if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO))
>   		this_cpu_inc(phba->sli4_hba.c_stat->xmt_io);
> 
And that's the bit which I don't particular like.

Essentially we'll have to inject additional ELS commands _on each I/O_ 
to get a valid VMID.
Where there are _so_ many things which might get wrong, causing an I/O 
stall.

I would have vastly preferred if we could _avoid_ having to do 
additional ELS commands for VMID registration in the I/O path
(ie only allow for I/O with a valid VMID), and reject the I/O otherwise 
until VMID registration is complete.

IE return 'BUSY' (or even a command retry?) when no valid VMID for this 
particular I/O is found, register the VMID (preferably in another 
thread), and restart the queue once the VMID is registered.

That way we have a clear separation, and the I/O path will always work 
with valid VMIDs.

Hmm?

Cheers,

Hannes
James Smart April 10, 2021, 3 p.m. UTC | #2
On 4/8/2021 1:46 AM, Hannes Reinecke wrote:
> On 4/7/21 1:06 AM, Muneendra wrote:
>> From: Gaurav Srivastava <gaurav.srivastava@broadcom.com>
...
>> +        /* while the read lock was released, in case the entry was */
>> +        /* added by other context or is in process of being added */
>> +        if (vmp && vmp->flag & LPFC_VMID_REGISTERED) {
>> +            lpfc_vmid_update_entry(vport, cmd, vmp, tag);
>> +            write_unlock(&vport->vmid_lock);
>> +            return 0;
>> +        } else if (vmp && vmp->flag & LPFC_VMID_REQ_REGISTER) {
>> +            write_unlock(&vport->vmid_lock);
>> +            return -EBUSY;
>> +        }
>> +
>> +        /* else search and allocate a free slot in the hash table */
>> +        if (vport->cur_vmid_cnt < vport->max_vmid) {
>> +            for (i = 0; i < vport->max_vmid; ++i) {
>> +                vmp = vport->vmid + i;
>> +                if (vmp->flag == LPFC_VMID_SLOT_FREE) {
>> +                    vmp = vport->vmid + i;

delete this last line and adjust parens - really odd that it replicates 
the assignment 2 lines earlier.

>> +                    break;
>> +                }
>> +            }

I would prefer that if the table is expended and no slots free, that 
-ENOMEM is returned here. Rather than falling down below and qualifying 
by slot free, then by pending (set only if slot free).  I can't believe 
there is a reason the idle timer has to be started if no slots free as 
all the other fail cases don't bother with it.

This also helps indentation levels below.

>> +        } else {
>> +            write_unlock(&vport->vmid_lock);
>> +            return -ENOMEM;
>> +        }
>> +
>> +        if (vmp && (vmp->flag == LPFC_VMID_SLOT_FREE)) {
>> +            /* Add the vmid and register  */
>> +            lpfc_put_vmid_in_hashtable(vport, hash, vmp);
>> +            vmp->vmid_len = len;
>> +            memcpy(vmp->host_vmid, uuid, vmp->vmid_len);
>> +            vmp->io_rd_cnt = 0;
>> +            vmp->io_wr_cnt = 0;
>> +            vmp->flag = LPFC_VMID_SLOT_USED;
>> +
>> +            vmp->delete_inactive =
>> +                vport->vmid_inactivity_timeout ? 1 : 0;
>> +
>> +            /* if type priority tag, get next available vmid */
>> +            if (lpfc_vmid_is_type_priority_tag(vport))
>> +                lpfc_vmid_assign_cs_ctl(vport, vmp);
>> +
>> +            /* allocate the per cpu variable for holding */
>> +            /* the last access time stamp only if vmid is enabled */
>> +            if (!vmp->last_io_time)
>> +                vmp->last_io_time =
>> +                    __alloc_percpu(sizeof(u64),
>> +                           __alignof__(struct
>> +                                   lpfc_vmid));
>> +
>> +            /* registration pending */
>> +            pending = 1;
>> +        } else {
>> +            rc = -ENOMEM;
>> +        }
>> +        write_unlock(&vport->vmid_lock);
>> +
>> +        /* complete transaction with switch */
>> +        if (pending) {
>> +            if (lpfc_vmid_is_type_priority_tag(vport))
>> +                rc = lpfc_vmid_uvem(vport, vmp, true);
>> +            else
>> +                rc = lpfc_vmid_cmd(vport,
>> +                           SLI_CTAS_RAPP_IDENT,
>> +                           vmp);
>> +            if (!rc) {
>> +                write_lock(&vport->vmid_lock);
>> +                vport->cur_vmid_cnt++;
>> +                vmp->flag |= LPFC_VMID_REQ_REGISTER;
>> +                write_unlock(&vport->vmid_lock);
>> +            }
>> +        }
>> +
>> +        /* finally, enable the idle timer once */
>> +        if (!(vport->phba->pport->vmid_flag & LPFC_VMID_TIMER_ENBLD)) {
>> +            mod_timer(&vport->phba->inactive_vmid_poll,
>> +                  jiffies +
>> +                  msecs_to_jiffies(1000 * LPFC_VMID_TIMER));
>> +            vport->phba->pport->vmid_flag |= LPFC_VMID_TIMER_ENBLD;
>> +        }
>> +    }
>> +    return rc;
>> +}
>> +
>> +/*
>> + * lpfc_is_command_vm_io - get the uuid from blk cgroup
>> + * @cmd:Pointer to scsi_cmnd data structure
>> + * Returns uuid if present if not null
>> + */
>> +static char *lpfc_is_command_vm_io(struct scsi_cmnd *cmd)
>> +{
>> +    char *uuid = NULL;
>> +
>> +    if (cmd->request) {
>> +        if (cmd->request->bio)
>> +            uuid = blkcg_get_fc_appid(cmd->request->bio);
>> +    }
>> +    return uuid;
>> +}
>> +
>>   /**
>>    * lpfc_queuecommand - scsi_host_template queuecommand entry point
>>    * @shost: kernel scsi host pointer.
>> @@ -5288,6 +5437,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, 
>> struct scsi_cmnd *cmnd)
>>       int err, idx;
>>   #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
>>       uint64_t start = 0L;
>> +    u8 *uuid = NULL;
>>       if (phba->ktime_on)
>>           start = ktime_get_ns();
>> @@ -5415,6 +5565,25 @@ lpfc_queuecommand(struct Scsi_Host *shost, 
>> struct scsi_cmnd *cmnd)
>>       }
>> +    /* check the necessary and sufficient condition to support VMID */
>> +    if (lpfc_is_vmid_enabled(phba) &&
>> +        (ndlp->vmid_support ||
>> +         phba->pport->vmid_priority_tagging ==
>> +         LPFC_VMID_PRIO_TAG_ALL_TARGETS)) {
>> +        /* is the IO generated by a VM, get the associated virtual */
>> +        /* entity id */
>> +        uuid = lpfc_is_command_vm_io(cmnd);
>> +
>> +        if (uuid) {
>> +            err = lpfc_vmid_get_appid(vport, uuid, cmnd,
>> +                (union lpfc_vmid_io_tag *)
>> +                    &lpfc_cmd->cur_iocbq.vmid_tag);
>> +            if (!err)
>> +                lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_VMID;
>> +        }
>> +    }
>> +
>> +    atomic_inc(&ndlp->cmd_pending);
>>   #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
>>       if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO))
>>           this_cpu_inc(phba->sli4_hba.c_stat->xmt_io);
>>
> And that's the bit which I don't particular like.
> 
> Essentially we'll have to inject additional ELS commands _on each I/O_ 
> to get a valid VMID.
> Where there are _so_ many things which might get wrong, causing an I/O 
> stall.

I don't follow you - yes ELS's are injected when there isn't an entry 
for the VM, but once there is, there isn't further ELS's. That is the 
cost. as we don't know what uuid's I/O will be sent to before hand, so 
it has to be siphoned off during the I/O flow.  I/O's can be sent 
non-tagged while the ELS's are completing (and there aren't multiple 
sets of ELS's as long as it's the same uuid), which is fine.

so I disagree with "_on each I/O_".

> I would have vastly preferred if we could _avoid_ having to do 
> additional ELS commands for VMID registration in the I/O path
> (ie only allow for I/O with a valid VMID), and reject the I/O otherwise 
> until VMID registration is complete.
> 
> IE return 'BUSY' (or even a command retry?) when no valid VMID for this 
> particular I/O is found, register the VMID (preferably in another 
> thread), and restart the queue once the VMID is registered.

Why does it bother you with the I/O path ?  It's actually happening in 
parallel with no real relation between the two.

I seriously disagree with reject if no vmid tag. Why?  what do you gain 
? This actually introduces more disruption than the parallel flow with 
the ELSs.   Also, after link bounce, where all VMID's have to be done, 
it adds a stall window after link up right when things are trying to 
resume after rports rejoin. Why add the i/o rebouncing ? There no real 
benefit. Issuing a few untagged I/O doesn't hurt.

> 
> That way we have a clear separation, and the I/O path will always work 
> with valid VMIDs.
> 
> Hmm?
> 
> Cheers,
> 
> Hannes

-- james
Hannes Reinecke April 12, 2021, 5:27 a.m. UTC | #3
On 4/10/21 5:00 PM, James Smart wrote:
> On 4/8/2021 1:46 AM, Hannes Reinecke wrote:
>> On 4/7/21 1:06 AM, Muneendra wrote:
>>> From: Gaurav Srivastava <gaurav.srivastava@broadcom.com>
> ...
>>> +        /* while the read lock was released, in case the entry was */
>>> +        /* added by other context or is in process of being added */
>>> +        if (vmp && vmp->flag & LPFC_VMID_REGISTERED) {
>>> +            lpfc_vmid_update_entry(vport, cmd, vmp, tag);
>>> +            write_unlock(&vport->vmid_lock);
>>> +            return 0;
>>> +        } else if (vmp && vmp->flag & LPFC_VMID_REQ_REGISTER) {
>>> +            write_unlock(&vport->vmid_lock);
>>> +            return -EBUSY;
>>> +        }
>>> +
>>> +        /* else search and allocate a free slot in the hash table */
>>> +        if (vport->cur_vmid_cnt < vport->max_vmid) {
>>> +            for (i = 0; i < vport->max_vmid; ++i) {
>>> +                vmp = vport->vmid + i;
>>> +                if (vmp->flag == LPFC_VMID_SLOT_FREE) {
>>> +                    vmp = vport->vmid + i;
> 
> delete this last line and adjust parens - really odd that it replicates 
> the assignment 2 lines earlier.
> 
>>> +                    break;
>>> +                }
>>> +            }
> 
> I would prefer that if the table is expended and no slots free, that 
> -ENOMEM is returned here. Rather than falling down below and qualifying 
> by slot free, then by pending (set only if slot free).  I can't believe 
> there is a reason the idle timer has to be started if no slots free as 
> all the other fail cases don't bother with it.
> 
> This also helps indentation levels below.
> 
>>> +        } else {
>>> +            write_unlock(&vport->vmid_lock);
>>> +            return -ENOMEM;
>>> +        }
>>> +
>>> +        if (vmp && (vmp->flag == LPFC_VMID_SLOT_FREE)) {
>>> +            /* Add the vmid and register  */
>>> +            lpfc_put_vmid_in_hashtable(vport, hash, vmp);
>>> +            vmp->vmid_len = len;
>>> +            memcpy(vmp->host_vmid, uuid, vmp->vmid_len);
>>> +            vmp->io_rd_cnt = 0;
>>> +            vmp->io_wr_cnt = 0;
>>> +            vmp->flag = LPFC_VMID_SLOT_USED;
>>> +
>>> +            vmp->delete_inactive =
>>> +                vport->vmid_inactivity_timeout ? 1 : 0;
>>> +
>>> +            /* if type priority tag, get next available vmid */
>>> +            if (lpfc_vmid_is_type_priority_tag(vport))
>>> +                lpfc_vmid_assign_cs_ctl(vport, vmp);
>>> +
>>> +            /* allocate the per cpu variable for holding */
>>> +            /* the last access time stamp only if vmid is enabled */
>>> +            if (!vmp->last_io_time)
>>> +                vmp->last_io_time =
>>> +                    __alloc_percpu(sizeof(u64),
>>> +                           __alignof__(struct
>>> +                                   lpfc_vmid));
>>> +
>>> +            /* registration pending */
>>> +            pending = 1;
>>> +        } else {
>>> +            rc = -ENOMEM;
>>> +        }
>>> +        write_unlock(&vport->vmid_lock);
>>> +
>>> +        /* complete transaction with switch */
>>> +        if (pending) {
>>> +            if (lpfc_vmid_is_type_priority_tag(vport))
>>> +                rc = lpfc_vmid_uvem(vport, vmp, true);
>>> +            else
>>> +                rc = lpfc_vmid_cmd(vport,
>>> +                           SLI_CTAS_RAPP_IDENT,
>>> +                           vmp);
>>> +            if (!rc) {
>>> +                write_lock(&vport->vmid_lock);
>>> +                vport->cur_vmid_cnt++;
>>> +                vmp->flag |= LPFC_VMID_REQ_REGISTER;
>>> +                write_unlock(&vport->vmid_lock);
>>> +            }
>>> +        }
>>> +
>>> +        /* finally, enable the idle timer once */
>>> +        if (!(vport->phba->pport->vmid_flag & LPFC_VMID_TIMER_ENBLD)) {
>>> +            mod_timer(&vport->phba->inactive_vmid_poll,
>>> +                  jiffies +
>>> +                  msecs_to_jiffies(1000 * LPFC_VMID_TIMER));
>>> +            vport->phba->pport->vmid_flag |= LPFC_VMID_TIMER_ENBLD;
>>> +        }
>>> +    }
>>> +    return rc;
>>> +}
>>> +
>>> +/*
>>> + * lpfc_is_command_vm_io - get the uuid from blk cgroup
>>> + * @cmd:Pointer to scsi_cmnd data structure
>>> + * Returns uuid if present if not null
>>> + */
>>> +static char *lpfc_is_command_vm_io(struct scsi_cmnd *cmd)
>>> +{
>>> +    char *uuid = NULL;
>>> +
>>> +    if (cmd->request) {
>>> +        if (cmd->request->bio)
>>> +            uuid = blkcg_get_fc_appid(cmd->request->bio);
>>> +    }
>>> +    return uuid;
>>> +}
>>> +
>>>   /**
>>>    * lpfc_queuecommand - scsi_host_template queuecommand entry point
>>>    * @shost: kernel scsi host pointer.
>>> @@ -5288,6 +5437,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, 
>>> struct scsi_cmnd *cmnd)
>>>       int err, idx;
>>>   #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
>>>       uint64_t start = 0L;
>>> +    u8 *uuid = NULL;
>>>       if (phba->ktime_on)
>>>           start = ktime_get_ns();
>>> @@ -5415,6 +5565,25 @@ lpfc_queuecommand(struct Scsi_Host *shost, 
>>> struct scsi_cmnd *cmnd)
>>>       }
>>> +    /* check the necessary and sufficient condition to support VMID */
>>> +    if (lpfc_is_vmid_enabled(phba) &&
>>> +        (ndlp->vmid_support ||
>>> +         phba->pport->vmid_priority_tagging ==
>>> +         LPFC_VMID_PRIO_TAG_ALL_TARGETS)) {
>>> +        /* is the IO generated by a VM, get the associated virtual */
>>> +        /* entity id */
>>> +        uuid = lpfc_is_command_vm_io(cmnd);
>>> +
>>> +        if (uuid) {
>>> +            err = lpfc_vmid_get_appid(vport, uuid, cmnd,
>>> +                (union lpfc_vmid_io_tag *)
>>> +                    &lpfc_cmd->cur_iocbq.vmid_tag);
>>> +            if (!err)
>>> +                lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_VMID;
>>> +        }
>>> +    }
>>> +
>>> +    atomic_inc(&ndlp->cmd_pending);
>>>   #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
>>>       if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO))
>>>           this_cpu_inc(phba->sli4_hba.c_stat->xmt_io);
>>>
>> And that's the bit which I don't particular like.
>>
>> Essentially we'll have to inject additional ELS commands _on each I/O_ 
>> to get a valid VMID.
>> Where there are _so_ many things which might get wrong, causing an I/O 
>> stall.
> 
> I don't follow you - yes ELS's are injected when there isn't an entry 
> for the VM, but once there is, there isn't further ELS's. That is the 
> cost. as we don't know what uuid's I/O will be sent to before hand, so 
> it has to be siphoned off during the I/O flow.  I/O's can be sent 
> non-tagged while the ELS's are completing (and there aren't multiple 
> sets of ELS's as long as it's the same uuid), which is fine.
> 
> so I disagree with "_on each I/O_".
> 
Yeah, that was an exaggeration.
Not on each I/O; I should've said 'on each unregistered I/O'.

This really is my unhappiness with the entire VMID specification,
where you can time out VMIDs after a certain period, and hence never
be sure if any particular I/O really _is_ registered.

>> I would have vastly preferred if we could _avoid_ having to do 
>> additional ELS commands for VMID registration in the I/O path
>> (ie only allow for I/O with a valid VMID), and reject the I/O 
>> otherwise until VMID registration is complete.
>>
>> IE return 'BUSY' (or even a command retry?) when no valid VMID for 
>> this particular I/O is found, register the VMID (preferably in another 
>> thread), and restart the queue once the VMID is registered.
> 
> Why does it bother you with the I/O path ?  It's actually happening in 
> parallel with no real relation between the two.
> 
> I seriously disagree with reject if no vmid tag. Why?  what do you gain 
> ? This actually introduces more disruption than the parallel flow with 
> the ELSs.   Also, after link bounce, where all VMID's have to be done, 
> it adds a stall window after link up right when things are trying to 
> resume after rports rejoin. Why add the i/o rebouncing ? There no real 
> benefit. Issuing a few untagged I/O doesn't hurt.
> 
That indeed is a valid point. I retract my objection.

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
diff mbox series

Patch

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index f81178464436..3267c5858238 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5263,6 +5263,155 @@  static void lpfc_vmid_assign_cs_ctl(struct lpfc_vport *vport,
 	}
 }
 
+/*
+ * lpfc_vmid_get_appid- get the vmid associated with the uuid
+ * @vport: The virtual port for which this call is being executed.
+ * @uuid: uuid associated with the VE
+ * @cmd: address of scsi cmmd descriptor
+ * @tag: VMID tag
+ * Returns status of the function
+ */
+static int lpfc_vmid_get_appid(struct lpfc_vport *vport, char *uuid, struct
+			       scsi_cmnd * cmd, union lpfc_vmid_io_tag *tag)
+{
+	struct lpfc_vmid *vmp = NULL;
+	int hash, len, rc, i;
+	u8 pending = 0;
+
+	/* check if QFPA is complete */
+	if (lpfc_vmid_is_type_priority_tag(vport) && !(vport->vmid_flag &
+	      LPFC_VMID_QFPA_CMPL)) {
+		vport->work_port_events |= WORKER_CHECK_VMID_ISSUE_QFPA;
+		return -EAGAIN;
+	}
+
+	/* search if the uuid has already been mapped to the vmid */
+	len = strlen(uuid);
+	hash = lpfc_vmid_hash_fn(uuid, len);
+
+	/* search for the VMID in the table */
+	read_lock(&vport->vmid_lock);
+	vmp = lpfc_get_vmid_from_hastable(vport, hash, uuid);
+
+	/* if found, check if its already registered  */
+	if (vmp  && vmp->flag & LPFC_VMID_REGISTERED) {
+		read_unlock(&vport->vmid_lock);
+		lpfc_vmid_update_entry(vport, cmd, vmp, tag);
+		rc = 0;
+	} else if (vmp && (vmp->flag & LPFC_VMID_REQ_REGISTER ||
+			   vmp->flag & LPFC_VMID_DE_REGISTER)) {
+		/* else if register or dereg request has already been sent */
+		/* Hence vmid tag will not be added for this IO */
+		read_unlock(&vport->vmid_lock);
+		rc = -EBUSY;
+	} else {
+		/* The vmid was not found in the hashtable. At this point, */
+		/* drop the read lock first before proceeding further */
+		read_unlock(&vport->vmid_lock);
+		/* start the process to obtain one as per the */
+		/* type of the vmid indicated */
+		write_lock(&vport->vmid_lock);
+		vmp = lpfc_get_vmid_from_hastable(vport, hash, uuid);
+
+		/* while the read lock was released, in case the entry was */
+		/* added by other context or is in process of being added */
+		if (vmp && vmp->flag & LPFC_VMID_REGISTERED) {
+			lpfc_vmid_update_entry(vport, cmd, vmp, tag);
+			write_unlock(&vport->vmid_lock);
+			return 0;
+		} else if (vmp && vmp->flag & LPFC_VMID_REQ_REGISTER) {
+			write_unlock(&vport->vmid_lock);
+			return -EBUSY;
+		}
+
+		/* else search and allocate a free slot in the hash table */
+		if (vport->cur_vmid_cnt < vport->max_vmid) {
+			for (i = 0; i < vport->max_vmid; ++i) {
+				vmp = vport->vmid + i;
+				if (vmp->flag == LPFC_VMID_SLOT_FREE) {
+					vmp = vport->vmid + i;
+					break;
+				}
+			}
+		} else {
+			write_unlock(&vport->vmid_lock);
+			return -ENOMEM;
+		}
+
+		if (vmp && (vmp->flag == LPFC_VMID_SLOT_FREE)) {
+			/* Add the vmid and register  */
+			lpfc_put_vmid_in_hashtable(vport, hash, vmp);
+			vmp->vmid_len = len;
+			memcpy(vmp->host_vmid, uuid, vmp->vmid_len);
+			vmp->io_rd_cnt = 0;
+			vmp->io_wr_cnt = 0;
+			vmp->flag = LPFC_VMID_SLOT_USED;
+
+			vmp->delete_inactive =
+			    vport->vmid_inactivity_timeout ? 1 : 0;
+
+			/* if type priority tag, get next available vmid */
+			if (lpfc_vmid_is_type_priority_tag(vport))
+				lpfc_vmid_assign_cs_ctl(vport, vmp);
+
+			/* allocate the per cpu variable for holding */
+			/* the last access time stamp only if vmid is enabled */
+			if (!vmp->last_io_time)
+				vmp->last_io_time =
+				    __alloc_percpu(sizeof(u64),
+						   __alignof__(struct
+							       lpfc_vmid));
+
+			/* registration pending */
+			pending = 1;
+		} else {
+			rc = -ENOMEM;
+		}
+		write_unlock(&vport->vmid_lock);
+
+		/* complete transaction with switch */
+		if (pending) {
+			if (lpfc_vmid_is_type_priority_tag(vport))
+				rc = lpfc_vmid_uvem(vport, vmp, true);
+			else
+				rc = lpfc_vmid_cmd(vport,
+						   SLI_CTAS_RAPP_IDENT,
+						   vmp);
+			if (!rc) {
+				write_lock(&vport->vmid_lock);
+				vport->cur_vmid_cnt++;
+				vmp->flag |= LPFC_VMID_REQ_REGISTER;
+				write_unlock(&vport->vmid_lock);
+			}
+		}
+
+		/* finally, enable the idle timer once */
+		if (!(vport->phba->pport->vmid_flag & LPFC_VMID_TIMER_ENBLD)) {
+			mod_timer(&vport->phba->inactive_vmid_poll,
+				  jiffies +
+				  msecs_to_jiffies(1000 * LPFC_VMID_TIMER));
+			vport->phba->pport->vmid_flag |= LPFC_VMID_TIMER_ENBLD;
+		}
+	}
+	return rc;
+}
+
+/*
+ * lpfc_is_command_vm_io - get the uuid from blk cgroup
+ * @cmd:Pointer to scsi_cmnd data structure
+ * Returns uuid if present if not null
+ */
+static char *lpfc_is_command_vm_io(struct scsi_cmnd *cmd)
+{
+	char *uuid = NULL;
+
+	if (cmd->request) {
+		if (cmd->request->bio)
+			uuid = blkcg_get_fc_appid(cmd->request->bio);
+	}
+	return uuid;
+}
+
 /**
  * lpfc_queuecommand - scsi_host_template queuecommand entry point
  * @shost: kernel scsi host pointer.
@@ -5288,6 +5437,7 @@  lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	int err, idx;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint64_t start = 0L;
+	u8 *uuid = NULL;
 
 	if (phba->ktime_on)
 		start = ktime_get_ns();
@@ -5415,6 +5565,25 @@  lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	}
 
 
+	/* check the necessary and sufficient condition to support VMID */
+	if (lpfc_is_vmid_enabled(phba) &&
+	    (ndlp->vmid_support ||
+	     phba->pport->vmid_priority_tagging ==
+	     LPFC_VMID_PRIO_TAG_ALL_TARGETS)) {
+		/* is the IO generated by a VM, get the associated virtual */
+		/* entity id */
+		uuid = lpfc_is_command_vm_io(cmnd);
+
+		if (uuid) {
+			err = lpfc_vmid_get_appid(vport, uuid, cmnd,
+				(union lpfc_vmid_io_tag *)
+					&lpfc_cmd->cur_iocbq.vmid_tag);
+			if (!err)
+				lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_VMID;
+		}
+	}
+
+	atomic_inc(&ndlp->cmd_pending);
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO))
 		this_cpu_inc(phba->sli4_hba.c_stat->xmt_io);