diff mbox series

[1/1] virtio_blk: implement init_hctx MQ operation

Message ID 20240801151137.14430-1-mgurtovoy@nvidia.com (mailing list archive)
State New, archived
Headers show
Series [1/1] virtio_blk: implement init_hctx MQ operation | expand

Commit Message

Max Gurtovoy Aug. 1, 2024, 3:11 p.m. UTC
In this operation set the driver data of the hctx to point to the virtio
block queue. By doing so, we can use this reference in the and reduce
the number of operations in the fast path.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
---
 drivers/block/virtio_blk.c | 42 ++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

Comments

Michael S. Tsirkin Aug. 1, 2024, 3:13 p.m. UTC | #1
On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
> In this operation set the driver data of the hctx to point to the virtio
> block queue. By doing so, we can use this reference in the and reduce

in the .... ?

> the number of operations in the fast path.
> 
> Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
> ---
>  drivers/block/virtio_blk.c | 42 ++++++++++++++++++++------------------
>  1 file changed, 22 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 2351f411fa46..35a7a586f6f5 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -129,14 +129,6 @@ static inline blk_status_t virtblk_result(u8 status)
>  	}
>  }
>  
> -static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
> -{
> -	struct virtio_blk *vblk = hctx->queue->queuedata;
> -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
> -
> -	return vq;
> -}
> -
>  static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
>  {
>  	struct scatterlist out_hdr, in_hdr, *sgs[3];
> @@ -377,8 +369,7 @@ static void virtblk_done(struct virtqueue *vq)
>  
>  static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
>  {
> -	struct virtio_blk *vblk = hctx->queue->queuedata;
> -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
> +	struct virtio_blk_vq *vq = hctx->driver_data;
>  	bool kick;
>  
>  	spin_lock_irq(&vq->lock);
> @@ -428,10 +419,10 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  			   const struct blk_mq_queue_data *bd)
>  {
>  	struct virtio_blk *vblk = hctx->queue->queuedata;
> +	struct virtio_blk_vq *vq = hctx->driver_data;
>  	struct request *req = bd->rq;
>  	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>  	unsigned long flags;
> -	int qid = hctx->queue_num;
>  	bool notify = false;
>  	blk_status_t status;
>  	int err;
> @@ -440,26 +431,26 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	if (unlikely(status))
>  		return status;
>  
> -	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
> -	err = virtblk_add_req(vblk->vqs[qid].vq, vbr);
> +	spin_lock_irqsave(&vq->lock, flags);
> +	err = virtblk_add_req(vq->vq, vbr);
>  	if (err) {
> -		virtqueue_kick(vblk->vqs[qid].vq);
> +		virtqueue_kick(vq->vq);
>  		/* Don't stop the queue if -ENOMEM: we may have failed to
>  		 * bounce the buffer due to global resource outage.
>  		 */
>  		if (err == -ENOSPC)
>  			blk_mq_stop_hw_queue(hctx);
> -		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
> +		spin_unlock_irqrestore(&vq->lock, flags);
>  		virtblk_unmap_data(req, vbr);
>  		return virtblk_fail_to_queue(req, err);
>  	}
>  
> -	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
> +	if (bd->last && virtqueue_kick_prepare(vq->vq))
>  		notify = true;
> -	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
> +	spin_unlock_irqrestore(&vq->lock, flags);
>  
>  	if (notify)
> -		virtqueue_notify(vblk->vqs[qid].vq);
> +		virtqueue_notify(vq->vq);
>  	return BLK_STS_OK;
>  }
>  
> @@ -504,7 +495,7 @@ static void virtio_queue_rqs(struct request **rqlist)
>  	struct request *requeue_list = NULL;
>  
>  	rq_list_for_each_safe(rqlist, req, next) {
> -		struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
> +		struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
>  		bool kick;
>  
>  		if (!virtblk_prep_rq_batch(req)) {
> @@ -1164,6 +1155,16 @@ static const struct attribute_group *virtblk_attr_groups[] = {
>  	NULL,
>  };
>  
> +static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
> +		unsigned int hctx_idx)
> +{
> +	struct virtio_blk *vblk = data;
> +	struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
> +
> +	hctx->driver_data = vq;
> +	return 0;
> +}
> +
>  static void virtblk_map_queues(struct blk_mq_tag_set *set)
>  {
>  	struct virtio_blk *vblk = set->driver_data;
> @@ -1205,7 +1206,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
>  static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
>  {
>  	struct virtio_blk *vblk = hctx->queue->queuedata;
> -	struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
> +	struct virtio_blk_vq *vq = hctx->driver_data;
>  	struct virtblk_req *vbr;
>  	unsigned long flags;
>  	unsigned int len;
> @@ -1236,6 +1237,7 @@ static const struct blk_mq_ops virtio_mq_ops = {
>  	.queue_rqs	= virtio_queue_rqs,
>  	.commit_rqs	= virtio_commit_rqs,
>  	.complete	= virtblk_request_done,
> +	.init_hctx	= virtblk_init_hctx,
>  	.map_queues	= virtblk_map_queues,
>  	.poll		= virtblk_poll,
>  };
> -- 
> 2.18.1
Max Gurtovoy Aug. 1, 2024, 3:17 p.m. UTC | #2
On 01/08/2024 18:13, Michael S. Tsirkin wrote:
> On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
>> In this operation set the driver data of the hctx to point to the virtio
>> block queue. By doing so, we can use this reference in the and reduce
> in the .... ?

sorry for the type.

should be :

"By doing so, we can use this reference and reduce the number of operations in the fast path."


>
>> the number of operations in the fast path.
>>
>> Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
>> ---
>>   drivers/block/virtio_blk.c | 42 ++++++++++++++++++++------------------
>>   1 file changed, 22 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
>> index 2351f411fa46..35a7a586f6f5 100644
>> --- a/drivers/block/virtio_blk.c
>> +++ b/drivers/block/virtio_blk.c
>> @@ -129,14 +129,6 @@ static inline blk_status_t virtblk_result(u8 status)
>>   	}
>>   }
>>   
>> -static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
>> -{
>> -	struct virtio_blk *vblk = hctx->queue->queuedata;
>> -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
>> -
>> -	return vq;
>> -}
>> -
>>   static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
>>   {
>>   	struct scatterlist out_hdr, in_hdr, *sgs[3];
>> @@ -377,8 +369,7 @@ static void virtblk_done(struct virtqueue *vq)
>>   
>>   static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
>>   {
>> -	struct virtio_blk *vblk = hctx->queue->queuedata;
>> -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
>> +	struct virtio_blk_vq *vq = hctx->driver_data;
>>   	bool kick;
>>   
>>   	spin_lock_irq(&vq->lock);
>> @@ -428,10 +419,10 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>>   			   const struct blk_mq_queue_data *bd)
>>   {
>>   	struct virtio_blk *vblk = hctx->queue->queuedata;
>> +	struct virtio_blk_vq *vq = hctx->driver_data;
>>   	struct request *req = bd->rq;
>>   	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>>   	unsigned long flags;
>> -	int qid = hctx->queue_num;
>>   	bool notify = false;
>>   	blk_status_t status;
>>   	int err;
>> @@ -440,26 +431,26 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>>   	if (unlikely(status))
>>   		return status;
>>   
>> -	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
>> -	err = virtblk_add_req(vblk->vqs[qid].vq, vbr);
>> +	spin_lock_irqsave(&vq->lock, flags);
>> +	err = virtblk_add_req(vq->vq, vbr);
>>   	if (err) {
>> -		virtqueue_kick(vblk->vqs[qid].vq);
>> +		virtqueue_kick(vq->vq);
>>   		/* Don't stop the queue if -ENOMEM: we may have failed to
>>   		 * bounce the buffer due to global resource outage.
>>   		 */
>>   		if (err == -ENOSPC)
>>   			blk_mq_stop_hw_queue(hctx);
>> -		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
>> +		spin_unlock_irqrestore(&vq->lock, flags);
>>   		virtblk_unmap_data(req, vbr);
>>   		return virtblk_fail_to_queue(req, err);
>>   	}
>>   
>> -	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
>> +	if (bd->last && virtqueue_kick_prepare(vq->vq))
>>   		notify = true;
>> -	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
>> +	spin_unlock_irqrestore(&vq->lock, flags);
>>   
>>   	if (notify)
>> -		virtqueue_notify(vblk->vqs[qid].vq);
>> +		virtqueue_notify(vq->vq);
>>   	return BLK_STS_OK;
>>   }
>>   
>> @@ -504,7 +495,7 @@ static void virtio_queue_rqs(struct request **rqlist)
>>   	struct request *requeue_list = NULL;
>>   
>>   	rq_list_for_each_safe(rqlist, req, next) {
>> -		struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
>> +		struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
>>   		bool kick;
>>   
>>   		if (!virtblk_prep_rq_batch(req)) {
>> @@ -1164,6 +1155,16 @@ static const struct attribute_group *virtblk_attr_groups[] = {
>>   	NULL,
>>   };
>>   
>> +static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
>> +		unsigned int hctx_idx)
>> +{
>> +	struct virtio_blk *vblk = data;
>> +	struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
>> +
>> +	hctx->driver_data = vq;
>> +	return 0;
>> +}
>> +
>>   static void virtblk_map_queues(struct blk_mq_tag_set *set)
>>   {
>>   	struct virtio_blk *vblk = set->driver_data;
>> @@ -1205,7 +1206,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
>>   static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
>>   {
>>   	struct virtio_blk *vblk = hctx->queue->queuedata;
>> -	struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
>> +	struct virtio_blk_vq *vq = hctx->driver_data;
>>   	struct virtblk_req *vbr;
>>   	unsigned long flags;
>>   	unsigned int len;
>> @@ -1236,6 +1237,7 @@ static const struct blk_mq_ops virtio_mq_ops = {
>>   	.queue_rqs	= virtio_queue_rqs,
>>   	.commit_rqs	= virtio_commit_rqs,
>>   	.complete	= virtblk_request_done,
>> +	.init_hctx	= virtblk_init_hctx,
>>   	.map_queues	= virtblk_map_queues,
>>   	.poll		= virtblk_poll,
>>   };
>> -- 
>> 2.18.1
Michael S. Tsirkin Aug. 1, 2024, 3:29 p.m. UTC | #3
On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
> 
> On 01/08/2024 18:13, Michael S. Tsirkin wrote:
> > On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
> > > In this operation set the driver data of the hctx to point to the virtio
> > > block queue. By doing so, we can use this reference in the and reduce
> > in the .... ?
> 
> sorry for the type.
> 
> should be :
> 
> "By doing so, we can use this reference and reduce the number of operations in the fast path."

ok. what kind of benefit do you see with this patch?

> 
> > 
> > > the number of operations in the fast path.
> > > 
> > > Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
> > > ---
> > >   drivers/block/virtio_blk.c | 42 ++++++++++++++++++++------------------
> > >   1 file changed, 22 insertions(+), 20 deletions(-)
> > > 
> > > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> > > index 2351f411fa46..35a7a586f6f5 100644
> > > --- a/drivers/block/virtio_blk.c
> > > +++ b/drivers/block/virtio_blk.c
> > > @@ -129,14 +129,6 @@ static inline blk_status_t virtblk_result(u8 status)
> > >   	}
> > >   }
> > > -static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
> > > -{
> > > -	struct virtio_blk *vblk = hctx->queue->queuedata;
> > > -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
> > > -
> > > -	return vq;
> > > -}
> > > -
> > >   static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
> > >   {
> > >   	struct scatterlist out_hdr, in_hdr, *sgs[3];
> > > @@ -377,8 +369,7 @@ static void virtblk_done(struct virtqueue *vq)
> > >   static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
> > >   {
> > > -	struct virtio_blk *vblk = hctx->queue->queuedata;
> > > -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
> > > +	struct virtio_blk_vq *vq = hctx->driver_data;
> > >   	bool kick;
> > >   	spin_lock_irq(&vq->lock);
> > > @@ -428,10 +419,10 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> > >   			   const struct blk_mq_queue_data *bd)
> > >   {
> > >   	struct virtio_blk *vblk = hctx->queue->queuedata;
> > > +	struct virtio_blk_vq *vq = hctx->driver_data;
> > >   	struct request *req = bd->rq;
> > >   	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
> > >   	unsigned long flags;
> > > -	int qid = hctx->queue_num;
> > >   	bool notify = false;
> > >   	blk_status_t status;
> > >   	int err;
> > > @@ -440,26 +431,26 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> > >   	if (unlikely(status))
> > >   		return status;
> > > -	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
> > > -	err = virtblk_add_req(vblk->vqs[qid].vq, vbr);
> > > +	spin_lock_irqsave(&vq->lock, flags);
> > > +	err = virtblk_add_req(vq->vq, vbr);
> > >   	if (err) {
> > > -		virtqueue_kick(vblk->vqs[qid].vq);
> > > +		virtqueue_kick(vq->vq);
> > >   		/* Don't stop the queue if -ENOMEM: we may have failed to
> > >   		 * bounce the buffer due to global resource outage.
> > >   		 */
> > >   		if (err == -ENOSPC)
> > >   			blk_mq_stop_hw_queue(hctx);
> > > -		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
> > > +		spin_unlock_irqrestore(&vq->lock, flags);
> > >   		virtblk_unmap_data(req, vbr);
> > >   		return virtblk_fail_to_queue(req, err);
> > >   	}
> > > -	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
> > > +	if (bd->last && virtqueue_kick_prepare(vq->vq))
> > >   		notify = true;
> > > -	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
> > > +	spin_unlock_irqrestore(&vq->lock, flags);
> > >   	if (notify)
> > > -		virtqueue_notify(vblk->vqs[qid].vq);
> > > +		virtqueue_notify(vq->vq);
> > >   	return BLK_STS_OK;
> > >   }
> > > @@ -504,7 +495,7 @@ static void virtio_queue_rqs(struct request **rqlist)
> > >   	struct request *requeue_list = NULL;
> > >   	rq_list_for_each_safe(rqlist, req, next) {
> > > -		struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
> > > +		struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
> > >   		bool kick;
> > >   		if (!virtblk_prep_rq_batch(req)) {
> > > @@ -1164,6 +1155,16 @@ static const struct attribute_group *virtblk_attr_groups[] = {
> > >   	NULL,
> > >   };
> > > +static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
> > > +		unsigned int hctx_idx)
> > > +{
> > > +	struct virtio_blk *vblk = data;
> > > +	struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
> > > +
> > > +	hctx->driver_data = vq;
> > > +	return 0;
> > > +}
> > > +
> > >   static void virtblk_map_queues(struct blk_mq_tag_set *set)
> > >   {
> > >   	struct virtio_blk *vblk = set->driver_data;
> > > @@ -1205,7 +1206,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
> > >   static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
> > >   {
> > >   	struct virtio_blk *vblk = hctx->queue->queuedata;
> > > -	struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
> > > +	struct virtio_blk_vq *vq = hctx->driver_data;
> > >   	struct virtblk_req *vbr;
> > >   	unsigned long flags;
> > >   	unsigned int len;
> > > @@ -1236,6 +1237,7 @@ static const struct blk_mq_ops virtio_mq_ops = {
> > >   	.queue_rqs	= virtio_queue_rqs,
> > >   	.commit_rqs	= virtio_commit_rqs,
> > >   	.complete	= virtblk_request_done,
> > > +	.init_hctx	= virtblk_init_hctx,
> > >   	.map_queues	= virtblk_map_queues,
> > >   	.poll		= virtblk_poll,
> > >   };
> > > -- 
> > > 2.18.1
Max Gurtovoy Aug. 1, 2024, 3:39 p.m. UTC | #4
On 01/08/2024 18:29, Michael S. Tsirkin wrote:
> On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
>> On 01/08/2024 18:13, Michael S. Tsirkin wrote:
>>> On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
>>>> In this operation set the driver data of the hctx to point to the virtio
>>>> block queue. By doing so, we can use this reference in the and reduce
>>> in the .... ?
>> sorry for the type.
>>
>> should be :
>>
>> "By doing so, we can use this reference and reduce the number of operations in the fast path."
> ok. what kind of benefit do you see with this patch?

As mentioned. This is a micro optimization that reduce the number of 
instructions/dereferences in the fast path.


>
>>>> the number of operations in the fast path.
>>>>
>>>> Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
>>>> ---
>>>>    drivers/block/virtio_blk.c | 42 ++++++++++++++++++++------------------
>>>>    1 file changed, 22 insertions(+), 20 deletions(-)
>>>>
>>>> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
>>>> index 2351f411fa46..35a7a586f6f5 100644
>>>> --- a/drivers/block/virtio_blk.c
>>>> +++ b/drivers/block/virtio_blk.c
>>>> @@ -129,14 +129,6 @@ static inline blk_status_t virtblk_result(u8 status)
>>>>    	}
>>>>    }
>>>> -static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
>>>> -{
>>>> -	struct virtio_blk *vblk = hctx->queue->queuedata;
>>>> -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
>>>> -
>>>> -	return vq;
>>>> -}
>>>> -
>>>>    static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
>>>>    {
>>>>    	struct scatterlist out_hdr, in_hdr, *sgs[3];
>>>> @@ -377,8 +369,7 @@ static void virtblk_done(struct virtqueue *vq)
>>>>    static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
>>>>    {
>>>> -	struct virtio_blk *vblk = hctx->queue->queuedata;
>>>> -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
>>>> +	struct virtio_blk_vq *vq = hctx->driver_data;
>>>>    	bool kick;
>>>>    	spin_lock_irq(&vq->lock);
>>>> @@ -428,10 +419,10 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>>>>    			   const struct blk_mq_queue_data *bd)
>>>>    {
>>>>    	struct virtio_blk *vblk = hctx->queue->queuedata;
>>>> +	struct virtio_blk_vq *vq = hctx->driver_data;
>>>>    	struct request *req = bd->rq;
>>>>    	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>>>>    	unsigned long flags;
>>>> -	int qid = hctx->queue_num;
>>>>    	bool notify = false;
>>>>    	blk_status_t status;
>>>>    	int err;
>>>> @@ -440,26 +431,26 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>>>>    	if (unlikely(status))
>>>>    		return status;
>>>> -	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
>>>> -	err = virtblk_add_req(vblk->vqs[qid].vq, vbr);
>>>> +	spin_lock_irqsave(&vq->lock, flags);
>>>> +	err = virtblk_add_req(vq->vq, vbr);
>>>>    	if (err) {
>>>> -		virtqueue_kick(vblk->vqs[qid].vq);
>>>> +		virtqueue_kick(vq->vq);
>>>>    		/* Don't stop the queue if -ENOMEM: we may have failed to
>>>>    		 * bounce the buffer due to global resource outage.
>>>>    		 */
>>>>    		if (err == -ENOSPC)
>>>>    			blk_mq_stop_hw_queue(hctx);
>>>> -		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
>>>> +		spin_unlock_irqrestore(&vq->lock, flags);
>>>>    		virtblk_unmap_data(req, vbr);
>>>>    		return virtblk_fail_to_queue(req, err);
>>>>    	}
>>>> -	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
>>>> +	if (bd->last && virtqueue_kick_prepare(vq->vq))
>>>>    		notify = true;
>>>> -	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
>>>> +	spin_unlock_irqrestore(&vq->lock, flags);
>>>>    	if (notify)
>>>> -		virtqueue_notify(vblk->vqs[qid].vq);
>>>> +		virtqueue_notify(vq->vq);
>>>>    	return BLK_STS_OK;
>>>>    }
>>>> @@ -504,7 +495,7 @@ static void virtio_queue_rqs(struct request **rqlist)
>>>>    	struct request *requeue_list = NULL;
>>>>    	rq_list_for_each_safe(rqlist, req, next) {
>>>> -		struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
>>>> +		struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
>>>>    		bool kick;
>>>>    		if (!virtblk_prep_rq_batch(req)) {
>>>> @@ -1164,6 +1155,16 @@ static const struct attribute_group *virtblk_attr_groups[] = {
>>>>    	NULL,
>>>>    };
>>>> +static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
>>>> +		unsigned int hctx_idx)
>>>> +{
>>>> +	struct virtio_blk *vblk = data;
>>>> +	struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
>>>> +
>>>> +	hctx->driver_data = vq;
>>>> +	return 0;
>>>> +}
>>>> +
>>>>    static void virtblk_map_queues(struct blk_mq_tag_set *set)
>>>>    {
>>>>    	struct virtio_blk *vblk = set->driver_data;
>>>> @@ -1205,7 +1206,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
>>>>    static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
>>>>    {
>>>>    	struct virtio_blk *vblk = hctx->queue->queuedata;
>>>> -	struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
>>>> +	struct virtio_blk_vq *vq = hctx->driver_data;
>>>>    	struct virtblk_req *vbr;
>>>>    	unsigned long flags;
>>>>    	unsigned int len;
>>>> @@ -1236,6 +1237,7 @@ static const struct blk_mq_ops virtio_mq_ops = {
>>>>    	.queue_rqs	= virtio_queue_rqs,
>>>>    	.commit_rqs	= virtio_commit_rqs,
>>>>    	.complete	= virtblk_request_done,
>>>> +	.init_hctx	= virtblk_init_hctx,
>>>>    	.map_queues	= virtblk_map_queues,
>>>>    	.poll		= virtblk_poll,
>>>>    };
>>>> -- 
>>>> 2.18.1
Michael S. Tsirkin Aug. 1, 2024, 3:43 p.m. UTC | #5
On Thu, Aug 01, 2024 at 06:39:16PM +0300, Max Gurtovoy wrote:
> 
> On 01/08/2024 18:29, Michael S. Tsirkin wrote:
> > On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
> > > On 01/08/2024 18:13, Michael S. Tsirkin wrote:
> > > > On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
> > > > > In this operation set the driver data of the hctx to point to the virtio
> > > > > block queue. By doing so, we can use this reference in the and reduce
> > > > in the .... ?
> > > sorry for the type.
> > > 
> > > should be :
> > > 
> > > "By doing so, we can use this reference and reduce the number of operations in the fast path."
> > ok. what kind of benefit do you see with this patch?
> 
> As mentioned. This is a micro optimization that reduce the number of
> instructions/dereferences in the fast path.

By how much? How random code tweaks affect object code is unpredictable.
Pls show results of objdump to prove it does anything
useful.

> 
> > 
> > > > > the number of operations in the fast path.
> > > > > 
> > > > > Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
> > > > > ---
> > > > >    drivers/block/virtio_blk.c | 42 ++++++++++++++++++++------------------
> > > > >    1 file changed, 22 insertions(+), 20 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> > > > > index 2351f411fa46..35a7a586f6f5 100644
> > > > > --- a/drivers/block/virtio_blk.c
> > > > > +++ b/drivers/block/virtio_blk.c
> > > > > @@ -129,14 +129,6 @@ static inline blk_status_t virtblk_result(u8 status)
> > > > >    	}
> > > > >    }
> > > > > -static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
> > > > > -{
> > > > > -	struct virtio_blk *vblk = hctx->queue->queuedata;
> > > > > -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
> > > > > -
> > > > > -	return vq;
> > > > > -}
> > > > > -
> > > > >    static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
> > > > >    {
> > > > >    	struct scatterlist out_hdr, in_hdr, *sgs[3];
> > > > > @@ -377,8 +369,7 @@ static void virtblk_done(struct virtqueue *vq)
> > > > >    static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
> > > > >    {
> > > > > -	struct virtio_blk *vblk = hctx->queue->queuedata;
> > > > > -	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
> > > > > +	struct virtio_blk_vq *vq = hctx->driver_data;
> > > > >    	bool kick;
> > > > >    	spin_lock_irq(&vq->lock);
> > > > > @@ -428,10 +419,10 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> > > > >    			   const struct blk_mq_queue_data *bd)
> > > > >    {
> > > > >    	struct virtio_blk *vblk = hctx->queue->queuedata;
> > > > > +	struct virtio_blk_vq *vq = hctx->driver_data;
> > > > >    	struct request *req = bd->rq;
> > > > >    	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
> > > > >    	unsigned long flags;
> > > > > -	int qid = hctx->queue_num;
> > > > >    	bool notify = false;
> > > > >    	blk_status_t status;
> > > > >    	int err;
> > > > > @@ -440,26 +431,26 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
> > > > >    	if (unlikely(status))
> > > > >    		return status;
> > > > > -	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
> > > > > -	err = virtblk_add_req(vblk->vqs[qid].vq, vbr);
> > > > > +	spin_lock_irqsave(&vq->lock, flags);
> > > > > +	err = virtblk_add_req(vq->vq, vbr);
> > > > >    	if (err) {
> > > > > -		virtqueue_kick(vblk->vqs[qid].vq);
> > > > > +		virtqueue_kick(vq->vq);
> > > > >    		/* Don't stop the queue if -ENOMEM: we may have failed to
> > > > >    		 * bounce the buffer due to global resource outage.
> > > > >    		 */
> > > > >    		if (err == -ENOSPC)
> > > > >    			blk_mq_stop_hw_queue(hctx);
> > > > > -		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
> > > > > +		spin_unlock_irqrestore(&vq->lock, flags);
> > > > >    		virtblk_unmap_data(req, vbr);
> > > > >    		return virtblk_fail_to_queue(req, err);
> > > > >    	}
> > > > > -	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
> > > > > +	if (bd->last && virtqueue_kick_prepare(vq->vq))
> > > > >    		notify = true;
> > > > > -	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
> > > > > +	spin_unlock_irqrestore(&vq->lock, flags);
> > > > >    	if (notify)
> > > > > -		virtqueue_notify(vblk->vqs[qid].vq);
> > > > > +		virtqueue_notify(vq->vq);
> > > > >    	return BLK_STS_OK;
> > > > >    }
> > > > > @@ -504,7 +495,7 @@ static void virtio_queue_rqs(struct request **rqlist)
> > > > >    	struct request *requeue_list = NULL;
> > > > >    	rq_list_for_each_safe(rqlist, req, next) {
> > > > > -		struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
> > > > > +		struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
> > > > >    		bool kick;
> > > > >    		if (!virtblk_prep_rq_batch(req)) {
> > > > > @@ -1164,6 +1155,16 @@ static const struct attribute_group *virtblk_attr_groups[] = {
> > > > >    	NULL,
> > > > >    };
> > > > > +static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
> > > > > +		unsigned int hctx_idx)
> > > > > +{
> > > > > +	struct virtio_blk *vblk = data;
> > > > > +	struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
> > > > > +
> > > > > +	hctx->driver_data = vq;
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > >    static void virtblk_map_queues(struct blk_mq_tag_set *set)
> > > > >    {
> > > > >    	struct virtio_blk *vblk = set->driver_data;
> > > > > @@ -1205,7 +1206,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
> > > > >    static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
> > > > >    {
> > > > >    	struct virtio_blk *vblk = hctx->queue->queuedata;
> > > > > -	struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
> > > > > +	struct virtio_blk_vq *vq = hctx->driver_data;
> > > > >    	struct virtblk_req *vbr;
> > > > >    	unsigned long flags;
> > > > >    	unsigned int len;
> > > > > @@ -1236,6 +1237,7 @@ static const struct blk_mq_ops virtio_mq_ops = {
> > > > >    	.queue_rqs	= virtio_queue_rqs,
> > > > >    	.commit_rqs	= virtio_commit_rqs,
> > > > >    	.complete	= virtblk_request_done,
> > > > > +	.init_hctx	= virtblk_init_hctx,
> > > > >    	.map_queues	= virtblk_map_queues,
> > > > >    	.poll		= virtblk_poll,
> > > > >    };
> > > > > -- 
> > > > > 2.18.1
Michael S. Tsirkin Aug. 1, 2024, 5:46 p.m. UTC | #6
On Thu, Aug 01, 2024 at 06:56:44PM +0300, Max Gurtovoy wrote:
> 
> On 01/08/2024 18:43, Michael S. Tsirkin wrote:
> 
>     On Thu, Aug 01, 2024 at 06:39:16PM +0300, Max Gurtovoy wrote:
> 
>         On 01/08/2024 18:29, Michael S. Tsirkin wrote:
> 
>             On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
> 
>                 On 01/08/2024 18:13, Michael S. Tsirkin wrote:
> 
>                     On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
> 
>                         In this operation set the driver data of the hctx to point to the virtio
>                         block queue. By doing so, we can use this reference in the and reduce
> 
>                     in the .... ?
> 
>                 sorry for the type.
> 
>                 should be :
> 
>                 "By doing so, we can use this reference and reduce the number of operations in the fast path."
> 
>             ok. what kind of benefit do you see with this patch?
> 
>         As mentioned. This is a micro optimization that reduce the number of
>         instructions/dereferences in the fast path.
> 
>     By how much? How random code tweaks affect object code is unpredictable.
>     Pls show results of objdump to prove it does anything
>     useful.
> 
> This is the way all modern block drivers such as NVMe PCI/RDMA/TCP use the
> driver_data.
> 
> These drivers don't have driver specific mechanisms to find the queue from the 
> hctx->queue->queuedata like vblk driver has for some unknown reason.
> 
> It is pretty easy to review this patch and see its benefits, isn't it ?
> 
> It is not expected to provide extreme perf improvement.
> 
> It is introduced for aligning the driver to use common MQ mechanisms and reduce
> dereferences.
> 
> This is not "random code tweaks".


Then pls say so in the commit log.

Look I don't have anything for or against this patch.

I do however want to establish that if something is billed as
an "optimization" it has to come with numbers (even if
it's as simple as "size" run on the object file).

If it's just cleaner/simpler, say so.


I'll wait for an ack from Paolo/Stefan, anyway.



>                         the number of operations in the fast path.
> 
>                         Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
>                         ---
>                            drivers/block/virtio_blk.c | 42 ++++++++++++++++++++------------------
>                            1 file changed, 22 insertions(+), 20 deletions(-)
> 
>                         diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
>                         index 2351f411fa46..35a7a586f6f5 100644
>                         --- a/drivers/block/virtio_blk.c
>                         +++ b/drivers/block/virtio_blk.c
>                         @@ -129,14 +129,6 @@ static inline blk_status_t virtblk_result(u8 status)
>                                 }
>                            }
>                         -static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
>                         -{
>                         -       struct virtio_blk *vblk = hctx->queue->queuedata;
>                         -       struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
>                         -
>                         -       return vq;
>                         -}
>                         -
>                            static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
>                            {
>                                 struct scatterlist out_hdr, in_hdr, *sgs[3];
>                         @@ -377,8 +369,7 @@ static void virtblk_done(struct virtqueue *vq)
>                            static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
>                            {
>                         -       struct virtio_blk *vblk = hctx->queue->queuedata;
>                         -       struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
>                         +       struct virtio_blk_vq *vq = hctx->driver_data;
>                                 bool kick;
>                                 spin_lock_irq(&vq->lock);
>                         @@ -428,10 +419,10 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>                                                    const struct blk_mq_queue_data *bd)
>                            {
>                                 struct virtio_blk *vblk = hctx->queue->queuedata;
>                         +       struct virtio_blk_vq *vq = hctx->driver_data;
>                                 struct request *req = bd->rq;
>                                 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>                                 unsigned long flags;
>                         -       int qid = hctx->queue_num;
>                                 bool notify = false;
>                                 blk_status_t status;
>                                 int err;
>                         @@ -440,26 +431,26 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>                                 if (unlikely(status))
>                                         return status;
>                         -       spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
>                         -       err = virtblk_add_req(vblk->vqs[qid].vq, vbr);
>                         +       spin_lock_irqsave(&vq->lock, flags);
>                         +       err = virtblk_add_req(vq->vq, vbr);
>                                 if (err) {
>                         -               virtqueue_kick(vblk->vqs[qid].vq);
>                         +               virtqueue_kick(vq->vq);
>                                         /* Don't stop the queue if -ENOMEM: we may have failed to
>                                          * bounce the buffer due to global resource outage.
>                                          */
>                                         if (err == -ENOSPC)
>                                                 blk_mq_stop_hw_queue(hctx);
>                         -               spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
>                         +               spin_unlock_irqrestore(&vq->lock, flags);
>                                         virtblk_unmap_data(req, vbr);
>                                         return virtblk_fail_to_queue(req, err);
>                                 }
>                         -       if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
>                         +       if (bd->last && virtqueue_kick_prepare(vq->vq))
>                                         notify = true;
>                         -       spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
>                         +       spin_unlock_irqrestore(&vq->lock, flags);
>                                 if (notify)
>                         -               virtqueue_notify(vblk->vqs[qid].vq);
>                         +               virtqueue_notify(vq->vq);
>                                 return BLK_STS_OK;
>                            }
>                         @@ -504,7 +495,7 @@ static void virtio_queue_rqs(struct request **rqlist)
>                                 struct request *requeue_list = NULL;
>                                 rq_list_for_each_safe(rqlist, req, next) {
>                         -               struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
>                         +               struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
>                                         bool kick;
>                                         if (!virtblk_prep_rq_batch(req)) {
>                         @@ -1164,6 +1155,16 @@ static const struct attribute_group *virtblk_attr_groups[] = {
>                                 NULL,
>                            };
>                         +static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
>                         +               unsigned int hctx_idx)
>                         +{
>                         +       struct virtio_blk *vblk = data;
>                         +       struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
>                         +
>                         +       hctx->driver_data = vq;
>                         +       return 0;
>                         +}
>                         +
>                            static void virtblk_map_queues(struct blk_mq_tag_set *set)
>                            {
>                                 struct virtio_blk *vblk = set->driver_data;
>                         @@ -1205,7 +1206,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
>                            static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
>                            {
>                                 struct virtio_blk *vblk = hctx->queue->queuedata;
>                         -       struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
>                         +       struct virtio_blk_vq *vq = hctx->driver_data;
>                                 struct virtblk_req *vbr;
>                                 unsigned long flags;
>                                 unsigned int len;
>                         @@ -1236,6 +1237,7 @@ static const struct blk_mq_ops virtio_mq_ops = {
>                                 .queue_rqs      = virtio_queue_rqs,
>                                 .commit_rqs     = virtio_commit_rqs,
>                                 .complete       = virtblk_request_done,
>                         +       .init_hctx      = virtblk_init_hctx,
>                                 .map_queues     = virtblk_map_queues,
>                                 .poll           = virtblk_poll,
>                            };
>                         --
>                         2.18.1
>
Stefan Hajnoczi Aug. 1, 2024, 5:56 p.m. UTC | #7
On Thu, Aug 01, 2024 at 06:56:44PM +0300, Max Gurtovoy wrote:
> 
> On 01/08/2024 18:43, Michael S. Tsirkin wrote:
> > On Thu, Aug 01, 2024 at 06:39:16PM +0300, Max Gurtovoy wrote:
> > > On 01/08/2024 18:29, Michael S. Tsirkin wrote:
> > > > On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
> > > > > On 01/08/2024 18:13, Michael S. Tsirkin wrote:
> > > > > > On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
> > > > > > > In this operation set the driver data of the hctx to point to the virtio
> > > > > > > block queue. By doing so, we can use this reference in the and reduce
> > > > > > in the .... ?
> > > > > sorry for the type.
> > > > > 
> > > > > should be :
> > > > > 
> > > > > "By doing so, we can use this reference and reduce the number of operations in the fast path."
> > > > ok. what kind of benefit do you see with this patch?
> > > As mentioned. This is a micro optimization that reduce the number of
> > > instructions/dereferences in the fast path.
> > By how much? How random code tweaks affect object code is unpredictable.
> > Pls show results of objdump to prove it does anything
> > useful.
> 
> This is the way all modern block drivers such as NVMe PCI/RDMA/TCP use the
> driver_data.
> 
> These drivers don't have driver specific mechanisms to find the queue from
> the hctx->queue->queuedata like vblk driver has for some unknown reason.
> 
> It is pretty easy to review this patch and see its benefits, isn't it ?
> 
> It is not expected to provide extreme perf improvement.
> 
> It is introduced for aligning the driver to use common MQ mechanisms and
> reduce dereferences.
> 
> This is not "random code tweaks".

If you cannot observe a performance change, then adjusting the commit
description to explain this as a code cleanup to reduce dereferences and
local variables, improving code readability seems fine to me. I think
it's a nice cleanup when presented as such rather than a performance
optimization.

Stefan
Max Gurtovoy Aug. 2, 2024, 10:07 p.m. UTC | #8
On 01/08/2024 20:56, Stefan Hajnoczi wrote:
> On Thu, Aug 01, 2024 at 06:56:44PM +0300, Max Gurtovoy wrote:
>> On 01/08/2024 18:43, Michael S. Tsirkin wrote:
>>> On Thu, Aug 01, 2024 at 06:39:16PM +0300, Max Gurtovoy wrote:
>>>> On 01/08/2024 18:29, Michael S. Tsirkin wrote:
>>>>> On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
>>>>>> On 01/08/2024 18:13, Michael S. Tsirkin wrote:
>>>>>>> On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
>>>>>>>> In this operation set the driver data of the hctx to point to the virtio
>>>>>>>> block queue. By doing so, we can use this reference in the and reduce
>>>>>>> in the .... ?
>>>>>> sorry for the type.
>>>>>>
>>>>>> should be :
>>>>>>
>>>>>> "By doing so, we can use this reference and reduce the number of operations in the fast path."
>>>>> ok. what kind of benefit do you see with this patch?
>>>> As mentioned. This is a micro optimization that reduce the number of
>>>> instructions/dereferences in the fast path.
>>> By how much? How random code tweaks affect object code is unpredictable.
>>> Pls show results of objdump to prove it does anything
>>> useful.
>> This is the way all modern block drivers such as NVMe PCI/RDMA/TCP use the
>> driver_data.
>>
>> These drivers don't have driver specific mechanisms to find the queue from
>> the hctx->queue->queuedata like vblk driver has for some unknown reason.
>>
>> It is pretty easy to review this patch and see its benefits, isn't it ?
>>
>> It is not expected to provide extreme perf improvement.
>>
>> It is introduced for aligning the driver to use common MQ mechanisms and
>> reduce dereferences.
>>
>> This is not "random code tweaks".
> If you cannot observe a performance change, then adjusting the commit
> description to explain this as a code cleanup to reduce dereferences and
> local variables, improving code readability seems fine to me. I think
> it's a nice cleanup when presented as such rather than a performance
> optimization.
>
> Stefan

Sure. Please check the bellow adjustment:

virtio_blk: implement init_hctx MQ operation

Set the driver data of the hardware context (hctx) to point directly to
the virtio block queue. This cleanup improves code readability, reduces
the number of dereferences, and minimizes local variables in the fast
path.
Michael S. Tsirkin Aug. 3, 2024, 12:39 p.m. UTC | #9
On Sat, Aug 03, 2024 at 01:07:27AM +0300, Max Gurtovoy wrote:
> 
> On 01/08/2024 20:56, Stefan Hajnoczi wrote:
> > On Thu, Aug 01, 2024 at 06:56:44PM +0300, Max Gurtovoy wrote:
> > > On 01/08/2024 18:43, Michael S. Tsirkin wrote:
> > > > On Thu, Aug 01, 2024 at 06:39:16PM +0300, Max Gurtovoy wrote:
> > > > > On 01/08/2024 18:29, Michael S. Tsirkin wrote:
> > > > > > On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
> > > > > > > On 01/08/2024 18:13, Michael S. Tsirkin wrote:
> > > > > > > > On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
> > > > > > > > > In this operation set the driver data of the hctx to point to the virtio
> > > > > > > > > block queue. By doing so, we can use this reference in the and reduce
> > > > > > > > in the .... ?
> > > > > > > sorry for the type.
> > > > > > > 
> > > > > > > should be :
> > > > > > > 
> > > > > > > "By doing so, we can use this reference and reduce the number of operations in the fast path."
> > > > > > ok. what kind of benefit do you see with this patch?
> > > > > As mentioned. This is a micro optimization that reduce the number of
> > > > > instructions/dereferences in the fast path.
> > > > By how much? How random code tweaks affect object code is unpredictable.
> > > > Pls show results of objdump to prove it does anything
> > > > useful.
> > > This is the way all modern block drivers such as NVMe PCI/RDMA/TCP use the
> > > driver_data.
> > > 
> > > These drivers don't have driver specific mechanisms to find the queue from
> > > the hctx->queue->queuedata like vblk driver has for some unknown reason.
> > > 
> > > It is pretty easy to review this patch and see its benefits, isn't it ?
> > > 
> > > It is not expected to provide extreme perf improvement.
> > > 
> > > It is introduced for aligning the driver to use common MQ mechanisms and
> > > reduce dereferences.
> > > 
> > > This is not "random code tweaks".
> > If you cannot observe a performance change, then adjusting the commit
> > description to explain this as a code cleanup to reduce dereferences and
> > local variables, improving code readability seems fine to me. I think
> > it's a nice cleanup when presented as such rather than a performance
> > optimization.
> > 
> > Stefan
> 
> Sure. Please check the bellow adjustment:
> 
> virtio_blk: implement init_hctx MQ operation
> 
> Set the driver data of the hardware context (hctx) to point directly to
> the virtio block queue. This cleanup improves code readability, reduces
> the number of dereferences, and minimizes local variables in the fast
> path.

I'd drop the local variables part, it is not at all clear why is that
a win.
Max Gurtovoy Aug. 3, 2024, 5:54 p.m. UTC | #10
On 03/08/2024 15:39, Michael S. Tsirkin wrote:
> On Sat, Aug 03, 2024 at 01:07:27AM +0300, Max Gurtovoy wrote:
>> On 01/08/2024 20:56, Stefan Hajnoczi wrote:
>>> On Thu, Aug 01, 2024 at 06:56:44PM +0300, Max Gurtovoy wrote:
>>>> On 01/08/2024 18:43, Michael S. Tsirkin wrote:
>>>>> On Thu, Aug 01, 2024 at 06:39:16PM +0300, Max Gurtovoy wrote:
>>>>>> On 01/08/2024 18:29, Michael S. Tsirkin wrote:
>>>>>>> On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
>>>>>>>> On 01/08/2024 18:13, Michael S. Tsirkin wrote:
>>>>>>>>> On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
>>>>>>>>>> In this operation set the driver data of the hctx to point to the virtio
>>>>>>>>>> block queue. By doing so, we can use this reference in the and reduce
>>>>>>>>> in the .... ?
>>>>>>>> sorry for the type.
>>>>>>>>
>>>>>>>> should be :
>>>>>>>>
>>>>>>>> "By doing so, we can use this reference and reduce the number of operations in the fast path."
>>>>>>> ok. what kind of benefit do you see with this patch?
>>>>>> As mentioned. This is a micro optimization that reduce the number of
>>>>>> instructions/dereferences in the fast path.
>>>>> By how much? How random code tweaks affect object code is unpredictable.
>>>>> Pls show results of objdump to prove it does anything
>>>>> useful.
>>>> This is the way all modern block drivers such as NVMe PCI/RDMA/TCP use the
>>>> driver_data.
>>>>
>>>> These drivers don't have driver specific mechanisms to find the queue from
>>>> the hctx->queue->queuedata like vblk driver has for some unknown reason.
>>>>
>>>> It is pretty easy to review this patch and see its benefits, isn't it ?
>>>>
>>>> It is not expected to provide extreme perf improvement.
>>>>
>>>> It is introduced for aligning the driver to use common MQ mechanisms and
>>>> reduce dereferences.
>>>>
>>>> This is not "random code tweaks".
>>> If you cannot observe a performance change, then adjusting the commit
>>> description to explain this as a code cleanup to reduce dereferences and
>>> local variables, improving code readability seems fine to me. I think
>>> it's a nice cleanup when presented as such rather than a performance
>>> optimization.
>>>
>>> Stefan
>> Sure. Please check the bellow adjustment:
>>
>> virtio_blk: implement init_hctx MQ operation
>>
>> Set the driver data of the hardware context (hctx) to point directly to
>> the virtio block queue. This cleanup improves code readability, reduces
>> the number of dereferences, and minimizes local variables in the fast
>> path.
> I'd drop the local variables part, it is not at all clear why is that
> a win.

We can drop it:

virtio_blk: implement init_hctx MQ operation

Set the driver data of the hardware context (hctx) to point directly to
the virtio block queue. This cleanup improves code readability and reduces
the number of dereferences in the fast path.
Stefan Hajnoczi Aug. 7, 2024, 1:19 p.m. UTC | #11
On Sat, Aug 03, 2024 at 08:54:45PM +0300, Max Gurtovoy wrote:
> 
> On 03/08/2024 15:39, Michael S. Tsirkin wrote:
> > On Sat, Aug 03, 2024 at 01:07:27AM +0300, Max Gurtovoy wrote:
> > > On 01/08/2024 20:56, Stefan Hajnoczi wrote:
> > > > On Thu, Aug 01, 2024 at 06:56:44PM +0300, Max Gurtovoy wrote:
> > > > > On 01/08/2024 18:43, Michael S. Tsirkin wrote:
> > > > > > On Thu, Aug 01, 2024 at 06:39:16PM +0300, Max Gurtovoy wrote:
> > > > > > > On 01/08/2024 18:29, Michael S. Tsirkin wrote:
> > > > > > > > On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
> > > > > > > > > On 01/08/2024 18:13, Michael S. Tsirkin wrote:
> > > > > > > > > > On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
> > > > > > > > > > > In this operation set the driver data of the hctx to point to the virtio
> > > > > > > > > > > block queue. By doing so, we can use this reference in the and reduce
> > > > > > > > > > in the .... ?
> > > > > > > > > sorry for the type.
> > > > > > > > > 
> > > > > > > > > should be :
> > > > > > > > > 
> > > > > > > > > "By doing so, we can use this reference and reduce the number of operations in the fast path."
> > > > > > > > ok. what kind of benefit do you see with this patch?
> > > > > > > As mentioned. This is a micro optimization that reduce the number of
> > > > > > > instructions/dereferences in the fast path.
> > > > > > By how much? How random code tweaks affect object code is unpredictable.
> > > > > > Pls show results of objdump to prove it does anything
> > > > > > useful.
> > > > > This is the way all modern block drivers such as NVMe PCI/RDMA/TCP use the
> > > > > driver_data.
> > > > > 
> > > > > These drivers don't have driver specific mechanisms to find the queue from
> > > > > the hctx->queue->queuedata like vblk driver has for some unknown reason.
> > > > > 
> > > > > It is pretty easy to review this patch and see its benefits, isn't it ?
> > > > > 
> > > > > It is not expected to provide extreme perf improvement.
> > > > > 
> > > > > It is introduced for aligning the driver to use common MQ mechanisms and
> > > > > reduce dereferences.
> > > > > 
> > > > > This is not "random code tweaks".
> > > > If you cannot observe a performance change, then adjusting the commit
> > > > description to explain this as a code cleanup to reduce dereferences and
> > > > local variables, improving code readability seems fine to me. I think
> > > > it's a nice cleanup when presented as such rather than a performance
> > > > optimization.
> > > > 
> > > > Stefan
> > > Sure. Please check the bellow adjustment:
> > > 
> > > virtio_blk: implement init_hctx MQ operation
> > > 
> > > Set the driver data of the hardware context (hctx) to point directly to
> > > the virtio block queue. This cleanup improves code readability, reduces
> > > the number of dereferences, and minimizes local variables in the fast
> > > path.
> > I'd drop the local variables part, it is not at all clear why is that
> > a win.
> 
> We can drop it:
> 
> virtio_blk: implement init_hctx MQ operation
> 
> Set the driver data of the hardware context (hctx) to point directly to
> the virtio block queue. This cleanup improves code readability and reduces
> the number of dereferences in the fast path.
> 
> 

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Michael S. Tsirkin Aug. 7, 2024, 1:34 p.m. UTC | #12
On Sat, Aug 03, 2024 at 08:54:45PM +0300, Max Gurtovoy wrote:
> 
> On 03/08/2024 15:39, Michael S. Tsirkin wrote:
> > On Sat, Aug 03, 2024 at 01:07:27AM +0300, Max Gurtovoy wrote:
> > > On 01/08/2024 20:56, Stefan Hajnoczi wrote:
> > > > On Thu, Aug 01, 2024 at 06:56:44PM +0300, Max Gurtovoy wrote:
> > > > > On 01/08/2024 18:43, Michael S. Tsirkin wrote:
> > > > > > On Thu, Aug 01, 2024 at 06:39:16PM +0300, Max Gurtovoy wrote:
> > > > > > > On 01/08/2024 18:29, Michael S. Tsirkin wrote:
> > > > > > > > On Thu, Aug 01, 2024 at 06:17:21PM +0300, Max Gurtovoy wrote:
> > > > > > > > > On 01/08/2024 18:13, Michael S. Tsirkin wrote:
> > > > > > > > > > On Thu, Aug 01, 2024 at 06:11:37PM +0300, Max Gurtovoy wrote:
> > > > > > > > > > > In this operation set the driver data of the hctx to point to the virtio
> > > > > > > > > > > block queue. By doing so, we can use this reference in the and reduce
> > > > > > > > > > in the .... ?
> > > > > > > > > sorry for the type.
> > > > > > > > > 
> > > > > > > > > should be :
> > > > > > > > > 
> > > > > > > > > "By doing so, we can use this reference and reduce the number of operations in the fast path."
> > > > > > > > ok. what kind of benefit do you see with this patch?
> > > > > > > As mentioned. This is a micro optimization that reduce the number of
> > > > > > > instructions/dereferences in the fast path.
> > > > > > By how much? How random code tweaks affect object code is unpredictable.
> > > > > > Pls show results of objdump to prove it does anything
> > > > > > useful.
> > > > > This is the way all modern block drivers such as NVMe PCI/RDMA/TCP use the
> > > > > driver_data.
> > > > > 
> > > > > These drivers don't have driver specific mechanisms to find the queue from
> > > > > the hctx->queue->queuedata like vblk driver has for some unknown reason.
> > > > > 
> > > > > It is pretty easy to review this patch and see its benefits, isn't it ?
> > > > > 
> > > > > It is not expected to provide extreme perf improvement.
> > > > > 
> > > > > It is introduced for aligning the driver to use common MQ mechanisms and
> > > > > reduce dereferences.
> > > > > 
> > > > > This is not "random code tweaks".
> > > > If you cannot observe a performance change, then adjusting the commit
> > > > description to explain this as a code cleanup to reduce dereferences and
> > > > local variables, improving code readability seems fine to me. I think
> > > > it's a nice cleanup when presented as such rather than a performance
> > > > optimization.
> > > > 
> > > > Stefan
> > > Sure. Please check the bellow adjustment:
> > > 
> > > virtio_blk: implement init_hctx MQ operation
> > > 
> > > Set the driver data of the hardware context (hctx) to point directly to
> > > the virtio block queue. This cleanup improves code readability, reduces
> > > the number of dereferences, and minimizes local variables in the fast
> > > path.
> > I'd drop the local variables part, it is not at all clear why is that
> > a win.
> 
> We can drop it:
> 
> virtio_blk: implement init_hctx MQ operation
> 
> Set the driver data of the hardware context (hctx) to point directly to
> the virtio block queue. This cleanup improves code readability and reduces
> the number of dereferences in the fast path.
> 


yep. also pls drop 1/1 from subject. Just [PATCH vX]

pls repost with these commit log tweaks, I will queue.
diff mbox series

Patch

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 2351f411fa46..35a7a586f6f5 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -129,14 +129,6 @@  static inline blk_status_t virtblk_result(u8 status)
 	}
 }
 
-static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
-{
-	struct virtio_blk *vblk = hctx->queue->queuedata;
-	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
-
-	return vq;
-}
-
 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
 {
 	struct scatterlist out_hdr, in_hdr, *sgs[3];
@@ -377,8 +369,7 @@  static void virtblk_done(struct virtqueue *vq)
 
 static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
 {
-	struct virtio_blk *vblk = hctx->queue->queuedata;
-	struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
+	struct virtio_blk_vq *vq = hctx->driver_data;
 	bool kick;
 
 	spin_lock_irq(&vq->lock);
@@ -428,10 +419,10 @@  static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 			   const struct blk_mq_queue_data *bd)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
+	struct virtio_blk_vq *vq = hctx->driver_data;
 	struct request *req = bd->rq;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 	unsigned long flags;
-	int qid = hctx->queue_num;
 	bool notify = false;
 	blk_status_t status;
 	int err;
@@ -440,26 +431,26 @@  static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (unlikely(status))
 		return status;
 
-	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
-	err = virtblk_add_req(vblk->vqs[qid].vq, vbr);
+	spin_lock_irqsave(&vq->lock, flags);
+	err = virtblk_add_req(vq->vq, vbr);
 	if (err) {
-		virtqueue_kick(vblk->vqs[qid].vq);
+		virtqueue_kick(vq->vq);
 		/* Don't stop the queue if -ENOMEM: we may have failed to
 		 * bounce the buffer due to global resource outage.
 		 */
 		if (err == -ENOSPC)
 			blk_mq_stop_hw_queue(hctx);
-		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
+		spin_unlock_irqrestore(&vq->lock, flags);
 		virtblk_unmap_data(req, vbr);
 		return virtblk_fail_to_queue(req, err);
 	}
 
-	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
+	if (bd->last && virtqueue_kick_prepare(vq->vq))
 		notify = true;
-	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
+	spin_unlock_irqrestore(&vq->lock, flags);
 
 	if (notify)
-		virtqueue_notify(vblk->vqs[qid].vq);
+		virtqueue_notify(vq->vq);
 	return BLK_STS_OK;
 }
 
@@ -504,7 +495,7 @@  static void virtio_queue_rqs(struct request **rqlist)
 	struct request *requeue_list = NULL;
 
 	rq_list_for_each_safe(rqlist, req, next) {
-		struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
+		struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
 		bool kick;
 
 		if (!virtblk_prep_rq_batch(req)) {
@@ -1164,6 +1155,16 @@  static const struct attribute_group *virtblk_attr_groups[] = {
 	NULL,
 };
 
+static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+		unsigned int hctx_idx)
+{
+	struct virtio_blk *vblk = data;
+	struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
+
+	hctx->driver_data = vq;
+	return 0;
+}
+
 static void virtblk_map_queues(struct blk_mq_tag_set *set)
 {
 	struct virtio_blk *vblk = set->driver_data;
@@ -1205,7 +1206,7 @@  static void virtblk_complete_batch(struct io_comp_batch *iob)
 static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
-	struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
+	struct virtio_blk_vq *vq = hctx->driver_data;
 	struct virtblk_req *vbr;
 	unsigned long flags;
 	unsigned int len;
@@ -1236,6 +1237,7 @@  static const struct blk_mq_ops virtio_mq_ops = {
 	.queue_rqs	= virtio_queue_rqs,
 	.commit_rqs	= virtio_commit_rqs,
 	.complete	= virtblk_request_done,
+	.init_hctx	= virtblk_init_hctx,
 	.map_queues	= virtblk_map_queues,
 	.poll		= virtblk_poll,
 };