[RFC,11/11] scsi: storvsc: Support PAGE_SIZE larger than 4K
diff mbox series

Message ID 20200721014135.84140-12-boqun.feng@gmail.com
State Superseded
Headers show
Series
  • Hyper-V: Support PAGE_SIZE larger than 4K
Related show

Commit Message

Boqun Feng July 21, 2020, 1:41 a.m. UTC
Hyper-V always use 4k page size (HV_HYP_PAGE_SIZE), so when
communicating with Hyper-V, a guest should always use HV_HYP_PAGE_SIZE
as the unit for page related data. For storvsc, the data is
vmbus_packet_mpb_array. And since in scsi_cmnd, sglist of pages (in unit
of PAGE_SIZE) is used, we need convert pages in the sglist of scsi_cmnd
into Hyper-V pages in vmbus_packet_mpb_array.

This patch does the conversion by dividing pages in sglist into Hyper-V
pages, offset and indexes in vmbus_packet_mpb_array are recalculated
accordingly.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
---
 drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

Comments

Michael Kelley July 23, 2020, 12:13 a.m. UTC | #1
From: Boqun Feng <boqun.feng@gmail.com> Sent: Monday, July 20, 2020 6:42 PM
> 
> Hyper-V always use 4k page size (HV_HYP_PAGE_SIZE), so when
> communicating with Hyper-V, a guest should always use HV_HYP_PAGE_SIZE
> as the unit for page related data. For storvsc, the data is
> vmbus_packet_mpb_array. And since in scsi_cmnd, sglist of pages (in unit
> of PAGE_SIZE) is used, we need convert pages in the sglist of scsi_cmnd
> into Hyper-V pages in vmbus_packet_mpb_array.
> 
> This patch does the conversion by dividing pages in sglist into Hyper-V
> pages, offset and indexes in vmbus_packet_mpb_array are recalculated
> accordingly.
> 
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> ---
>  drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++++++------
>  1 file changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> index fb41636519ee..c54d25f279bc 100644
> --- a/drivers/scsi/storvsc_drv.c
> +++ b/drivers/scsi/storvsc_drv.c
> @@ -1561,7 +1561,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct
> scsi_cmnd *scmnd)
>  	struct hv_host_device *host_dev = shost_priv(host);
>  	struct hv_device *dev = host_dev->dev;
>  	struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);
> -	int i;
> +	int i, j, k;
>  	struct scatterlist *sgl;
>  	unsigned int sg_count = 0;
>  	struct vmscsi_request *vm_srb;
> @@ -1569,6 +1569,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct
> scsi_cmnd *scmnd)
>  	struct vmbus_packet_mpb_array  *payload;
>  	u32 payload_sz;
>  	u32 length;
> +	int subpage_idx = 0;
> +	unsigned int hvpg_count = 0;
> 
>  	if (vmstor_proto_version <= VMSTOR_PROTO_VERSION_WIN8) {
>  		/*
> @@ -1643,23 +1645,36 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct
> scsi_cmnd *scmnd)
>  	payload_sz = sizeof(cmd_request->mpb);
> 
>  	if (sg_count) {
> -		if (sg_count > MAX_PAGE_BUFFER_COUNT) {
> +		hvpg_count = sg_count * (PAGE_SIZE / HV_HYP_PAGE_SIZE);

The above calculation doesn't take into account the offset in the
first sglist or the overall length of the transfer, so the value of hvpg_count
could be quite a bit bigger than it needs to be.  For example, with a 64K
page size and an 8 Kbyte transfer size that starts at offset 60K in the
first page, hvpg_count will be 32 when it really only needs to be 2.

The nested loops below that populate the pfn_array take the 
offset into account when starting, so that's good.  But it will potentially
leave allocated entries unused.  Furthermore, the nested loops could
terminate early when enough Hyper-V size pages are mapped to PFNs
based on the length of the transfer, even if all of the last guest size
page has not been mapped to PFNs.  Like the offset at the beginning of
first guest size page in the sglist, there's potentially an unused portion
at the end of the last guest size page in the sglist.

> +		if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
> 
> -			payload_sz = (sg_count * sizeof(u64) +
> +			payload_sz = (hvpg_count * sizeof(u64) +
>  				      sizeof(struct vmbus_packet_mpb_array));
>  			payload = kzalloc(payload_sz, GFP_ATOMIC);
>  			if (!payload)
>  				return SCSI_MLQUEUE_DEVICE_BUSY;
>  		}
> 
> +		/*
> +		 * sgl is a list of PAGEs, and payload->range.pfn_array
> +		 * expects the page number in the unit of HV_HYP_PAGE_SIZE (the
> +		 * page size that Hyper-V uses, so here we need to divide PAGEs
> +		 * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.
> +		 */
>  		payload->range.len = length;
> -		payload->range.offset = sgl[0].offset;
> +		payload->range.offset = sgl[0].offset & ~HV_HYP_PAGE_MASK;
> +		subpage_idx = sgl[0].offset >> HV_HYP_PAGE_SHIFT;
> 
>  		cur_sgl = sgl;
> +		k = 0;
>  		for (i = 0; i < sg_count; i++) {
> -			payload->range.pfn_array[i] =
> -				page_to_pfn(sg_page((cur_sgl)));
> +			for (j = subpage_idx; j < (PAGE_SIZE / HV_HYP_PAGE_SIZE); j++) {

In the case where PAGE_SIZE == HV_HYP_PAGE_SIZE, would it help the compiler
eliminate the loop if local variable j is declared as unsigned?  In that case the test in the
for statement will always be false.

> +				payload->range.pfn_array[k] =
> +					page_to_hvpfn(sg_page((cur_sgl))) + j;
> +				k++;
> +			}
>  			cur_sgl = sg_next(cur_sgl);
> +			subpage_idx = 0;
>  		}
>  	}
> 
> --
> 2.27.0
Boqun Feng July 23, 2020, 1:51 a.m. UTC | #2
On Thu, Jul 23, 2020 at 12:13:07AM +0000, Michael Kelley wrote:
> From: Boqun Feng <boqun.feng@gmail.com> Sent: Monday, July 20, 2020 6:42 PM
> > 
> > Hyper-V always use 4k page size (HV_HYP_PAGE_SIZE), so when
> > communicating with Hyper-V, a guest should always use HV_HYP_PAGE_SIZE
> > as the unit for page related data. For storvsc, the data is
> > vmbus_packet_mpb_array. And since in scsi_cmnd, sglist of pages (in unit
> > of PAGE_SIZE) is used, we need convert pages in the sglist of scsi_cmnd
> > into Hyper-V pages in vmbus_packet_mpb_array.
> > 
> > This patch does the conversion by dividing pages in sglist into Hyper-V
> > pages, offset and indexes in vmbus_packet_mpb_array are recalculated
> > accordingly.
> > 
> > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > ---
> >  drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++++++------
> >  1 file changed, 21 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index fb41636519ee..c54d25f279bc 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -1561,7 +1561,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct
> > scsi_cmnd *scmnd)
> >  	struct hv_host_device *host_dev = shost_priv(host);
> >  	struct hv_device *dev = host_dev->dev;
> >  	struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);
> > -	int i;
> > +	int i, j, k;
> >  	struct scatterlist *sgl;
> >  	unsigned int sg_count = 0;
> >  	struct vmscsi_request *vm_srb;
> > @@ -1569,6 +1569,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct
> > scsi_cmnd *scmnd)
> >  	struct vmbus_packet_mpb_array  *payload;
> >  	u32 payload_sz;
> >  	u32 length;
> > +	int subpage_idx = 0;
> > +	unsigned int hvpg_count = 0;
> > 
> >  	if (vmstor_proto_version <= VMSTOR_PROTO_VERSION_WIN8) {
> >  		/*
> > @@ -1643,23 +1645,36 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct
> > scsi_cmnd *scmnd)
> >  	payload_sz = sizeof(cmd_request->mpb);
> > 
> >  	if (sg_count) {
> > -		if (sg_count > MAX_PAGE_BUFFER_COUNT) {
> > +		hvpg_count = sg_count * (PAGE_SIZE / HV_HYP_PAGE_SIZE);
> 
> The above calculation doesn't take into account the offset in the
> first sglist or the overall length of the transfer, so the value of hvpg_count
> could be quite a bit bigger than it needs to be.  For example, with a 64K
> page size and an 8 Kbyte transfer size that starts at offset 60K in the
> first page, hvpg_count will be 32 when it really only needs to be 2.
> 
> The nested loops below that populate the pfn_array take the 
> offset into account when starting, so that's good.  But it will potentially
> leave allocated entries unused.  Furthermore, the nested loops could
> terminate early when enough Hyper-V size pages are mapped to PFNs
> based on the length of the transfer, even if all of the last guest size
> page has not been mapped to PFNs.  Like the offset at the beginning of
> first guest size page in the sglist, there's potentially an unused portion
> at the end of the last guest size page in the sglist.
> 

Good point. I think we could calculate the exact hvpg_count as follow:

	hvpg_count = 0;
	cur_sgl = sgl;

	for (i = 0; i < sg_count; i++) {
		hvpg_count += HVPFN_UP(cur_sg->length)
		cur_sgl = sg_next(cur_sgl);
	}

> > +		if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
> > 
> > -			payload_sz = (sg_count * sizeof(u64) +
> > +			payload_sz = (hvpg_count * sizeof(u64) +
> >  				      sizeof(struct vmbus_packet_mpb_array));
> >  			payload = kzalloc(payload_sz, GFP_ATOMIC);
> >  			if (!payload)
> >  				return SCSI_MLQUEUE_DEVICE_BUSY;
> >  		}
> > 
> > +		/*
> > +		 * sgl is a list of PAGEs, and payload->range.pfn_array
> > +		 * expects the page number in the unit of HV_HYP_PAGE_SIZE (the
> > +		 * page size that Hyper-V uses, so here we need to divide PAGEs
> > +		 * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.
> > +		 */
> >  		payload->range.len = length;
> > -		payload->range.offset = sgl[0].offset;
> > +		payload->range.offset = sgl[0].offset & ~HV_HYP_PAGE_MASK;
> > +		subpage_idx = sgl[0].offset >> HV_HYP_PAGE_SHIFT;
> > 
> >  		cur_sgl = sgl;
> > +		k = 0;
> >  		for (i = 0; i < sg_count; i++) {
> > -			payload->range.pfn_array[i] =
> > -				page_to_pfn(sg_page((cur_sgl)));
> > +			for (j = subpage_idx; j < (PAGE_SIZE / HV_HYP_PAGE_SIZE); j++) {
> 
> In the case where PAGE_SIZE == HV_HYP_PAGE_SIZE, would it help the compiler
> eliminate the loop if local variable j is declared as unsigned?  In that case the test in the
> for statement will always be false.
> 

Good point! I did the following test:

test.c:

	int func(unsigned int input, int *arr)
	{
		unsigned int i;
		int result = 0;

		for (i = input; i < 1; i++)
			result += arr[i];

		return result;
	}

if I define i as "int", I got:

	0000000000000000 <func>:
	   0:	85 ff                	test   %edi,%edi
	   2:	7f 2c                	jg     30 <func+0x30>
	   4:	48 63 d7             	movslq %edi,%rdx
	   7:	f7 df                	neg    %edi
	   9:	45 31 c0             	xor    %r8d,%r8d
	   c:	89 ff                	mov    %edi,%edi
	   e:	48 8d 04 96          	lea    (%rsi,%rdx,4),%rax
	  12:	48 01 d7             	add    %rdx,%rdi
	  15:	48 8d 54 be 04       	lea    0x4(%rsi,%rdi,4),%rdx
	  1a:	66 0f 1f 44 00 00    	nopw   0x0(%rax,%rax,1)
	  20:	44 03 00             	add    (%rax),%r8d
	  23:	48 83 c0 04          	add    $0x4,%rax
	  27:	48 39 d0             	cmp    %rdx,%rax
	  2a:	75 f4                	jne    20 <func+0x20>
	  2c:	44 89 c0             	mov    %r8d,%eax
	  2f:	c3                   	retq   
	  30:	45 31 c0             	xor    %r8d,%r8d
	  33:	44 89 c0             	mov    %r8d,%eax
	  36:	c3                   	retq   

and when I define i as "unsigned int", I got:

	0000000000000000 <func>:
	   0:	85 ff                	test   %edi,%edi
	   2:	75 03                	jne    7 <func+0x7>
	   4:	8b 06                	mov    (%rsi),%eax
	   6:	c3                   	retq   
	   7:	31 c0                	xor    %eax,%eax
	   9:	c3                   	retq   

So clearly it helps, I will change this in the next version.

Regards,
Boqun

> > +				payload->range.pfn_array[k] =
> > +					page_to_hvpfn(sg_page((cur_sgl))) + j;
> > +				k++;
> > +			}
> >  			cur_sgl = sg_next(cur_sgl);
> > +			subpage_idx = 0;
> >  		}
> >  	}
> > 
> > --
> > 2.27.0
>
Michael Kelley July 23, 2020, 2:26 a.m. UTC | #3
From: boqun.feng@gmail.com <boqun.feng@gmail.com> Sent: Wednesday, July 22, 2020 6:52 PM
> 
> On Thu, Jul 23, 2020 at 12:13:07AM +0000, Michael Kelley wrote:
> > From: Boqun Feng <boqun.feng@gmail.com> Sent: Monday, July 20, 2020 6:42 PM
> > >
> > > Hyper-V always use 4k page size (HV_HYP_PAGE_SIZE), so when
> > > communicating with Hyper-V, a guest should always use HV_HYP_PAGE_SIZE
> > > as the unit for page related data. For storvsc, the data is
> > > vmbus_packet_mpb_array. And since in scsi_cmnd, sglist of pages (in unit
> > > of PAGE_SIZE) is used, we need convert pages in the sglist of scsi_cmnd
> > > into Hyper-V pages in vmbus_packet_mpb_array.
> > >
> > > This patch does the conversion by dividing pages in sglist into Hyper-V
> > > pages, offset and indexes in vmbus_packet_mpb_array are recalculated
> > > accordingly.
> > >
> > > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > > ---
> > >  drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++++++------
> > >  1 file changed, 21 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > > index fb41636519ee..c54d25f279bc 100644
> > > --- a/drivers/scsi/storvsc_drv.c
> > > +++ b/drivers/scsi/storvsc_drv.c
> > > @@ -1561,7 +1561,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host,
> struct
> > > scsi_cmnd *scmnd)
> > >  	struct hv_host_device *host_dev = shost_priv(host);
> > >  	struct hv_device *dev = host_dev->dev;
> > >  	struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);
> > > -	int i;
> > > +	int i, j, k;
> > >  	struct scatterlist *sgl;
> > >  	unsigned int sg_count = 0;
> > >  	struct vmscsi_request *vm_srb;
> > > @@ -1569,6 +1569,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host,
> struct
> > > scsi_cmnd *scmnd)
> > >  	struct vmbus_packet_mpb_array  *payload;
> > >  	u32 payload_sz;
> > >  	u32 length;
> > > +	int subpage_idx = 0;
> > > +	unsigned int hvpg_count = 0;
> > >
> > >  	if (vmstor_proto_version <= VMSTOR_PROTO_VERSION_WIN8) {
> > >  		/*
> > > @@ -1643,23 +1645,36 @@ static int storvsc_queuecommand(struct Scsi_Host *host,
> struct
> > > scsi_cmnd *scmnd)
> > >  	payload_sz = sizeof(cmd_request->mpb);
> > >
> > >  	if (sg_count) {
> > > -		if (sg_count > MAX_PAGE_BUFFER_COUNT) {
> > > +		hvpg_count = sg_count * (PAGE_SIZE / HV_HYP_PAGE_SIZE);
> >
> > The above calculation doesn't take into account the offset in the
> > first sglist or the overall length of the transfer, so the value of hvpg_count
> > could be quite a bit bigger than it needs to be.  For example, with a 64K
> > page size and an 8 Kbyte transfer size that starts at offset 60K in the
> > first page, hvpg_count will be 32 when it really only needs to be 2.
> >
> > The nested loops below that populate the pfn_array take the
> > offset into account when starting, so that's good.  But it will potentially
> > leave allocated entries unused.  Furthermore, the nested loops could
> > terminate early when enough Hyper-V size pages are mapped to PFNs
> > based on the length of the transfer, even if all of the last guest size
> > page has not been mapped to PFNs.  Like the offset at the beginning of
> > first guest size page in the sglist, there's potentially an unused portion
> > at the end of the last guest size page in the sglist.
> >
> 
> Good point. I think we could calculate the exact hvpg_count as follow:
> 
> 	hvpg_count = 0;
> 	cur_sgl = sgl;
> 
> 	for (i = 0; i < sg_count; i++) {
> 		hvpg_count += HVPFN_UP(cur_sg->length)
> 		cur_sgl = sg_next(cur_sgl);
> 	}
> 

The downside would be going around that loop a lot of times when
the page size is 4K bytes and the I/O transfer size is something like
256K bytes.  I think this gives the right result in constant time:  the
starting offset within a Hyper-V page, plus the transfer length,
rounded up to a Hyper-V page size, and divided by the Hyper-V
page size.


> > > +		if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
> > >
> > > -			payload_sz = (sg_count * sizeof(u64) +
> > > +			payload_sz = (hvpg_count * sizeof(u64) +
> > >  				      sizeof(struct vmbus_packet_mpb_array));
> > >  			payload = kzalloc(payload_sz, GFP_ATOMIC);
> > >  			if (!payload)
> > >  				return SCSI_MLQUEUE_DEVICE_BUSY;
> > >  		}
> > >
> > > +		/*
> > > +		 * sgl is a list of PAGEs, and payload->range.pfn_array
> > > +		 * expects the page number in the unit of HV_HYP_PAGE_SIZE (the
> > > +		 * page size that Hyper-V uses, so here we need to divide PAGEs
> > > +		 * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.
> > > +		 */
> > >  		payload->range.len = length;
> > > -		payload->range.offset = sgl[0].offset;
> > > +		payload->range.offset = sgl[0].offset & ~HV_HYP_PAGE_MASK;
> > > +		subpage_idx = sgl[0].offset >> HV_HYP_PAGE_SHIFT;
> > >
> > >  		cur_sgl = sgl;
> > > +		k = 0;
> > >  		for (i = 0; i < sg_count; i++) {
> > > -			payload->range.pfn_array[i] =
> > > -				page_to_pfn(sg_page((cur_sgl)));
> > > +			for (j = subpage_idx; j < (PAGE_SIZE / HV_HYP_PAGE_SIZE); j++) {
> >
> > In the case where PAGE_SIZE == HV_HYP_PAGE_SIZE, would it help the compiler
> > eliminate the loop if local variable j is declared as unsigned?  In that case the test in the
> > for statement will always be false.
> >
> 
> Good point! I did the following test:
> 
> test.c:
> 
> 	int func(unsigned int input, int *arr)
> 	{
> 		unsigned int i;
> 		int result = 0;
> 
> 		for (i = input; i < 1; i++)
> 			result += arr[i];
> 
> 		return result;
> 	}
> 
> if I define i as "int", I got:
> 
> 	0000000000000000 <func>:
> 	   0:	85 ff                	test   %edi,%edi
> 	   2:	7f 2c                	jg     30 <func+0x30>
> 	   4:	48 63 d7             	movslq %edi,%rdx
> 	   7:	f7 df                	neg    %edi
> 	   9:	45 31 c0             	xor    %r8d,%r8d
> 	   c:	89 ff                	mov    %edi,%edi
> 	   e:	48 8d 04 96          	lea    (%rsi,%rdx,4),%rax
> 	  12:	48 01 d7             	add    %rdx,%rdi
> 	  15:	48 8d 54 be 04       	lea    0x4(%rsi,%rdi,4),%rdx
> 	  1a:	66 0f 1f 44 00 00    	nopw   0x0(%rax,%rax,1)
> 	  20:	44 03 00             	add    (%rax),%r8d
> 	  23:	48 83 c0 04          	add    $0x4,%rax
> 	  27:	48 39 d0             	cmp    %rdx,%rax
> 	  2a:	75 f4                	jne    20 <func+0x20>
> 	  2c:	44 89 c0             	mov    %r8d,%eax
> 	  2f:	c3                   	retq
> 	  30:	45 31 c0             	xor    %r8d,%r8d
> 	  33:	44 89 c0             	mov    %r8d,%eax
> 	  36:	c3                   	retq
> 
> and when I define i as "unsigned int", I got:
> 
> 	0000000000000000 <func>:
> 	   0:	85 ff                	test   %edi,%edi
> 	   2:	75 03                	jne    7 <func+0x7>
> 	   4:	8b 06                	mov    (%rsi),%eax
> 	   6:	c3                   	retq
> 	   7:	31 c0                	xor    %eax,%eax
> 	   9:	c3                   	retq
> 
> So clearly it helps, I will change this in the next version.

Wow!  The compiler is good ....

> 
> Regards,
> Boqun
> 
> > > +				payload->range.pfn_array[k] =
> > > +					page_to_hvpfn(sg_page((cur_sgl))) + j;
> > > +				k++;
> > > +			}
> > >  			cur_sgl = sg_next(cur_sgl);
> > > +			subpage_idx = 0;
> > >  		}
> > >  	}
> > >
> > > --
> > > 2.27.0
> >
Boqun Feng July 23, 2020, 3:12 a.m. UTC | #4
On Thu, Jul 23, 2020 at 02:26:00AM +0000, Michael Kelley wrote:
> From: boqun.feng@gmail.com <boqun.feng@gmail.com> Sent: Wednesday, July 22, 2020 6:52 PM
> > 
> > On Thu, Jul 23, 2020 at 12:13:07AM +0000, Michael Kelley wrote:
> > > From: Boqun Feng <boqun.feng@gmail.com> Sent: Monday, July 20, 2020 6:42 PM
> > > >
> > > > Hyper-V always use 4k page size (HV_HYP_PAGE_SIZE), so when
> > > > communicating with Hyper-V, a guest should always use HV_HYP_PAGE_SIZE
> > > > as the unit for page related data. For storvsc, the data is
> > > > vmbus_packet_mpb_array. And since in scsi_cmnd, sglist of pages (in unit
> > > > of PAGE_SIZE) is used, we need convert pages in the sglist of scsi_cmnd
> > > > into Hyper-V pages in vmbus_packet_mpb_array.
> > > >
> > > > This patch does the conversion by dividing pages in sglist into Hyper-V
> > > > pages, offset and indexes in vmbus_packet_mpb_array are recalculated
> > > > accordingly.
> > > >
> > > > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > > > ---
> > > >  drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++++++------
> > > >  1 file changed, 21 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > > > index fb41636519ee..c54d25f279bc 100644
> > > > --- a/drivers/scsi/storvsc_drv.c
> > > > +++ b/drivers/scsi/storvsc_drv.c
> > > > @@ -1561,7 +1561,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host,
> > struct
> > > > scsi_cmnd *scmnd)
> > > >  	struct hv_host_device *host_dev = shost_priv(host);
> > > >  	struct hv_device *dev = host_dev->dev;
> > > >  	struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);
> > > > -	int i;
> > > > +	int i, j, k;
> > > >  	struct scatterlist *sgl;
> > > >  	unsigned int sg_count = 0;
> > > >  	struct vmscsi_request *vm_srb;
> > > > @@ -1569,6 +1569,8 @@ static int storvsc_queuecommand(struct Scsi_Host *host,
> > struct
> > > > scsi_cmnd *scmnd)
> > > >  	struct vmbus_packet_mpb_array  *payload;
> > > >  	u32 payload_sz;
> > > >  	u32 length;
> > > > +	int subpage_idx = 0;
> > > > +	unsigned int hvpg_count = 0;
> > > >
> > > >  	if (vmstor_proto_version <= VMSTOR_PROTO_VERSION_WIN8) {
> > > >  		/*
> > > > @@ -1643,23 +1645,36 @@ static int storvsc_queuecommand(struct Scsi_Host *host,
> > struct
> > > > scsi_cmnd *scmnd)
> > > >  	payload_sz = sizeof(cmd_request->mpb);
> > > >
> > > >  	if (sg_count) {
> > > > -		if (sg_count > MAX_PAGE_BUFFER_COUNT) {
> > > > +		hvpg_count = sg_count * (PAGE_SIZE / HV_HYP_PAGE_SIZE);
> > >
> > > The above calculation doesn't take into account the offset in the
> > > first sglist or the overall length of the transfer, so the value of hvpg_count
> > > could be quite a bit bigger than it needs to be.  For example, with a 64K
> > > page size and an 8 Kbyte transfer size that starts at offset 60K in the
> > > first page, hvpg_count will be 32 when it really only needs to be 2.
> > >
> > > The nested loops below that populate the pfn_array take the
> > > offset into account when starting, so that's good.  But it will potentially
> > > leave allocated entries unused.  Furthermore, the nested loops could
> > > terminate early when enough Hyper-V size pages are mapped to PFNs
> > > based on the length of the transfer, even if all of the last guest size
> > > page has not been mapped to PFNs.  Like the offset at the beginning of
> > > first guest size page in the sglist, there's potentially an unused portion
> > > at the end of the last guest size page in the sglist.
> > >
> > 
> > Good point. I think we could calculate the exact hvpg_count as follow:
> > 
> > 	hvpg_count = 0;
> > 	cur_sgl = sgl;
> > 
> > 	for (i = 0; i < sg_count; i++) {
> > 		hvpg_count += HVPFN_UP(cur_sg->length)
> > 		cur_sgl = sg_next(cur_sgl);
> > 	}
> > 
> 
> The downside would be going around that loop a lot of times when
> the page size is 4K bytes and the I/O transfer size is something like
> 256K bytes.  I think this gives the right result in constant time:  the
> starting offset within a Hyper-V page, plus the transfer length,
> rounded up to a Hyper-V page size, and divided by the Hyper-V
> page size.
> 

Ok, then:

	hvpg_offset = sgl->offset & ~HV_HYP_PAGE_MASK;
	hvpg_count = HVPFN_UP(hv_offset + length);

?

Thanks!

Regards,
Boqun

> 
> > > > +		if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
> > > >
> > > > -			payload_sz = (sg_count * sizeof(u64) +
> > > > +			payload_sz = (hvpg_count * sizeof(u64) +
> > > >  				      sizeof(struct vmbus_packet_mpb_array));
> > > >  			payload = kzalloc(payload_sz, GFP_ATOMIC);
> > > >  			if (!payload)
> > > >  				return SCSI_MLQUEUE_DEVICE_BUSY;
> > > >  		}
> > > >
> > > > +		/*
> > > > +		 * sgl is a list of PAGEs, and payload->range.pfn_array
> > > > +		 * expects the page number in the unit of HV_HYP_PAGE_SIZE (the
> > > > +		 * page size that Hyper-V uses, so here we need to divide PAGEs
> > > > +		 * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.
> > > > +		 */
> > > >  		payload->range.len = length;
> > > > -		payload->range.offset = sgl[0].offset;
> > > > +		payload->range.offset = sgl[0].offset & ~HV_HYP_PAGE_MASK;
> > > > +		subpage_idx = sgl[0].offset >> HV_HYP_PAGE_SHIFT;
> > > >
> > > >  		cur_sgl = sgl;
> > > > +		k = 0;
> > > >  		for (i = 0; i < sg_count; i++) {
> > > > -			payload->range.pfn_array[i] =
> > > > -				page_to_pfn(sg_page((cur_sgl)));
> > > > +			for (j = subpage_idx; j < (PAGE_SIZE / HV_HYP_PAGE_SIZE); j++) {
> > >
> > > In the case where PAGE_SIZE == HV_HYP_PAGE_SIZE, would it help the compiler
> > > eliminate the loop if local variable j is declared as unsigned?  In that case the test in the
> > > for statement will always be false.
> > >
> > 
> > Good point! I did the following test:
> > 
> > test.c:
> > 
> > 	int func(unsigned int input, int *arr)
> > 	{
> > 		unsigned int i;
> > 		int result = 0;
> > 
> > 		for (i = input; i < 1; i++)
> > 			result += arr[i];
> > 
> > 		return result;
> > 	}
> > 
> > if I define i as "int", I got:
> > 
> > 	0000000000000000 <func>:
> > 	   0:	85 ff                	test   %edi,%edi
> > 	   2:	7f 2c                	jg     30 <func+0x30>
> > 	   4:	48 63 d7             	movslq %edi,%rdx
> > 	   7:	f7 df                	neg    %edi
> > 	   9:	45 31 c0             	xor    %r8d,%r8d
> > 	   c:	89 ff                	mov    %edi,%edi
> > 	   e:	48 8d 04 96          	lea    (%rsi,%rdx,4),%rax
> > 	  12:	48 01 d7             	add    %rdx,%rdi
> > 	  15:	48 8d 54 be 04       	lea    0x4(%rsi,%rdi,4),%rdx
> > 	  1a:	66 0f 1f 44 00 00    	nopw   0x0(%rax,%rax,1)
> > 	  20:	44 03 00             	add    (%rax),%r8d
> > 	  23:	48 83 c0 04          	add    $0x4,%rax
> > 	  27:	48 39 d0             	cmp    %rdx,%rax
> > 	  2a:	75 f4                	jne    20 <func+0x20>
> > 	  2c:	44 89 c0             	mov    %r8d,%eax
> > 	  2f:	c3                   	retq
> > 	  30:	45 31 c0             	xor    %r8d,%r8d
> > 	  33:	44 89 c0             	mov    %r8d,%eax
> > 	  36:	c3                   	retq
> > 
> > and when I define i as "unsigned int", I got:
> > 
> > 	0000000000000000 <func>:
> > 	   0:	85 ff                	test   %edi,%edi
> > 	   2:	75 03                	jne    7 <func+0x7>
> > 	   4:	8b 06                	mov    (%rsi),%eax
> > 	   6:	c3                   	retq
> > 	   7:	31 c0                	xor    %eax,%eax
> > 	   9:	c3                   	retq
> > 
> > So clearly it helps, I will change this in the next version.
> 
> Wow!  The compiler is good ....
> 
> > 
> > Regards,
> > Boqun
> > 
> > > > +				payload->range.pfn_array[k] =
> > > > +					page_to_hvpfn(sg_page((cur_sgl))) + j;
> > > > +				k++;
> > > > +			}
> > > >  			cur_sgl = sg_next(cur_sgl);
> > > > +			subpage_idx = 0;
> > > >  		}
> > > >  	}
> > > >
> > > > --
> > > > 2.27.0
> > >

Patch
diff mbox series

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index fb41636519ee..c54d25f279bc 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1561,7 +1561,7 @@  static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	struct hv_host_device *host_dev = shost_priv(host);
 	struct hv_device *dev = host_dev->dev;
 	struct storvsc_cmd_request *cmd_request = scsi_cmd_priv(scmnd);
-	int i;
+	int i, j, k;
 	struct scatterlist *sgl;
 	unsigned int sg_count = 0;
 	struct vmscsi_request *vm_srb;
@@ -1569,6 +1569,8 @@  static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	struct vmbus_packet_mpb_array  *payload;
 	u32 payload_sz;
 	u32 length;
+	int subpage_idx = 0;
+	unsigned int hvpg_count = 0;
 
 	if (vmstor_proto_version <= VMSTOR_PROTO_VERSION_WIN8) {
 		/*
@@ -1643,23 +1645,36 @@  static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	payload_sz = sizeof(cmd_request->mpb);
 
 	if (sg_count) {
-		if (sg_count > MAX_PAGE_BUFFER_COUNT) {
+		hvpg_count = sg_count * (PAGE_SIZE / HV_HYP_PAGE_SIZE);
+		if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
 
-			payload_sz = (sg_count * sizeof(u64) +
+			payload_sz = (hvpg_count * sizeof(u64) +
 				      sizeof(struct vmbus_packet_mpb_array));
 			payload = kzalloc(payload_sz, GFP_ATOMIC);
 			if (!payload)
 				return SCSI_MLQUEUE_DEVICE_BUSY;
 		}
 
+		/*
+		 * sgl is a list of PAGEs, and payload->range.pfn_array
+		 * expects the page number in the unit of HV_HYP_PAGE_SIZE (the
+		 * page size that Hyper-V uses, so here we need to divide PAGEs
+		 * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.
+		 */
 		payload->range.len = length;
-		payload->range.offset = sgl[0].offset;
+		payload->range.offset = sgl[0].offset & ~HV_HYP_PAGE_MASK;
+		subpage_idx = sgl[0].offset >> HV_HYP_PAGE_SHIFT;
 
 		cur_sgl = sgl;
+		k = 0;
 		for (i = 0; i < sg_count; i++) {
-			payload->range.pfn_array[i] =
-				page_to_pfn(sg_page((cur_sgl)));
+			for (j = subpage_idx; j < (PAGE_SIZE / HV_HYP_PAGE_SIZE); j++) {
+				payload->range.pfn_array[k] =
+					page_to_hvpfn(sg_page((cur_sgl))) + j;
+				k++;
+			}
 			cur_sgl = sg_next(cur_sgl);
+			subpage_idx = 0;
 		}
 	}