diff mbox

[v2,15/21] xen-blkfront: Make use of the new sg_map helper function

Message ID 1493144468-22493-16-git-send-email-logang@deltatee.com (mailing list archive)
State New, archived
Headers show

Commit Message

Logan Gunthorpe April 25, 2017, 6:21 p.m. UTC
Straightforward conversion to the new helper, except due to the lack
of error path, we have to use SG_MAP_MUST_NOT_FAIL which may BUG_ON in
certain cases in the future.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>
---
 drivers/block/xen-blkfront.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

Comments

Roger Pau Monné April 26, 2017, 7:37 a.m. UTC | #1
On Tue, Apr 25, 2017 at 12:21:02PM -0600, Logan Gunthorpe wrote:
> Straightforward conversion to the new helper, except due to the lack
> of error path, we have to use SG_MAP_MUST_NOT_FAIL which may BUG_ON in
> certain cases in the future.
> 
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Cc: Juergen Gross <jgross@suse.com>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: "Roger Pau Monné" <roger.pau@citrix.com>
> ---
>  drivers/block/xen-blkfront.c | 20 +++++++++++---------
>  1 file changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 3945963..ed62175 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -816,8 +816,9 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
>  		BUG_ON(sg->offset + sg->length > PAGE_SIZE);
>  
>  		if (setup.need_copy) {
> -			setup.bvec_off = sg->offset;
> -			setup.bvec_data = kmap_atomic(sg_page(sg));
> +			setup.bvec_off = 0;
> +			setup.bvec_data = sg_map(sg, 0, SG_KMAP_ATOMIC |
> +						 SG_MAP_MUST_NOT_FAIL);

I assume that sg_map already adds sg->offset to the address?

Also wondering whether we can get rid of bvec_off and just increment bvec_data,
adding Julien who IIRC added this code.

>  		}
>  
>  		gnttab_foreach_grant_in_range(sg_page(sg),
> @@ -827,7 +828,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
>  					      &setup);
>  
>  		if (setup.need_copy)
> -			kunmap_atomic(setup.bvec_data);
> +			sg_unmap(sg, setup.bvec_data, 0, SG_KMAP_ATOMIC);
>  	}
>  	if (setup.segments)
>  		kunmap_atomic(setup.segments);
> @@ -1053,7 +1054,7 @@ static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
>  		case XEN_SCSI_DISK5_MAJOR:
>  		case XEN_SCSI_DISK6_MAJOR:
>  		case XEN_SCSI_DISK7_MAJOR:
> -			*offset = (*minor / PARTS_PER_DISK) + 
> +			*offset = (*minor / PARTS_PER_DISK) +
>  				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
>  				EMULATED_SD_DISK_NAME_OFFSET;
>  			*minor = *minor +
> @@ -1068,7 +1069,7 @@ static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
>  		case XEN_SCSI_DISK13_MAJOR:
>  		case XEN_SCSI_DISK14_MAJOR:
>  		case XEN_SCSI_DISK15_MAJOR:
> -			*offset = (*minor / PARTS_PER_DISK) + 
> +			*offset = (*minor / PARTS_PER_DISK) +
>  				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
>  				EMULATED_SD_DISK_NAME_OFFSET;
>  			*minor = *minor +
> @@ -1119,7 +1120,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
>  	if (!VDEV_IS_EXTENDED(info->vdevice)) {
>  		err = xen_translate_vdev(info->vdevice, &minor, &offset);
>  		if (err)
> -			return err;		
> +			return err;

Cosmetic changes should go in a separate patch please.

Roger.
Logan Gunthorpe April 27, 2017, 8:19 p.m. UTC | #2
On 26/04/17 01:37 AM, Roger Pau Monné wrote:
> On Tue, Apr 25, 2017 at 12:21:02PM -0600, Logan Gunthorpe wrote:
>> Straightforward conversion to the new helper, except due to the lack
>> of error path, we have to use SG_MAP_MUST_NOT_FAIL which may BUG_ON in
>> certain cases in the future.
>>
>> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
>> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> Cc: Juergen Gross <jgross@suse.com>
>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>> Cc: "Roger Pau Monné" <roger.pau@citrix.com>
>> ---
>>  drivers/block/xen-blkfront.c | 20 +++++++++++---------
>>  1 file changed, 11 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index 3945963..ed62175 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -816,8 +816,9 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
>>  		BUG_ON(sg->offset + sg->length > PAGE_SIZE);
>>  
>>  		if (setup.need_copy) {
>> -			setup.bvec_off = sg->offset;
>> -			setup.bvec_data = kmap_atomic(sg_page(sg));
>> +			setup.bvec_off = 0;
>> +			setup.bvec_data = sg_map(sg, 0, SG_KMAP_ATOMIC |
>> +						 SG_MAP_MUST_NOT_FAIL);
> 
> I assume that sg_map already adds sg->offset to the address?

Correct.

> Also wondering whether we can get rid of bvec_off and just increment bvec_data,
> adding Julien who IIRC added this code.

bvec_off is used to keep track of the offset within the current mapping
so it's not a great idea given that you'd want to kunmap_atomic the
original address and not something with an offset. It would be nice if
this could be converted to use the sg_miter interface but that's a much
more invasive change that would require someone who knows this code and
can properly test it. I'd be very grateful if someone actually took that on.

Logan
Jason Gunthorpe April 27, 2017, 8:53 p.m. UTC | #3
On Thu, Apr 27, 2017 at 02:19:24PM -0600, Logan Gunthorpe wrote:
> 
> 
> On 26/04/17 01:37 AM, Roger Pau Monné wrote:
> > On Tue, Apr 25, 2017 at 12:21:02PM -0600, Logan Gunthorpe wrote:
> >> Straightforward conversion to the new helper, except due to the lack
> >> of error path, we have to use SG_MAP_MUST_NOT_FAIL which may BUG_ON in
> >> certain cases in the future.
> >>
> >> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> >> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> >> Cc: Juergen Gross <jgross@suse.com>
> >> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> >> Cc: "Roger Pau Monné" <roger.pau@citrix.com>
> >>  drivers/block/xen-blkfront.c | 20 +++++++++++---------
> >>  1 file changed, 11 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> >> index 3945963..ed62175 100644
> >> +++ b/drivers/block/xen-blkfront.c
> >> @@ -816,8 +816,9 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
> >>  		BUG_ON(sg->offset + sg->length > PAGE_SIZE);
> >>  
> >>  		if (setup.need_copy) {
> >> -			setup.bvec_off = sg->offset;
> >> -			setup.bvec_data = kmap_atomic(sg_page(sg));
> >> +			setup.bvec_off = 0;
> >> +			setup.bvec_data = sg_map(sg, 0, SG_KMAP_ATOMIC |
> >> +						 SG_MAP_MUST_NOT_FAIL);
> > 
> > I assume that sg_map already adds sg->offset to the address?
> 
> Correct.
> 
> > Also wondering whether we can get rid of bvec_off and just increment bvec_data,
> > adding Julien who IIRC added this code.
> 
> bvec_off is used to keep track of the offset within the current mapping
> so it's not a great idea given that you'd want to kunmap_atomic the
> original address and not something with an offset. It would be nice if
> this could be converted to use the sg_miter interface but that's a much
> more invasive change that would require someone who knows this code and
> can properly test it. I'd be very grateful if someone actually took that on.

blkfront is one of the drivers I looked at, and it appears to only be
memcpying with the bvec_data pointer, so I wonder why it does not use
sg_copy_X_buffer instead..

Jason
Logan Gunthorpe April 27, 2017, 9:53 p.m. UTC | #4
On 27/04/17 02:53 PM, Jason Gunthorpe wrote:
> blkfront is one of the drivers I looked at, and it appears to only be
> memcpying with the bvec_data pointer, so I wonder why it does not use
> sg_copy_X_buffer instead..

Yes, sort of...

But you'd potentially end up calling sg_copy_to_buffer multiple times
per page within the sg (given that gnttab_foreach_grant_in_range might
call blkif_copy_from_grant/blkif_setup_rw_req_grant multiple times).
Even calling sg_copy_to_buffer once per page seems rather inefficient as
it uses sg_miter internally.

Switching the for_each_sg to sg_miter is probably the nicer solution as
it takes care of the mapping and the offset/length accounting for you
and will have similar performance.

But, yes, if performance is not an issue, switching it to
sg_copy_to_buffer would be a less invasive change than sg_miter. Which
the same might be said about a lot of these cases.

Unfortunately, changing from kmap_atomic (which is a null operation in a
lot of cases) to sg_copy_X_buffer is a pretty big performance hit.

Logan
Jason Gunthorpe April 27, 2017, 10:11 p.m. UTC | #5
On Thu, Apr 27, 2017 at 03:53:37PM -0600, Logan Gunthorpe wrote:
> On 27/04/17 02:53 PM, Jason Gunthorpe wrote:
> > blkfront is one of the drivers I looked at, and it appears to only be
> > memcpying with the bvec_data pointer, so I wonder why it does not use
> > sg_copy_X_buffer instead..
> 
> But you'd potentially end up calling sg_copy_to_buffer multiple times
> per page within the sg (given that gnttab_foreach_grant_in_range might
> call blkif_copy_from_grant/blkif_setup_rw_req_grant multiple times).
> Even calling sg_copy_to_buffer once per page seems rather inefficient as
> it uses sg_miter internally.

Well, that is in the current form, with more users it would make sense
to optimize for the single page case, eg by providing the existing
call, providing a faster single-page-only variant of the copy, perhaps
even one that is inlined.

> Switching the for_each_sg to sg_miter is probably the nicer solution as
> it takes care of the mapping and the offset/length accounting for you
> and will have similar performance.

sg_miter will still fail when the sg contains __iomem, however I would
expect that the sg_copy will work with iomem, by using the __iomem
memcpy variant.

So, sg_copy should always be preferred in this new world with mixed
__iomem since it is the only primitive that can transparently handle
it.

Jason
Logan Gunthorpe April 27, 2017, 11:03 p.m. UTC | #6
On 27/04/17 04:11 PM, Jason Gunthorpe wrote:
> On Thu, Apr 27, 2017 at 03:53:37PM -0600, Logan Gunthorpe wrote:
> Well, that is in the current form, with more users it would make sense
> to optimize for the single page case, eg by providing the existing
> call, providing a faster single-page-only variant of the copy, perhaps
> even one that is inlined.

Ok, does it make sense then to have an sg_copy_page_to_buffer (or some
such... I'm having trouble thinking of a sane name that isn't too long).
That just does k(un)map_atomic and memcpy? I could try that if it makes
sense to people.

>> Switching the for_each_sg to sg_miter is probably the nicer solution as
>> it takes care of the mapping and the offset/length accounting for you
>> and will have similar performance.
> 
> sg_miter will still fail when the sg contains __iomem, however I would
> expect that the sg_copy will work with iomem, by using the __iomem
> memcpy variant.

Yes, that's true. Any sg_miters that ever see iomem will need to be
converted to support it. This isn't much different than the other
kmap(sg_page()) users I was converting that will also fail if they see
iomem. Though, I suspect an sg_miter user would be easier to convert to
iomem than a random kmap user.

Logan
Jason Gunthorpe April 27, 2017, 11:20 p.m. UTC | #7
On Thu, Apr 27, 2017 at 05:03:45PM -0600, Logan Gunthorpe wrote:
> 
> 
> On 27/04/17 04:11 PM, Jason Gunthorpe wrote:
> > On Thu, Apr 27, 2017 at 03:53:37PM -0600, Logan Gunthorpe wrote:
> > Well, that is in the current form, with more users it would make sense
> > to optimize for the single page case, eg by providing the existing
> > call, providing a faster single-page-only variant of the copy, perhaps
> > even one that is inlined.
> 
> Ok, does it make sense then to have an sg_copy_page_to_buffer (or some
> such... I'm having trouble thinking of a sane name that isn't too long).
> That just does k(un)map_atomic and memcpy? I could try that if it makes
> sense to people.

It seems the most robust: test for iomem, and jump to a slow path
copy, otherwise inline the kmap and memcpy

Every place doing memcpy from sgl will need that pattern to be
correct.

> > sg_miter will still fail when the sg contains __iomem, however I would
> > expect that the sg_copy will work with iomem, by using the __iomem
> > memcpy variant.
> 
> Yes, that's true. Any sg_miters that ever see iomem will need to be
> converted to support it. This isn't much different than the other
> kmap(sg_page()) users I was converting that will also fail if they see
> iomem. Though, I suspect an sg_miter user would be easier to convert to
> iomem than a random kmap user.

How? sg_miter seems like the next nightmare down this path, what is
sg_miter_next supposed to do when something hits an iomem sgl?

miter.addr is supposed to be a kernel pointer that must not be
__iomem..

Jason
Logan Gunthorpe April 27, 2017, 11:29 p.m. UTC | #8
On 27/04/17 05:20 PM, Jason Gunthorpe wrote:
> It seems the most robust: test for iomem, and jump to a slow path
> copy, otherwise inline the kmap and memcpy
> 
> Every place doing memcpy from sgl will need that pattern to be
> correct.

Ok, sounds like a good place to start to me. I'll see what I can do for
a v3 of this set. Though, I probably won't send anything until after the
merge window.

>>> sg_miter will still fail when the sg contains __iomem, however I would
>>> expect that the sg_copy will work with iomem, by using the __iomem
>>> memcpy variant.
>>
>> Yes, that's true. Any sg_miters that ever see iomem will need to be
>> converted to support it. This isn't much different than the other
>> kmap(sg_page()) users I was converting that will also fail if they see
>> iomem. Though, I suspect an sg_miter user would be easier to convert to
>> iomem than a random kmap user.
> 
> How? sg_miter seems like the next nightmare down this path, what is
> sg_miter_next supposed to do when something hits an iomem sgl?

My proposal is roughly included in the draft I sent upthread. We add an
sg_miter flag indicating the iteratee supports iomem and if miter finds
iomem (with the support flag set) it sets ioaddr which is __iomem. The
iteratee then just needs to null check addr and ioaddr and perform the
appropriate action.

Logan
diff mbox

Patch

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3945963..ed62175 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -816,8 +816,9 @@  static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 		BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 
 		if (setup.need_copy) {
-			setup.bvec_off = sg->offset;
-			setup.bvec_data = kmap_atomic(sg_page(sg));
+			setup.bvec_off = 0;
+			setup.bvec_data = sg_map(sg, 0, SG_KMAP_ATOMIC |
+						 SG_MAP_MUST_NOT_FAIL);
 		}
 
 		gnttab_foreach_grant_in_range(sg_page(sg),
@@ -827,7 +828,7 @@  static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 					      &setup);
 
 		if (setup.need_copy)
-			kunmap_atomic(setup.bvec_data);
+			sg_unmap(sg, setup.bvec_data, 0, SG_KMAP_ATOMIC);
 	}
 	if (setup.segments)
 		kunmap_atomic(setup.segments);
@@ -1053,7 +1054,7 @@  static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
 		case XEN_SCSI_DISK5_MAJOR:
 		case XEN_SCSI_DISK6_MAJOR:
 		case XEN_SCSI_DISK7_MAJOR:
-			*offset = (*minor / PARTS_PER_DISK) + 
+			*offset = (*minor / PARTS_PER_DISK) +
 				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
 				EMULATED_SD_DISK_NAME_OFFSET;
 			*minor = *minor +
@@ -1068,7 +1069,7 @@  static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
 		case XEN_SCSI_DISK13_MAJOR:
 		case XEN_SCSI_DISK14_MAJOR:
 		case XEN_SCSI_DISK15_MAJOR:
-			*offset = (*minor / PARTS_PER_DISK) + 
+			*offset = (*minor / PARTS_PER_DISK) +
 				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
 				EMULATED_SD_DISK_NAME_OFFSET;
 			*minor = *minor +
@@ -1119,7 +1120,7 @@  static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	if (!VDEV_IS_EXTENDED(info->vdevice)) {
 		err = xen_translate_vdev(info->vdevice, &minor, &offset);
 		if (err)
-			return err;		
+			return err;
  		nr_parts = PARTS_PER_DISK;
 	} else {
 		minor = BLKIF_MINOR_EXT(info->vdevice);
@@ -1483,8 +1484,9 @@  static bool blkif_completion(unsigned long *id,
 		for_each_sg(s->sg, sg, num_sg, i) {
 			BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 
-			data.bvec_offset = sg->offset;
-			data.bvec_data = kmap_atomic(sg_page(sg));
+			data.bvec_offset = 0;
+			data.bvec_data = sg_map(sg, 0, SG_KMAP_ATOMIC |
+						SG_MAP_MUST_NOT_FAIL);
 
 			gnttab_foreach_grant_in_range(sg_page(sg),
 						      sg->offset,
@@ -1492,7 +1494,7 @@  static bool blkif_completion(unsigned long *id,
 						      blkif_copy_from_grant,
 						      &data);
 
-			kunmap_atomic(data.bvec_data);
+			sg_unmap(sg, data.bvec_data, 0, SG_KMAP_ATOMIC);
 		}
 	}
 	/* Add the persistent grant into the list of free grants */