diff mbox series

target/iblock: split T10 PI SGL across command bios

Message ID 20180808193140.1463-1-gedwards@ddn.com (mailing list archive)
State New, archived
Headers show
Series target/iblock: split T10 PI SGL across command bios | expand

Commit Message

Greg Edwards Aug. 8, 2018, 7:31 p.m. UTC
When T10 PI is enabled on a backing device for the iblock backstore, the
PI SGL for the entire command is attached to the first bio only.  This
works fine if the command is covered by a single bio, but results in
integrity verification errors for the other bios in a multi-bio command.

Split the PI SGL across the bios in the command, so each bip contains
the protection information for the data in the bio.  In a multi-bio
command, store where we left off in the PI SGL so we know where to start
with the next bio.

Signed-off-by: Greg Edwards <gedwards@ddn.com>
---
This patch depends on commit 359f642700f2 ("block: move
bio_integrity_{intervals,bytes} into blkdev.h") in Jens' block for-next branch.

 drivers/target/target_core_iblock.c | 60 +++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 16 deletions(-)

Comments

Mike Christie Aug. 21, 2018, 9:07 p.m. UTC | #1
On 08/08/2018 02:31 PM, Greg Edwards wrote:
> When T10 PI is enabled on a backing device for the iblock backstore, the
> PI SGL for the entire command is attached to the first bio only.  This
> works fine if the command is covered by a single bio, but results in
> integrity verification errors for the other bios in a multi-bio command.
> 

Did you hit this with a older distro kernel?

It looks like iblock_get_bio will alloc a bio that has enough vecs for
the entire cmd (bi_max_vecs will equal sgl_nents). So it is not clear to
me how does the bio_add_page call ever return a value other than
sg->length, and we end up doing another iblock_get_bio call?


> Split the PI SGL across the bios in the command, so each bip contains
> the protection information for the data in the bio.  In a multi-bio
> command, store where we left off in the PI SGL so we know where to start
> with the next bio.
> 
> Signed-off-by: Greg Edwards <gedwards@ddn.com>
> ---
> This patch depends on commit 359f642700f2 ("block: move
> bio_integrity_{intervals,bytes} into blkdev.h") in Jens' block for-next branch.
> 
>  drivers/target/target_core_iblock.c | 60 +++++++++++++++++++++--------
>  1 file changed, 44 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
> index ce1321a5cb7b..d3ab83282f61 100644
> --- a/drivers/target/target_core_iblock.c
> +++ b/drivers/target/target_core_iblock.c
> @@ -635,7 +635,8 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b)
>  }
>  
>  static int
> -iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio)
> +iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio,
> +		 struct scatterlist **sgl, unsigned int *skip)
>  {
>  	struct se_device *dev = cmd->se_dev;
>  	struct blk_integrity *bi;
> @@ -643,6 +644,7 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio)
>  	struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
>  	struct scatterlist *sg;
>  	int i, rc;
> +	unsigned int size, nr_pages, len, off;
>  
>  	bi = bdev_get_integrity(ib_dev->ibd_bd);
>  	if (!bi) {
> @@ -650,32 +652,52 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio)
>  		return -ENODEV;
>  	}
>  
> -	bip = bio_integrity_alloc(bio, GFP_NOIO, cmd->t_prot_nents);
> +	nr_pages = min_t(unsigned int, cmd->t_prot_nents, BIO_MAX_PAGES);
> +	bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
>  	if (IS_ERR(bip)) {
>  		pr_err("Unable to allocate bio_integrity_payload\n");
>  		return PTR_ERR(bip);
>  	}
>  
> -	bip->bip_iter.bi_size = (cmd->data_length / dev->dev_attrib.block_size) *
> -			 dev->prot_length;
> -	bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
> +	bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
> +	bip_set_seed(bip, bio->bi_iter.bi_sector);
>  
>  	pr_debug("IBLOCK BIP Size: %u Sector: %llu\n", bip->bip_iter.bi_size,
>  		 (unsigned long long)bip->bip_iter.bi_sector);
>  
> -	for_each_sg(cmd->t_prot_sg, sg, cmd->t_prot_nents, i) {
> +	size = bip->bip_iter.bi_size;
> +	for_each_sg(*sgl, sg, nr_pages, i) {
>  
> -		rc = bio_integrity_add_page(bio, sg_page(sg), sg->length,
> -					    sg->offset);
> -		if (rc != sg->length) {
> +		len = sg->length - *skip;
> +		off = sg->offset + *skip;
> +
> +		if (*skip)
> +			*skip = 0;
> +
> +		if (len > size) {
> +			len = size;
> +			*skip = len;
> +		}
> +
> +		rc = bio_integrity_add_page(bio, sg_page(sg), len, off);
> +		if (rc != len) {
>  			pr_err("bio_integrity_add_page() failed; %d\n", rc);
>  			return -ENOMEM;
>  		}
>  
>  		pr_debug("Added bio integrity page: %p length: %d offset; %d\n",
> -			 sg_page(sg), sg->length, sg->offset);
> +			 sg_page(sg), len, off);
> +
> +		size -= len;
> +		if (!size)
> +			break;
>  	}
>  
> +	if (*skip == 0)
> +		*sgl = sg_next(sg);
> +	else
> +		*sgl = sg;
> +
>  	return 0;
>  }
>  
> @@ -686,12 +708,12 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
>  	struct se_device *dev = cmd->se_dev;
>  	sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba);
>  	struct iblock_req *ibr;
> -	struct bio *bio, *bio_start;
> +	struct bio *bio;
>  	struct bio_list list;
> -	struct scatterlist *sg;
> +	struct scatterlist *sg, *sg_prot = cmd->t_prot_sg;
>  	u32 sg_num = sgl_nents;
> -	unsigned bio_cnt;
> -	int i, op, op_flags = 0;
> +	unsigned int bio_cnt, skip = 0;
> +	int i, rc, op, op_flags = 0;
>  
>  	if (data_direction == DMA_TO_DEVICE) {
>  		struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
> @@ -726,7 +748,6 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
>  	if (!bio)
>  		goto fail_free_ibr;
>  
> -	bio_start = bio;
>  	bio_list_init(&list);
>  	bio_list_add(&list, bio);
>  
> @@ -741,6 +762,13 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
>  		 */
>  		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
>  				!= sg->length) {
> +			if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
> +				rc = iblock_alloc_bip(cmd, bio, &sg_prot,
> +						      &skip);
> +				if (rc)
> +					goto fail_put_bios;
> +			}
> +
>  			if (bio_cnt >= IBLOCK_MAX_BIO_PER_TASK) {
>  				iblock_submit_bios(&list);
>  				bio_cnt = 0;
> @@ -762,7 +790,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
>  	}
>  
>  	if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
> -		int rc = iblock_alloc_bip(cmd, bio_start);
> +		rc = iblock_alloc_bip(cmd, bio, &sg_prot, &skip);
>  		if (rc)
>  			goto fail_put_bios;
>  	}
>
Greg Edwards Aug. 21, 2018, 10:13 p.m. UTC | #2
On Tue, Aug 21, 2018 at 04:07:08PM -0500, Mike Christie wrote:
> On 08/08/2018 02:31 PM, Greg Edwards wrote:
>> When T10 PI is enabled on a backing device for the iblock backstore, the
>> PI SGL for the entire command is attached to the first bio only.  This
>> works fine if the command is covered by a single bio, but results in
>> integrity verification errors for the other bios in a multi-bio command.
>>
>
> Did you hit this with a older distro kernel?
>
> It looks like iblock_get_bio will alloc a bio that has enough vecs for
> the entire cmd (bi_max_vecs will equal sgl_nents). So it is not clear to
> me how does the bio_add_page call ever return a value other than
> sg->length, and we end up doing another iblock_get_bio call?

I hit it with the tip of Linus' tree, but it depended on some other
in-flight changes.  Those other changes are now in Linus' tree for 4.19,
with the exception of [1].

Without [1], when doing a large read I/O through vhost + iblock to a T10
PI enabled device (I used scsi_debug), you first hit the vhost
VHOST_SCSI_PREALLOC_PROT_SGLS limitation noted in [1].

Once the limitation on I/O size is no longer gated by
VHOST_SCSI_PREALLOC_PROT_SGLS, the next issue I hit is the one this
patch addresses.  I should have been more precise in my commit message.
The failure is actually a bio_integrity_alloc() failure to allocate the
bip_vec when cmd->t_prot_nents exceeds 256 (BIO_MAX_PAGES), which
results in the following failure on the host:

[   53.780723] Unable to allocate bio_integrity_payload

and the following failure on the client:

[   28.724432] sd 0:0:1:0: [sda] tag#40 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
[   28.736127] sd 0:0:1:0: [sda] tag#40 Sense Key : Not Ready [current]
[   28.744567] sd 0:0:1:0: [sda] tag#40 Add. Sense: Logical unit communication failure
[   28.754724] sd 0:0:1:0: [sda] tag#40 CDB: Read(10) 28 20 00 00 00 00 00 38 00 00
[   28.766190] print_req_error: I/O error, dev sda, sector 0

By splitting up the PI SGL across the bios, you avoid ever trying to
allocate a too-large bip_vec (I was testing with 32 MiB I/Os).

Here is how I am testing:

L1 VM:

# modprobe scsi_debug dif=1 dix=1 guard=0 dev_size_mb=6144
# targetcli <<EOF
/backstores/block create dev=/dev/sda name=scsi_debug
/vhost create wwn=naa.50014055d17a5a87
/vhost/naa.50014055d17a5a87/tpg1/luns/ create /backstores/block/scsi_debug
EOF

L2 VM is booted with QEMU vhost option 't10_pi=on', which depends on
QEMU patches [2]:

	-device vhost-scsi-pci,wwpn=naa.50014055d17a5a87,t10_pi=on \

then in L2 VM:

# cat /sys/block/sda/queue/max_hw_sectors_kb > /sys/block/sda/queue/max_sectors_kb
# dd if=/dev/sda of=/dev/null bs=32M iflag=direct count=1

The end goal being to have a vehicle to test large I/Os through
virtio_scsi to a PI enabled device.

Greg

  [1] https://lists.linuxfoundation.org/pipermail/virtualization/2018-August/039040.html
  [2] https://lists.nongnu.org/archive/html/qemu-devel/2018-08/msg01298.html
Greg Edwards Aug. 22, 2018, 4:52 p.m. UTC | #3
On Tue, Aug 21, 2018 at 04:13:57PM -0600, Greg Edwards wrote:
> On Tue, Aug 21, 2018 at 04:07:08PM -0500, Mike Christie wrote:
>> On 08/08/2018 02:31 PM, Greg Edwards wrote:
>>> When T10 PI is enabled on a backing device for the iblock backstore, the
>>> PI SGL for the entire command is attached to the first bio only.  This
>>> works fine if the command is covered by a single bio, but results in
>>> integrity verification errors for the other bios in a multi-bio command.
>>>
>>
>> Did you hit this with a older distro kernel?
>>
>> It looks like iblock_get_bio will alloc a bio that has enough vecs for
>> the entire cmd (bi_max_vecs will equal sgl_nents). So it is not clear to
>> me how does the bio_add_page call ever return a value other than
>> sg->length, and we end up doing another iblock_get_bio call?
>
> I hit it with the tip of Linus' tree, but it depended on some other
> in-flight changes.  Those other changes are now in Linus' tree for 4.19,
> with the exception of [1].
>
> Without [1], when doing a large read I/O through vhost + iblock to a T10
> PI enabled device (I used scsi_debug), you first hit the vhost
> VHOST_SCSI_PREALLOC_PROT_SGLS limitation noted in [1].
>
> Once the limitation on I/O size is no longer gated by
> VHOST_SCSI_PREALLOC_PROT_SGLS, the next issue I hit is the one this
> patch addresses.  I should have been more precise in my commit message.
> The failure is actually a bio_integrity_alloc() failure to allocate the
> bip_vec when cmd->t_prot_nents exceeds 256 (BIO_MAX_PAGES), which
> results in the following failure on the host:
>
> [   53.780723] Unable to allocate bio_integrity_payload

Hi Mike,

Hold off on looking at this patch for the moment.  I took another look
at it this morning, and I think I've found a vhost-scsi bug that is
contributing to this (and changing VHOST_SCSI_PREALLOC_PROT_SGLS just
papers around it).  It looks like vhost-scsi should be capping the
prot_iter in vhost_scsi_handle_vq(), something like:


diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 76f8d649147b..1302869c506b 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -973,6 +973,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 			if (prot_bytes) {
 				exp_data_len -= prot_bytes;
 				prot_iter = data_iter;
+				iov_iter_truncate(&prot_iter, prot_bytes);
 				iov_iter_advance(&data_iter, prot_bytes);
 			}
 			tag = vhost64_to_cpu(vq, v_req_pi.tag);


Greg
diff mbox series

Patch

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index ce1321a5cb7b..d3ab83282f61 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -635,7 +635,8 @@  static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b)
 }
 
 static int
-iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio)
+iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio,
+		 struct scatterlist **sgl, unsigned int *skip)
 {
 	struct se_device *dev = cmd->se_dev;
 	struct blk_integrity *bi;
@@ -643,6 +644,7 @@  iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio)
 	struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
 	struct scatterlist *sg;
 	int i, rc;
+	unsigned int size, nr_pages, len, off;
 
 	bi = bdev_get_integrity(ib_dev->ibd_bd);
 	if (!bi) {
@@ -650,32 +652,52 @@  iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio)
 		return -ENODEV;
 	}
 
-	bip = bio_integrity_alloc(bio, GFP_NOIO, cmd->t_prot_nents);
+	nr_pages = min_t(unsigned int, cmd->t_prot_nents, BIO_MAX_PAGES);
+	bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
 	if (IS_ERR(bip)) {
 		pr_err("Unable to allocate bio_integrity_payload\n");
 		return PTR_ERR(bip);
 	}
 
-	bip->bip_iter.bi_size = (cmd->data_length / dev->dev_attrib.block_size) *
-			 dev->prot_length;
-	bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
+	bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
+	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
 	pr_debug("IBLOCK BIP Size: %u Sector: %llu\n", bip->bip_iter.bi_size,
 		 (unsigned long long)bip->bip_iter.bi_sector);
 
-	for_each_sg(cmd->t_prot_sg, sg, cmd->t_prot_nents, i) {
+	size = bip->bip_iter.bi_size;
+	for_each_sg(*sgl, sg, nr_pages, i) {
 
-		rc = bio_integrity_add_page(bio, sg_page(sg), sg->length,
-					    sg->offset);
-		if (rc != sg->length) {
+		len = sg->length - *skip;
+		off = sg->offset + *skip;
+
+		if (*skip)
+			*skip = 0;
+
+		if (len > size) {
+			len = size;
+			*skip = len;
+		}
+
+		rc = bio_integrity_add_page(bio, sg_page(sg), len, off);
+		if (rc != len) {
 			pr_err("bio_integrity_add_page() failed; %d\n", rc);
 			return -ENOMEM;
 		}
 
 		pr_debug("Added bio integrity page: %p length: %d offset; %d\n",
-			 sg_page(sg), sg->length, sg->offset);
+			 sg_page(sg), len, off);
+
+		size -= len;
+		if (!size)
+			break;
 	}
 
+	if (*skip == 0)
+		*sgl = sg_next(sg);
+	else
+		*sgl = sg;
+
 	return 0;
 }
 
@@ -686,12 +708,12 @@  iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	struct se_device *dev = cmd->se_dev;
 	sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba);
 	struct iblock_req *ibr;
-	struct bio *bio, *bio_start;
+	struct bio *bio;
 	struct bio_list list;
-	struct scatterlist *sg;
+	struct scatterlist *sg, *sg_prot = cmd->t_prot_sg;
 	u32 sg_num = sgl_nents;
-	unsigned bio_cnt;
-	int i, op, op_flags = 0;
+	unsigned int bio_cnt, skip = 0;
+	int i, rc, op, op_flags = 0;
 
 	if (data_direction == DMA_TO_DEVICE) {
 		struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
@@ -726,7 +748,6 @@  iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	if (!bio)
 		goto fail_free_ibr;
 
-	bio_start = bio;
 	bio_list_init(&list);
 	bio_list_add(&list, bio);
 
@@ -741,6 +762,13 @@  iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		 */
 		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
 				!= sg->length) {
+			if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
+				rc = iblock_alloc_bip(cmd, bio, &sg_prot,
+						      &skip);
+				if (rc)
+					goto fail_put_bios;
+			}
+
 			if (bio_cnt >= IBLOCK_MAX_BIO_PER_TASK) {
 				iblock_submit_bios(&list);
 				bio_cnt = 0;
@@ -762,7 +790,7 @@  iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	}
 
 	if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
-		int rc = iblock_alloc_bip(cmd, bio_start);
+		rc = iblock_alloc_bip(cmd, bio, &sg_prot, &skip);
 		if (rc)
 			goto fail_put_bios;
 	}