diff mbox series

[for-next,v2,2/4] null_blk: do partial IO for bad blocks

Message ID 20241225100949.930897-3-shinichiro.kawasaki@wdc.com (mailing list archive)
State New, archived
Headers show
Series null_blk: improve write failure simulation | expand

Commit Message

Shinichiro Kawasaki Dec. 25, 2024, 10:09 a.m. UTC
The current null_blk implementation checks if any bad blocks exist in
the target blocks of each IO. If so, the IO fails and data is not
transferred for all of the IO target blocks. However, when real storage
devices have bad blocks, the devices may transfer data partially up to
the first bad blocks. Especially, when the IO is a write operation, such
partial IO leaves partially written data on the device.

To simulate such partial IO using null_blk, perform the data transfer
from the IO start block to the block just before the first bad block.
Introduce __null_handle_rq() to support partial data transfer. Modify
null_handle_badblocks() to calculate the size of the partial data
transfer and call __null_handle_rq().

Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
 drivers/block/null_blk/main.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

Comments

Damien Le Moal Jan. 6, 2025, 5:47 a.m. UTC | #1
On 12/25/24 7:09 PM, Shin'ichiro Kawasaki wrote:
> The current null_blk implementation checks if any bad blocks exist in
> the target blocks of each IO. If so, the IO fails and data is not
> transferred for all of the IO target blocks. However, when real storage
> devices have bad blocks, the devices may transfer data partially up to
> the first bad blocks. Especially, when the IO is a write operation, such
> partial IO leaves partially written data on the device.
> 
> To simulate such partial IO using null_blk, perform the data transfer
> from the IO start block to the block just before the first bad block.
> Introduce __null_handle_rq() to support partial data transfer. Modify
> null_handle_badblocks() to calculate the size of the partial data
> transfer and call __null_handle_rq().

We should have an option to control this behavior to be able to mimic actual
devices. E.g. SAS devices may do partial data transfers before hitting a bad
block, but ATA devices will not (it is always all or nothing with ATA). The
current default corresponds to an ATA drive behavior and this change allows
emulating a SAS drive behavior. So let's control this with an option.
Shinichiro Kawasaki Jan. 15, 2025, 1:16 a.m. UTC | #2
On Jan 06, 2025 / 14:47, Damien Le Moal wrote:
> On 12/25/24 7:09 PM, Shin'ichiro Kawasaki wrote:
> > The current null_blk implementation checks if any bad blocks exist in
> > the target blocks of each IO. If so, the IO fails and data is not
> > transferred for all of the IO target blocks. However, when real storage
> > devices have bad blocks, the devices may transfer data partially up to
> > the first bad blocks. Especially, when the IO is a write operation, such
> > partial IO leaves partially written data on the device.
> > 
> > To simulate such partial IO using null_blk, perform the data transfer
> > from the IO start block to the block just before the first bad block.
> > Introduce __null_handle_rq() to support partial data transfer. Modify
> > null_handle_badblocks() to calculate the size of the partial data
> > transfer and call __null_handle_rq().
> 
> We should have an option to control this behavior to be able to mimic actual
> devices. E.g. SAS devices may do partial data transfers before hitting a bad
> block, but ATA devices will not (it is always all or nothing with ATA). The
> current default corresponds to an ATA drive behavior and this change allows
> emulating a SAS drive behavior. So let's control this with an option.

Thanks. Will do so in v3.
diff mbox series

Patch

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index f720707b7cfb..d155eb040077 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1264,31 +1264,50 @@  static int null_transfer(struct nullb *nullb, struct page *page,
 	return err;
 }
 
-static blk_status_t null_handle_rq(struct nullb_cmd *cmd)
+/*
+ * Transfer data for the given request. The transfer size is capped with the
+ * max_bytes argument. If max_bytes is zero, transfer all of the requested data.
+ */
+static blk_status_t __null_handle_rq(struct nullb_cmd *cmd,
+				      unsigned int max_bytes)
 {
 	struct request *rq = blk_mq_rq_from_pdu(cmd);
 	struct nullb *nullb = cmd->nq->dev->nullb;
 	int err = 0;
 	unsigned int len;
 	sector_t sector = blk_rq_pos(rq);
+	unsigned int transferred_bytes = 0;
 	struct req_iterator iter;
 	struct bio_vec bvec;
 
+	if (!max_bytes)
+		max_bytes = blk_rq_bytes(rq);
+
 	spin_lock_irq(&nullb->lock);
 	rq_for_each_segment(bvec, rq, iter) {
 		len = bvec.bv_len;
+		if (transferred_bytes + len > max_bytes)
+			len = max_bytes - transferred_bytes;
 		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
 				     op_is_write(req_op(rq)), sector,
 				     rq->cmd_flags & REQ_FUA);
 		if (err)
 			break;
 		sector += len >> SECTOR_SHIFT;
+		transferred_bytes += len;
+		if (transferred_bytes >= max_bytes)
+			break;
 	}
 	spin_unlock_irq(&nullb->lock);
 
 	return errno_to_blk_status(err);
 }
 
+static blk_status_t null_handle_rq(struct nullb_cmd *cmd)
+{
+	return __null_handle_rq(cmd, 0);
+}
+
 static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
 {
 	struct nullb_device *dev = cmd->nq->dev;
@@ -1315,11 +1334,21 @@  static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
 						 sector_t nr_sectors)
 {
 	struct badblocks *bb = &cmd->nq->dev->badblocks;
+	struct nullb_device *dev = cmd->nq->dev;
+	unsigned int block_sectors = dev->blocksize >> SECTOR_SHIFT;
+	unsigned int transfer_bytes;
 	sector_t first_bad;
 	int bad_sectors;
 
-	if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
+	if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors)) {
+		if (!IS_ALIGNED(first_bad, block_sectors))
+			first_bad = ALIGN_DOWN(first_bad, block_sectors);
+		if (dev->memory_backed && sector < first_bad) {
+			transfer_bytes = (first_bad - sector) << SECTOR_SHIFT;
+			__null_handle_rq(cmd, transfer_bytes);
+		}
 		return BLK_STS_IOERR;
+	}
 
 	return BLK_STS_OK;
 }