diff mbox series

[-next,RFC] block: count 'ios' and 'sectors' when io is done for bio-based device

Message ID 20230223091226.1135678-1-yukuai1@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series [-next,RFC] block: count 'ios' and 'sectors' when io is done for bio-based device | expand

Commit Message

Yu Kuai Feb. 23, 2023, 9:12 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

While using iostat for raid, I observed very strange 'await'
occasionally, and turns out it's due to that 'ios' and 'sectors' is
counted in bdev_start_io_acct(), while 'nsecs' is counted in
bdev_end_io_acct(). I'm not sure why they are ccounted like that
but I think this behaviour is obviously wrong because user will get
wrong disk stats.

Fix the problem by counting 'ios' and 'sectors' when io is done, like
what rq-based device does.

Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-core.c              | 16 ++++++----------
 drivers/md/dm.c               |  6 +++---
 drivers/nvme/host/multipath.c |  8 ++++----
 include/linux/blkdev.h        |  5 ++---
 4 files changed, 15 insertions(+), 20 deletions(-)

Comments

Yu Kuai Feb. 28, 2023, 1:01 a.m. UTC | #1
Hi,

friendly ping ...

Thanks,
Kuai

在 2023/02/23 17:12, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> While using iostat for raid, I observed very strange 'await'
> occasionally, and turns out it's due to that 'ios' and 'sectors' is
> counted in bdev_start_io_acct(), while 'nsecs' is counted in
> bdev_end_io_acct(). I'm not sure why they are ccounted like that
> but I think this behaviour is obviously wrong because user will get
> wrong disk stats.
> 
> Fix the problem by counting 'ios' and 'sectors' when io is done, like
> what rq-based device does.
> 
> Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>   block/blk-core.c              | 16 ++++++----------
>   drivers/md/dm.c               |  6 +++---
>   drivers/nvme/host/multipath.c |  8 ++++----
>   include/linux/blkdev.h        |  5 ++---
>   4 files changed, 15 insertions(+), 20 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 82b5b2c53f1e..fe1d320f5f07 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
>   	}
>   }
>   
> -unsigned long bdev_start_io_acct(struct block_device *bdev,
> -				 unsigned int sectors, enum req_op op,
> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
>   				 unsigned long start_time)
>   {
> -	const int sgrp = op_stat_group(op);
> -
>   	part_stat_lock();
>   	update_io_ticks(bdev, start_time, false);
> -	part_stat_inc(bdev, ios[sgrp]);
> -	part_stat_add(bdev, sectors[sgrp], sectors);
>   	part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
>   	part_stat_unlock();
>   
> @@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
>    */
>   unsigned long bio_start_io_acct(struct bio *bio)
>   {
> -	return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
> -				  bio_op(bio), jiffies);
> +	return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
>   }
>   EXPORT_SYMBOL_GPL(bio_start_io_acct);
>   
>   void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
> -		      unsigned long start_time)
> +		      unsigned int sectors, unsigned long start_time)
>   {
>   	const int sgrp = op_stat_group(op);
>   	unsigned long now = READ_ONCE(jiffies);
> @@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
>   
>   	part_stat_lock();
>   	update_io_ticks(bdev, now, true);
> +	part_stat_inc(bdev, ios[sgrp]);
> +	part_stat_add(bdev, sectors[sgrp], sectors);
>   	part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
>   	part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
>   	part_stat_unlock();
> @@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
>   void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
>   			      struct block_device *orig_bdev)
>   {
> -	bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
> +	bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
>   }
>   EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
>   
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index eace45a18d45..f5cc330bb549 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
>   		sectors = io->sectors;
>   
>   	if (!end)
> -		bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
> -				   start_time);
> +		bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
>   	else
> -		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
> +		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
> +				 start_time);
>   
>   	if (static_branch_unlikely(&stats_enabled) &&
>   	    unlikely(dm_stats_used(&md->stats))) {
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index fc39d01e7b63..9171452e2f6d 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
>   		return;
>   
>   	nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
> -	nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
> -					blk_rq_bytes(rq) >> SECTOR_SHIFT,
> -					req_op(rq), jiffies);
> +	nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
> +						      jiffies);
>   }
>   EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
>   
> @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
>   	if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
>   		return;
>   	bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
> -		nvme_req(rq)->start_time);
> +			 blk_rq_bytes(rq) >> SECTOR_SHIFT,
> +			 nvme_req(rq)->start_time);
>   }
>   
>   void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index d1aee08f8c18..941304f17492 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
>   		wake_up_process(waiter);
>   }
>   
> -unsigned long bdev_start_io_acct(struct block_device *bdev,
> -				 unsigned int sectors, enum req_op op,
> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
>   				 unsigned long start_time);
>   void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
> -		unsigned long start_time);
> +		      unsigned int sectors, unsigned long start_time);
>   
>   unsigned long bio_start_io_acct(struct bio *bio);
>   void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
>
Yu Kuai March 7, 2023, 10:44 a.m. UTC | #2
Hi,

在 2023/02/28 9:01, Yu Kuai 写道:
> Hi,
> 
> friendly ping ...
> 
> Thanks,
> Kuai
> 
> 在 2023/02/23 17:12, Yu Kuai 写道:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> While using iostat for raid, I observed very strange 'await'
>> occasionally, and turns out it's due to that 'ios' and 'sectors' is
>> counted in bdev_start_io_acct(), while 'nsecs' is counted in
>> bdev_end_io_acct(). I'm not sure why they are ccounted like that
>> but I think this behaviour is obviously wrong because user will get
>> wrong disk stats.
>>
>> Fix the problem by counting 'ios' and 'sectors' when io is done, like
>> what rq-based device does.

Can anyone help to review this change? Or is there any reason to count
'ios' and 'sectors' when io is started?

Thanks,
Kuai
>>
>> Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help 
>> function")
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/blk-core.c              | 16 ++++++----------
>>   drivers/md/dm.c               |  6 +++---
>>   drivers/nvme/host/multipath.c |  8 ++++----
>>   include/linux/blkdev.h        |  5 ++---
>>   4 files changed, 15 insertions(+), 20 deletions(-)
>>
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 82b5b2c53f1e..fe1d320f5f07 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, 
>> unsigned long now, bool end)
>>       }
>>   }
>> -unsigned long bdev_start_io_acct(struct block_device *bdev,
>> -                 unsigned int sectors, enum req_op op,
>> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum 
>> req_op op,
>>                    unsigned long start_time)
>>   {
>> -    const int sgrp = op_stat_group(op);
>> -
>>       part_stat_lock();
>>       update_io_ticks(bdev, start_time, false);
>> -    part_stat_inc(bdev, ios[sgrp]);
>> -    part_stat_add(bdev, sectors[sgrp], sectors);
>>       part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
>>       part_stat_unlock();
>> @@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
>>    */
>>   unsigned long bio_start_io_acct(struct bio *bio)
>>   {
>> -    return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
>> -                  bio_op(bio), jiffies);
>> +    return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
>>   }
>>   EXPORT_SYMBOL_GPL(bio_start_io_acct);
>>   void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
>> -              unsigned long start_time)
>> +              unsigned int sectors, unsigned long start_time)
>>   {
>>       const int sgrp = op_stat_group(op);
>>       unsigned long now = READ_ONCE(jiffies);
>> @@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, 
>> enum req_op op,
>>       part_stat_lock();
>>       update_io_ticks(bdev, now, true);
>> +    part_stat_inc(bdev, ios[sgrp]);
>> +    part_stat_add(bdev, sectors[sgrp], sectors);
>>       part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
>>       part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
>>       part_stat_unlock();
>> @@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
>>   void bio_end_io_acct_remapped(struct bio *bio, unsigned long 
>> start_time,
>>                     struct block_device *orig_bdev)
>>   {
>> -    bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
>> +    bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), 
>> start_time);
>>   }
>>   EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
>> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
>> index eace45a18d45..f5cc330bb549 100644
>> --- a/drivers/md/dm.c
>> +++ b/drivers/md/dm.c
>> @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
>>           sectors = io->sectors;
>>       if (!end)
>> -        bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
>> -                   start_time);
>> +        bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
>>       else
>> -        bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
>> +        bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
>> +                 start_time);
>>       if (static_branch_unlikely(&stats_enabled) &&
>>           unlikely(dm_stats_used(&md->stats))) {
>> diff --git a/drivers/nvme/host/multipath.c 
>> b/drivers/nvme/host/multipath.c
>> index fc39d01e7b63..9171452e2f6d 100644
>> --- a/drivers/nvme/host/multipath.c
>> +++ b/drivers/nvme/host/multipath.c
>> @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
>>           return;
>>       nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
>> -    nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
>> -                    blk_rq_bytes(rq) >> SECTOR_SHIFT,
>> -                    req_op(rq), jiffies);
>> +    nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, 
>> req_op(rq),
>> +                              jiffies);
>>   }
>>   EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
>> @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
>>       if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
>>           return;
>>       bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
>> -        nvme_req(rq)->start_time);
>> +             blk_rq_bytes(rq) >> SECTOR_SHIFT,
>> +             nvme_req(rq)->start_time);
>>   }
>>   void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index d1aee08f8c18..941304f17492 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct 
>> task_struct *waiter)
>>           wake_up_process(waiter);
>>   }
>> -unsigned long bdev_start_io_acct(struct block_device *bdev,
>> -                 unsigned int sectors, enum req_op op,
>> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum 
>> req_op op,
>>                    unsigned long start_time);
>>   void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
>> -        unsigned long start_time);
>> +              unsigned int sectors, unsigned long start_time);
>>   unsigned long bio_start_io_acct(struct bio *bio);
>>   void bio_end_io_acct_remapped(struct bio *bio, unsigned long 
>> start_time,
>>
> 
> .
>
Christoph Hellwig March 15, 2023, 3:10 p.m. UTC | #3
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Jens Axboe March 15, 2023, 3:25 p.m. UTC | #4
On Thu, 23 Feb 2023 17:12:26 +0800, Yu Kuai wrote:
> While using iostat for raid, I observed very strange 'await'
> occasionally, and turns out it's due to that 'ios' and 'sectors' is
> counted in bdev_start_io_acct(), while 'nsecs' is counted in
> bdev_end_io_acct(). I'm not sure why they are ccounted like that
> but I think this behaviour is obviously wrong because user will get
> wrong disk stats.
> 
> [...]

Applied, thanks!

[1/1] block: count 'ios' and 'sectors' when io is done for bio-based device
      commit: 5f27571382ca42daa3e3d40d1b252bf18c2b61d2

Best regards,
diff mbox series

Patch

diff --git a/block/blk-core.c b/block/blk-core.c
index 82b5b2c53f1e..fe1d320f5f07 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -953,16 +953,11 @@  void update_io_ticks(struct block_device *part, unsigned long now, bool end)
 	}
 }
 
-unsigned long bdev_start_io_acct(struct block_device *bdev,
-				 unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
 				 unsigned long start_time)
 {
-	const int sgrp = op_stat_group(op);
-
 	part_stat_lock();
 	update_io_ticks(bdev, start_time, false);
-	part_stat_inc(bdev, ios[sgrp]);
-	part_stat_add(bdev, sectors[sgrp], sectors);
 	part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
 	part_stat_unlock();
 
@@ -978,13 +973,12 @@  EXPORT_SYMBOL(bdev_start_io_acct);
  */
 unsigned long bio_start_io_acct(struct bio *bio)
 {
-	return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
-				  bio_op(bio), jiffies);
+	return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
 }
 EXPORT_SYMBOL_GPL(bio_start_io_acct);
 
 void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
-		      unsigned long start_time)
+		      unsigned int sectors, unsigned long start_time)
 {
 	const int sgrp = op_stat_group(op);
 	unsigned long now = READ_ONCE(jiffies);
@@ -992,6 +986,8 @@  void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
 
 	part_stat_lock();
 	update_io_ticks(bdev, now, true);
+	part_stat_inc(bdev, ios[sgrp]);
+	part_stat_add(bdev, sectors[sgrp], sectors);
 	part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
 	part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
 	part_stat_unlock();
@@ -1001,7 +997,7 @@  EXPORT_SYMBOL(bdev_end_io_acct);
 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
 			      struct block_device *orig_bdev)
 {
-	bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
+	bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
 }
 EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eace45a18d45..f5cc330bb549 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -512,10 +512,10 @@  static void dm_io_acct(struct dm_io *io, bool end)
 		sectors = io->sectors;
 
 	if (!end)
-		bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
-				   start_time);
+		bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
 	else
-		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
+		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
+				 start_time);
 
 	if (static_branch_unlikely(&stats_enabled) &&
 	    unlikely(dm_stats_used(&md->stats))) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index fc39d01e7b63..9171452e2f6d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -123,9 +123,8 @@  void nvme_mpath_start_request(struct request *rq)
 		return;
 
 	nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
-	nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
-					blk_rq_bytes(rq) >> SECTOR_SHIFT,
-					req_op(rq), jiffies);
+	nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
+						      jiffies);
 }
 EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
 
@@ -136,7 +135,8 @@  void nvme_mpath_end_request(struct request *rq)
 	if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
 		return;
 	bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
-		nvme_req(rq)->start_time);
+			 blk_rq_bytes(rq) >> SECTOR_SHIFT,
+			 nvme_req(rq)->start_time);
 }
 
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d1aee08f8c18..941304f17492 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1446,11 +1446,10 @@  static inline void blk_wake_io_task(struct task_struct *waiter)
 		wake_up_process(waiter);
 }
 
-unsigned long bdev_start_io_acct(struct block_device *bdev,
-				 unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
 				 unsigned long start_time);
 void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
-		unsigned long start_time);
+		      unsigned int sectors, unsigned long start_time);
 
 unsigned long bio_start_io_acct(struct bio *bio);
 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,