diff mbox series

dm-verity: support block number limits for different ioprio classes

Message ID 20250325104942.1170388-1-weilongping@oppo.com (mailing list archive)
State Superseded, archived
Delegated to: Mikulas Patocka
Headers show
Series dm-verity: support block number limits for different ioprio classes | expand

Commit Message

LongPing Wei March 25, 2025, 10:49 a.m. UTC
Calling verity_verify_io in bh for IO of all sizes is not suitable for
embedded devices. From our tests, it can improve the performance of 4K
synchronise random reads.
For example:
./fio --name=rand_read --ioengine=psync --rw=randread --bs=4K \
 --direct=1 --numjobs=8 --runtime=60 --time_based --group_reporting \
 --filename=/dev/block/mapper/xx-verity

But it will degrade the performance of 512K synchronise sequential reads
on our devices.
For example:
./fio --name=read --ioengine=psync --rw=read --bs=512K --direct=1 \
 --numjobs=8 --runtime=60 --time_based --group_reporting \
 --filename=/dev/block/mapper/xx-verity

A parameter array is introduced by this change. And users can modify the
default config by /sys/module/dm_verity/parameters/use_bh_blocks.

The default limits for NONE/RT/BE/IDLE is set to UINT_MAX to keep the
original behaviour.

Signed-off-by: LongPing Wei <weilongping@oppo.com>
---
 drivers/md/dm-verity-target.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

Comments

Eric Biggers March 25, 2025, 5:43 p.m. UTC | #1
Hi LongPing,

On Tue, Mar 25, 2025 at 06:49:43PM +0800, LongPing Wei wrote:
> Calling verity_verify_io in bh for IO of all sizes is not suitable for
> embedded devices. From our tests, it can improve the performance of 4K
> synchronise random reads.
> For example:
> ./fio --name=rand_read --ioengine=psync --rw=randread --bs=4K \
>  --direct=1 --numjobs=8 --runtime=60 --time_based --group_reporting \
>  --filename=/dev/block/mapper/xx-verity
> 
> But it will degrade the performance of 512K synchronise sequential reads
> on our devices.
> For example:
> ./fio --name=read --ioengine=psync --rw=read --bs=512K --direct=1 \
>  --numjobs=8 --runtime=60 --time_based --group_reporting \
>  --filename=/dev/block/mapper/xx-verity
> 
> A parameter array is introduced by this change. And users can modify the
> default config by /sys/module/dm_verity/parameters/use_bh_blocks.
> 
> The default limits for NONE/RT/BE/IDLE is set to UINT_MAX to keep the
> original behaviour.
> 
> Signed-off-by: LongPing Wei <weilongping@oppo.com>
> ---
>  drivers/md/dm-verity-target.c | 19 ++++++++++++++++++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
> index e86c1431b108..1e0d0920d6a9 100644
> --- a/drivers/md/dm-verity-target.c
> +++ b/drivers/md/dm-verity-target.c
> @@ -30,6 +30,7 @@
>  #define DM_VERITY_ENV_VAR_NAME		"DM_VERITY_ERR_BLOCK_NR"
>  
>  #define DM_VERITY_DEFAULT_PREFETCH_SIZE	262144
> +#define DM_VERITY_USE_BH_DEFAULT_BLOCKS	UINT_MAX
>  
>  #define DM_VERITY_MAX_CORRUPTED_ERRS	100
>  
> @@ -49,6 +50,15 @@ static unsigned int dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE
>  
>  module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644);
>  
> +static unsigned int dm_verity_use_bh_blocks[4] = {
> +	DM_VERITY_USE_BH_DEFAULT_BLOCKS,	// IOPRIO_CLASS_NONE
> +	DM_VERITY_USE_BH_DEFAULT_BLOCKS,	// IOPRIO_CLASS_RT
> +	DM_VERITY_USE_BH_DEFAULT_BLOCKS,	// IOPRIO_CLASS_BE
> +	DM_VERITY_USE_BH_DEFAULT_BLOCKS		// IOPRIO_CLASS_IDLE
> +};
> +
> +module_param_array_named(use_bh_blocks, dm_verity_use_bh_blocks, uint, NULL, 0644);
> +
>  static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled);
>  
>  /* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */
> @@ -652,6 +662,12 @@ static void verity_bh_work(struct work_struct *w)
>  	verity_finish_io(io, errno_to_blk_status(err));
>  }
>  
> +static inline bool verity_use_bh(unsigned int n_blocks, unsigned short ioprio)
> +{
> +	return ioprio <= IOPRIO_CLASS_IDLE &&
> +		n_blocks <= READ_ONCE(dm_verity_use_bh_blocks[ioprio]);
> +}
> +
>  static void verity_end_io(struct bio *bio)
>  {
>  	struct dm_verity_io *io = bio->bi_private;
> @@ -664,7 +680,8 @@ static void verity_end_io(struct bio *bio)
>  		return;
>  	}
>  
> -	if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq) {
> +	if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq &&
> +		verity_use_bh(io->n_blocks, IOPRIO_PRIO_CLASS(bio->bi_ioprio))) {
>  		INIT_WORK(&io->bh_work, verity_bh_work);
>  		queue_work(system_bh_wq, &io->bh_work);

Thanks for working on this and sending a patch!

Just for some background: "try_verify_in_tasklet" was originally developed for
use in Android, but it did not actually end up being used there because of
concern about softirqs interfering with real-time tasks.  Yet, the scheduling
delay in dm-verity remains a problem.  I think that opportunistically doing the
verity work in softirq context remains promising (not necessarily ideal, but
helpful in practice), especially in cases where the block softirq already has to
run so the work can just be done in-line.  dm-verity mainly just needs to be
smarter about whether it chooses to use softirq or kworker for each request.

Choosing softirq context only for short requests seems helpful, and it's the
same idea I had.  In practice, dm-verity I/O request lengths have a bimodal
distribution with peaks at 4 KiB for synchronous reads and 128 KiB for
readahead.  These cases are quite different, and it makes sense that different
solutions could be optimal for them.  SHA-256 hashing a 4 KiB block takes only
2-6 microseconds, so there is not much reason to punt it to a kworker.

Taking the I/O priority into account makes sense too.  In theory, the higher the
priority, the more softirq should be preferred.  Though, I do wonder if almost
all the benefit actually just comes from the 4 KiB case, in which case there
would not be much reason to bother looking the I/O priority.

FWIW, other ideas that I have are:

    - When possible, make dm-verity do the work inline in the existing block
      softirq, instead of using the BH workqueue.  This would reduce the amount
      of overhead.  Note that dm-crypt does it this way.

    - If the CPU has been in softirq context for too long, then stop choosing
      softirq context and instead fall back to the traditional workqueue.  This
      could help address objections about increased use of softirq context.

But this patch seems like a good start.

> The default limits for NONE/RT/BE/IDLE is set to UINT_MAX to keep the
> original behaviour.

Let's just set good parameters by default instead.  "try_verify_in_tasklet" is
kind of useless as-is, and we should just fix it.  Especially since it only
affects performance and not other behavior, I don't think there are any
compatibility concerns with changing what it does exactly.

- Eric
Mikulas Patocka March 25, 2025, 9:17 p.m. UTC | #2
Hi

I think the patch is OK, I just suggest these changes:

- set the limit for the number of bytes, rather than the number of blocks, 
because different dm-verity devices can have different block size

- run some benchmarks and set the default values according to the 
benchmark results

- increase the target version number

- add a note about this feature to 
Documentation/admin-guide/device-mapper/verity.rst

Mikulas


On Tue, 25 Mar 2025, LongPing Wei wrote:

> Calling verity_verify_io in bh for IO of all sizes is not suitable for
> embedded devices. From our tests, it can improve the performance of 4K
> synchronise random reads.
> For example:
> ./fio --name=rand_read --ioengine=psync --rw=randread --bs=4K \
>  --direct=1 --numjobs=8 --runtime=60 --time_based --group_reporting \
>  --filename=/dev/block/mapper/xx-verity
> 
> But it will degrade the performance of 512K synchronise sequential reads
> on our devices.
> For example:
> ./fio --name=read --ioengine=psync --rw=read --bs=512K --direct=1 \
>  --numjobs=8 --runtime=60 --time_based --group_reporting \
>  --filename=/dev/block/mapper/xx-verity
> 
> A parameter array is introduced by this change. And users can modify the
> default config by /sys/module/dm_verity/parameters/use_bh_blocks.
> 
> The default limits for NONE/RT/BE/IDLE is set to UINT_MAX to keep the
> original behaviour.
> 
> Signed-off-by: LongPing Wei <weilongping@oppo.com>
> ---
>  drivers/md/dm-verity-target.c | 19 ++++++++++++++++++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
> index e86c1431b108..1e0d0920d6a9 100644
> --- a/drivers/md/dm-verity-target.c
> +++ b/drivers/md/dm-verity-target.c
> @@ -30,6 +30,7 @@
>  #define DM_VERITY_ENV_VAR_NAME		"DM_VERITY_ERR_BLOCK_NR"
>  
>  #define DM_VERITY_DEFAULT_PREFETCH_SIZE	262144
> +#define DM_VERITY_USE_BH_DEFAULT_BLOCKS	UINT_MAX
>  
>  #define DM_VERITY_MAX_CORRUPTED_ERRS	100
>  
> @@ -49,6 +50,15 @@ static unsigned int dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE
>  
>  module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644);
>  
> +static unsigned int dm_verity_use_bh_blocks[4] = {
> +	DM_VERITY_USE_BH_DEFAULT_BLOCKS,	// IOPRIO_CLASS_NONE
> +	DM_VERITY_USE_BH_DEFAULT_BLOCKS,	// IOPRIO_CLASS_RT
> +	DM_VERITY_USE_BH_DEFAULT_BLOCKS,	// IOPRIO_CLASS_BE
> +	DM_VERITY_USE_BH_DEFAULT_BLOCKS		// IOPRIO_CLASS_IDLE
> +};
> +
> +module_param_array_named(use_bh_blocks, dm_verity_use_bh_blocks, uint, NULL, 0644);
> +
>  static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled);
>  
>  /* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */
> @@ -652,6 +662,12 @@ static void verity_bh_work(struct work_struct *w)
>  	verity_finish_io(io, errno_to_blk_status(err));
>  }
>  
> +static inline bool verity_use_bh(unsigned int n_blocks, unsigned short ioprio)
> +{
> +	return ioprio <= IOPRIO_CLASS_IDLE &&
> +		n_blocks <= READ_ONCE(dm_verity_use_bh_blocks[ioprio]);
> +}
> +
>  static void verity_end_io(struct bio *bio)
>  {
>  	struct dm_verity_io *io = bio->bi_private;
> @@ -664,7 +680,8 @@ static void verity_end_io(struct bio *bio)
>  		return;
>  	}
>  
> -	if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq) {
> +	if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq &&
> +		verity_use_bh(io->n_blocks, IOPRIO_PRIO_CLASS(bio->bi_ioprio))) {
>  		INIT_WORK(&io->bh_work, verity_bh_work);
>  		queue_work(system_bh_wq, &io->bh_work);
>  	} else {
> -- 
> 2.34.1
>
LongPing Wei March 26, 2025, 2:09 a.m. UTC | #3
Hi, Eric

 > If the CPU has been in softirq context for too long, then stop
 > choosing softirq context and instead fall back to the traditional
 > workqueue.  This could help address objections about increased use of 
 > softirq context.

Could you share me some example about this?

 > When possible, make dm-verity do the work inline in the existing block
 > softirq, instead of using the BH workqueue.  This would reduce the
 > amount of overhead.  Note that dm-crypt does it this way.

 > Let's just set good parameters by default instead.
 > "try_verify_in_tasklet" is kind of useless as-is, and we should just
 > fix it.  Especially since it only affects performance and not other
 > behavior, I don't think there are any compatibility concerns with
 > changing what it does exactly.

These ideas have been implemented in patch v2. Thanks for your suggestions.

- LongPing
Eric Biggers March 26, 2025, 4:40 p.m. UTC | #4
On Wed, Mar 26, 2025 at 10:09:22AM +0800, LongPing Wei wrote:
> Hi, Eric
> 
> > If the CPU has been in softirq context for too long, then stop
> > choosing softirq context and instead fall back to the traditional
> > workqueue.  This could help address objections about increased use of >
> softirq context.
> 
> Could you share me some example about this?
> 

The objection that keeps getting raised to doing more work in softirq context is
that the latency of real-time tasks, such as audio tasks, may increase due to
softirqs having a higher priority than all tasks.  Causing audio skipping, etc.

The logic in handle_softirqs() in kernel/softirq.c actually goes a ways towards
addressing this already: it processes softirqs for at most 2 ms or until
rescheduling of the interrupted task gets requested, before deferring them to
ksoftirqd which runs at normal task priority.

The gaps that I see are (a) 2 ms is longer than desired, and (b) the limit does
not apply to *individual* softirqs.  Looking at (b), observe that the block
softirq (blk_done_softirq()) completes a list of I/O requests.  If there are a
lot of requests in that list, it could theoretically take more than the 2 ms
limit that kernel/softirq.c is meant to enforce (which is already too long).

So the objection would be that, even with dm-verity choosing to do in-line
verification only for 4 KiB requests which take only a few microseconds each, a
lot of requests could still add up to cause a long time to be spent in a single
softirq context.  (At least in theory.  AFAIK no one has confirmed that this is
actually a problem in practice with the 4 KiB limit applied.  But this is the
objection that I keep hearing whenever anyone suggests doing more in softirqs.)

But if dm-verity could detect this case happening and start deferring
verification work to task context, that should mostly address this concern, IMO.

One idea which *might* get us most of the way there pretty easily would be to do
the in-line verification only when need_resched() returns false.

- Eric
diff mbox series

Patch

diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index e86c1431b108..1e0d0920d6a9 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -30,6 +30,7 @@ 
 #define DM_VERITY_ENV_VAR_NAME		"DM_VERITY_ERR_BLOCK_NR"
 
 #define DM_VERITY_DEFAULT_PREFETCH_SIZE	262144
+#define DM_VERITY_USE_BH_DEFAULT_BLOCKS	UINT_MAX
 
 #define DM_VERITY_MAX_CORRUPTED_ERRS	100
 
@@ -49,6 +50,15 @@  static unsigned int dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE
 
 module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644);
 
+static unsigned int dm_verity_use_bh_blocks[4] = {
+	DM_VERITY_USE_BH_DEFAULT_BLOCKS,	// IOPRIO_CLASS_NONE
+	DM_VERITY_USE_BH_DEFAULT_BLOCKS,	// IOPRIO_CLASS_RT
+	DM_VERITY_USE_BH_DEFAULT_BLOCKS,	// IOPRIO_CLASS_BE
+	DM_VERITY_USE_BH_DEFAULT_BLOCKS		// IOPRIO_CLASS_IDLE
+};
+
+module_param_array_named(use_bh_blocks, dm_verity_use_bh_blocks, uint, NULL, 0644);
+
 static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled);
 
 /* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */
@@ -652,6 +662,12 @@  static void verity_bh_work(struct work_struct *w)
 	verity_finish_io(io, errno_to_blk_status(err));
 }
 
+static inline bool verity_use_bh(unsigned int n_blocks, unsigned short ioprio)
+{
+	return ioprio <= IOPRIO_CLASS_IDLE &&
+		n_blocks <= READ_ONCE(dm_verity_use_bh_blocks[ioprio]);
+}
+
 static void verity_end_io(struct bio *bio)
 {
 	struct dm_verity_io *io = bio->bi_private;
@@ -664,7 +680,8 @@  static void verity_end_io(struct bio *bio)
 		return;
 	}
 
-	if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq) {
+	if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq &&
+		verity_use_bh(io->n_blocks, IOPRIO_PRIO_CLASS(bio->bi_ioprio))) {
 		INIT_WORK(&io->bh_work, verity_bh_work);
 		queue_work(system_bh_wq, &io->bh_work);
 	} else {