diff mbox series

ufs: crypto: add host_sem lock in ufshcd_program_key

Message ID 20250317110101.347636-1-zhanghui31@xiaomi.com (mailing list archive)
State Changes Requested
Headers show
Series ufs: crypto: add host_sem lock in ufshcd_program_key | expand

Commit Message

ZhangHui March 17, 2025, 11:01 a.m. UTC
From: zhanghui <zhanghui31@xiaomi.com>

On Android devices, we found that there is a probability that
the ufs has been shut down but the thread is still deleting the
key, which will cause the bus error.

We checked the Android reboot process and found that it is indeed
possible that some threads have not been killed before the device
shutdown, because the Android reboot process will not wait until
all threads are killed before continuing to execute.

The call stack is as follows:

__blk_crypto_evict_key+0x148/0x1c4
blk_crypto_evict_key+0x38/0x9c
dm_keyslot_evict_callback+0x18/0x2c
default_key_iterate_devices+0x40/0x50
dm_keyslot_evict+0xac/0xfc
__blk_crypto_evict_key+0xf4/0x1c4
blk_crypto_evict_key+0x38/0x9c
fscrypt_destroy_inline_crypt_key+0xb8/0x10c
fscrypt_destroy_prepared_key+0x30/0x48
fscrypt_put_master_key_activeref+0xc4/0x308
fscrypt_destroy_keyring+0xb0/0xfc
generic_shutdown_super+0x60/0x12c
kill_block_super+0x1c/0x48
kill_f2fs_super+0xc4/0x1a8
deactivate_locked_super+0x98/0x144
deactivate_super+0x78/0x8c
cleanup_mnt+0x110/0x148
__cleanup_mnt+0x14/0x20
task_work_run+0xc4/0xec
get_signal+0x6c/0x8d8
do_notify_resume+0x128/0x828
el0_svc+0x6c/0x70
el0t_64_sync_handler+0x68/0xbc
el0t_64_sync+0x1a8/0x1ac

Let's fixed this issue by adding a lock in program_key flow.

Signed-off-by: zhanghui <zhanghui31@xiaomi.com>
---
 drivers/ufs/core/ufshcd-crypto.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

Comments

Bart Van Assche March 17, 2025, 10:32 p.m. UTC | #1
On 3/17/25 4:01 AM, ZhangHui wrote:
> On Android devices, we found that there is a probability that
> the ufs has been shut down but the thread is still deleting the
> key, which will cause the bus error.

How does this patch guarantee that crypto keys are evicted before the
UFS driver has been shut down? This should be explained in detail in the
patch description.

> Let's fixed this issue by adding a lock in program_key flow.

There are multiple synchronization objects owned by the UFS driver. Why
'host_sem' and not any other synchronization object?

Thanks,

Bart.
ZhangHui March 19, 2025, 3:17 a.m. UTC | #2
On Mon, Mar 17, 2025 at 03:32:51PM -0700, Bart Van Assche wrote:
> On 3/17/25 4:01 AM, ZhangHui wrote:
> > On Android devices, we found that there is a probability that
> > the ufs has been shut down but the thread is still deleting the
> > key, which will cause the bus error.
> 
> How does this patch guarantee that crypto keys are evicted before the
> UFS driver has been shut down? This should be explained in detail in the
> patch description.

This modification does not guarantee that ufs has entered shutdown before
evicting the key. Drivers should not make assumptions about the behavior
of user access.

If ufs has entered shutdown, evicting key flow will return -EBUSY.

> > Let's fixed this issue by adding a lock in program_key flow.
> 
> There are multiple synchronization objects owned by the UFS driver. Why
> 'host_sem' and not any other synchronization object?
> 

The host_sem is held in suspend/resume, shutdown and err hander flow, so I
think host_sem is used to mutually exclude host controller power failure
and access.

thanks 
zhanghui
Eric Biggers March 21, 2025, 4:44 a.m. UTC | #3
Hi ZhangHui,

On Mon, Mar 17, 2025 at 07:01:01PM +0800, ZhangHui wrote:
> From: zhanghui <zhanghui31@xiaomi.com>
> 
> On Android devices, we found that there is a probability that
> the ufs has been shut down but the thread is still deleting the
> key, which will cause the bus error.
> 
> We checked the Android reboot process and found that it is indeed
> possible that some threads have not been killed before the device
> shutdown, because the Android reboot process will not wait until
> all threads are killed before continuing to execute.
> 
> The call stack is as follows:
> 
> __blk_crypto_evict_key+0x148/0x1c4
> blk_crypto_evict_key+0x38/0x9c
> dm_keyslot_evict_callback+0x18/0x2c
> default_key_iterate_devices+0x40/0x50
> dm_keyslot_evict+0xac/0xfc
> __blk_crypto_evict_key+0xf4/0x1c4
> blk_crypto_evict_key+0x38/0x9c
> fscrypt_destroy_inline_crypt_key+0xb8/0x10c
> fscrypt_destroy_prepared_key+0x30/0x48
> fscrypt_put_master_key_activeref+0xc4/0x308
> fscrypt_destroy_keyring+0xb0/0xfc
> generic_shutdown_super+0x60/0x12c
> kill_block_super+0x1c/0x48
> kill_f2fs_super+0xc4/0x1a8
> deactivate_locked_super+0x98/0x144
> deactivate_super+0x78/0x8c
> cleanup_mnt+0x110/0x148
> __cleanup_mnt+0x14/0x20
> task_work_run+0xc4/0xec
> get_signal+0x6c/0x8d8
> do_notify_resume+0x128/0x828
> el0_svc+0x6c/0x70
> el0t_64_sync_handler+0x68/0xbc
> el0t_64_sync+0x1a8/0x1ac
>
> Let's fixed this issue by adding a lock in program_key flow.
> 
> Signed-off-by: zhanghui <zhanghui31@xiaomi.com>
> ---
>  drivers/ufs/core/ufshcd-crypto.c | 20 +++++++++++++++-----
>  1 file changed, 15 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/ufs/core/ufshcd-crypto.c b/drivers/ufs/core/ufshcd-crypto.c
> index 694ff7578fc1..f3260a072c0c 100644
> --- a/drivers/ufs/core/ufshcd-crypto.c
> +++ b/drivers/ufs/core/ufshcd-crypto.c
> @@ -5,6 +5,7 @@
>  
>  #include <ufs/ufshcd.h>
>  #include "ufshcd-crypto.h"
> +#include "ufshcd-priv.h"
>  
>  /* Blk-crypto modes supported by UFS crypto */
>  static const struct ufs_crypto_alg_entry {
> @@ -17,12 +18,18 @@ static const struct ufs_crypto_alg_entry {
>  	},
>  };
>  
> -static void ufshcd_program_key(struct ufs_hba *hba,
> +static int ufshcd_program_key(struct ufs_hba *hba,
>  			       const union ufs_crypto_cfg_entry *cfg, int slot)
>  {
>  	int i;
>  	u32 slot_offset = hba->crypto_cfg_register + slot * sizeof(*cfg);
>  
> +	down(&hba->host_sem);
> +	if (!ufshcd_is_user_access_allowed(hba)) {
> +		up(&hba->host_sem);
> +		return -EBUSY;
> +	}
> +

It seems broken that the filesystem doesn't get unmounted until after the UFS is
shut down.  It would be helpful to get a clearer picture of exactly why things
are happening in that order.

But disregarding that, it's indeed logical for blk_crypto_evict_key() to return
an error if it cannot fulfill the request.

But I'm wondering if this needs to be solved in the UFS driver itself or whether
the blk-crypto framework should handle this (so that it also gets fixed for
other drivers that may have the same problem).  In block/blk-crypto-profile.c,
pm_runtime_get_sync() is already called before ->keyslot_evict.  So
->keyslot_evict is supposed to be called only when the device is resumed.

The blk-crypto code (in blk_crypto_hw_enter()) doesn't check the return value of
pm_runtime_get_sync(), though.  That seems like a bug.  Is it possible this
issue would be fixed if it checked the return value?

Or does the UFS driver still need to check ufshcd_is_user_access_allowed() too?
If that's the case, I'm also wondering whether it's okay to nest host_sem inside
pm_runtime_get_sync().  Elsewhere in the UFS driver they are called in the
opposite order.

- Eric
ZhangHui March 21, 2025, 7:45 a.m. UTC | #4
On Thu, Mar 20, 2025 at 09:44:55PM -0700, Eric Biggers wrote:
> It seems broken that the filesystem doesn't get unmounted until after the UFS is
> shut down.  It would be helpful to get a clearer picture of exactly why things
> are happening in that order.
> 
> But disregarding that, it's indeed logical for blk_crypto_evict_key() to return
> an error if it cannot fulfill the request.
> 
> But I'm wondering if this needs to be solved in the UFS driver itself or whether
> the blk-crypto framework should handle this (so that it also gets fixed for
> other drivers that may have the same problem).  In block/blk-crypto-profile.c,
> pm_runtime_get_sync() is already called before ->keyslot_evict.  So
> ->keyslot_evict is supposed to be called only when the device is resumed.
> 
> The blk-crypto code (in blk_crypto_hw_enter()) doesn't check the return value of
> pm_runtime_get_sync(), though.  That seems like a bug.  Is it possible this
> issue would be fixed if it checked the return value?
> 

hi Eric,

I have checked the device_shutdown process and it seems only wait for the resume
that has not been processed to be completed, and then continue. It does not seem
to cause pm_runtime_get_sync to return an error.

> Or does the UFS driver still need to check ufshcd_is_user_access_allowed() too?
> If that's the case, I'm also wondering whether it's okay to nest host_sem inside
> pm_runtime_get_sync().  Elsewhere in the UFS driver they are called in the
> opposite order.

I found that ufshcd_is_user_access_allowed is used in many places in the ufs driver
code. What is the historical reason for this?

thanks
zhanghui
Bart Van Assche March 21, 2025, 4:53 p.m. UTC | #5
On 3/21/25 12:45 AM, ZhangHui wrote:
> I have checked the device_shutdown process and it seems only wait
> for the resume that has not been processed to be completed, and
 > then continue. It does not seem to cause pm_runtime_get_sync to return
 > an error.

device_shutdown() is a kernel function. File systems must be unmounted
by user space code before the device_shutdown() kernel function is
called. The sequence followed by systemd is as follows (see also the
systemd source file src/shutdown/shutdown.c):
* Call sync().
* Send SIGTERM and SIGKILL to all running processes.
* Unmount all filesystems, deactivate swap devices, detach loopback
   devices, stop md devices and detach dm devices.
* Call sync() again.
* Call the reboot() system call.

 From kernel/reboot.c:

SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
		void __user *, arg)
{
	...
	case LINUX_REBOOT_CMD_POWER_OFF:
		kernel_power_off();
		do_exit(0);
		break;
	...
}

void kernel_power_off(void)
{
	kernel_shutdown_prepare(SYSTEM_POWER_OFF);
	if (pm_power_off_prepare)
		pm_power_off_prepare();
	migrate_to_reboot_cpu();
	syscore_shutdown();
	pr_emerg("Power down\n");
	kmsg_dump(KMSG_DUMP_POWEROFF);
	machine_power_off();
}

static void kernel_shutdown_prepare(enum system_states state)
{
	blocking_notifier_call_chain(&amp;reboot_notifier_list,
		(state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL);
	system_state = state;
	usermodehelper_disable();
	device_shutdown();
}

>> Or does the UFS driver still need to check
 >> ufshcd_is_user_access_allowed() too? If that's the case, I'm also
 >> wondering whether it's okay to nest host_sem inside
 >> pm_runtime_get_sync().  Elsewhere in the UFS driver they are>> 
called in the opposite order.>
 > I found that ufshcd_is_user_access_allowed is used in many places in
 > the ufs driver code. What is the historical reason for this?

My understanding is that ufshcd_is_user_access_allowed() is only called
from sysfs and debugfs show and store callbacks. I'd like to remove that
function because my understanding is that access to sysfs and debugfs
attributes stops before the device .shutdown() callbacks are called.

Bart.
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufshcd-crypto.c b/drivers/ufs/core/ufshcd-crypto.c
index 694ff7578fc1..f3260a072c0c 100644
--- a/drivers/ufs/core/ufshcd-crypto.c
+++ b/drivers/ufs/core/ufshcd-crypto.c
@@ -5,6 +5,7 @@ 
 
 #include <ufs/ufshcd.h>
 #include "ufshcd-crypto.h"
+#include "ufshcd-priv.h"
 
 /* Blk-crypto modes supported by UFS crypto */
 static const struct ufs_crypto_alg_entry {
@@ -17,12 +18,18 @@  static const struct ufs_crypto_alg_entry {
 	},
 };
 
-static void ufshcd_program_key(struct ufs_hba *hba,
+static int ufshcd_program_key(struct ufs_hba *hba,
 			       const union ufs_crypto_cfg_entry *cfg, int slot)
 {
 	int i;
 	u32 slot_offset = hba->crypto_cfg_register + slot * sizeof(*cfg);
 
+	down(&hba->host_sem);
+	if (!ufshcd_is_user_access_allowed(hba)) {
+		up(&hba->host_sem);
+		return -EBUSY;
+	}
+
 	ufshcd_hold(hba);
 
 	/* Ensure that CFGE is cleared before programming the key */
@@ -38,6 +45,9 @@  static void ufshcd_program_key(struct ufs_hba *hba,
 	ufshcd_writel(hba, le32_to_cpu(cfg->reg_val[16]),
 		      slot_offset + 16 * sizeof(cfg->reg_val[0]));
 	ufshcd_release(hba);
+
+	up(&hba->host_sem);
+	return 0;
 }
 
 static int ufshcd_crypto_keyslot_program(struct blk_crypto_profile *profile,
@@ -52,6 +62,7 @@  static int ufshcd_crypto_keyslot_program(struct blk_crypto_profile *profile,
 	int i;
 	int cap_idx = -1;
 	union ufs_crypto_cfg_entry cfg = {};
+	int err;
 
 	BUILD_BUG_ON(UFS_CRYPTO_KEY_SIZE_INVALID != 0);
 	for (i = 0; i < hba->crypto_capabilities.num_crypto_cap; i++) {
@@ -79,10 +90,10 @@  static int ufshcd_crypto_keyslot_program(struct blk_crypto_profile *profile,
 		memcpy(cfg.crypto_key, key->raw, key->size);
 	}
 
-	ufshcd_program_key(hba, &cfg, slot);
+	err = ufshcd_program_key(hba, &cfg, slot);
 
 	memzero_explicit(&cfg, sizeof(cfg));
-	return 0;
+	return err;
 }
 
 static int ufshcd_crypto_keyslot_evict(struct blk_crypto_profile *profile,
@@ -96,8 +107,7 @@  static int ufshcd_crypto_keyslot_evict(struct blk_crypto_profile *profile,
 	 */
 	union ufs_crypto_cfg_entry cfg = {};
 
-	ufshcd_program_key(hba, &cfg, slot);
-	return 0;
+	return ufshcd_program_key(hba, &cfg, slot);
 }
 
 /*