diff mbox series

[2/2] efivarfs: support freeze/thaw

Message ID 20250331-work-freeze-v1-2-6dfbe8253b9f@kernel.org (mailing list archive)
State New
Headers show
Series efivarfs: support freeze/thaw | expand

Commit Message

Christian Brauner March 31, 2025, 12:42 p.m. UTC
Allow efivarfs to partake to resync variable state during system
hibernation and suspend. Add freeze/thaw support.

This is a pretty straightforward implementation. We simply add regular
freeze/thaw support for both userspace and the kernel. This works
without any big issues and congrats afaict efivars is the first
pseudofilesystem that adds support for filesystem freezing and thawing.

The simplicity comes from the fact that we simply always resync variable
state after efivarfs has been frozen. It doesn't matter whether that's
because of suspend, userspace initiated freeze or hibernation. Efivars
is simple enough that it doesn't matter that we walk all dentries. There
are no directories and there aren't insane amounts of entries and both
freeze/thaw are already heavy-handed operations. We really really don't
need to care.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/efivarfs/internal.h |   1 -
 fs/efivarfs/super.c    | 196 +++++++++++++------------------------------------
 2 files changed, 51 insertions(+), 146 deletions(-)

Comments

James Bottomley March 31, 2025, 2:46 p.m. UTC | #1
On Mon, 2025-03-31 at 14:42 +0200, Christian Brauner wrote:
> Allow efivarfs to partake to resync variable state during system
> hibernation and suspend. Add freeze/thaw support.
> 
> This is a pretty straightforward implementation. We simply add
> regular freeze/thaw support for both userspace and the kernel. This
> works without any big issues and congrats afaict efivars is the first
> pseudofilesystem that adds support for filesystem freezing and
> thawing.
> 
> The simplicity comes from the fact that we simply always resync
> variable state after efivarfs has been frozen. It doesn't matter
> whether that's because of suspend, userspace initiated freeze or
> hibernation. Efivars is simple enough that it doesn't matter that we
> walk all dentries. There are no directories and there aren't insane
> amounts of entries and both freeze/thaw are already heavy-handed
> operations. We really really don't need to care.

Just as a point of order: this can't actually work until freeze/thaw is
actually plumbed into suspend/resume.

> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
>  fs/efivarfs/internal.h |   1 -
>  fs/efivarfs/super.c    | 196 +++++++++++++--------------------------
> ----------
>  2 files changed, 51 insertions(+), 146 deletions(-)
> 
> diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h
> index ac6a1dd0a6a5..f913b6824289 100644
> --- a/fs/efivarfs/internal.h
> +++ b/fs/efivarfs/internal.h
> @@ -17,7 +17,6 @@ struct efivarfs_fs_info {
>  	struct efivarfs_mount_opts mount_opts;
>  	struct super_block *sb;
>  	struct notifier_block nb;
> -	struct notifier_block pm_nb;
>  };
>  
>  struct efi_variable {
> diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
> index 0486e9b68bc6..567e849a03fe 100644
> --- a/fs/efivarfs/super.c
> +++ b/fs/efivarfs/super.c
> @@ -20,6 +20,7 @@
>  #include <linux/printk.h>
>  
>  #include "internal.h"
> +#include "../internal.h"
>  
>  static int efivarfs_ops_notifier(struct notifier_block *nb, unsigned
> long event,
>  				 void *data)
> @@ -119,12 +120,18 @@ static int efivarfs_statfs(struct dentry
> *dentry, struct kstatfs *buf)
>  
>  	return 0;
>  }
> +
> +static int efivarfs_freeze_fs(struct super_block *sb);
> +static int efivarfs_unfreeze_fs(struct super_block *sb);
> +
>  static const struct super_operations efivarfs_ops = {
>  	.statfs = efivarfs_statfs,
>  	.drop_inode = generic_delete_inode,
>  	.alloc_inode = efivarfs_alloc_inode,
>  	.free_inode = efivarfs_free_inode,
>  	.show_options = efivarfs_show_options,
> +	.freeze_fs = efivarfs_freeze_fs,

Why is it necessary to have a freeze_fs operation?  The current code in
super.c:freeze_super() reads:

	if (sb->s_op->freeze_fs) {
		ret = sb->s_op->freeze_fs(sb);

So it would seem that setting this to NULL has exactly the same effect
as providing a null method.

> +	.unfreeze_fs = efivarfs_unfreeze_fs,
>  };
>  
>  /*
> 
[...]
> @@ -608,9 +516,7 @@ static void efivarfs_kill_sb(struct super_block
> *sb)
>  {
>  	struct efivarfs_fs_info *sfi = sb->s_fs_info;
>  
> -	blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi-
> >nb);

This is an extraneous deletion of an unrelated notifier which efivarfs
still needs to listen for ops updates from the efi subsystem.

Regards,

James
Christian Brauner March 31, 2025, 3:03 p.m. UTC | #2
On Mon, Mar 31, 2025 at 10:46:43AM -0400, James Bottomley wrote:
> On Mon, 2025-03-31 at 14:42 +0200, Christian Brauner wrote:
> > Allow efivarfs to partake to resync variable state during system
> > hibernation and suspend. Add freeze/thaw support.
> > 
> > This is a pretty straightforward implementation. We simply add
> > regular freeze/thaw support for both userspace and the kernel. This
> > works without any big issues and congrats afaict efivars is the first
> > pseudofilesystem that adds support for filesystem freezing and
> > thawing.
> > 
> > The simplicity comes from the fact that we simply always resync
> > variable state after efivarfs has been frozen. It doesn't matter
> > whether that's because of suspend, userspace initiated freeze or
> > hibernation. Efivars is simple enough that it doesn't matter that we
> > walk all dentries. There are no directories and there aren't insane
> > amounts of entries and both freeze/thaw are already heavy-handed
> > operations. We really really don't need to care.
> 
> Just as a point of order: this can't actually work until freeze/thaw is
> actually plumbed into suspend/resume.
> 
> > 
> > Signed-off-by: Christian Brauner <brauner@kernel.org>
> > ---
> >  fs/efivarfs/internal.h |   1 -
> >  fs/efivarfs/super.c    | 196 +++++++++++++--------------------------
> > ----------
> >  2 files changed, 51 insertions(+), 146 deletions(-)
> > 
> > diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h
> > index ac6a1dd0a6a5..f913b6824289 100644
> > --- a/fs/efivarfs/internal.h
> > +++ b/fs/efivarfs/internal.h
> > @@ -17,7 +17,6 @@ struct efivarfs_fs_info {
> >  	struct efivarfs_mount_opts mount_opts;
> >  	struct super_block *sb;
> >  	struct notifier_block nb;
> > -	struct notifier_block pm_nb;
> >  };
> >  
> >  struct efi_variable {
> > diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
> > index 0486e9b68bc6..567e849a03fe 100644
> > --- a/fs/efivarfs/super.c
> > +++ b/fs/efivarfs/super.c
> > @@ -20,6 +20,7 @@
> >  #include <linux/printk.h>
> >  
> >  #include "internal.h"
> > +#include "../internal.h"
> >  
> >  static int efivarfs_ops_notifier(struct notifier_block *nb, unsigned
> > long event,
> >  				 void *data)
> > @@ -119,12 +120,18 @@ static int efivarfs_statfs(struct dentry
> > *dentry, struct kstatfs *buf)
> >  
> >  	return 0;
> >  }
> > +
> > +static int efivarfs_freeze_fs(struct super_block *sb);
> > +static int efivarfs_unfreeze_fs(struct super_block *sb);
> > +
> >  static const struct super_operations efivarfs_ops = {
> >  	.statfs = efivarfs_statfs,
> >  	.drop_inode = generic_delete_inode,
> >  	.alloc_inode = efivarfs_alloc_inode,
> >  	.free_inode = efivarfs_free_inode,
> >  	.show_options = efivarfs_show_options,
> > +	.freeze_fs = efivarfs_freeze_fs,
> 
> Why is it necessary to have a freeze_fs operation?  The current code in
> super.c:freeze_super() reads:

Fwiw, I've explained this already in prior mails. The same behavior as
for the ioctl where we check whether the filesystem provides either a
->freeze_fs or ->freeze_super method. If neither is provided the
filesystem is assumed to not have freeze support.

> 
> 	if (sb->s_op->freeze_fs) {
> 		ret = sb->s_op->freeze_fs(sb);
> 
> So it would seem that setting this to NULL has exactly the same effect
> as providing a null method.

No, it would cause freeze to not be called.

IOW, any filesystem that doesn't provides neither a freeze_super or
freeze_fs method doesn't support freeze (that's how the ioctls work as
well) which allows us to only call into filesystems that are able to
properly freeze so we don't need pointless FS_* flags. By only providing
thaw it would end up thawing something that was never frozen. Both are
provided and the freeze methods function as the indicator whether
freezing/thawing is supported.

That could be changed but not in this series. We could also provide
noop_freeze just like we have noop_sync but again, not for this series.

> 
> > +	.unfreeze_fs = efivarfs_unfreeze_fs,
> >  };
> >  
> >  /*
> > 
> [...]
> > @@ -608,9 +516,7 @@ static void efivarfs_kill_sb(struct super_block
> > *sb)
> >  {
> >  	struct efivarfs_fs_info *sfi = sb->s_fs_info;
> >  
> > -	blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi-
> > >nb);
> 
> This is an extraneous deletion of an unrelated notifier which efivarfs
> still needs to listen for ops updates from the efi subsystem.

At first I was bewildered because I thought you were talking about pm_nb
for some reason and was ready to explode. Man, I need a post LSFMM
vacation. :)

Thanks for spotting this. This is now fixed by adding back:

blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb);
James Bottomley April 1, 2025, 7:31 p.m. UTC | #3
On Mon, 2025-03-31 at 14:42 +0200, Christian Brauner wrote:
[...]
> +	pr_info("efivarfs: resyncing variable state\n");
> +	for (;;) {
> +		int err;
> +		size_t size;
> +		struct inode *inode;
> +		struct efivar_entry *entry;
> +
> +		child = find_next_child(sb->s_root, child);
> +		if (!child)
> +			break;
> +
> +		inode = d_inode(child);
> +		entry = efivar_entry(inode);
> +
> +		err = efivar_entry_size(entry, &size);
> +		if (err)
> +			size = 0;
> +		else
> +			size += sizeof(__u32);
> +
> +		inode_lock(inode);
> +		i_size_write(inode, size);
> +		inode_unlock(inode);
> +
> +		if (!err)
> +			continue;
> +
> +		/* The variable doesn't exist anymore, delete it. */

The message that should be here got deleted.  We now only print
messages about variables we add not variables we remove.  I get that
the code is a bit chatty here, but it should either print both the
removing and adding messages or print neither, I think.

> +		simple_recursive_removal(child, NULL);
>  	}
[...]
> -			pr_info("efivarfs: removing variable %pd\n",
> -				ectx.dentry);

This is the lost message, although ectx.dentry should become child.

Regards,

James
Christian Brauner April 2, 2025, 7:44 a.m. UTC | #4
On Tue, Apr 01, 2025 at 03:31:13PM -0400, James Bottomley wrote:
> On Mon, 2025-03-31 at 14:42 +0200, Christian Brauner wrote:
> [...]
> > +	pr_info("efivarfs: resyncing variable state\n");
> > +	for (;;) {
> > +		int err;
> > +		size_t size;
> > +		struct inode *inode;
> > +		struct efivar_entry *entry;
> > +
> > +		child = find_next_child(sb->s_root, child);
> > +		if (!child)
> > +			break;
> > +
> > +		inode = d_inode(child);
> > +		entry = efivar_entry(inode);
> > +
> > +		err = efivar_entry_size(entry, &size);
> > +		if (err)
> > +			size = 0;
> > +		else
> > +			size += sizeof(__u32);
> > +
> > +		inode_lock(inode);
> > +		i_size_write(inode, size);
> > +		inode_unlock(inode);
> > +
> > +		if (!err)
> > +			continue;
> > +
> > +		/* The variable doesn't exist anymore, delete it. */
> 
> The message that should be here got deleted.  We now only print
> messages about variables we add not variables we remove.  I get that
> the code is a bit chatty here, but it should either print both the
> removing and adding messages or print neither, I think.

Ok, I added the deletion printk line back.
diff mbox series

Patch

diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h
index ac6a1dd0a6a5..f913b6824289 100644
--- a/fs/efivarfs/internal.h
+++ b/fs/efivarfs/internal.h
@@ -17,7 +17,6 @@  struct efivarfs_fs_info {
 	struct efivarfs_mount_opts mount_opts;
 	struct super_block *sb;
 	struct notifier_block nb;
-	struct notifier_block pm_nb;
 };
 
 struct efi_variable {
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 0486e9b68bc6..567e849a03fe 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -20,6 +20,7 @@ 
 #include <linux/printk.h>
 
 #include "internal.h"
+#include "../internal.h"
 
 static int efivarfs_ops_notifier(struct notifier_block *nb, unsigned long event,
 				 void *data)
@@ -119,12 +120,18 @@  static int efivarfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	return 0;
 }
+
+static int efivarfs_freeze_fs(struct super_block *sb);
+static int efivarfs_unfreeze_fs(struct super_block *sb);
+
 static const struct super_operations efivarfs_ops = {
 	.statfs = efivarfs_statfs,
 	.drop_inode = generic_delete_inode,
 	.alloc_inode = efivarfs_alloc_inode,
 	.free_inode = efivarfs_free_inode,
 	.show_options = efivarfs_show_options,
+	.freeze_fs = efivarfs_freeze_fs,
+	.unfreeze_fs = efivarfs_unfreeze_fs,
 };
 
 /*
@@ -367,8 +374,6 @@  static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc)
 	if (err)
 		return err;
 
-	register_pm_notifier(&sfi->pm_nb);
-
 	return efivar_init(efivarfs_callback, sb, true);
 }
 
@@ -393,48 +398,6 @@  static const struct fs_context_operations efivarfs_context_ops = {
 	.reconfigure	= efivarfs_reconfigure,
 };
 
-struct efivarfs_ctx {
-	struct dir_context ctx;
-	struct super_block *sb;
-	struct dentry *dentry;
-};
-
-static bool efivarfs_actor(struct dir_context *ctx, const char *name, int len,
-			   loff_t offset, u64 ino, unsigned mode)
-{
-	unsigned long size;
-	struct efivarfs_ctx *ectx = container_of(ctx, struct efivarfs_ctx, ctx);
-	struct qstr qstr = { .name = name, .len = len };
-	struct dentry *dentry = d_hash_and_lookup(ectx->sb->s_root, &qstr);
-	struct inode *inode;
-	struct efivar_entry *entry;
-	int err;
-
-	if (IS_ERR_OR_NULL(dentry))
-		return true;
-
-	inode = d_inode(dentry);
-	entry = efivar_entry(inode);
-
-	err = efivar_entry_size(entry, &size);
-	size += sizeof(__u32);	/* attributes */
-	if (err)
-		size = 0;
-
-	inode_lock_nested(inode, I_MUTEX_CHILD);
-	i_size_write(inode, size);
-	inode_unlock(inode);
-
-	if (!size) {
-		ectx->dentry = dentry;
-		return false;
-	}
-
-	dput(dentry);
-
-	return true;
-}
-
 static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
 				  unsigned long name_size, void *data)
 {
@@ -474,111 +437,59 @@  static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
 	return err;
 }
 
-static void efivarfs_deactivate_super_work(struct work_struct *work)
-{
-	struct super_block *s = container_of(work, struct super_block,
-					     destroy_work);
-	/*
-	 * note: here s->destroy_work is free for reuse (which
-	 * will happen in deactivate_super)
-	 */
-	deactivate_super(s);
-}
-
 static struct file_system_type efivarfs_type;
 
-static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
-			      void *ptr)
+static int efivarfs_freeze_fs(struct super_block *sb)
 {
-	struct efivarfs_fs_info *sfi = container_of(nb, struct efivarfs_fs_info,
-						    pm_nb);
-	struct path path;
-	struct efivarfs_ctx ectx = {
-		.ctx = {
-			.actor	= efivarfs_actor,
-		},
-		.sb = sfi->sb,
-	};
-	struct file *file;
-	struct super_block *s = sfi->sb;
-	static bool rescan_done = true;
-
-	if (action == PM_HIBERNATION_PREPARE) {
-		rescan_done = false;
-		return NOTIFY_OK;
-	} else if (action != PM_POST_HIBERNATION) {
-		return NOTIFY_DONE;
-	}
-
-	if (rescan_done)
-		return NOTIFY_DONE;
-
-	/* ensure single superblock is alive and pin it */
-	if (!atomic_inc_not_zero(&s->s_active))
-		return NOTIFY_DONE;
-
-	pr_info("efivarfs: resyncing variable state\n");
+	/* Nothing for us to do. */
+	return 0;
+}
 
-	path.dentry = sfi->sb->s_root;
+static int efivarfs_unfreeze_fs(struct super_block *sb)
+{
+	struct dentry *child = NULL;
 
 	/*
-	 * do not add SB_KERNMOUNT which a single superblock could
-	 * expose to userspace and which also causes MNT_INTERNAL, see
-	 * below
+	 * Unconditionally resync the variable state on a thaw request.
+	 * Given the size of efivarfs it really doesn't matter to simply
+	 * iterate through all of the entries and resync. Freeze/thaw
+	 * requests are rare enough for that to not matter and the
+	 * number of entries is pretty low too. So we really don't care.
 	 */
-	path.mnt = vfs_kern_mount(&efivarfs_type, 0,
-				  efivarfs_type.name, NULL);
-	if (IS_ERR(path.mnt)) {
-		pr_err("efivarfs: internal mount failed\n");
-		/*
-		 * We may be the last pinner of the superblock but
-		 * calling efivarfs_kill_sb from within the notifier
-		 * here would deadlock trying to unregister it
-		 */
-		INIT_WORK(&s->destroy_work, efivarfs_deactivate_super_work);
-		schedule_work(&s->destroy_work);
-		return PTR_ERR(path.mnt);
+	pr_info("efivarfs: resyncing variable state\n");
+	for (;;) {
+		int err;
+		size_t size;
+		struct inode *inode;
+		struct efivar_entry *entry;
+
+		child = find_next_child(sb->s_root, child);
+		if (!child)
+			break;
+
+		inode = d_inode(child);
+		entry = efivar_entry(inode);
+
+		err = efivar_entry_size(entry, &size);
+		if (err)
+			size = 0;
+		else
+			size += sizeof(__u32);
+
+		inode_lock(inode);
+		i_size_write(inode, size);
+		inode_unlock(inode);
+
+		if (!err)
+			continue;
+
+		/* The variable doesn't exist anymore, delete it. */
+		simple_recursive_removal(child, NULL);
 	}
 
-	/* path.mnt now has pin on superblock, so this must be above one */
-	atomic_dec(&s->s_active);
-
-	file = kernel_file_open(&path, O_RDONLY | O_DIRECTORY | O_NOATIME,
-				current_cred());
-	/*
-	 * safe even if last put because no MNT_INTERNAL means this
-	 * will do delayed deactivate_super and not deadlock
-	 */
-	mntput(path.mnt);
-	if (IS_ERR(file))
-		return NOTIFY_DONE;
-
-	rescan_done = true;
-
-	/*
-	 * First loop over the directory and verify each entry exists,
-	 * removing it if it doesn't
-	 */
-	file->f_pos = 2;	/* skip . and .. */
-	do {
-		ectx.dentry = NULL;
-		iterate_dir(file, &ectx.ctx);
-		if (ectx.dentry) {
-			pr_info("efivarfs: removing variable %pd\n",
-				ectx.dentry);
-			simple_recursive_removal(ectx.dentry, NULL);
-			dput(ectx.dentry);
-		}
-	} while (ectx.dentry);
-	fput(file);
-
-	/*
-	 * then loop over variables, creating them if there's no matching
-	 * dentry
-	 */
-	efivar_init(efivarfs_check_missing, sfi->sb, false);
-
-	return NOTIFY_OK;
+	efivar_init(efivarfs_check_missing, sb, false);
+	pr_info("efivarfs: finished resyncing variable state\n");
+	return 0;
 }
 
 static int efivarfs_init_fs_context(struct fs_context *fc)
@@ -598,9 +509,6 @@  static int efivarfs_init_fs_context(struct fs_context *fc)
 	fc->s_fs_info = sfi;
 	fc->ops = &efivarfs_context_ops;
 
-	sfi->pm_nb.notifier_call = efivarfs_pm_notify;
-	sfi->pm_nb.priority = 0;
-
 	return 0;
 }
 
@@ -608,9 +516,7 @@  static void efivarfs_kill_sb(struct super_block *sb)
 {
 	struct efivarfs_fs_info *sfi = sb->s_fs_info;
 
-	blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb);
 	kill_litter_super(sb);
-	unregister_pm_notifier(&sfi->pm_nb);
 
 	kfree(sfi);
 }