diff mbox series

[02/15] exfat: move freeing sbi, upcase table and dropping nls into rcu-delayed helper

Message ID 20231002023015.GC3389589@ZenIV (mailing list archive)
State New, archived
Headers show
Series [01/15] rcu pathwalk: prevent bogus hard errors from may_lookup() | expand

Commit Message

Al Viro Oct. 2, 2023, 2:30 a.m. UTC
That stuff can be accessed by ->d_hash()/->d_compare(); as it is, we have
a hard-to-hit UAF if rcu pathwalk manages to get into ->d_hash() on a filesystem
that is in process of getting shut down.

Besides, having nls and upcase table cleanup moved from ->put_super() towards
the place where sbi is freed makes for simpler failure exits.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exfat/exfat_fs.h |  1 +
 fs/exfat/nls.c      | 14 ++++----------
 fs/exfat/super.c    | 20 +++++++++++---------
 3 files changed, 16 insertions(+), 19 deletions(-)

Comments

Linus Torvalds Oct. 2, 2023, 4:10 p.m. UTC | #1
On Sun, 1 Oct 2023 at 19:30, Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> That stuff can be accessed by ->d_hash()/->d_compare(); as it is, we have
> a hard-to-hit UAF if rcu pathwalk manages to get into ->d_hash() on a filesystem
> that is in process of getting shut down.
>
> Besides, having nls and upcase table cleanup moved from ->put_super() towards
> the place where sbi is freed makes for simpler failure exits.

I don't disagree with moving the freeing,  but the RCU-delay makes me go "hmm".

Is there some reason why we can't try to do this in generic code? The
umount code already does RCU delays for other things, I get the
feeling that we should have a RCu delay between "put_super" and
"kkill_sb".

Could we move the ->kill_sb() call into destroy_super_work(), which is
already RCU-delayed, for example?

It feels wrong to have the filesystems have to deal with the vfs layer
doing RCU-lookups.

             Linus
Al Viro Oct. 2, 2023, 6:04 p.m. UTC | #2
On Mon, Oct 02, 2023 at 09:10:22AM -0700, Linus Torvalds wrote:
> On Sun, 1 Oct 2023 at 19:30, Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > That stuff can be accessed by ->d_hash()/->d_compare(); as it is, we have
> > a hard-to-hit UAF if rcu pathwalk manages to get into ->d_hash() on a filesystem
> > that is in process of getting shut down.
> >
> > Besides, having nls and upcase table cleanup moved from ->put_super() towards
> > the place where sbi is freed makes for simpler failure exits.
> 
> I don't disagree with moving the freeing,  but the RCU-delay makes me go "hmm".
> 
> Is there some reason why we can't try to do this in generic code? The
> umount code already does RCU delays for other things, I get the
> feeling that we should have a RCu delay between "put_super" and
> "kkill_sb".
> 
> Could we move the ->kill_sb() call into destroy_super_work(), which is
> already RCU-delayed, for example?
> 
> It feels wrong to have the filesystems have to deal with the vfs layer
> doing RCU-lookups.

	For one thing, ->kill_sb() might do tons of IO.  And we really want
to have that done before umount(2) returns to userland, so that part can't
be offloaded via schedule_work()...
diff mbox series

Patch

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index f55498e5c23d..22e17b0a66e8 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -273,6 +273,7 @@  struct exfat_sb_info {
 
 	spinlock_t inode_hash_lock;
 	struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
+	struct rcu_head rcu;
 };
 
 #define EXFAT_CACHE_VALID	0
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 705710f93e2d..afdf13c34ff5 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -655,7 +655,6 @@  static int exfat_load_upcase_table(struct super_block *sb,
 	unsigned int sect_size = sb->s_blocksize;
 	unsigned int i, index = 0;
 	u32 chksum = 0;
-	int ret;
 	unsigned char skip = false;
 	unsigned short *upcase_table;
 
@@ -673,8 +672,7 @@  static int exfat_load_upcase_table(struct super_block *sb,
 		if (!bh) {
 			exfat_err(sb, "failed to read sector(0x%llx)",
 				  (unsigned long long)sector);
-			ret = -EIO;
-			goto free_table;
+			return -EIO;
 		}
 		sector++;
 		for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) {
@@ -701,15 +699,12 @@  static int exfat_load_upcase_table(struct super_block *sb,
 
 	exfat_err(sb, "failed to load upcase table (idx : 0x%08x, chksum : 0x%08x, utbl_chksum : 0x%08x)",
 		  index, chksum, utbl_checksum);
-	ret = -EINVAL;
-free_table:
-	exfat_free_upcase_table(sbi);
-	return ret;
+	return -EINVAL;
 }
 
 static int exfat_load_default_upcase_table(struct super_block *sb)
 {
-	int i, ret = -EIO;
+	int i;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	unsigned char skip = false;
 	unsigned short uni = 0, *upcase_table;
@@ -740,8 +735,7 @@  static int exfat_load_default_upcase_table(struct super_block *sb)
 		return 0;
 
 	/* FATAL error: default upcase table has error */
-	exfat_free_upcase_table(sbi);
-	return ret;
+	return -EIO;
 }
 
 int exfat_create_upcase_table(struct super_block *sb)
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 2778bd9b631e..593cfff8c6f4 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -39,9 +39,6 @@  static void exfat_put_super(struct super_block *sb)
 	exfat_free_bitmap(sbi);
 	brelse(sbi->boot_bh);
 	mutex_unlock(&sbi->s_lock);
-
-	unload_nls(sbi->nls_io);
-	exfat_free_upcase_table(sbi);
 }
 
 static int exfat_sync_fs(struct super_block *sb, int wait)
@@ -593,7 +590,7 @@  static int __exfat_fill_super(struct super_block *sb)
 	ret = exfat_load_bitmap(sb);
 	if (ret) {
 		exfat_err(sb, "failed to load alloc-bitmap");
-		goto free_upcase_table;
+		goto free_bh;
 	}
 
 	ret = exfat_count_used_clusters(sb, &sbi->used_clusters);
@@ -606,8 +603,6 @@  static int __exfat_fill_super(struct super_block *sb)
 
 free_alloc_bitmap:
 	exfat_free_bitmap(sbi);
-free_upcase_table:
-	exfat_free_upcase_table(sbi);
 free_bh:
 	brelse(sbi->boot_bh);
 	return ret;
@@ -694,12 +689,10 @@  static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
 	sb->s_root = NULL;
 
 free_table:
-	exfat_free_upcase_table(sbi);
 	exfat_free_bitmap(sbi);
 	brelse(sbi->boot_bh);
 
 check_nls_io:
-	unload_nls(sbi->nls_io);
 	return err;
 }
 
@@ -764,13 +757,22 @@  static int exfat_init_fs_context(struct fs_context *fc)
 	return 0;
 }
 
+static void delayed_free(struct rcu_head *p)
+{
+	struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu);
+
+	unload_nls(sbi->nls_io);
+	exfat_free_upcase_table(sbi);
+	exfat_free_sbi(sbi);
+}
+
 static void exfat_kill_sb(struct super_block *sb)
 {
 	struct exfat_sb_info *sbi = sb->s_fs_info;
 
 	kill_block_super(sb);
 	if (sbi)
-		exfat_free_sbi(sbi);
+		call_rcu(&sbi->rcu, delayed_free);
 }
 
 static struct file_system_type exfat_fs_type = {