fs: open the block device after allocation the super_block

Message ID	20230724175145.201318-1-hch@lst.de (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-fsdevel-owner@vger.kernel.org> From: Christoph Hellwig <hch@lst.de> To: viro@zeniv.linux.org.uk, brauner@kernel.org, jack@suse.cz Cc: linux-fsdevel@vger.kernel.org Subject: [PATCH] fs: open the block device after allocation the super_block Date: Mon, 24 Jul 2023 10:51:45 -0700 Message-Id: <20230724175145.201318-1-hch@lst.de> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	fs: open the block device after allocation the super_block \| expand fs: open the block device after allocation the super_block

diff --git a/fs/super.c b/fs/super.c index e781226e28800c..3ef39df5bec506 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1228,12 +1228,7 @@ static const struct blk_holder_ops fs_holder_ops = { static int set_bdev_super(struct super_block *s, void *data) { - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; - s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi); - - if (bdev_stable_writes(s->s_bdev)) - s->s_iflags |= SB_I_STABLE_WRITES; + s->s_dev = *(dev_t *)data; return 0; } @@ -1244,7 +1239,61 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc) static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc) { - return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key; + return !(s->s_iflags & SB_I_RETIRED) && + s->s_dev == *(dev_t *)fc->sget_key; +} + +static int setup_bdev_super(struct super_block *sb, int sb_flags, + struct fs_context *fc) +{ + blk_mode_t mode = sb_open_mode(sb_flags); + struct block_device *bdev; + + bdev = blkdev_get_by_dev(sb->s_dev, mode, sb->s_type, &fs_holder_ops); + if (IS_ERR(bdev)) { + if (fc) + errorf(fc, "%s: Can't open blockdev", fc->source); + return PTR_ERR(bdev); + } + + /* + * This really should be in blkdev_get_by_dev, but right now can't due + * to legacy issues that require us to allow opening a block device node + * writable from userspace even for a read-only block device. + */ + if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { + blkdev_put(bdev, sb->s_type); + return -EACCES; + } + + /* + * Until SB_BORN flag is set, there can be no active superblock + * references and thus no filesystem freezing. get_active_super() will + * just loop waiting for SB_BORN so even freeze_bdev() cannot proceed. + * + * It is enough to check bdev was not frozen before we set s_bdev. + */ + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + if (fc) + warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); + blkdev_put(bdev, sb->s_type); + return -EBUSY; + } + spin_lock(&sb_lock); + sb->s_bdev = bdev; + sb->s_bdi = bdi_get(bdev->bd_disk->bdi); + if (bdev_stable_writes(bdev)) + sb->s_iflags |= SB_I_STABLE_WRITES; + spin_unlock(&sb_lock); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + + snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); + shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name, + sb->s_id); + sb_set_blocksize(sb, block_size(bdev)); + return 0; } /** @@ -1256,73 +1305,50 @@ int get_tree_bdev(struct fs_context *fc, int (*fill_super)(struct super_block *, struct fs_context *)) { - struct block_device *bdev; struct super_block *s; int error = 0; + dev_t dev; if (!fc->source) return invalf(fc, "No source specified"); - bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags), - fc->fs_type, &fs_holder_ops); - if (IS_ERR(bdev)) { - errorf(fc, "%s: Can't open blockdev", fc->source); - return PTR_ERR(bdev); - } - - /* Once the superblock is inserted into the list by sget_fc(), s_umount - * will protect the lockfs code from trying to start a snapshot while - * we are mounting - */ - mutex_lock(&bdev->bd_fsfreeze_mutex); - if (bdev->bd_fsfreeze_count > 0) { - mutex_unlock(&bdev->bd_fsfreeze_mutex); - warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); - blkdev_put(bdev, fc->fs_type); - return -EBUSY; + error = lookup_bdev(fc->source, &dev); + if (error) { + errorf(fc, "%s: Can't lookup blockdev", fc->source); + return error; } fc->sb_flags |= SB_NOSEC; - fc->sget_key = bdev; + fc->sget_key = &dev; s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - if (IS_ERR(s)) { - blkdev_put(bdev, fc->fs_type); + if (IS_ERR(s)) return PTR_ERR(s); - } if (s->s_root) { /* Don't summarily change the RO/RW state. */ if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { - warnf(fc, "%pg: Can't mount, would change RO state", bdev); + warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev); deactivate_locked_super(s); - blkdev_put(bdev, fc->fs_type); return -EBUSY; } - + } else { /* - * s_umount nests inside open_mutex during - * __invalidate_device(). blkdev_put() acquires - * open_mutex and can't be called under s_umount. Drop - * s_umount temporarily. This is safe as we're - * holding an active reference. + * We drop s_umount here because we need to open the bdev and + * bdev->open_mutex ranks above s_umount (blkdev_put() -> + * __invalidate_device()). It is safe because we have active sb + * reference and SB_BORN is not set yet. */ up_write(&s->s_umount); - blkdev_put(bdev, fc->fs_type); + error = setup_bdev_super(s, fc->sb_flags, fc); down_write(&s->s_umount); - } else { - snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); - shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", - fc->fs_type->name, s->s_id); - sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, fc); + if (!error) + error = fill_super(s, fc); if (error) { deactivate_locked_super(s); return error; } - s->s_flags |= SB_ACTIVE; - bdev->bd_super = s; + s->s_bdev->bd_super = s; } BUG_ON(fc->root); @@ -1333,79 +1359,53 @@ EXPORT_SYMBOL(get_tree_bdev); static int test_bdev_super(struct super_block *s, void *data) { - return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data; + return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data; } struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)) { - struct block_device *bdev; struct super_block *s; - int error = 0; + int error; + dev_t dev; - bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type, - &fs_holder_ops); - if (IS_ERR(bdev)) - return ERR_CAST(bdev); + error = lookup_bdev(dev_name, &dev); + if (error) + return ERR_PTR(error); - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - mutex_lock(&bdev->bd_fsfreeze_mutex); - if (bdev->bd_fsfreeze_count > 0) { - mutex_unlock(&bdev->bd_fsfreeze_mutex); - error = -EBUSY; - goto error_bdev; - } - s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC, - bdev); - mutex_unlock(&bdev->bd_fsfreeze_mutex); + flags |= SB_NOSEC; + s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev); if (IS_ERR(s)) - goto error_s; + return ERR_CAST(s); if (s->s_root) { if ((flags ^ s->s_flags) & SB_RDONLY) { deactivate_locked_super(s); - error = -EBUSY; - goto error_bdev; + return ERR_PTR(-EBUSY); } - + } else { /* - * s_umount nests inside open_mutex during - * __invalidate_device(). blkdev_put() acquires - * open_mutex and can't be called under s_umount. Drop - * s_umount temporarily. This is safe as we're - * holding an active reference. + * We drop s_umount here because we need to open the bdev and + * bdev->open_mutex ranks above s_umount (blkdev_put() -> + * __invalidate_device()). It is safe because we have active sb + * reference and SB_BORN is not set yet. */ up_write(&s->s_umount); - blkdev_put(bdev, fs_type); + error = setup_bdev_super(s, flags, NULL); down_write(&s->s_umount); - } else { - snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); - shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", - fs_type->name, s->s_id); - sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); + if (!error) + error = fill_super(s, data, flags & SB_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); - goto error; + return ERR_PTR(error); } s->s_flags |= SB_ACTIVE; - bdev->bd_super = s; + s->s_bdev->bd_super = s; } return dget(s->s_root); - -error_s: - error = PTR_ERR(s); -error_bdev: - blkdev_put(bdev, fs_type); -error: - return ERR_PTR(error); } EXPORT_SYMBOL(mount_bdev); @@ -1413,10 +1413,12 @@ void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; - bdev->bd_super = NULL; generic_shutdown_super(sb); - sync_blockdev(bdev); - blkdev_put(bdev, sb->s_type); + if (bdev) { + bdev->bd_super = NULL; + sync_blockdev(bdev); + blkdev_put(bdev, sb->s_type); + } } EXPORT_SYMBOL(kill_block_super);

fs: open the block device after allocation the super_block

Commit Message

Comments

Patch