diff mbox

[RFC] Btrfs: add support for persistent mount options

Message ID 1375813640-14063-1-git-send-email-fdmanana@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Filipe Manana Aug. 6, 2013, 6:27 p.m. UTC
This change allows for most mount options to be persisted in
the filesystem, and be applied when the filesystem is mounted.
If the same options are specified at mount time, the persisted
values for those options are ignored.

The only options not supported are: subvol, subvolid, subvolrootid,
device and thread_pool. This limitation is due to how this feature
is implemented: basically there's an optional value (of type
struct btrfs_dir_item) in the tree of tree roots used to store the
list of options in the same format as they are passed to btrfs_mount().
This means any mount option that takes effect before the tree of tree
roots is setup is not supported.

To set these options, the user space tool btrfstune was modified
to persist the list of options into an unmounted filesystem's
tree of tree roots.

NOTE: Like the corresponding btrfs-progs patch, this is a WIP with
the goal o gathering feedback.

Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
---
 fs/btrfs/ctree.h   |   11 +++++++-
 fs/btrfs/disk-io.c |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/super.c   |   46 +++++++++++++++++++++++++++++++--
 3 files changed, 125 insertions(+), 4 deletions(-)

Comments

Eric Sandeen Aug. 6, 2013, 8:37 p.m. UTC | #1
On 8/6/13 1:27 PM, Filipe David Borba Manana wrote:
> This change allows for most mount options to be persisted in
> the filesystem, and be applied when the filesystem is mounted.
> If the same options are specified at mount time, the persisted
> values for those options are ignored.
> 
> The only options not supported are: subvol, subvolid, subvolrootid,
> device and thread_pool. This limitation is due to how this feature
> is implemented: basically there's an optional value (of type
> struct btrfs_dir_item) in the tree of tree roots used to store the
> list of options in the same format as they are passed to btrfs_mount().
> This means any mount option that takes effect before the tree of tree
> roots is setup is not supported.
> 
> To set these options, the user space tool btrfstune was modified
> to persist the list of options into an unmounted filesystem's
> tree of tree roots.

So, it does this thing, ok - but why?
What is seen as the administrative advantage of this new mechanism?

Just to play devil's advocate, and to add a bit of history:

On any production system, the filesystems will be mounted via fstab,
which has the advantages of being widely known, well understood, and
100% expected - as well as being transparent, unsurprising, and seamless.

For history: ext4 did this too.  And now it's in a situation where it's
got mount options coming at it from both the superblock and from
the commandline (or fstab), and sometimes they conflict; it also tries
to report mount options in /proc/mounts, but has grown hairy code
to decide which ones to print and which ones to not print (if it's
a "default" option, don't print it in /proc/mounts, but what's default,
code-default or fs-default?)  And it's really kind of an ugly mess.

Further, mounting 2 filesystems w/ no options in fstab or on the
commandline, and getting different behavior due to hidden (sorry,
persistent) options in the fs itself is surprising, and surprise
is rarely good.

So this patch adds 100+ lines of new code, to implement this idea, but:
what is the advantage?  Unless there is a compelling administrative
use case, I'd vote against it.  Lines of code that don't exist don't
have bugs.  ;)

-Eric

> NOTE: Like the corresponding btrfs-progs patch, this is a WIP with
> the goal o gathering feedback.
> 
> Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
> ---
>  fs/btrfs/ctree.h   |   11 +++++++-
>  fs/btrfs/disk-io.c |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/btrfs/super.c   |   46 +++++++++++++++++++++++++++++++--
>  3 files changed, 125 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index cbb1263..24363df 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -121,6 +121,14 @@ struct btrfs_ordered_sum;
>   */
>  #define BTRFS_FREE_INO_OBJECTID -12ULL
>  
> +/*
> + * Item that stores permanent mount options. These options
> + * have effect if they are not specified as well at mount
> + * time (that is, if a permanent option is also specified at
> + * mount time, the later wins).
> + */
> +#define BTRFS_PERSISTENT_OPTIONS_OBJECTID -13ULL
> +
>  /* dummy objectid represents multiple objectids */
>  #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
>  
> @@ -3739,7 +3747,8 @@ void btrfs_exit_sysfs(void);
>  ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
>  
>  /* super.c */
> -int btrfs_parse_options(struct btrfs_root *root, char *options);
> +int btrfs_parse_options(struct btrfs_root *root, char *options,
> +			int parsing_persistent, int **options_parsed);
>  int btrfs_sync_fs(struct super_block *sb, int wait);
>  
>  #ifdef CONFIG_PRINTK
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 254cdc8..eeabdd4 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2077,6 +2077,53 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
>  	}
>  }
>  
> +static char *get_persistent_options(struct btrfs_root *tree_root)
> +{
> +	int ret;
> +	struct btrfs_key key;
> +	struct btrfs_path *path;
> +	struct extent_buffer *leaf;
> +	struct btrfs_dir_item *di;
> +	u32 name_len, data_len;
> +	char *options = NULL;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return ERR_PTR(-ENOMEM);
> +
> +	key.objectid = BTRFS_PERSISTENT_OPTIONS_OBJECTID;
> +	key.type = 0;
> +	key.offset = 0;
> +
> +	ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
> +	if (ret < 0)
> +		goto out;
> +	if (ret > 0) {
> +		ret = 0;
> +		goto out;
> +	}
> +
> +	leaf = path->nodes[0];
> +	di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
> +	name_len = btrfs_dir_name_len(leaf, di);
> +	data_len = btrfs_dir_data_len(leaf, di);
> +	options = kmalloc(data_len + 1, GFP_NOFS);
> +	if (!options) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +	read_extent_buffer(leaf, options,
> +			   (unsigned long)((char *)(di + 1) + name_len),
> +			   data_len);
> +	options[data_len] = '\0';
> +
> +out:
> +	btrfs_free_path(path);
> +	if (ret)
> +		return ERR_PTR(ret);
> +	return options;
> +}
> +
>  int open_ctree(struct super_block *sb,
>  	       struct btrfs_fs_devices *fs_devices,
>  	       char *options)
> @@ -2103,6 +2150,8 @@ int open_ctree(struct super_block *sb,
>  	int err = -EINVAL;
>  	int num_backups_tried = 0;
>  	int backup_index = 0;
> +	int *mnt_options = NULL;
> +	char *persist_options = NULL;
>  
>  	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
>  	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
> @@ -2372,7 +2421,7 @@ int open_ctree(struct super_block *sb,
>  	 */
>  	fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
>  
> -	ret = btrfs_parse_options(tree_root, options);
> +	ret = btrfs_parse_options(tree_root, options, 0, &mnt_options);
>  	if (ret) {
>  		err = ret;
>  		goto fail_alloc;
> @@ -2656,6 +2705,26 @@ retry_root_backup:
>  	btrfs_set_root_node(&tree_root->root_item, tree_root->node);
>  	tree_root->commit_root = btrfs_root_node(tree_root);
>  
> +	persist_options = get_persistent_options(tree_root);
> +	if (IS_ERR(persist_options)) {
> +		ret = PTR_ERR(persist_options);
> +		goto fail_tree_roots;
> +	} else if (persist_options) {
> +		ret = btrfs_parse_options(tree_root, persist_options,
> +					  1, &mnt_options);
> +		kfree(mnt_options);
> +		mnt_options = NULL;
> +		if (ret) {
> +			err = ret;
> +			goto fail_tree_roots;
> +		}
> +		if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) {
> +			features = btrfs_super_incompat_flags(disk_super);
> +			features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
> +			btrfs_set_super_incompat_flags(disk_super, features);
> +		}
> +	}
> +
>  	location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
>  	location.type = BTRFS_ROOT_ITEM_KEY;
>  	location.offset = 0;
> @@ -2904,6 +2973,7 @@ fail_block_groups:
>  	btrfs_free_block_groups(fs_info);
>  
>  fail_tree_roots:
> +	kfree(mnt_options);
>  	free_root_pointers(fs_info, 1);
>  	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
>  
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 2cc5b80..ced0a85 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -369,7 +369,8 @@ static match_table_t tokens = {
>   * reading in a new superblock is parsed here.
>   * XXX JDM: This needs to be cleaned up for remount.
>   */
> -int btrfs_parse_options(struct btrfs_root *root, char *options)
> +int btrfs_parse_options(struct btrfs_root *root, char *options,
> +			int parsing_persistent, int **options_parsed)
>  {
>  	struct btrfs_fs_info *info = root->fs_info;
>  	substring_t args[MAX_OPT_ARGS];
> @@ -379,11 +380,21 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
>  	int ret = 0;
>  	char *compress_type;
>  	bool compress_force = false;
> +	int *parsed = NULL;
>  
>  	cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
>  	if (cache_gen)
>  		btrfs_set_opt(info->mount_opt, SPACE_CACHE);
>  
> +	if (!parsing_persistent && options_parsed) {
> +		parsed = kzalloc(sizeof(int) * ARRAY_SIZE(tokens), GFP_NOFS);
> +		if (!parsed)
> +			return -ENOMEM;
> +		*options_parsed = parsed;
> +	} else if (parsing_persistent && options_parsed) {
> +		parsed = *options_parsed;
> +	}
> +
>  	if (!options)
>  		goto out;
>  
> @@ -403,6 +414,37 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
>  			continue;
>  
>  		token = match_token(p, tokens, args);
> +
> +		if (parsing_persistent && parsed) {
> +			/*
> +			 * A persistent option value is ignored if a value for
> +			 * that option was given at mount time.
> +			 */
> +
> +			if (parsed[token])
> +				continue;
> +			if (token == Opt_no_space_cache &&
> +			    parsed[Opt_space_cache])
> +				continue;
> +			if (token == Opt_space_cache &&
> +			    parsed[Opt_no_space_cache])
> +				continue;
> +
> +			if (token == Opt_subvol)
> +				printk(KERN_WARNING "btrfs: subvol not supported as a persistent option");
> +			else if (token == Opt_subvolid)
> +				printk(KERN_WARNING "btrfs: subvolid not supported as a persistent option");
> +			else if (token == Opt_subvolrootid)
> +				printk(KERN_WARNING "btrfs: subvolrootid not supported as a persistent option");
> +			else if (token == Opt_device)
> +				printk(KERN_WARNING "btrfs: device not supported as a persistent option");
> +			else if (token == Opt_thread_pool)
> +				printk(KERN_WARNING "btrfs: thread_pool not supported as a persistent option");
> +		}
> +
> +		if (!parsing_persistent && parsed)
> +			parsed[token] = 1;
> +
>  		switch (token) {
>  		case Opt_degraded:
>  			printk(KERN_INFO "btrfs: allowing degraded mounts\n");
> @@ -1279,7 +1321,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
>  
>  	btrfs_remount_prepare(fs_info);
>  
> -	ret = btrfs_parse_options(root, data);
> +	ret = btrfs_parse_options(root, data, 0, NULL);
>  	if (ret) {
>  		ret = -EINVAL;
>  		goto restore;
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Filipe Manana Aug. 6, 2013, 8:45 p.m. UTC | #2
On Tue, Aug 6, 2013 at 9:37 PM, Eric Sandeen <sandeen@redhat.com> wrote:
> On 8/6/13 1:27 PM, Filipe David Borba Manana wrote:
>> This change allows for most mount options to be persisted in
>> the filesystem, and be applied when the filesystem is mounted.
>> If the same options are specified at mount time, the persisted
>> values for those options are ignored.
>>
>> The only options not supported are: subvol, subvolid, subvolrootid,
>> device and thread_pool. This limitation is due to how this feature
>> is implemented: basically there's an optional value (of type
>> struct btrfs_dir_item) in the tree of tree roots used to store the
>> list of options in the same format as they are passed to btrfs_mount().
>> This means any mount option that takes effect before the tree of tree
>> roots is setup is not supported.
>>
>> To set these options, the user space tool btrfstune was modified
>> to persist the list of options into an unmounted filesystem's
>> tree of tree roots.
>
> So, it does this thing, ok - but why?
> What is seen as the administrative advantage of this new mechanism?
>
> Just to play devil's advocate, and to add a bit of history:
>
> On any production system, the filesystems will be mounted via fstab,
> which has the advantages of being widely known, well understood, and
> 100% expected - as well as being transparent, unsurprising, and seamless.
>
> For history: ext4 did this too.  And now it's in a situation where it's
> got mount options coming at it from both the superblock and from
> the commandline (or fstab), and sometimes they conflict; it also tries
> to report mount options in /proc/mounts, but has grown hairy code
> to decide which ones to print and which ones to not print (if it's
> a "default" option, don't print it in /proc/mounts, but what's default,
> code-default or fs-default?)  And it's really kind of an ugly mess.
>
> Further, mounting 2 filesystems w/ no options in fstab or on the
> commandline, and getting different behavior due to hidden (sorry,
> persistent) options in the fs itself is surprising, and surprise
> is rarely good.
>
> So this patch adds 100+ lines of new code, to implement this idea, but:
> what is the advantage?  Unless there is a compelling administrative
> use case, I'd vote against it.  Lines of code that don't exist don't
> have bugs.  ;)

There was a recent good example (imho at least) mentioned by Xavier
Gnata some time ago:

http://comments.gmane.org/gmane.comp.file-systems.btrfs/26011

cheers


>
> -Eric
>
>> NOTE: Like the corresponding btrfs-progs patch, this is a WIP with
>> the goal o gathering feedback.
>>
>> Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
>> ---
>>  fs/btrfs/ctree.h   |   11 +++++++-
>>  fs/btrfs/disk-io.c |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  fs/btrfs/super.c   |   46 +++++++++++++++++++++++++++++++--
>>  3 files changed, 125 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
>> index cbb1263..24363df 100644
>> --- a/fs/btrfs/ctree.h
>> +++ b/fs/btrfs/ctree.h
>> @@ -121,6 +121,14 @@ struct btrfs_ordered_sum;
>>   */
>>  #define BTRFS_FREE_INO_OBJECTID -12ULL
>>
>> +/*
>> + * Item that stores permanent mount options. These options
>> + * have effect if they are not specified as well at mount
>> + * time (that is, if a permanent option is also specified at
>> + * mount time, the later wins).
>> + */
>> +#define BTRFS_PERSISTENT_OPTIONS_OBJECTID -13ULL
>> +
>>  /* dummy objectid represents multiple objectids */
>>  #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
>>
>> @@ -3739,7 +3747,8 @@ void btrfs_exit_sysfs(void);
>>  ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
>>
>>  /* super.c */
>> -int btrfs_parse_options(struct btrfs_root *root, char *options);
>> +int btrfs_parse_options(struct btrfs_root *root, char *options,
>> +                     int parsing_persistent, int **options_parsed);
>>  int btrfs_sync_fs(struct super_block *sb, int wait);
>>
>>  #ifdef CONFIG_PRINTK
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index 254cdc8..eeabdd4 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -2077,6 +2077,53 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
>>       }
>>  }
>>
>> +static char *get_persistent_options(struct btrfs_root *tree_root)
>> +{
>> +     int ret;
>> +     struct btrfs_key key;
>> +     struct btrfs_path *path;
>> +     struct extent_buffer *leaf;
>> +     struct btrfs_dir_item *di;
>> +     u32 name_len, data_len;
>> +     char *options = NULL;
>> +
>> +     path = btrfs_alloc_path();
>> +     if (!path)
>> +             return ERR_PTR(-ENOMEM);
>> +
>> +     key.objectid = BTRFS_PERSISTENT_OPTIONS_OBJECTID;
>> +     key.type = 0;
>> +     key.offset = 0;
>> +
>> +     ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
>> +     if (ret < 0)
>> +             goto out;
>> +     if (ret > 0) {
>> +             ret = 0;
>> +             goto out;
>> +     }
>> +
>> +     leaf = path->nodes[0];
>> +     di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
>> +     name_len = btrfs_dir_name_len(leaf, di);
>> +     data_len = btrfs_dir_data_len(leaf, di);
>> +     options = kmalloc(data_len + 1, GFP_NOFS);
>> +     if (!options) {
>> +             ret = -ENOMEM;
>> +             goto out;
>> +     }
>> +     read_extent_buffer(leaf, options,
>> +                        (unsigned long)((char *)(di + 1) + name_len),
>> +                        data_len);
>> +     options[data_len] = '\0';
>> +
>> +out:
>> +     btrfs_free_path(path);
>> +     if (ret)
>> +             return ERR_PTR(ret);
>> +     return options;
>> +}
>> +
>>  int open_ctree(struct super_block *sb,
>>              struct btrfs_fs_devices *fs_devices,
>>              char *options)
>> @@ -2103,6 +2150,8 @@ int open_ctree(struct super_block *sb,
>>       int err = -EINVAL;
>>       int num_backups_tried = 0;
>>       int backup_index = 0;
>> +     int *mnt_options = NULL;
>> +     char *persist_options = NULL;
>>
>>       tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
>>       chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
>> @@ -2372,7 +2421,7 @@ int open_ctree(struct super_block *sb,
>>        */
>>       fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
>>
>> -     ret = btrfs_parse_options(tree_root, options);
>> +     ret = btrfs_parse_options(tree_root, options, 0, &mnt_options);
>>       if (ret) {
>>               err = ret;
>>               goto fail_alloc;
>> @@ -2656,6 +2705,26 @@ retry_root_backup:
>>       btrfs_set_root_node(&tree_root->root_item, tree_root->node);
>>       tree_root->commit_root = btrfs_root_node(tree_root);
>>
>> +     persist_options = get_persistent_options(tree_root);
>> +     if (IS_ERR(persist_options)) {
>> +             ret = PTR_ERR(persist_options);
>> +             goto fail_tree_roots;
>> +     } else if (persist_options) {
>> +             ret = btrfs_parse_options(tree_root, persist_options,
>> +                                       1, &mnt_options);
>> +             kfree(mnt_options);
>> +             mnt_options = NULL;
>> +             if (ret) {
>> +                     err = ret;
>> +                     goto fail_tree_roots;
>> +             }
>> +             if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) {
>> +                     features = btrfs_super_incompat_flags(disk_super);
>> +                     features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
>> +                     btrfs_set_super_incompat_flags(disk_super, features);
>> +             }
>> +     }
>> +
>>       location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
>>       location.type = BTRFS_ROOT_ITEM_KEY;
>>       location.offset = 0;
>> @@ -2904,6 +2973,7 @@ fail_block_groups:
>>       btrfs_free_block_groups(fs_info);
>>
>>  fail_tree_roots:
>> +     kfree(mnt_options);
>>       free_root_pointers(fs_info, 1);
>>       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
>>
>> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
>> index 2cc5b80..ced0a85 100644
>> --- a/fs/btrfs/super.c
>> +++ b/fs/btrfs/super.c
>> @@ -369,7 +369,8 @@ static match_table_t tokens = {
>>   * reading in a new superblock is parsed here.
>>   * XXX JDM: This needs to be cleaned up for remount.
>>   */
>> -int btrfs_parse_options(struct btrfs_root *root, char *options)
>> +int btrfs_parse_options(struct btrfs_root *root, char *options,
>> +                     int parsing_persistent, int **options_parsed)
>>  {
>>       struct btrfs_fs_info *info = root->fs_info;
>>       substring_t args[MAX_OPT_ARGS];
>> @@ -379,11 +380,21 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
>>       int ret = 0;
>>       char *compress_type;
>>       bool compress_force = false;
>> +     int *parsed = NULL;
>>
>>       cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
>>       if (cache_gen)
>>               btrfs_set_opt(info->mount_opt, SPACE_CACHE);
>>
>> +     if (!parsing_persistent && options_parsed) {
>> +             parsed = kzalloc(sizeof(int) * ARRAY_SIZE(tokens), GFP_NOFS);
>> +             if (!parsed)
>> +                     return -ENOMEM;
>> +             *options_parsed = parsed;
>> +     } else if (parsing_persistent && options_parsed) {
>> +             parsed = *options_parsed;
>> +     }
>> +
>>       if (!options)
>>               goto out;
>>
>> @@ -403,6 +414,37 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
>>                       continue;
>>
>>               token = match_token(p, tokens, args);
>> +
>> +             if (parsing_persistent && parsed) {
>> +                     /*
>> +                      * A persistent option value is ignored if a value for
>> +                      * that option was given at mount time.
>> +                      */
>> +
>> +                     if (parsed[token])
>> +                             continue;
>> +                     if (token == Opt_no_space_cache &&
>> +                         parsed[Opt_space_cache])
>> +                             continue;
>> +                     if (token == Opt_space_cache &&
>> +                         parsed[Opt_no_space_cache])
>> +                             continue;
>> +
>> +                     if (token == Opt_subvol)
>> +                             printk(KERN_WARNING "btrfs: subvol not supported as a persistent option");
>> +                     else if (token == Opt_subvolid)
>> +                             printk(KERN_WARNING "btrfs: subvolid not supported as a persistent option");
>> +                     else if (token == Opt_subvolrootid)
>> +                             printk(KERN_WARNING "btrfs: subvolrootid not supported as a persistent option");
>> +                     else if (token == Opt_device)
>> +                             printk(KERN_WARNING "btrfs: device not supported as a persistent option");
>> +                     else if (token == Opt_thread_pool)
>> +                             printk(KERN_WARNING "btrfs: thread_pool not supported as a persistent option");
>> +             }
>> +
>> +             if (!parsing_persistent && parsed)
>> +                     parsed[token] = 1;
>> +
>>               switch (token) {
>>               case Opt_degraded:
>>                       printk(KERN_INFO "btrfs: allowing degraded mounts\n");
>> @@ -1279,7 +1321,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
>>
>>       btrfs_remount_prepare(fs_info);
>>
>> -     ret = btrfs_parse_options(root, data);
>> +     ret = btrfs_parse_options(root, data, 0, NULL);
>>       if (ret) {
>>               ret = -EINVAL;
>>               goto restore;
>>
>
Eric Sandeen Aug. 6, 2013, 9:05 p.m. UTC | #3
On 8/6/13 3:45 PM, Filipe David Manana wrote:
> On Tue, Aug 6, 2013 at 9:37 PM, Eric Sandeen <sandeen@redhat.com> wrote:
>> On 8/6/13 1:27 PM, Filipe David Borba Manana wrote:
>>> This change allows for most mount options to be persisted in
>>> the filesystem, and be applied when the filesystem is mounted.
>>> If the same options are specified at mount time, the persisted
>>> values for those options are ignored.
>>>
>>> The only options not supported are: subvol, subvolid, subvolrootid,
>>> device and thread_pool. This limitation is due to how this feature
>>> is implemented: basically there's an optional value (of type
>>> struct btrfs_dir_item) in the tree of tree roots used to store the
>>> list of options in the same format as they are passed to btrfs_mount().
>>> This means any mount option that takes effect before the tree of tree
>>> roots is setup is not supported.
>>>
>>> To set these options, the user space tool btrfstune was modified
>>> to persist the list of options into an unmounted filesystem's
>>> tree of tree roots.
>>
>> So, it does this thing, ok - but why?
>> What is seen as the administrative advantage of this new mechanism?
>>
>> Just to play devil's advocate, and to add a bit of history:
>>
>> On any production system, the filesystems will be mounted via fstab,
>> which has the advantages of being widely known, well understood, and
>> 100% expected - as well as being transparent, unsurprising, and seamless.
>>
>> For history: ext4 did this too.  And now it's in a situation where it's
>> got mount options coming at it from both the superblock and from
>> the commandline (or fstab), and sometimes they conflict; it also tries
>> to report mount options in /proc/mounts, but has grown hairy code
>> to decide which ones to print and which ones to not print (if it's
>> a "default" option, don't print it in /proc/mounts, but what's default,
>> code-default or fs-default?)  And it's really kind of an ugly mess.
>>
>> Further, mounting 2 filesystems w/ no options in fstab or on the
>> commandline, and getting different behavior due to hidden (sorry,
>> persistent) options in the fs itself is surprising, and surprise
>> is rarely good.
>>
>> So this patch adds 100+ lines of new code, to implement this idea, but:
>> what is the advantage?  Unless there is a compelling administrative
>> use case, I'd vote against it.  Lines of code that don't exist don't
>> have bugs.  ;)
> 
> There was a recent good example (imho at least) mentioned by Xavier
> Gnata some time ago:
> 
> http://comments.gmane.org/gmane.comp.file-systems.btrfs/26011
> 
> cheers

Hm, I see.  I forgot about hotplugging in my "most systems mount
via fstab" assertion.  :)

I was thinking (and Josef just suggested too) that making a
dir flag, saying "everything under this dir gets compressed" might make
more sense for that scenario than adding a whole slew of
on-disk-persistent-mount-option code.

Because really, the motivation sounds like it's primarily for significant
on-disk format changes controlled by mount options.  I understand that
motivation more than being able to persist something like "noatime."

-Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duncan Aug. 7, 2013, 1:20 a.m. UTC | #4
Eric Sandeen posted on Tue, 06 Aug 2013 15:37:30 -0500 as excerpted:

> On 8/6/13 1:27 PM, Filipe David Borba Manana wrote:
>> This change allows for most mount options to be persisted in the
>> filesystem, and be applied when the filesystem is mounted.
>> If the same options are specified at mount time, the persisted values
>> for those options are ignored.
>> 
>> The only options not supported are: subvol, subvolid, subvolrootid,
>> device and thread_pool. This limitation is due to how this feature is
>> implemented: basically there's an optional value (of type struct
>> btrfs_dir_item) in the tree of tree roots used to store the list of
>> options in the same format as they are passed to btrfs_mount().
>> This means any mount option that takes effect before the tree of tree
>> roots is setup is not supported.
>> 
>> To set these options, the user space tool btrfstune was modified to
>> persist the list of options into an unmounted filesystem's tree of tree
>> roots.
> 
> So, it does this thing, ok - but why?
> What is seen as the administrative advantage of this new mechanism?
> 
> Just to play devil's advocate, and to add a bit of history:
> 
> On any production system, the filesystems will be mounted via fstab,
> which has the advantages of being widely known, well understood, and
> 100% expected - as well as being transparent, unsurprising, and
> seamless.
> 
> For history: ext4 did this too.  And now it's in a situation where it's
> got mount options coming at it from both the superblock and from the
> commandline (or fstab), and sometimes they conflict; it also tries to
> report mount options in /proc/mounts, but has grown hairy code to decide
> which ones to print and which ones to not print (if it's a "default"
> option, don't print it in /proc/mounts, but what's default, code-default
> or fs-default?)  And it's really kind of an ugly mess.
> 
> Further, mounting 2 filesystems w/ no options in fstab or on the
> commandline, and getting different behavior due to hidden (sorry,
> persistent) options in the fs itself is surprising, and surprise is
> rarely good.
> 
> So this patch adds 100+ lines of new code, to implement this idea, but:
> what is the advantage?  Unless there is a compelling administrative use
> case, I'd vote against it.  Lines of code that don't exist don't have
> bugs.  ;)

As an admin, there's some options I want always applied as site policy.  
Here, that includes compress=lzo, autodefrag and noatime.  And with all 
the capacities btrfs has what with raid and the like, particularly if 
someone's needing to use device= (which won't go in the persistent 
options for what should be pretty obvious reasons) a bunch of times, that 
fstab line can get quite long indeed![1]

Just like the kernel has a config option for a built-in commandline that 
takes some of the pressure off the actually passed commandline for 
options that are pretty much always going to be used so it's easier to 
administer because only the possibly dynamic options or those going 
against the builtin commandline defaults need passed, a filesystem as 
complex and multi-optioned as btrfs is, really NEEDS some way to persist 
the options that are effectively site policy default, so the admin 
doesn't have to worry about them any longer.

FWIW, I had assumed persistent mount options were planned as a given and 
simply hadn't been implemented yet.  Because to me it's a no-brainer.  
After all, people don't have to use the feature if they don't like it.  
And it sure saves a headache when what might otherwise be a dozen 
parameters passed in can be cut in half or better, leaving only the ones 
that are going to differ depending on circumstances to worry about.  I 
know that from experience with the kernel builtin commandline option!

---
[1] I ran initr*less root-on-md-raid for many years.  That involved 
passing a complicated set of md1=/dev/sda3,/dev/sdb3,/dev/sdc3,/dev/sdd3 
type options to the kernel, along with more usual options I was passing, 
and I was SO happy when the kernel got that built-in-commandline option 
and I could put the default set in there, such that I only had to worry 
about passing that parameter for the backup boot option, thus along with 
several other passed options I was able to put in the builtin, shrinking 
the actual passed kernel commandline dramatically, so only the stuff that 
wasn't the default needed passed for a particular boot option.  It would 
sure be nice to be able to do the same thing, but at the filesystem 
level, here!
Eric Sandeen Aug. 7, 2013, 1:37 a.m. UTC | #5
On 8/6/13 8:20 PM, Duncan wrote:
> Eric Sandeen posted on Tue, 06 Aug 2013 15:37:30 -0500 as excerpted:
> 
>> On 8/6/13 1:27 PM, Filipe David Borba Manana wrote:
>>> This change allows for most mount options to be persisted in the
>>> filesystem, and be applied when the filesystem is mounted.
>>> If the same options are specified at mount time, the persisted values
>>> for those options are ignored.
>>>
>>> The only options not supported are: subvol, subvolid, subvolrootid,
>>> device and thread_pool. This limitation is due to how this feature is
>>> implemented: basically there's an optional value (of type struct
>>> btrfs_dir_item) in the tree of tree roots used to store the list of
>>> options in the same format as they are passed to btrfs_mount().
>>> This means any mount option that takes effect before the tree of tree
>>> roots is setup is not supported.
>>>
>>> To set these options, the user space tool btrfstune was modified to
>>> persist the list of options into an unmounted filesystem's tree of tree
>>> roots.
>>
>> So, it does this thing, ok - but why?
>> What is seen as the administrative advantage of this new mechanism?
>>
>> Just to play devil's advocate, and to add a bit of history:
>>
>> On any production system, the filesystems will be mounted via fstab,
>> which has the advantages of being widely known, well understood, and
>> 100% expected - as well as being transparent, unsurprising, and
>> seamless.
>>
>> For history: ext4 did this too.  And now it's in a situation where it's
>> got mount options coming at it from both the superblock and from the
>> commandline (or fstab), and sometimes they conflict; it also tries to
>> report mount options in /proc/mounts, but has grown hairy code to decide
>> which ones to print and which ones to not print (if it's a "default"
>> option, don't print it in /proc/mounts, but what's default, code-default
>> or fs-default?)  And it's really kind of an ugly mess.
>>
>> Further, mounting 2 filesystems w/ no options in fstab or on the
>> commandline, and getting different behavior due to hidden (sorry,
>> persistent) options in the fs itself is surprising, and surprise is
>> rarely good.
>>
>> So this patch adds 100+ lines of new code, to implement this idea, but:
>> what is the advantage?  Unless there is a compelling administrative use
>> case, I'd vote against it.  Lines of code that don't exist don't have
>> bugs.  ;)
> 
> As an admin, there's some options I want always applied as site policy.  
> Here, that includes compress=lzo, autodefrag and noatime.  And with all 

But as an admin, you can add that to fstab, right?

> the capacities btrfs has what with raid and the like, particularly if 
> someone's needing to use device= (which won't go in the persistent 
> options for what should be pretty obvious reasons) a bunch of times, that 
> fstab line can get quite long indeed![1]
> 
> Just like the kernel has a config option for a built-in commandline that 
> takes some of the pressure off the actually passed commandline for 
> options that are pretty much always going to be used so it's easier to 
> administer because only the possibly dynamic options or those going 
> against the builtin commandline defaults need passed, a filesystem as 
> complex and multi-optioned as btrfs is, really NEEDS some way to persist 
> the options that are effectively site policy default, so the admin 
> doesn't have to worry about them any longer.

Again, fstab is perfectly sufficient, simply for site policy.

It seems that your main argument here (no pun intended) is that the sheer
length of the options string becomes unwieldy.  i.e. "it's too long
for fstab, so it must be moved into the filesystem."

"too long" is a bit subjective, unless util-linux
has an actual string limit.  If not, I guess it's mostly aesthetics.

> FWIW, I had assumed persistent mount options were planned as a given and 
> simply hadn't been implemented yet.  Because to me it's a no-brainer.  
> After all, people don't have to use the feature if they don't like it.  

no, but we still have to maintain it ;)

So just speaking for myself, 100+ new lines of code forever after, vs.
"I find my fstab to be unattractive" is an obvious choice.

> And it sure saves a headache when what might otherwise be a dozen 
> parameters passed in can be cut in half or better, leaving only the ones 
> that are going to differ depending on circumstances to worry about.  I 
> know that from experience with the kernel builtin commandline option!

But you still have to set them all.  Is it less headache to use btrfstune
vs edit fstab?  I'm not really convinced.

I guess the argument that it's easier to notice specific differences against
a site-wide (hidden) set of options, vs. a bunch of long option strings
resonates somewhat.

*shrug* well, I did my ghost-of-christmas-future bit; it's just my $0.02.

Thanks,
-Eric

> ---
> [1] I ran initr*less root-on-md-raid for many years.

isn't that kind of doing it wrong, though?  init*fs solves the problem
you complain about.  (There's probably a reason for it that I'm unaware
of, though.)

>  That involved 
> passing a complicated set of md1=/dev/sda3,/dev/sdb3,/dev/sdc3,/dev/sdd3 
> type options to the kernel, along with more usual options I was passing, 
> and I was SO happy when the kernel got that built-in-commandline option 
> and I could put the default set in there, such that I only had to worry 
> about passing that parameter for the backup boot option, thus along with 
> several other passed options I was able to put in the builtin, shrinking 
> the actual passed kernel commandline dramatically, so only the stuff that 
> wasn't the default needed passed for a particular boot option.  It would 
> sure be nice to be able to do the same thing, but at the filesystem 
> level, here!
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen Aug. 7, 2013, 3:04 a.m. UTC | #6
On 8/6/13 1:27 PM, Filipe David Borba Manana wrote:
> This change allows for most mount options to be persisted in
> the filesystem, and be applied when the filesystem is mounted.
> If the same options are specified at mount time, the persisted
> values for those options are ignored.

I thought the plan was to make commandline mount options
override persistent ones, but that doesn't seem to be the case,
at least in this example:

# ./btrfstune -o compress,discard,ssd /dev/sdb1
New persistent options:      compress,discard,ssd
# mount -o nossd /dev/sdb1 /mnt/test
# dmesg | tail
[  995.657233] btrfs: use ssd allocation scheme
[  995.661501] btrfs: disk space caching is enabled

and /proc/mounts is similarly confused, showing both options:

# grep sdb1 /proc/mounts
/dev/sdb1 /mnt/test btrfs rw,seclabel,relatime,compress=zlib,nossd,ssd,discard,space_cache 0 0

(This is the trail of woe I was talking about from ext4-land) ;)

> The only options not supported are: subvol, subvolid, subvolrootid,
> device and thread_pool. This limitation is due to how this feature
> is implemented: basically there's an optional value (of type
> struct btrfs_dir_item) in the tree of tree roots used to store the
> list of options in the same format as they are passed to btrfs_mount().
> This means any mount option that takes effect before the tree of tree
> roots is setup is not supported.

Just FWIW, vfs-level mount options are also not supported; i.e. "noatime"
or "ro" won't work either, because the code is already past the VFS
option parsing:

# ./btrfstune -o compress,discard,ro /dev/sdb1
Current persistent options:  compress,discard
New persistent options:      compress,discard,ro
# mount /dev/sdb1 /mnt/test
mount: wrong fs type, bad option, bad superblock on /dev/sdb1, ...
# dmesg | tail
[  817.681417] btrfs: unrecognized mount option 'ro'
[  817.694689] btrfs: open_ctree failed

> To set these options, the user space tool btrfstune was modified
> to persist the list of options into an unmounted filesystem's
> tree of tree roots.

If this goes forward, you'd want an easy way to display
them, not just set them, I suppose.

Thanks,
-Eric
 
> NOTE: Like the corresponding btrfs-progs patch, this is a WIP with
> the goal o gathering feedback.
> 
> Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Filipe Manana Aug. 7, 2013, 3:12 a.m. UTC | #7
On Tue, Aug 6, 2013 at 10:05 PM, Eric Sandeen <sandeen@redhat.com> wrote:
> On 8/6/13 3:45 PM, Filipe David Manana wrote:
>> On Tue, Aug 6, 2013 at 9:37 PM, Eric Sandeen <sandeen@redhat.com> wrote:
>>> On 8/6/13 1:27 PM, Filipe David Borba Manana wrote:
>>>> This change allows for most mount options to be persisted in
>>>> the filesystem, and be applied when the filesystem is mounted.
>>>> If the same options are specified at mount time, the persisted
>>>> values for those options are ignored.
>>>>
>>>> The only options not supported are: subvol, subvolid, subvolrootid,
>>>> device and thread_pool. This limitation is due to how this feature
>>>> is implemented: basically there's an optional value (of type
>>>> struct btrfs_dir_item) in the tree of tree roots used to store the
>>>> list of options in the same format as they are passed to btrfs_mount().
>>>> This means any mount option that takes effect before the tree of tree
>>>> roots is setup is not supported.
>>>>
>>>> To set these options, the user space tool btrfstune was modified
>>>> to persist the list of options into an unmounted filesystem's
>>>> tree of tree roots.
>>>
>>> So, it does this thing, ok - but why?
>>> What is seen as the administrative advantage of this new mechanism?
>>>
>>> Just to play devil's advocate, and to add a bit of history:
>>>
>>> On any production system, the filesystems will be mounted via fstab,
>>> which has the advantages of being widely known, well understood, and
>>> 100% expected - as well as being transparent, unsurprising, and seamless.
>>>
>>> For history: ext4 did this too.  And now it's in a situation where it's
>>> got mount options coming at it from both the superblock and from
>>> the commandline (or fstab), and sometimes they conflict; it also tries
>>> to report mount options in /proc/mounts, but has grown hairy code
>>> to decide which ones to print and which ones to not print (if it's
>>> a "default" option, don't print it in /proc/mounts, but what's default,
>>> code-default or fs-default?)  And it's really kind of an ugly mess.
>>>
>>> Further, mounting 2 filesystems w/ no options in fstab or on the
>>> commandline, and getting different behavior due to hidden (sorry,
>>> persistent) options in the fs itself is surprising, and surprise
>>> is rarely good.
>>>
>>> So this patch adds 100+ lines of new code, to implement this idea, but:
>>> what is the advantage?  Unless there is a compelling administrative
>>> use case, I'd vote against it.  Lines of code that don't exist don't
>>> have bugs.  ;)
>>
>> There was a recent good example (imho at least) mentioned by Xavier
>> Gnata some time ago:
>>
>> http://comments.gmane.org/gmane.comp.file-systems.btrfs/26011
>>
>> cheers
>
> Hm, I see.  I forgot about hotplugging in my "most systems mount
> via fstab" assertion.  :)
>
> I was thinking (and Josef just suggested too) that making a
> dir flag, saying "everything under this dir gets compressed" might make
> more sense for that scenario than adding a whole slew of
> on-disk-persistent-mount-option code.

I like that idea too. Thanks for the suggestion. A quick experiment
allowed for that approach in under 20 lines, will test it a bit more.

>
> Because really, the motivation sounds like it's primarily for significant
> on-disk format changes controlled by mount options.  I understand that
> motivation more than being able to persist something like "noatime."
>
> -Eric
>
Filipe Manana Aug. 7, 2013, 3:16 a.m. UTC | #8
On Wed, Aug 7, 2013 at 4:04 AM, Eric Sandeen <sandeen@redhat.com> wrote:
> On 8/6/13 1:27 PM, Filipe David Borba Manana wrote:
>> This change allows for most mount options to be persisted in
>> the filesystem, and be applied when the filesystem is mounted.
>> If the same options are specified at mount time, the persisted
>> values for those options are ignored.
>
> I thought the plan was to make commandline mount options
> override persistent ones, but that doesn't seem to be the case,
> at least in this example:

Yes, that was the idea. However I didn't try all possible combinations
of mount and persistent parameters.

>
> # ./btrfstune -o compress,discard,ssd /dev/sdb1
> New persistent options:      compress,discard,ssd
> # mount -o nossd /dev/sdb1 /mnt/test
> # dmesg | tail
> [  995.657233] btrfs: use ssd allocation scheme
> [  995.661501] btrfs: disk space caching is enabled

Yes. Misses some checks like the ones I added for the space_cache /
no_space_cache combinations:

+ if (token == Opt_no_space_cache &&
+    parsed[Opt_space_cache])
+ continue;
+ if (token == Opt_space_cache &&
+    parsed[Opt_no_space_cache])
+ continue;

>
> and /proc/mounts is similarly confused, showing both options:
>
> # grep sdb1 /proc/mounts
> /dev/sdb1 /mnt/test btrfs rw,seclabel,relatime,compress=zlib,nossd,ssd,discard,space_cache 0 0
>
> (This is the trail of woe I was talking about from ext4-land) ;)
>
>> The only options not supported are: subvol, subvolid, subvolrootid,
>> device and thread_pool. This limitation is due to how this feature
>> is implemented: basically there's an optional value (of type
>> struct btrfs_dir_item) in the tree of tree roots used to store the
>> list of options in the same format as they are passed to btrfs_mount().
>> This means any mount option that takes effect before the tree of tree
>> roots is setup is not supported.
>
> Just FWIW, vfs-level mount options are also not supported; i.e. "noatime"
> or "ro" won't work either, because the code is already past the VFS
> option parsing:
>
> # ./btrfstune -o compress,discard,ro /dev/sdb1
> Current persistent options:  compress,discard
> New persistent options:      compress,discard,ro
> # mount /dev/sdb1 /mnt/test
> mount: wrong fs type, bad option, bad superblock on /dev/sdb1, ...
> # dmesg | tail
> [  817.681417] btrfs: unrecognized mount option 'ro'
> [  817.694689] btrfs: open_ctree failed

Yes, that would be a next step to work on after getting community
feedback about the approach (from a user point of view and the
technical details / implementation).

Thanks for trying it and for your feedback Eric.

>
>> To set these options, the user space tool btrfstune was modified
>> to persist the list of options into an unmounted filesystem's
>> tree of tree roots.
>
> If this goes forward, you'd want an easy way to display
> them, not just set them, I suppose.
>
> Thanks,
> -Eric
>
>> NOTE: Like the corresponding btrfs-progs patch, this is a WIP with
>> the goal o gathering feedback.
>>
>> Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
>
>
David Sterba Aug. 7, 2013, 10:40 a.m. UTC | #9
On Tue, Aug 06, 2013 at 07:27:20PM +0100, Filipe David Borba Manana wrote:
> This change allows for most mount options to be persisted in
> the filesystem, and be applied when the filesystem is mounted.
> If the same options are specified at mount time, the persisted
> values for those options are ignored.
> 
> The only options not supported are: subvol, subvolid, subvolrootid,
> device and thread_pool. This limitation is due to how this feature
> is implemented: basically there's an optional value (of type
> struct btrfs_dir_item) in the tree of tree roots used to store the
> list of options in the same format as they are passed to btrfs_mount().
> This means any mount option that takes effect before the tree of tree
> roots is setup is not supported.
> 
> To set these options, the user space tool btrfstune was modified
> to persist the list of options into an unmounted filesystem's
> tree of tree roots.

We’ve seen this proposed in the past, IIRC this thread contains most of what
has been discussed:
http://thread.gmane.org/gmane.comp.file-systems.btrfs/19757

Implementing just whole-filesystem mount options is not convering all usecases,
more broad approach like per-subvolume or per-file settings is desired. There
are always settings that aren’t known now but would be useful in the future, so
we need a flexible infrastructure to maintain the properties.

There was a proposed implementation for set/get properties:
http://thread.gmane.org/gmane.comp.file-systems.btrfs/18287

From the implementation side, I very much want to abandon the separate
btrfstune utility. Currently it’s a bandaid because there’s nothing better atm.

Designing and merging the properties feature takes time, but we want to tune
simple things now. The wiki project mentions ‘tune2fs’ as an example, but the
project details are not always accurate about how to do the things, it’s more
like ideas what to do. If you’re going to work on that, please claim the
project on the wiki, and possibly write more details abou the design.

david
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Aug. 7, 2013, 10:48 a.m. UTC | #10
On Tue, Aug 06, 2013 at 04:05:50PM -0500, Eric Sandeen wrote:
> I was thinking (and Josef just suggested too) that making a
> dir flag, saying "everything under this dir gets compressed" might make
> more sense for that scenario than adding a whole slew of
> on-disk-persistent-mount-option code.

We have this dir flag stored in the attributes, chattr +c dir/, but this
cannot be tuned further - no way to specify the compression algorithm
(or for example the target ratio as compression hint), or we can't say
"never compress files in this dir (even if mounted with compression)"
etc. 

david
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Filipe Manana Aug. 7, 2013, 11:33 a.m. UTC | #11
On Wed, Aug 7, 2013 at 11:40 AM, David Sterba <dsterba@suse.cz> wrote:
> On Tue, Aug 06, 2013 at 07:27:20PM +0100, Filipe David Borba Manana wrote:
>> This change allows for most mount options to be persisted in
>> the filesystem, and be applied when the filesystem is mounted.
>> If the same options are specified at mount time, the persisted
>> values for those options are ignored.
>>
>> The only options not supported are: subvol, subvolid, subvolrootid,
>> device and thread_pool. This limitation is due to how this feature
>> is implemented: basically there's an optional value (of type
>> struct btrfs_dir_item) in the tree of tree roots used to store the
>> list of options in the same format as they are passed to btrfs_mount().
>> This means any mount option that takes effect before the tree of tree
>> roots is setup is not supported.
>>
>> To set these options, the user space tool btrfstune was modified
>> to persist the list of options into an unmounted filesystem's
>> tree of tree roots.
>
> We’ve seen this proposed in the past, IIRC this thread contains most of what
> has been discussed:
> http://thread.gmane.org/gmane.comp.file-systems.btrfs/19757

Thanks, I missed to find that before.
The implementation is very different from the one I proposed.

>
> Implementing just whole-filesystem mount options is not convering all usecases,
> more broad approach like per-subvolume or per-file settings is desired. There
> are always settings that aren’t known now but would be useful in the future, so
> we need a flexible infrastructure to maintain the properties.
>
> There was a proposed implementation for set/get properties:
> http://thread.gmane.org/gmane.comp.file-systems.btrfs/18287

Will take a detailed look at it.
Thanks for pointing it out David.
Why was it never picked?

>
> From the implementation side, I very much want to abandon the separate
> btrfstune utility. Currently it’s a bandaid because there’s nothing better atm.
>
> Designing and merging the properties feature takes time, but we want to tune
> simple things now. The wiki project mentions ‘tune2fs’ as an example, but the
> project details are not always accurate about how to do the things, it’s more
> like ideas what to do. If you’re going to work on that, please claim the
> project on the wiki, and possibly write more details abou the design.

I will.

>
> david
Filipe Manana Aug. 7, 2013, 11:36 a.m. UTC | #12
On Wed, Aug 7, 2013 at 11:48 AM, David Sterba <dsterba@suse.cz> wrote:
> On Tue, Aug 06, 2013 at 04:05:50PM -0500, Eric Sandeen wrote:
>> I was thinking (and Josef just suggested too) that making a
>> dir flag, saying "everything under this dir gets compressed" might make
>> more sense for that scenario than adding a whole slew of
>> on-disk-persistent-mount-option code.
>
> We have this dir flag stored in the attributes, chattr +c dir/, but this
> cannot be tuned further - no way to specify the compression algorithm
> (or for example the target ratio as compression hint), or we can't say
> "never compress files in this dir (even if mounted with compression)"
> etc.

What Eric/Josef suggested, I think I've implemented it in the following RFC:

https://patchwork.kernel.org/patch/2840235/

The only thing it doesn't permit, is to disallow compression for
individual files or files placed under specific directories.
Any idea how would this be specified? (not via chattr I guess)


>
> david
Martin Steigerwald Aug. 7, 2013, 1:46 p.m. UTC | #13
Am Dienstag, 6. August 2013, 16:05:50 schrieb Eric Sandeen:
> On 8/6/13 3:45 PM, Filipe David Manana wrote:
> > On Tue, Aug 6, 2013 at 9:37 PM, Eric Sandeen <sandeen@redhat.com> wrote:
> >> On 8/6/13 1:27 PM, Filipe David Borba Manana wrote:
> >>> This change allows for most mount options to be persisted in
> >>> the filesystem, and be applied when the filesystem is mounted.
> >>> If the same options are specified at mount time, the persisted
> >>> values for those options are ignored.
> >>> 
> >>> The only options not supported are: subvol, subvolid, subvolrootid,
> >>> device and thread_pool. This limitation is due to how this feature
> >>> is implemented: basically there's an optional value (of type
> >>> struct btrfs_dir_item) in the tree of tree roots used to store the
> >>> list of options in the same format as they are passed to btrfs_mount().
> >>> This means any mount option that takes effect before the tree of tree
> >>> roots is setup is not supported.
> >>> 
> >>> To set these options, the user space tool btrfstune was modified
> >>> to persist the list of options into an unmounted filesystem's
> >>> tree of tree roots.
> >> 
> >> So, it does this thing, ok - but why?
> >> What is seen as the administrative advantage of this new mechanism?
> >> 
> >> Just to play devil's advocate, and to add a bit of history:
> >> 
> >> On any production system, the filesystems will be mounted via fstab,
> >> which has the advantages of being widely known, well understood, and
> >> 100% expected - as well as being transparent, unsurprising, and seamless.
> >> 
> >> For history: ext4 did this too.  And now it's in a situation where it's
> >> got mount options coming at it from both the superblock and from
> >> the commandline (or fstab), and sometimes they conflict; it also tries
> >> to report mount options in /proc/mounts, but has grown hairy code
> >> to decide which ones to print and which ones to not print (if it's
> >> a "default" option, don't print it in /proc/mounts, but what's default,
> >> code-default or fs-default?)  And it's really kind of an ugly mess.
> >> 
> >> Further, mounting 2 filesystems w/ no options in fstab or on the
> >> commandline, and getting different behavior due to hidden (sorry,
> >> persistent) options in the fs itself is surprising, and surprise
> >> is rarely good.
> >> 
> >> So this patch adds 100+ lines of new code, to implement this idea, but:
> >> what is the advantage?  Unless there is a compelling administrative
> >> use case, I'd vote against it.  Lines of code that don't exist don't
> >> have bugs.  ;)
> > 
> > There was a recent good example (imho at least) mentioned by Xavier
> > Gnata some time ago:
> > 
> > http://comments.gmane.org/gmane.comp.file-systems.btrfs/26011
> > 
> > cheers
> 
> Hm, I see.  I forgot about hotplugging in my "most systems mount
> via fstab" assertion.  :)
> 
> I was thinking (and Josef just suggested too) that making a
> dir flag, saying "everything under this dir gets compressed" might make
> more sense for that scenario than adding a whole slew of
> on-disk-persistent-mount-option code.
> 
> Because really, the motivation sounds like it's primarily for significant
> on-disk format changes controlled by mount options.  I understand that
> motivation more than being able to persist something like "noatime."

For a hotplug-able SSD having noatime stored persistently IMHO makes a lot of 
sense as well.

I won´t be surprised that at some time, extern SSDs or extern $successor-of-
SSD will be replacing extern harddisks.
David Sterba Aug. 8, 2013, 10:21 p.m. UTC | #14
On Wed, Aug 07, 2013 at 03:46:20PM +0200, Martin Steigerwald wrote:
> > Because really, the motivation sounds like it's primarily for significant
> > on-disk format changes controlled by mount options.  I understand that
> > motivation more than being able to persist something like "noatime."
> 
> For a hotplug-able SSD having noatime stored persistently IMHO makes a lot of 
> sense as well.

I agree, and we can let btrfs understand noatime (or ro) even if they get
processed by vfs layer.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Aug. 9, 2013, 12:01 a.m. UTC | #15
On Wed, Aug 07, 2013 at 12:33:09PM +0100, Filipe David Manana wrote:
> Thanks, I missed to find that before.
> The implementation is very different from the one I proposed.

That's one of the fundaental questions how to store the information:
inside existing structures, via xattrs, under new tree items. Each one
has pros and cons.

> > Designing and merging the properties feature takes time, but we want to tune
> > simple things now. The wiki project mentions ‘tune2fs’ as an example, but the
> > project details are not always accurate about how to do the things, it’s more
> > like ideas what to do. If you’re going to work on that, please claim the
> > project on the wiki, and possibly write more details abou the design.
> 
> I will.

The project is titled as persistent mount options, are you willing to
take the more general "per-object properties" task? IMHO there's not
much difference, the UI should be the same, just that it implements
per-fs or per-subvolume properties like mount options. The rest of the
object properties has to be collected and agreed on. I'm sure there's
community knowledge of what's desired, so it's a matter of writing it
down and bikeshe^Wagreement on the naming syntax.

david
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Filipe Manana Aug. 9, 2013, 1:17 p.m. UTC | #16
On Fri, Aug 9, 2013 at 1:01 AM, David Sterba <dsterba@suse.cz> wrote:
> On Wed, Aug 07, 2013 at 12:33:09PM +0100, Filipe David Manana wrote:
>> Thanks, I missed to find that before.
>> The implementation is very different from the one I proposed.
>
> That's one of the fundaental questions how to store the information:
> inside existing structures, via xattrs, under new tree items. Each one
> has pros and cons.
>
>> > Designing and merging the properties feature takes time, but we want to tune
>> > simple things now. The wiki project mentions ‘tune2fs’ as an example, but the
>> > project details are not always accurate about how to do the things, it’s more
>> > like ideas what to do. If you’re going to work on that, please claim the
>> > project on the wiki, and possibly write more details abou the design.
>>
>> I will.
>
> The project is titled as persistent mount options, are you willing to
> take the more general "per-object properties" task? IMHO there's not
> much difference, the UI should be the same, just that it implements
> per-fs or per-subvolume properties like mount options. The rest of the
> object properties has to be collected and agreed on. I'm sure there's
> community knowledge of what's desired, so it's a matter of writing it
> down and bikeshe^Wagreement on the naming syntax.

Yes, I will.
I'll get back to this soon and update the wiki page.

thanks

>
> david
diff mbox

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index cbb1263..24363df 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -121,6 +121,14 @@  struct btrfs_ordered_sum;
  */
 #define BTRFS_FREE_INO_OBJECTID -12ULL
 
+/*
+ * Item that stores permanent mount options. These options
+ * have effect if they are not specified as well at mount
+ * time (that is, if a permanent option is also specified at
+ * mount time, the later wins).
+ */
+#define BTRFS_PERSISTENT_OPTIONS_OBJECTID -13ULL
+
 /* dummy objectid represents multiple objectids */
 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
 
@@ -3739,7 +3747,8 @@  void btrfs_exit_sysfs(void);
 ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 
 /* super.c */
-int btrfs_parse_options(struct btrfs_root *root, char *options);
+int btrfs_parse_options(struct btrfs_root *root, char *options,
+			int parsing_persistent, int **options_parsed);
 int btrfs_sync_fs(struct super_block *sb, int wait);
 
 #ifdef CONFIG_PRINTK
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 254cdc8..eeabdd4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2077,6 +2077,53 @@  static void del_fs_roots(struct btrfs_fs_info *fs_info)
 	}
 }
 
+static char *get_persistent_options(struct btrfs_root *tree_root)
+{
+	int ret;
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	struct extent_buffer *leaf;
+	struct btrfs_dir_item *di;
+	u32 name_len, data_len;
+	char *options = NULL;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return ERR_PTR(-ENOMEM);
+
+	key.objectid = BTRFS_PERSISTENT_OPTIONS_OBJECTID;
+	key.type = 0;
+	key.offset = 0;
+
+	ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+	if (ret > 0) {
+		ret = 0;
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+	name_len = btrfs_dir_name_len(leaf, di);
+	data_len = btrfs_dir_data_len(leaf, di);
+	options = kmalloc(data_len + 1, GFP_NOFS);
+	if (!options) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	read_extent_buffer(leaf, options,
+			   (unsigned long)((char *)(di + 1) + name_len),
+			   data_len);
+	options[data_len] = '\0';
+
+out:
+	btrfs_free_path(path);
+	if (ret)
+		return ERR_PTR(ret);
+	return options;
+}
+
 int open_ctree(struct super_block *sb,
 	       struct btrfs_fs_devices *fs_devices,
 	       char *options)
@@ -2103,6 +2150,8 @@  int open_ctree(struct super_block *sb,
 	int err = -EINVAL;
 	int num_backups_tried = 0;
 	int backup_index = 0;
+	int *mnt_options = NULL;
+	char *persist_options = NULL;
 
 	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
 	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2372,7 +2421,7 @@  int open_ctree(struct super_block *sb,
 	 */
 	fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
 
-	ret = btrfs_parse_options(tree_root, options);
+	ret = btrfs_parse_options(tree_root, options, 0, &mnt_options);
 	if (ret) {
 		err = ret;
 		goto fail_alloc;
@@ -2656,6 +2705,26 @@  retry_root_backup:
 	btrfs_set_root_node(&tree_root->root_item, tree_root->node);
 	tree_root->commit_root = btrfs_root_node(tree_root);
 
+	persist_options = get_persistent_options(tree_root);
+	if (IS_ERR(persist_options)) {
+		ret = PTR_ERR(persist_options);
+		goto fail_tree_roots;
+	} else if (persist_options) {
+		ret = btrfs_parse_options(tree_root, persist_options,
+					  1, &mnt_options);
+		kfree(mnt_options);
+		mnt_options = NULL;
+		if (ret) {
+			err = ret;
+			goto fail_tree_roots;
+		}
+		if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) {
+			features = btrfs_super_incompat_flags(disk_super);
+			features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+			btrfs_set_super_incompat_flags(disk_super, features);
+		}
+	}
+
 	location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
 	location.type = BTRFS_ROOT_ITEM_KEY;
 	location.offset = 0;
@@ -2904,6 +2973,7 @@  fail_block_groups:
 	btrfs_free_block_groups(fs_info);
 
 fail_tree_roots:
+	kfree(mnt_options);
 	free_root_pointers(fs_info, 1);
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2cc5b80..ced0a85 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -369,7 +369,8 @@  static match_table_t tokens = {
  * reading in a new superblock is parsed here.
  * XXX JDM: This needs to be cleaned up for remount.
  */
-int btrfs_parse_options(struct btrfs_root *root, char *options)
+int btrfs_parse_options(struct btrfs_root *root, char *options,
+			int parsing_persistent, int **options_parsed)
 {
 	struct btrfs_fs_info *info = root->fs_info;
 	substring_t args[MAX_OPT_ARGS];
@@ -379,11 +380,21 @@  int btrfs_parse_options(struct btrfs_root *root, char *options)
 	int ret = 0;
 	char *compress_type;
 	bool compress_force = false;
+	int *parsed = NULL;
 
 	cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
 	if (cache_gen)
 		btrfs_set_opt(info->mount_opt, SPACE_CACHE);
 
+	if (!parsing_persistent && options_parsed) {
+		parsed = kzalloc(sizeof(int) * ARRAY_SIZE(tokens), GFP_NOFS);
+		if (!parsed)
+			return -ENOMEM;
+		*options_parsed = parsed;
+	} else if (parsing_persistent && options_parsed) {
+		parsed = *options_parsed;
+	}
+
 	if (!options)
 		goto out;
 
@@ -403,6 +414,37 @@  int btrfs_parse_options(struct btrfs_root *root, char *options)
 			continue;
 
 		token = match_token(p, tokens, args);
+
+		if (parsing_persistent && parsed) {
+			/*
+			 * A persistent option value is ignored if a value for
+			 * that option was given at mount time.
+			 */
+
+			if (parsed[token])
+				continue;
+			if (token == Opt_no_space_cache &&
+			    parsed[Opt_space_cache])
+				continue;
+			if (token == Opt_space_cache &&
+			    parsed[Opt_no_space_cache])
+				continue;
+
+			if (token == Opt_subvol)
+				printk(KERN_WARNING "btrfs: subvol not supported as a persistent option");
+			else if (token == Opt_subvolid)
+				printk(KERN_WARNING "btrfs: subvolid not supported as a persistent option");
+			else if (token == Opt_subvolrootid)
+				printk(KERN_WARNING "btrfs: subvolrootid not supported as a persistent option");
+			else if (token == Opt_device)
+				printk(KERN_WARNING "btrfs: device not supported as a persistent option");
+			else if (token == Opt_thread_pool)
+				printk(KERN_WARNING "btrfs: thread_pool not supported as a persistent option");
+		}
+
+		if (!parsing_persistent && parsed)
+			parsed[token] = 1;
+
 		switch (token) {
 		case Opt_degraded:
 			printk(KERN_INFO "btrfs: allowing degraded mounts\n");
@@ -1279,7 +1321,7 @@  static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 
 	btrfs_remount_prepare(fs_info);
 
-	ret = btrfs_parse_options(root, data);
+	ret = btrfs_parse_options(root, data, 0, NULL);
 	if (ret) {
 		ret = -EINVAL;
 		goto restore;