Message ID | 20200212085454.35665-1-xiubli@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ceph: fs add reconfiguring superblock parameters support | expand |
On Wed, Feb 12, 2020 at 9:55 AM <xiubli@redhat.com> wrote: > > From: Xiubo Li <xiubli@redhat.com> > > This will enable the remount and reconfiguring superblock params > for the fs. Currently some mount options are not allowed to be > reconfigured. > > It will working like: > $ mount.ceph :/ /mnt/cephfs -o remount,mount_timeout=100 > > URL:https://tracker.ceph.com/issues/44071 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > block/bfq-cgroup.c | 1 + > drivers/block/rbd.c | 2 +- > fs/ceph/caps.c | 2 + > fs/ceph/mds_client.c | 5 +- > fs/ceph/super.c | 126 +++++++++++++++++++++++++++++------ > fs/ceph/super.h | 2 + > include/linux/ceph/libceph.h | 4 +- > net/ceph/ceph_common.c | 83 ++++++++++++++++++++--- > 8 files changed, 192 insertions(+), 33 deletions(-) > > diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c > index e1419edde2ec..b3d42200182e 100644 > --- a/block/bfq-cgroup.c > +++ b/block/bfq-cgroup.c > @@ -12,6 +12,7 @@ > #include <linux/ioprio.h> > #include <linux/sbitmap.h> > #include <linux/delay.h> > +#include <linux/rbtree.h> Hi Xiubo, This hunk touches the block layer. > > #include "bfq-iosched.h" > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index 4e494d5600cc..470de27cf809 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -6573,7 +6573,7 @@ static int rbd_add_parse_args(const char *buf, > *(snap_name + len) = '\0'; > pctx.spec->snap_name = snap_name; > > - pctx.copts = ceph_alloc_options(); > + pctx.copts = ceph_alloc_options(NULL); > if (!pctx.copts) > goto out_mem; > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index b4f122eb74bb..020f83186f94 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -491,10 +491,12 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, > { > struct ceph_mount_options *opt = mdsc->fsc->mount_options; > > + spin_lock(&opt->ceph_opt_lock); > ci->i_hold_caps_min = round_jiffies(jiffies + > opt->caps_wanted_delay_min * HZ); > ci->i_hold_caps_max = round_jiffies(jiffies + > opt->caps_wanted_delay_max * HZ); > + spin_unlock(&opt->ceph_opt_lock); > dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode, > ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies); > } > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 376e7cf1685f..451c3727cd0b 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -2099,6 +2099,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, > struct ceph_inode_info *ci = ceph_inode(dir); > struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; > struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; > + unsigned int max_readdir = opt->max_readdir; > size_t size = sizeof(struct ceph_mds_reply_dir_entry); > unsigned int num_entries; > int order; > @@ -2107,7 +2108,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, > num_entries = ci->i_files + ci->i_subdirs; > spin_unlock(&ci->i_ceph_lock); > num_entries = max(num_entries, 1U); > - num_entries = min(num_entries, opt->max_readdir); > + num_entries = min(num_entries, max_readdir); > > order = get_order(size * num_entries); > while (order >= 0) { > @@ -2122,7 +2123,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, > return -ENOMEM; > > num_entries = (PAGE_SIZE << order) / size; > - num_entries = min(num_entries, opt->max_readdir); > + num_entries = min(num_entries, max_readdir); > > rinfo->dir_buf_size = PAGE_SIZE << order; > req->r_num_caps = num_entries + 1; > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > index 9a21054059f2..8df506dd9039 100644 > --- a/fs/ceph/super.c > +++ b/fs/ceph/super.c > @@ -1175,7 +1175,57 @@ static void ceph_free_fc(struct fs_context *fc) > > static int ceph_reconfigure_fc(struct fs_context *fc) > { > - sync_filesystem(fc->root->d_sb); > + struct super_block *sb = fc->root->d_sb; > + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); > + struct ceph_mount_options *fsopt = fsc->mount_options; > + struct ceph_parse_opts_ctx *pctx = fc->fs_private; > + struct ceph_mount_options *new_fsopt = pctx->opts; > + int ret; > + > + sync_filesystem(sb); > + > + ret = ceph_reconfigure_copts(fc, pctx->copts, fsc->client->options); > + if (ret) > + return ret; > + > + if (new_fsopt->snapdir_name != fsopt->snapdir_name) > + return invalf(fc, "ceph: reconfiguration of snapdir_name not allowed"); > + > + if (new_fsopt->mds_namespace != fsopt->mds_namespace) > + return invalf(fc, "ceph: reconfiguration of mds_namespace not allowed"); > + > + if (new_fsopt->wsize != fsopt->wsize) > + return invalf(fc, "ceph: reconfiguration of wsize not allowed"); > + if (new_fsopt->rsize != fsopt->rsize) > + return invalf(fc, "ceph: reconfiguration of rsize not allowed"); > + if (new_fsopt->rasize != fsopt->rasize) > + return invalf(fc, "ceph: reconfiguration of rasize not allowed"); > + > +#ifdef CONFIG_CEPH_FSCACHE > + if (strcmp_null(new_fsopt->fscache_uniq, fsopt->fscache_uniq)) > + return invalf(fc, "ceph: reconfiguration of fscache not allowed"); > +#endif > + > + fsopt->flags = new_fsopt->flags; > + > + spin_lock(&fsopt->ceph_opt_lock); > + fsopt->caps_wanted_delay_min = new_fsopt->caps_wanted_delay_min; > + fsopt->caps_wanted_delay_max = new_fsopt->caps_wanted_delay_max; > + spin_unlock(&fsopt->ceph_opt_lock); > + > + fsopt->max_readdir_bytes = new_fsopt->max_readdir_bytes; > + fsopt->congestion_kb = new_fsopt->congestion_kb; > + > + fsopt->caps_max = new_fsopt->caps_max; > + fsopt->max_readdir = new_fsopt->max_readdir; > + ceph_adjust_caps_max_min(fsc->mdsc, fsopt); > + > +#ifdef CONFIG_CEPH_FS_POSIX_ACL > + if (fc->sb_flags & SB_POSIXACL) > + sb->s_flags |= SB_POSIXACL; > + else > + sb->s_flags &= ~SB_POSIXACL; > +#endif > return 0; > } > > @@ -1193,38 +1243,77 @@ static int ceph_init_fs_context(struct fs_context *fc) > { > struct ceph_parse_opts_ctx *pctx; > struct ceph_mount_options *fsopt; > + struct ceph_options *copts = NULL; > > pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); > if (!pctx) > return -ENOMEM; > > - pctx->copts = ceph_alloc_options(); > - if (!pctx->copts) > - goto nomem; > - > pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); > if (!pctx->opts) > goto nomem; > > fsopt = pctx->opts; > - fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; > > - fsopt->wsize = CEPH_MAX_WRITE_SIZE; > - fsopt->rsize = CEPH_MAX_READ_SIZE; > - fsopt->rasize = CEPH_RASIZE_DEFAULT; > - fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); > - if (!fsopt->snapdir_name) > - goto nomem; > +#ifdef CONFIG_CEPH_FS_POSIX_ACL > + fc->sb_flags |= SB_POSIXACL; > +#endif > + > + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { > + struct super_block *sb = fc->root->d_sb; > + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); > + struct ceph_mount_options *old = fsc->mount_options; > + > + copts = fsc->client->options; > + > + fsopt->flags = old->flags; > > - fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; > - fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; > - fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; > - fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; > - fsopt->congestion_kb = default_congestion_kb(); > + fsopt->wsize = old->wsize; > + fsopt->rsize = old->rsize; > + fsopt->rasize = old->rasize; > + > + fsopt->fscache_uniq = kstrdup(old->fscache_uniq, GFP_KERNEL); > + if (!fsopt->fscache_uniq) > + goto nomem; > + > + fsopt->snapdir_name = kstrdup(old->snapdir_name, GFP_KERNEL); > + if (!fsopt->snapdir_name) > + goto nomem; > + > + fsopt->caps_wanted_delay_min = old->caps_wanted_delay_min; > + fsopt->caps_wanted_delay_max = old->caps_wanted_delay_max; > + fsopt->max_readdir = old->max_readdir; > + fsopt->max_readdir_bytes = old->max_readdir_bytes; > + fsopt->congestion_kb = old->congestion_kb; > + fsopt->caps_max = old->caps_max; > + fsopt->max_readdir = old->max_readdir; > > #ifdef CONFIG_CEPH_FS_POSIX_ACL > - fc->sb_flags |= SB_POSIXACL; > + if (!(sb->s_flags & SB_POSIXACL)) > + fc->sb_flags &= ~SB_POSIXACL; > #endif > + } else { > + fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; > + > + fsopt->wsize = CEPH_MAX_WRITE_SIZE; > + fsopt->rsize = CEPH_MAX_READ_SIZE; > + fsopt->rasize = CEPH_RASIZE_DEFAULT; > + > + fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); > + if (!fsopt->snapdir_name) > + goto nomem; > + > + fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; > + fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; > + fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; > + fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; > + fsopt->congestion_kb = default_congestion_kb(); > + spin_lock_init(&fsopt->ceph_opt_lock); > + } > + > + pctx->copts = ceph_alloc_options(copts); > + if (!pctx->copts) > + goto nomem; > > fc->fs_private = pctx; > fc->ops = &ceph_context_ops; > @@ -1232,7 +1321,6 @@ static int ceph_init_fs_context(struct fs_context *fc) > > nomem: > destroy_mount_options(pctx->opts); > - ceph_destroy_options(pctx->copts); > kfree(pctx); > return -ENOMEM; > } > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 2acb09980432..ad44b98f3c3b 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -95,6 +95,8 @@ struct ceph_mount_options { > char *mds_namespace; /* default NULL */ > char *server_path; /* default "/" */ > char *fscache_uniq; /* default NULL */ > + > + spinlock_t ceph_opt_lock; > }; > > struct ceph_fs_client { > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h > index 8fe9b80e80a5..407645adb2ad 100644 > --- a/include/linux/ceph/libceph.h > +++ b/include/linux/ceph/libceph.h > @@ -281,11 +281,13 @@ extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); > extern void *ceph_kvmalloc(size_t size, gfp_t flags); > > struct fs_parameter; > -struct ceph_options *ceph_alloc_options(void); > +struct ceph_options *ceph_alloc_options(struct ceph_options *old); > int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, > struct fs_context *fc); > int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, > struct fs_context *fc); > +int ceph_reconfigure_copts(struct fs_context *fc, struct ceph_options *new_opts, > + struct ceph_options *opts); > int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, > bool show_all); > extern void ceph_destroy_options(struct ceph_options *opt); > diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c > index a9d6c97b5b0d..39e628996595 100644 > --- a/net/ceph/ceph_common.c > +++ b/net/ceph/ceph_common.c > @@ -295,7 +295,7 @@ static const struct fs_parameter_description ceph_parameters = { > .specs = ceph_param_specs, > }; > > -struct ceph_options *ceph_alloc_options(void) > +struct ceph_options *ceph_alloc_options(struct ceph_options *old) > { > struct ceph_options *opt; > > @@ -305,17 +305,49 @@ struct ceph_options *ceph_alloc_options(void) > > opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), > GFP_KERNEL); > - if (!opt->mon_addr) { > - kfree(opt); > - return NULL; > - } > + if (!opt->mon_addr) > + goto err; > + > + if (old) { > + memcpy(&opt->my_addr, &old->my_addr, sizeof(opt->my_addr)); > + memcpy(&opt->fsid, &old->fsid, sizeof(opt->fsid)); > + if (old->name) { > + opt->name = kstrdup(old->name, GFP_KERNEL); > + if (!opt->name) > + goto err; > + } > > - opt->flags = CEPH_OPT_DEFAULT; > - opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; > - opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; > - opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; > - opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; > + if (old->key) { > + opt->key = kmalloc(sizeof(*opt->key), GFP_KERNEL); > + if (!opt->key) > + goto err; > + > + opt->key->type = old->key->type; > + opt->key->created.tv_sec = old->key->created.tv_sec; > + opt->key->created.tv_nsec = old->key->created.tv_nsec; > + opt->key->len = old->key->len; > + memcpy(opt->key->key, old->key->key, old->key->len); > + } > + > + opt->osd_keepalive_timeout = old->osd_keepalive_timeout; > + opt->osd_idle_ttl = old->osd_idle_ttl; > + opt->mount_timeout = old->mount_timeout; > + opt->osd_request_timeout = old->osd_request_timeout; > + opt->flags = old->flags; > + } else { > + opt->flags = CEPH_OPT_DEFAULT; > + opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; > + opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; > + opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; > + opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; > + } > return opt; > + > +err: > + kfree(opt->name); > + kfree(opt->mon_addr); > + kfree(opt); > + return NULL; > } > EXPORT_SYMBOL(ceph_alloc_options); > > @@ -534,6 +566,37 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, > } > EXPORT_SYMBOL(ceph_parse_param); > > +int ceph_reconfigure_copts(struct fs_context *fc, struct ceph_options *new_opts, > + struct ceph_options *opts) > +{ > + if (memcmp(&new_opts->my_addr, &opts->my_addr, > + sizeof(opts->my_addr))) > + return invalf(fc, "ceph: reconfiguration of ip not allowed"); > + > + if (memcmp(&new_opts->fsid, &opts->fsid, sizeof(opts->fsid))) > + return invalf(fc, "ceph: reconfiguration of fsid not allowed"); > + > + if (strcmp_null(new_opts->name, opts->name)) > + return invalf(fc, "ceph: reconfiguration of name not allowed"); > + > + if (new_opts->key && (!opts->key || > + new_opts->key->type != opts->key->type || > + new_opts->key->created.tv_sec != opts->key->created.tv_sec || > + new_opts->key->created.tv_nsec != opts->key->created.tv_nsec || > + new_opts->key->len != opts->key->len || > + memcmp(new_opts->key->key, opts->key->key, opts->key->len))) > + return invalf(fc, "ceph: reconfiguration of secret not allowed"); > + > + opts->osd_keepalive_timeout = new_opts->osd_keepalive_timeout; > + opts->osd_idle_ttl = new_opts->osd_idle_ttl; > + opts->mount_timeout = new_opts->mount_timeout; > + opts->osd_request_timeout = new_opts->osd_request_timeout; What is the use case for reconfiguring any of these options? They are all low level and aren't really meant to be reconfigured. > + opts->flags = new_opts->flags; This is particularly true for flags. Reconfiguring tcp_nodelay will not have effect on sockets that are already open, reconfiguring noshare will not unshare the client instance, etc. Messing with crc or cephx_sign_messages is very likely to hang the client. Thanks, Ilya
On Wed, 2020-02-12 at 03:54 -0500, xiubli@redhat.com wrote: > From: Xiubo Li <xiubli@redhat.com> > > This will enable the remount and reconfiguring superblock params > for the fs. Currently some mount options are not allowed to be > reconfigured. > > It will working like: > $ mount.ceph :/ /mnt/cephfs -o remount,mount_timeout=100 > > URL:https://tracker.ceph.com/issues/44071 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > block/bfq-cgroup.c | 1 + > drivers/block/rbd.c | 2 +- > fs/ceph/caps.c | 2 + > fs/ceph/mds_client.c | 5 +- > fs/ceph/super.c | 126 +++++++++++++++++++++++++++++------ > fs/ceph/super.h | 2 + > include/linux/ceph/libceph.h | 4 +- > net/ceph/ceph_common.c | 83 ++++++++++++++++++++--- > 8 files changed, 192 insertions(+), 33 deletions(-) > > diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c > index e1419edde2ec..b3d42200182e 100644 > --- a/block/bfq-cgroup.c > +++ b/block/bfq-cgroup.c > @@ -12,6 +12,7 @@ > #include <linux/ioprio.h> > #include <linux/sbitmap.h> > #include <linux/delay.h> > +#include <linux/rbtree.h> > > #include "bfq-iosched.h" > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index 4e494d5600cc..470de27cf809 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -6573,7 +6573,7 @@ static int rbd_add_parse_args(const char *buf, > *(snap_name + len) = '\0'; > pctx.spec->snap_name = snap_name; > > - pctx.copts = ceph_alloc_options(); > + pctx.copts = ceph_alloc_options(NULL); > if (!pctx.copts) > goto out_mem; > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index b4f122eb74bb..020f83186f94 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -491,10 +491,12 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, > { > struct ceph_mount_options *opt = mdsc->fsc->mount_options; > > + spin_lock(&opt->ceph_opt_lock); > ci->i_hold_caps_min = round_jiffies(jiffies + > opt->caps_wanted_delay_min * HZ); > ci->i_hold_caps_max = round_jiffies(jiffies + > opt->caps_wanted_delay_max * HZ); > + spin_unlock(&opt->ceph_opt_lock); > dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode, > ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies); > } > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 376e7cf1685f..451c3727cd0b 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -2099,6 +2099,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, > struct ceph_inode_info *ci = ceph_inode(dir); > struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; > struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; > + unsigned int max_readdir = opt->max_readdir; > size_t size = sizeof(struct ceph_mds_reply_dir_entry); > unsigned int num_entries; > int order; > @@ -2107,7 +2108,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, > num_entries = ci->i_files + ci->i_subdirs; > spin_unlock(&ci->i_ceph_lock); > num_entries = max(num_entries, 1U); > - num_entries = min(num_entries, opt->max_readdir); > + num_entries = min(num_entries, max_readdir); > > order = get_order(size * num_entries); > while (order >= 0) { > @@ -2122,7 +2123,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, > return -ENOMEM; > > num_entries = (PAGE_SIZE << order) / size; > - num_entries = min(num_entries, opt->max_readdir); > + num_entries = min(num_entries, max_readdir); > > rinfo->dir_buf_size = PAGE_SIZE << order; > req->r_num_caps = num_entries + 1; > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > index 9a21054059f2..8df506dd9039 100644 > --- a/fs/ceph/super.c > +++ b/fs/ceph/super.c > @@ -1175,7 +1175,57 @@ static void ceph_free_fc(struct fs_context *fc) > > static int ceph_reconfigure_fc(struct fs_context *fc) > { > - sync_filesystem(fc->root->d_sb); > + struct super_block *sb = fc->root->d_sb; > + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); > + struct ceph_mount_options *fsopt = fsc->mount_options; > + struct ceph_parse_opts_ctx *pctx = fc->fs_private; > + struct ceph_mount_options *new_fsopt = pctx->opts; > + int ret; > + > + sync_filesystem(sb); > + > + ret = ceph_reconfigure_copts(fc, pctx->copts, fsc->client->options); > + if (ret) > + return ret; > + > + if (new_fsopt->snapdir_name != fsopt->snapdir_name) > + return invalf(fc, "ceph: reconfiguration of snapdir_name not allowed"); > + > + if (new_fsopt->mds_namespace != fsopt->mds_namespace) > + return invalf(fc, "ceph: reconfiguration of mds_namespace not allowed"); > + > + if (new_fsopt->wsize != fsopt->wsize) > + return invalf(fc, "ceph: reconfiguration of wsize not allowed"); > + if (new_fsopt->rsize != fsopt->rsize) > + return invalf(fc, "ceph: reconfiguration of rsize not allowed"); > + if (new_fsopt->rasize != fsopt->rasize) > + return invalf(fc, "ceph: reconfiguration of rasize not allowed"); > + Odd. I would think the wsize, rsize and rasize are things you _could_ reconfigure at remount time. In any case, I agree with Ilya. Not everything can be changed on a remount. It'd be best to identify some small subset of mount options that you do need to allow to be changed, and ensure we can do that. > +#ifdef CONFIG_CEPH_FSCACHE > + if (strcmp_null(new_fsopt->fscache_uniq, fsopt->fscache_uniq)) > + return invalf(fc, "ceph: reconfiguration of fscache not allowed"); > +#endif > + > + fsopt->flags = new_fsopt->flags; > + > + spin_lock(&fsopt->ceph_opt_lock); > + fsopt->caps_wanted_delay_min = new_fsopt->caps_wanted_delay_min; > + fsopt->caps_wanted_delay_max = new_fsopt->caps_wanted_delay_max; > + spin_unlock(&fsopt->ceph_opt_lock); > + > + fsopt->max_readdir_bytes = new_fsopt->max_readdir_bytes; > + fsopt->congestion_kb = new_fsopt->congestion_kb; > + > + fsopt->caps_max = new_fsopt->caps_max; > + fsopt->max_readdir = new_fsopt->max_readdir; > + ceph_adjust_caps_max_min(fsc->mdsc, fsopt); > + > +#ifdef CONFIG_CEPH_FS_POSIX_ACL > + if (fc->sb_flags & SB_POSIXACL) > + sb->s_flags |= SB_POSIXACL; > + else > + sb->s_flags &= ~SB_POSIXACL; > +#endif > return 0; > } > > @@ -1193,38 +1243,77 @@ static int ceph_init_fs_context(struct fs_context *fc) > { > struct ceph_parse_opts_ctx *pctx; > struct ceph_mount_options *fsopt; > + struct ceph_options *copts = NULL; > > pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); > if (!pctx) > return -ENOMEM; > > - pctx->copts = ceph_alloc_options(); > - if (!pctx->copts) > - goto nomem; > - > pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); > if (!pctx->opts) > goto nomem; > > fsopt = pctx->opts; > - fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; > > - fsopt->wsize = CEPH_MAX_WRITE_SIZE; > - fsopt->rsize = CEPH_MAX_READ_SIZE; > - fsopt->rasize = CEPH_RASIZE_DEFAULT; > - fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); > - if (!fsopt->snapdir_name) > - goto nomem; > +#ifdef CONFIG_CEPH_FS_POSIX_ACL > + fc->sb_flags |= SB_POSIXACL; > +#endif > + > + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { > + struct super_block *sb = fc->root->d_sb; > + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); > + struct ceph_mount_options *old = fsc->mount_options; > + > + copts = fsc->client->options; > + > + fsopt->flags = old->flags; > > - fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; > - fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; > - fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; > - fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; > - fsopt->congestion_kb = default_congestion_kb(); > + fsopt->wsize = old->wsize; > + fsopt->rsize = old->rsize; > + fsopt->rasize = old->rasize; > + > + fsopt->fscache_uniq = kstrdup(old->fscache_uniq, GFP_KERNEL); > + if (!fsopt->fscache_uniq) > + goto nomem; > + > + fsopt->snapdir_name = kstrdup(old->snapdir_name, GFP_KERNEL); > + if (!fsopt->snapdir_name) > + goto nomem; > + > + fsopt->caps_wanted_delay_min = old->caps_wanted_delay_min; > + fsopt->caps_wanted_delay_max = old->caps_wanted_delay_max; > + fsopt->max_readdir = old->max_readdir; > + fsopt->max_readdir_bytes = old->max_readdir_bytes; > + fsopt->congestion_kb = old->congestion_kb; > + fsopt->caps_max = old->caps_max; > + fsopt->max_readdir = old->max_readdir; > > #ifdef CONFIG_CEPH_FS_POSIX_ACL > - fc->sb_flags |= SB_POSIXACL; > + if (!(sb->s_flags & SB_POSIXACL)) > + fc->sb_flags &= ~SB_POSIXACL; > #endif > + } else { > + fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; > + > + fsopt->wsize = CEPH_MAX_WRITE_SIZE; > + fsopt->rsize = CEPH_MAX_READ_SIZE; > + fsopt->rasize = CEPH_RASIZE_DEFAULT; > + > + fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); > + if (!fsopt->snapdir_name) > + goto nomem; > + > + fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; > + fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; > + fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; > + fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; > + fsopt->congestion_kb = default_congestion_kb(); > + spin_lock_init(&fsopt->ceph_opt_lock); > + } > + > + pctx->copts = ceph_alloc_options(copts); > + if (!pctx->copts) > + goto nomem; > > fc->fs_private = pctx; > fc->ops = &ceph_context_ops; > @@ -1232,7 +1321,6 @@ static int ceph_init_fs_context(struct fs_context *fc) > > nomem: > destroy_mount_options(pctx->opts); > - ceph_destroy_options(pctx->copts); > kfree(pctx); > return -ENOMEM; > } > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 2acb09980432..ad44b98f3c3b 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -95,6 +95,8 @@ struct ceph_mount_options { > char *mds_namespace; /* default NULL */ > char *server_path; /* default "/" */ > char *fscache_uniq; /* default NULL */ > + > + spinlock_t ceph_opt_lock; I'm not sure we really need an extra lock around these fields, particularly if you're intending to only allow a few different things to be changed at remount. > }; > > struct ceph_fs_client { > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h > index 8fe9b80e80a5..407645adb2ad 100644 > --- a/include/linux/ceph/libceph.h > +++ b/include/linux/ceph/libceph.h > @@ -281,11 +281,13 @@ extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); > extern void *ceph_kvmalloc(size_t size, gfp_t flags); > > struct fs_parameter; > -struct ceph_options *ceph_alloc_options(void); > +struct ceph_options *ceph_alloc_options(struct ceph_options *old); > int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, > struct fs_context *fc); > int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, > struct fs_context *fc); > +int ceph_reconfigure_copts(struct fs_context *fc, struct ceph_options *new_opts, > + struct ceph_options *opts); > int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, > bool show_all); > extern void ceph_destroy_options(struct ceph_options *opt); > diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c > index a9d6c97b5b0d..39e628996595 100644 > --- a/net/ceph/ceph_common.c > +++ b/net/ceph/ceph_common.c > @@ -295,7 +295,7 @@ static const struct fs_parameter_description ceph_parameters = { > .specs = ceph_param_specs, > }; > > -struct ceph_options *ceph_alloc_options(void) > +struct ceph_options *ceph_alloc_options(struct ceph_options *old) > { > struct ceph_options *opt; > > @@ -305,17 +305,49 @@ struct ceph_options *ceph_alloc_options(void) > > opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), > GFP_KERNEL); > - if (!opt->mon_addr) { > - kfree(opt); > - return NULL; > - } > + if (!opt->mon_addr) > + goto err; > + > + if (old) { > + memcpy(&opt->my_addr, &old->my_addr, sizeof(opt->my_addr)); > + memcpy(&opt->fsid, &old->fsid, sizeof(opt->fsid)); > + if (old->name) { > + opt->name = kstrdup(old->name, GFP_KERNEL); > + if (!opt->name) > + goto err; > + } > > - opt->flags = CEPH_OPT_DEFAULT; > - opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; > - opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; > - opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; > - opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; > + if (old->key) { > + opt->key = kmalloc(sizeof(*opt->key), GFP_KERNEL); > + if (!opt->key) > + goto err; > + > + opt->key->type = old->key->type; > + opt->key->created.tv_sec = old->key->created.tv_sec; > + opt->key->created.tv_nsec = old->key->created.tv_nsec; > + opt->key->len = old->key->len; > + memcpy(opt->key->key, old->key->key, old->key->len); > + } > + > + opt->osd_keepalive_timeout = old->osd_keepalive_timeout; > + opt->osd_idle_ttl = old->osd_idle_ttl; > + opt->mount_timeout = old->mount_timeout; > + opt->osd_request_timeout = old->osd_request_timeout; > + opt->flags = old->flags; > + } else { > + opt->flags = CEPH_OPT_DEFAULT; > + opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; > + opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; > + opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; > + opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; > + } > return opt; > + > +err: > + kfree(opt->name); > + kfree(opt->mon_addr); > + kfree(opt); > + return NULL; > } > EXPORT_SYMBOL(ceph_alloc_options); > > @@ -534,6 +566,37 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, > } > EXPORT_SYMBOL(ceph_parse_param); > > +int ceph_reconfigure_copts(struct fs_context *fc, struct ceph_options *new_opts, > + struct ceph_options *opts) > +{ > + if (memcmp(&new_opts->my_addr, &opts->my_addr, > + sizeof(opts->my_addr))) > + return invalf(fc, "ceph: reconfiguration of ip not allowed"); > + > + if (memcmp(&new_opts->fsid, &opts->fsid, sizeof(opts->fsid))) > + return invalf(fc, "ceph: reconfiguration of fsid not allowed"); > + > + if (strcmp_null(new_opts->name, opts->name)) > + return invalf(fc, "ceph: reconfiguration of name not allowed"); > + > + if (new_opts->key && (!opts->key || > + new_opts->key->type != opts->key->type || > + new_opts->key->created.tv_sec != opts->key->created.tv_sec || > + new_opts->key->created.tv_nsec != opts->key->created.tv_nsec || > + new_opts->key->len != opts->key->len || > + memcmp(new_opts->key->key, opts->key->key, opts->key->len))) > + return invalf(fc, "ceph: reconfiguration of secret not allowed"); > + > + opts->osd_keepalive_timeout = new_opts->osd_keepalive_timeout; > + opts->osd_idle_ttl = new_opts->osd_idle_ttl; > + opts->mount_timeout = new_opts->mount_timeout; > + opts->osd_request_timeout = new_opts->osd_request_timeout; > + opts->flags = new_opts->flags; > + > + return 0; > +} > +EXPORT_SYMBOL(ceph_reconfigure_copts); > + > int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, > bool show_all) > {
On 2020/2/12 19:17, Ilya Dryomov wrote: > On Wed, Feb 12, 2020 at 9:55 AM <xiubli@redhat.com> wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> This will enable the remount and reconfiguring superblock params >> for the fs. Currently some mount options are not allowed to be >> reconfigured. >> >> It will working like: >> $ mount.ceph :/ /mnt/cephfs -o remount,mount_timeout=100 >> >> URL:https://tracker.ceph.com/issues/44071 >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> block/bfq-cgroup.c | 1 + >> drivers/block/rbd.c | 2 +- >> fs/ceph/caps.c | 2 + >> fs/ceph/mds_client.c | 5 +- >> fs/ceph/super.c | 126 +++++++++++++++++++++++++++++------ >> fs/ceph/super.h | 2 + >> include/linux/ceph/libceph.h | 4 +- >> net/ceph/ceph_common.c | 83 ++++++++++++++++++++--- >> 8 files changed, 192 insertions(+), 33 deletions(-) >> >> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c >> index e1419edde2ec..b3d42200182e 100644 >> --- a/block/bfq-cgroup.c >> +++ b/block/bfq-cgroup.c >> @@ -12,6 +12,7 @@ >> #include <linux/ioprio.h> >> #include <linux/sbitmap.h> >> #include <linux/delay.h> >> +#include <linux/rbtree.h> > Hi Xiubo, > > This hunk touches the block layer. Ah, okay, I thought I have removed this. [...] >> + if (memcmp(&new_opts->fsid, &opts->fsid, sizeof(opts->fsid))) >> + return invalf(fc, "ceph: reconfiguration of fsid not allowed"); >> + >> + if (strcmp_null(new_opts->name, opts->name)) >> + return invalf(fc, "ceph: reconfiguration of name not allowed"); >> + >> + if (new_opts->key && (!opts->key || >> + new_opts->key->type != opts->key->type || >> + new_opts->key->created.tv_sec != opts->key->created.tv_sec || >> + new_opts->key->created.tv_nsec != opts->key->created.tv_nsec || >> + new_opts->key->len != opts->key->len || >> + memcmp(new_opts->key->key, opts->key->key, opts->key->len))) >> + return invalf(fc, "ceph: reconfiguration of secret not allowed"); >> + >> + opts->osd_keepalive_timeout = new_opts->osd_keepalive_timeout; >> + opts->osd_idle_ttl = new_opts->osd_idle_ttl; >> + opts->mount_timeout = new_opts->mount_timeout; >> + opts->osd_request_timeout = new_opts->osd_request_timeout; > What is the use case for reconfiguring any of these options? They are > all low level and aren't really meant to be reconfigured. Okay, actually now these are not needed to reconfigured. >> + opts->flags = new_opts->flags; > This is particularly true for flags. Reconfiguring tcp_nodelay will > not have effect on sockets that are already open, reconfiguring noshare > will not unshare the client instance, etc. Messing with crc or > cephx_sign_messages is very likely to hang the client. Okay, will check this more. > Thanks, > > Ilya >
On 2020/2/12 20:01, Jeff Layton wrote: > On Wed, 2020-02-12 at 03:54 -0500, xiubli@redhat.com wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> This will enable the remount and reconfiguring superblock params >> for the fs. Currently some mount options are not allowed to be >> reconfigured. >> >> It will working like: >> $ mount.ceph :/ /mnt/cephfs -o remount,mount_timeout=100 >> >> URL:https://tracker.ceph.com/issues/44071 >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> block/bfq-cgroup.c | 1 + >> drivers/block/rbd.c | 2 +- >> fs/ceph/caps.c | 2 + >> fs/ceph/mds_client.c | 5 +- >> fs/ceph/super.c | 126 +++++++++++++++++++++++++++++------ >> fs/ceph/super.h | 2 + >> include/linux/ceph/libceph.h | 4 +- >> net/ceph/ceph_common.c | 83 ++++++++++++++++++++--- >> 8 files changed, 192 insertions(+), 33 deletions(-) >> >> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c >> index e1419edde2ec..b3d42200182e 100644 >> --- a/block/bfq-cgroup.c >> +++ b/block/bfq-cgroup.c >> @@ -12,6 +12,7 @@ >> #include <linux/ioprio.h> >> #include <linux/sbitmap.h> >> #include <linux/delay.h> >> +#include <linux/rbtree.h> >> >> #include "bfq-iosched.h" >> >> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c >> index 4e494d5600cc..470de27cf809 100644 >> --- a/drivers/block/rbd.c >> +++ b/drivers/block/rbd.c >> @@ -6573,7 +6573,7 @@ static int rbd_add_parse_args(const char *buf, >> *(snap_name + len) = '\0'; >> pctx.spec->snap_name = snap_name; >> >> - pctx.copts = ceph_alloc_options(); >> + pctx.copts = ceph_alloc_options(NULL); >> if (!pctx.copts) >> goto out_mem; >> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c >> index b4f122eb74bb..020f83186f94 100644 >> --- a/fs/ceph/caps.c >> +++ b/fs/ceph/caps.c >> @@ -491,10 +491,12 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, >> { >> struct ceph_mount_options *opt = mdsc->fsc->mount_options; >> >> + spin_lock(&opt->ceph_opt_lock); >> ci->i_hold_caps_min = round_jiffies(jiffies + >> opt->caps_wanted_delay_min * HZ); >> ci->i_hold_caps_max = round_jiffies(jiffies + >> opt->caps_wanted_delay_max * HZ); >> + spin_unlock(&opt->ceph_opt_lock); >> dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode, >> ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies); >> } >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >> index 376e7cf1685f..451c3727cd0b 100644 >> --- a/fs/ceph/mds_client.c >> +++ b/fs/ceph/mds_client.c >> @@ -2099,6 +2099,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, >> struct ceph_inode_info *ci = ceph_inode(dir); >> struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; >> struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; >> + unsigned int max_readdir = opt->max_readdir; >> size_t size = sizeof(struct ceph_mds_reply_dir_entry); >> unsigned int num_entries; >> int order; >> @@ -2107,7 +2108,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, >> num_entries = ci->i_files + ci->i_subdirs; >> spin_unlock(&ci->i_ceph_lock); >> num_entries = max(num_entries, 1U); >> - num_entries = min(num_entries, opt->max_readdir); >> + num_entries = min(num_entries, max_readdir); >> >> order = get_order(size * num_entries); >> while (order >= 0) { >> @@ -2122,7 +2123,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, >> return -ENOMEM; >> >> num_entries = (PAGE_SIZE << order) / size; >> - num_entries = min(num_entries, opt->max_readdir); >> + num_entries = min(num_entries, max_readdir); >> >> rinfo->dir_buf_size = PAGE_SIZE << order; >> req->r_num_caps = num_entries + 1; >> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >> index 9a21054059f2..8df506dd9039 100644 >> --- a/fs/ceph/super.c >> +++ b/fs/ceph/super.c >> @@ -1175,7 +1175,57 @@ static void ceph_free_fc(struct fs_context *fc) >> >> static int ceph_reconfigure_fc(struct fs_context *fc) >> { >> - sync_filesystem(fc->root->d_sb); >> + struct super_block *sb = fc->root->d_sb; >> + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); >> + struct ceph_mount_options *fsopt = fsc->mount_options; >> + struct ceph_parse_opts_ctx *pctx = fc->fs_private; >> + struct ceph_mount_options *new_fsopt = pctx->opts; >> + int ret; >> + >> + sync_filesystem(sb); >> + >> + ret = ceph_reconfigure_copts(fc, pctx->copts, fsc->client->options); >> + if (ret) >> + return ret; >> + >> + if (new_fsopt->snapdir_name != fsopt->snapdir_name) >> + return invalf(fc, "ceph: reconfiguration of snapdir_name not allowed"); >> + >> + if (new_fsopt->mds_namespace != fsopt->mds_namespace) >> + return invalf(fc, "ceph: reconfiguration of mds_namespace not allowed"); >> + >> + if (new_fsopt->wsize != fsopt->wsize) >> + return invalf(fc, "ceph: reconfiguration of wsize not allowed"); >> + if (new_fsopt->rsize != fsopt->rsize) >> + return invalf(fc, "ceph: reconfiguration of rsize not allowed"); >> + if (new_fsopt->rasize != fsopt->rasize) >> + return invalf(fc, "ceph: reconfiguration of rasize not allowed"); >> + > Odd. I would think the wsize, rsize and rasize are things you _could_ > reconfigure at remount time. There has some race for the wsize, > > In any case, I agree with Ilya. Not everything can be changed on a > remount. It'd be best to identify some small subset of mount options > that you do need to allow to be changed, and ensure we can do that. Yeah, I have disabled some already which may be racing when changing them in remount. Will disable the low level ones and some others in next version. >> diff --git a/fs/ceph/super.h b/fs/ceph/super.h >> index 2acb09980432..ad44b98f3c3b 100644 >> --- a/fs/ceph/super.h >> +++ b/fs/ceph/super.h >> @@ -95,6 +95,8 @@ struct ceph_mount_options { >> char *mds_namespace; /* default NULL */ >> char *server_path; /* default "/" */ >> char *fscache_uniq; /* default NULL */ >> + >> + spinlock_t ceph_opt_lock; > I'm not sure we really need an extra lock around these fields, > particularly if you're intending to only allow a few different things to > be changed at remount. This will only protect the "fsopt->caps_wanted_delay_min" and "fsopt->caps_wanted_delay_min", just in case we are changing them both which may be racing with __cap_set_timeouts(). For example: The old range is [40, 60] and if the new range is [10, 30], we may hit the [i_hold_caps_min, i_hold_caps_max] are set as [40, 30]. Thanks, BRs
On Wed, 2020-02-12 at 23:01 +0800, Xiubo Li wrote: > On 2020/2/12 20:01, Jeff Layton wrote: > > On Wed, 2020-02-12 at 03:54 -0500, xiubli@redhat.com wrote: > > > From: Xiubo Li <xiubli@redhat.com> > > > > > > This will enable the remount and reconfiguring superblock params > > > for the fs. Currently some mount options are not allowed to be > > > reconfigured. > > > > > > It will working like: > > > $ mount.ceph :/ /mnt/cephfs -o remount,mount_timeout=100 > > > > > > URL:https://tracker.ceph.com/issues/44071 > > > Signed-off-by: Xiubo Li <xiubli@redhat.com> > > > --- > > > block/bfq-cgroup.c | 1 + > > > drivers/block/rbd.c | 2 +- > > > fs/ceph/caps.c | 2 + > > > fs/ceph/mds_client.c | 5 +- > > > fs/ceph/super.c | 126 +++++++++++++++++++++++++++++------ > > > fs/ceph/super.h | 2 + > > > include/linux/ceph/libceph.h | 4 +- > > > net/ceph/ceph_common.c | 83 ++++++++++++++++++++--- > > > 8 files changed, 192 insertions(+), 33 deletions(-) > > > > > > diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c > > > index e1419edde2ec..b3d42200182e 100644 > > > --- a/block/bfq-cgroup.c > > > +++ b/block/bfq-cgroup.c > > > @@ -12,6 +12,7 @@ > > > #include <linux/ioprio.h> > > > #include <linux/sbitmap.h> > > > #include <linux/delay.h> > > > +#include <linux/rbtree.h> > > > > > > #include "bfq-iosched.h" > > > > > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > > > index 4e494d5600cc..470de27cf809 100644 > > > --- a/drivers/block/rbd.c > > > +++ b/drivers/block/rbd.c > > > @@ -6573,7 +6573,7 @@ static int rbd_add_parse_args(const char *buf, > > > *(snap_name + len) = '\0'; > > > pctx.spec->snap_name = snap_name; > > > > > > - pctx.copts = ceph_alloc_options(); > > > + pctx.copts = ceph_alloc_options(NULL); > > > if (!pctx.copts) > > > goto out_mem; > > > > > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > > > index b4f122eb74bb..020f83186f94 100644 > > > --- a/fs/ceph/caps.c > > > +++ b/fs/ceph/caps.c > > > @@ -491,10 +491,12 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, > > > { > > > struct ceph_mount_options *opt = mdsc->fsc->mount_options; > > > > > > + spin_lock(&opt->ceph_opt_lock); > > > ci->i_hold_caps_min = round_jiffies(jiffies + > > > opt->caps_wanted_delay_min * HZ); > > > ci->i_hold_caps_max = round_jiffies(jiffies + > > > opt->caps_wanted_delay_max * HZ); > > > + spin_unlock(&opt->ceph_opt_lock); > > > dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode, > > > ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies); > > > } > > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > > > index 376e7cf1685f..451c3727cd0b 100644 > > > --- a/fs/ceph/mds_client.c > > > +++ b/fs/ceph/mds_client.c > > > @@ -2099,6 +2099,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, > > > struct ceph_inode_info *ci = ceph_inode(dir); > > > struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; > > > struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; > > > + unsigned int max_readdir = opt->max_readdir; > > > size_t size = sizeof(struct ceph_mds_reply_dir_entry); > > > unsigned int num_entries; > > > int order; > > > @@ -2107,7 +2108,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, > > > num_entries = ci->i_files + ci->i_subdirs; > > > spin_unlock(&ci->i_ceph_lock); > > > num_entries = max(num_entries, 1U); > > > - num_entries = min(num_entries, opt->max_readdir); > > > + num_entries = min(num_entries, max_readdir); > > > > > > order = get_order(size * num_entries); > > > while (order >= 0) { > > > @@ -2122,7 +2123,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, > > > return -ENOMEM; > > > > > > num_entries = (PAGE_SIZE << order) / size; > > > - num_entries = min(num_entries, opt->max_readdir); > > > + num_entries = min(num_entries, max_readdir); > > > > > > rinfo->dir_buf_size = PAGE_SIZE << order; > > > req->r_num_caps = num_entries + 1; > > > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > > > index 9a21054059f2..8df506dd9039 100644 > > > --- a/fs/ceph/super.c > > > +++ b/fs/ceph/super.c > > > @@ -1175,7 +1175,57 @@ static void ceph_free_fc(struct fs_context *fc) > > > > > > static int ceph_reconfigure_fc(struct fs_context *fc) > > > { > > > - sync_filesystem(fc->root->d_sb); > > > + struct super_block *sb = fc->root->d_sb; > > > + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); > > > + struct ceph_mount_options *fsopt = fsc->mount_options; > > > + struct ceph_parse_opts_ctx *pctx = fc->fs_private; > > > + struct ceph_mount_options *new_fsopt = pctx->opts; > > > + int ret; > > > + > > > + sync_filesystem(sb); > > > + > > > + ret = ceph_reconfigure_copts(fc, pctx->copts, fsc->client->options); > > > + if (ret) > > > + return ret; > > > + > > > + if (new_fsopt->snapdir_name != fsopt->snapdir_name) > > > + return invalf(fc, "ceph: reconfiguration of snapdir_name not allowed"); > > > + > > > + if (new_fsopt->mds_namespace != fsopt->mds_namespace) > > > + return invalf(fc, "ceph: reconfiguration of mds_namespace not allowed"); > > > + > > > + if (new_fsopt->wsize != fsopt->wsize) > > > + return invalf(fc, "ceph: reconfiguration of wsize not allowed"); > > > + if (new_fsopt->rsize != fsopt->rsize) > > > + return invalf(fc, "ceph: reconfiguration of rsize not allowed"); > > > + if (new_fsopt->rasize != fsopt->rasize) > > > + return invalf(fc, "ceph: reconfiguration of rasize not allowed"); > > > + > > Odd. I would think the wsize, rsize and rasize are things you _could_ > > reconfigure at remount time. > > There has some race for the wsize, > > > > In any case, I agree with Ilya. Not everything can be changed on a > > remount. It'd be best to identify some small subset of mount options > > that you do need to allow to be changed, and ensure we can do that. > > Yeah, I have disabled some already which may be racing when changing > them in remount. > > Will disable the low level ones and some others in next version. > > > > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > > > index 2acb09980432..ad44b98f3c3b 100644 > > > --- a/fs/ceph/super.h > > > +++ b/fs/ceph/super.h > > > @@ -95,6 +95,8 @@ struct ceph_mount_options { > > > char *mds_namespace; /* default NULL */ > > > char *server_path; /* default "/" */ > > > char *fscache_uniq; /* default NULL */ > > > + > > > + spinlock_t ceph_opt_lock; > > I'm not sure we really need an extra lock around these fields, > > particularly if you're intending to only allow a few different things to > > be changed at remount. > > This will only protect the "fsopt->caps_wanted_delay_min" and > "fsopt->caps_wanted_delay_min", just in case we are changing them both > which may be racing with __cap_set_timeouts(). > > For example: > > The old range is [40, 60] and if the new range is [10, 30], we may hit > the [i_hold_caps_min, i_hold_caps_max] are set as [40, 30]. > For parameters that are linked like that, you could also consider a seqlock. Having to take a spinlock to just read sort of sucks, when most of the time there should be no change occurring.
On 2020/2/13 0:41, Jeff Layton wrote: > On Wed, 2020-02-12 at 23:01 +0800, Xiubo Li wrote: >> On 2020/2/12 20:01, Jeff Layton wrote: >>> On Wed, 2020-02-12 at 03:54 -0500, xiubli@redhat.com wrote: >>>> From: Xiubo Li <xiubli@redhat.com> >>>> >>>> This will enable the remount and reconfiguring superblock params >>>> for the fs. Currently some mount options are not allowed to be >>>> reconfigured. >>>> >>>> It will working like: >>>> $ mount.ceph :/ /mnt/cephfs -o remount,mount_timeout=100 >>>> >>>> URL:https://tracker.ceph.com/issues/44071 >>>> Signed-off-by: Xiubo Li <xiubli@redhat.com> >>>> --- >>>> block/bfq-cgroup.c | 1 + >>>> drivers/block/rbd.c | 2 +- >>>> fs/ceph/caps.c | 2 + >>>> fs/ceph/mds_client.c | 5 +- >>>> fs/ceph/super.c | 126 +++++++++++++++++++++++++++++------ >>>> fs/ceph/super.h | 2 + >>>> include/linux/ceph/libceph.h | 4 +- >>>> net/ceph/ceph_common.c | 83 ++++++++++++++++++++--- >>>> 8 files changed, 192 insertions(+), 33 deletions(-) >>>> >>>> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c >>>> index e1419edde2ec..b3d42200182e 100644 >>>> --- a/block/bfq-cgroup.c >>>> +++ b/block/bfq-cgroup.c >>>> @@ -12,6 +12,7 @@ >>>> #include <linux/ioprio.h> >>>> #include <linux/sbitmap.h> >>>> #include <linux/delay.h> >>>> +#include <linux/rbtree.h> >>>> >>>> #include "bfq-iosched.h" >>>> >>>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c >>>> index 4e494d5600cc..470de27cf809 100644 >>>> --- a/drivers/block/rbd.c >>>> +++ b/drivers/block/rbd.c >>>> @@ -6573,7 +6573,7 @@ static int rbd_add_parse_args(const char *buf, >>>> *(snap_name + len) = '\0'; >>>> pctx.spec->snap_name = snap_name; >>>> >>>> - pctx.copts = ceph_alloc_options(); >>>> + pctx.copts = ceph_alloc_options(NULL); >>>> if (!pctx.copts) >>>> goto out_mem; >>>> >>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c >>>> index b4f122eb74bb..020f83186f94 100644 >>>> --- a/fs/ceph/caps.c >>>> +++ b/fs/ceph/caps.c >>>> @@ -491,10 +491,12 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, >>>> { >>>> struct ceph_mount_options *opt = mdsc->fsc->mount_options; >>>> >>>> + spin_lock(&opt->ceph_opt_lock); >>>> ci->i_hold_caps_min = round_jiffies(jiffies + >>>> opt->caps_wanted_delay_min * HZ); >>>> ci->i_hold_caps_max = round_jiffies(jiffies + >>>> opt->caps_wanted_delay_max * HZ); >>>> + spin_unlock(&opt->ceph_opt_lock); >>>> dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode, >>>> ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies); >>>> } >>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >>>> index 376e7cf1685f..451c3727cd0b 100644 >>>> --- a/fs/ceph/mds_client.c >>>> +++ b/fs/ceph/mds_client.c >>>> @@ -2099,6 +2099,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, >>>> struct ceph_inode_info *ci = ceph_inode(dir); >>>> struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; >>>> struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; >>>> + unsigned int max_readdir = opt->max_readdir; >>>> size_t size = sizeof(struct ceph_mds_reply_dir_entry); >>>> unsigned int num_entries; >>>> int order; >>>> @@ -2107,7 +2108,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, >>>> num_entries = ci->i_files + ci->i_subdirs; >>>> spin_unlock(&ci->i_ceph_lock); >>>> num_entries = max(num_entries, 1U); >>>> - num_entries = min(num_entries, opt->max_readdir); >>>> + num_entries = min(num_entries, max_readdir); >>>> >>>> order = get_order(size * num_entries); >>>> while (order >= 0) { >>>> @@ -2122,7 +2123,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, >>>> return -ENOMEM; >>>> >>>> num_entries = (PAGE_SIZE << order) / size; >>>> - num_entries = min(num_entries, opt->max_readdir); >>>> + num_entries = min(num_entries, max_readdir); >>>> >>>> rinfo->dir_buf_size = PAGE_SIZE << order; >>>> req->r_num_caps = num_entries + 1; >>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >>>> index 9a21054059f2..8df506dd9039 100644 >>>> --- a/fs/ceph/super.c >>>> +++ b/fs/ceph/super.c >>>> @@ -1175,7 +1175,57 @@ static void ceph_free_fc(struct fs_context *fc) >>>> >>>> static int ceph_reconfigure_fc(struct fs_context *fc) >>>> { >>>> - sync_filesystem(fc->root->d_sb); >>>> + struct super_block *sb = fc->root->d_sb; >>>> + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); >>>> + struct ceph_mount_options *fsopt = fsc->mount_options; >>>> + struct ceph_parse_opts_ctx *pctx = fc->fs_private; >>>> + struct ceph_mount_options *new_fsopt = pctx->opts; >>>> + int ret; >>>> + >>>> + sync_filesystem(sb); >>>> + >>>> + ret = ceph_reconfigure_copts(fc, pctx->copts, fsc->client->options); >>>> + if (ret) >>>> + return ret; >>>> + >>>> + if (new_fsopt->snapdir_name != fsopt->snapdir_name) >>>> + return invalf(fc, "ceph: reconfiguration of snapdir_name not allowed"); >>>> + >>>> + if (new_fsopt->mds_namespace != fsopt->mds_namespace) >>>> + return invalf(fc, "ceph: reconfiguration of mds_namespace not allowed"); >>>> + >>>> + if (new_fsopt->wsize != fsopt->wsize) >>>> + return invalf(fc, "ceph: reconfiguration of wsize not allowed"); >>>> + if (new_fsopt->rsize != fsopt->rsize) >>>> + return invalf(fc, "ceph: reconfiguration of rsize not allowed"); >>>> + if (new_fsopt->rasize != fsopt->rasize) >>>> + return invalf(fc, "ceph: reconfiguration of rasize not allowed"); >>>> + >>> Odd. I would think the wsize, rsize and rasize are things you _could_ >>> reconfigure at remount time. >> There has some race for the wsize, >> >> >>> In any case, I agree with Ilya. Not everything can be changed on a >>> remount. It'd be best to identify some small subset of mount options >>> that you do need to allow to be changed, and ensure we can do that. >> Yeah, I have disabled some already which may be racing when changing >> them in remount. >> >> Will disable the low level ones and some others in next version. >> >> >>>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h >>>> index 2acb09980432..ad44b98f3c3b 100644 >>>> --- a/fs/ceph/super.h >>>> +++ b/fs/ceph/super.h >>>> @@ -95,6 +95,8 @@ struct ceph_mount_options { >>>> char *mds_namespace; /* default NULL */ >>>> char *server_path; /* default "/" */ >>>> char *fscache_uniq; /* default NULL */ >>>> + >>>> + spinlock_t ceph_opt_lock; >>> I'm not sure we really need an extra lock around these fields, >>> particularly if you're intending to only allow a few different things to >>> be changed at remount. >> This will only protect the "fsopt->caps_wanted_delay_min" and >> "fsopt->caps_wanted_delay_min", just in case we are changing them both >> which may be racing with __cap_set_timeouts(). >> >> For example: >> >> The old range is [40, 60] and if the new range is [10, 30], we may hit >> the [i_hold_caps_min, i_hold_caps_max] are set as [40, 30]. >> > For parameters that are linked like that, you could also consider a > seqlock. Having to take a spinlock to just read sort of sucks, when most > of the time there should be no change occurring. > Yeah, will try this. Thanks, BRs
On 2020/2/12 20:01, Jeff Layton wrote: > On Wed, 2020-02-12 at 03:54 -0500, xiubli@redhat.com wrote: [...] >> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >> index 9a21054059f2..8df506dd9039 100644 >> --- a/fs/ceph/super.c >> +++ b/fs/ceph/super.c >> @@ -1175,7 +1175,57 @@ static void ceph_free_fc(struct fs_context *fc) >> >> static int ceph_reconfigure_fc(struct fs_context *fc) >> { >> - sync_filesystem(fc->root->d_sb); >> + struct super_block *sb = fc->root->d_sb; >> + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); >> + struct ceph_mount_options *fsopt = fsc->mount_options; >> + struct ceph_parse_opts_ctx *pctx = fc->fs_private; >> + struct ceph_mount_options *new_fsopt = pctx->opts; >> + int ret; >> + >> + sync_filesystem(sb); >> + >> + ret = ceph_reconfigure_copts(fc, pctx->copts, fsc->client->options); >> + if (ret) >> + return ret; >> + >> + if (new_fsopt->snapdir_name != fsopt->snapdir_name) >> + return invalf(fc, "ceph: reconfiguration of snapdir_name not allowed"); >> + >> + if (new_fsopt->mds_namespace != fsopt->mds_namespace) >> + return invalf(fc, "ceph: reconfiguration of mds_namespace not allowed"); >> + >> + if (new_fsopt->wsize != fsopt->wsize) >> + return invalf(fc, "ceph: reconfiguration of wsize not allowed"); >> + if (new_fsopt->rsize != fsopt->rsize) >> + return invalf(fc, "ceph: reconfiguration of rsize not allowed"); >> + if (new_fsopt->rasize != fsopt->rasize) >> + return invalf(fc, "ceph: reconfiguration of rasize not allowed"); >> + > Odd. I would think the wsize, rsize and rasize are things you _could_ > reconfigure at remount time. > > In any case, I agree with Ilya. Not everything can be changed on a > remount. It'd be best to identify some small subset of mount options > that you do need to allow to be changed, and ensure we can do that. Checked this again, the wsize/rasize should be okay for this. But for the rsize, we need to apply the change to the bdi->io_pages, which will be used in the ondemand_readahead(), likes: 392 /* 393 * If the request exceeds the readahead window, allow the read to 394 * be up to the optimal hardware IO size 395 */ 396 if (req_size > max_pages && bdi->io_pages > max_pages) 397 max_pages = min(req_size, bdi->io_pages); For example: Assume the max_pages = 100, req_size = 1000, the old bdi->io_pages = 10. If the bdi->io_pages get changed from 10 --> 1000, between Line#396 and Line#397, after Line#397 we are expecting to get max_pages = 10, but actually we get 1000 instead. But this should be okay for the page cache readahead stuff, it doesn't matter here, right ? If so we can reconfigure the rsize then. Thanks.
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index e1419edde2ec..b3d42200182e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -12,6 +12,7 @@ #include <linux/ioprio.h> #include <linux/sbitmap.h> #include <linux/delay.h> +#include <linux/rbtree.h> #include "bfq-iosched.h" diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4e494d5600cc..470de27cf809 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -6573,7 +6573,7 @@ static int rbd_add_parse_args(const char *buf, *(snap_name + len) = '\0'; pctx.spec->snap_name = snap_name; - pctx.copts = ceph_alloc_options(); + pctx.copts = ceph_alloc_options(NULL); if (!pctx.copts) goto out_mem; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b4f122eb74bb..020f83186f94 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -491,10 +491,12 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, { struct ceph_mount_options *opt = mdsc->fsc->mount_options; + spin_lock(&opt->ceph_opt_lock); ci->i_hold_caps_min = round_jiffies(jiffies + opt->caps_wanted_delay_min * HZ); ci->i_hold_caps_max = round_jiffies(jiffies + opt->caps_wanted_delay_max * HZ); + spin_unlock(&opt->ceph_opt_lock); dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode, ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies); } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 376e7cf1685f..451c3727cd0b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2099,6 +2099,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; + unsigned int max_readdir = opt->max_readdir; size_t size = sizeof(struct ceph_mds_reply_dir_entry); unsigned int num_entries; int order; @@ -2107,7 +2108,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, num_entries = ci->i_files + ci->i_subdirs; spin_unlock(&ci->i_ceph_lock); num_entries = max(num_entries, 1U); - num_entries = min(num_entries, opt->max_readdir); + num_entries = min(num_entries, max_readdir); order = get_order(size * num_entries); while (order >= 0) { @@ -2122,7 +2123,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, return -ENOMEM; num_entries = (PAGE_SIZE << order) / size; - num_entries = min(num_entries, opt->max_readdir); + num_entries = min(num_entries, max_readdir); rinfo->dir_buf_size = PAGE_SIZE << order; req->r_num_caps = num_entries + 1; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9a21054059f2..8df506dd9039 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1175,7 +1175,57 @@ static void ceph_free_fc(struct fs_context *fc) static int ceph_reconfigure_fc(struct fs_context *fc) { - sync_filesystem(fc->root->d_sb); + struct super_block *sb = fc->root->d_sb; + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_mount_options *fsopt = fsc->mount_options; + struct ceph_parse_opts_ctx *pctx = fc->fs_private; + struct ceph_mount_options *new_fsopt = pctx->opts; + int ret; + + sync_filesystem(sb); + + ret = ceph_reconfigure_copts(fc, pctx->copts, fsc->client->options); + if (ret) + return ret; + + if (new_fsopt->snapdir_name != fsopt->snapdir_name) + return invalf(fc, "ceph: reconfiguration of snapdir_name not allowed"); + + if (new_fsopt->mds_namespace != fsopt->mds_namespace) + return invalf(fc, "ceph: reconfiguration of mds_namespace not allowed"); + + if (new_fsopt->wsize != fsopt->wsize) + return invalf(fc, "ceph: reconfiguration of wsize not allowed"); + if (new_fsopt->rsize != fsopt->rsize) + return invalf(fc, "ceph: reconfiguration of rsize not allowed"); + if (new_fsopt->rasize != fsopt->rasize) + return invalf(fc, "ceph: reconfiguration of rasize not allowed"); + +#ifdef CONFIG_CEPH_FSCACHE + if (strcmp_null(new_fsopt->fscache_uniq, fsopt->fscache_uniq)) + return invalf(fc, "ceph: reconfiguration of fscache not allowed"); +#endif + + fsopt->flags = new_fsopt->flags; + + spin_lock(&fsopt->ceph_opt_lock); + fsopt->caps_wanted_delay_min = new_fsopt->caps_wanted_delay_min; + fsopt->caps_wanted_delay_max = new_fsopt->caps_wanted_delay_max; + spin_unlock(&fsopt->ceph_opt_lock); + + fsopt->max_readdir_bytes = new_fsopt->max_readdir_bytes; + fsopt->congestion_kb = new_fsopt->congestion_kb; + + fsopt->caps_max = new_fsopt->caps_max; + fsopt->max_readdir = new_fsopt->max_readdir; + ceph_adjust_caps_max_min(fsc->mdsc, fsopt); + +#ifdef CONFIG_CEPH_FS_POSIX_ACL + if (fc->sb_flags & SB_POSIXACL) + sb->s_flags |= SB_POSIXACL; + else + sb->s_flags &= ~SB_POSIXACL; +#endif return 0; } @@ -1193,38 +1243,77 @@ static int ceph_init_fs_context(struct fs_context *fc) { struct ceph_parse_opts_ctx *pctx; struct ceph_mount_options *fsopt; + struct ceph_options *copts = NULL; pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); if (!pctx) return -ENOMEM; - pctx->copts = ceph_alloc_options(); - if (!pctx->copts) - goto nomem; - pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); if (!pctx->opts) goto nomem; fsopt = pctx->opts; - fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; - fsopt->wsize = CEPH_MAX_WRITE_SIZE; - fsopt->rsize = CEPH_MAX_READ_SIZE; - fsopt->rasize = CEPH_RASIZE_DEFAULT; - fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); - if (!fsopt->snapdir_name) - goto nomem; +#ifdef CONFIG_CEPH_FS_POSIX_ACL + fc->sb_flags |= SB_POSIXACL; +#endif + + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + struct super_block *sb = fc->root->d_sb; + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_mount_options *old = fsc->mount_options; + + copts = fsc->client->options; + + fsopt->flags = old->flags; - fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; - fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; - fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; - fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; - fsopt->congestion_kb = default_congestion_kb(); + fsopt->wsize = old->wsize; + fsopt->rsize = old->rsize; + fsopt->rasize = old->rasize; + + fsopt->fscache_uniq = kstrdup(old->fscache_uniq, GFP_KERNEL); + if (!fsopt->fscache_uniq) + goto nomem; + + fsopt->snapdir_name = kstrdup(old->snapdir_name, GFP_KERNEL); + if (!fsopt->snapdir_name) + goto nomem; + + fsopt->caps_wanted_delay_min = old->caps_wanted_delay_min; + fsopt->caps_wanted_delay_max = old->caps_wanted_delay_max; + fsopt->max_readdir = old->max_readdir; + fsopt->max_readdir_bytes = old->max_readdir_bytes; + fsopt->congestion_kb = old->congestion_kb; + fsopt->caps_max = old->caps_max; + fsopt->max_readdir = old->max_readdir; #ifdef CONFIG_CEPH_FS_POSIX_ACL - fc->sb_flags |= SB_POSIXACL; + if (!(sb->s_flags & SB_POSIXACL)) + fc->sb_flags &= ~SB_POSIXACL; #endif + } else { + fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; + + fsopt->wsize = CEPH_MAX_WRITE_SIZE; + fsopt->rsize = CEPH_MAX_READ_SIZE; + fsopt->rasize = CEPH_RASIZE_DEFAULT; + + fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); + if (!fsopt->snapdir_name) + goto nomem; + + fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; + fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; + fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; + fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; + fsopt->congestion_kb = default_congestion_kb(); + spin_lock_init(&fsopt->ceph_opt_lock); + } + + pctx->copts = ceph_alloc_options(copts); + if (!pctx->copts) + goto nomem; fc->fs_private = pctx; fc->ops = &ceph_context_ops; @@ -1232,7 +1321,6 @@ static int ceph_init_fs_context(struct fs_context *fc) nomem: destroy_mount_options(pctx->opts); - ceph_destroy_options(pctx->copts); kfree(pctx); return -ENOMEM; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2acb09980432..ad44b98f3c3b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -95,6 +95,8 @@ struct ceph_mount_options { char *mds_namespace; /* default NULL */ char *server_path; /* default "/" */ char *fscache_uniq; /* default NULL */ + + spinlock_t ceph_opt_lock; }; struct ceph_fs_client { diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 8fe9b80e80a5..407645adb2ad 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -281,11 +281,13 @@ extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern void *ceph_kvmalloc(size_t size, gfp_t flags); struct fs_parameter; -struct ceph_options *ceph_alloc_options(void); +struct ceph_options *ceph_alloc_options(struct ceph_options *old); int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, struct fs_context *fc); int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, struct fs_context *fc); +int ceph_reconfigure_copts(struct fs_context *fc, struct ceph_options *new_opts, + struct ceph_options *opts); int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, bool show_all); extern void ceph_destroy_options(struct ceph_options *opt); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a9d6c97b5b0d..39e628996595 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -295,7 +295,7 @@ static const struct fs_parameter_description ceph_parameters = { .specs = ceph_param_specs, }; -struct ceph_options *ceph_alloc_options(void) +struct ceph_options *ceph_alloc_options(struct ceph_options *old) { struct ceph_options *opt; @@ -305,17 +305,49 @@ struct ceph_options *ceph_alloc_options(void) opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), GFP_KERNEL); - if (!opt->mon_addr) { - kfree(opt); - return NULL; - } + if (!opt->mon_addr) + goto err; + + if (old) { + memcpy(&opt->my_addr, &old->my_addr, sizeof(opt->my_addr)); + memcpy(&opt->fsid, &old->fsid, sizeof(opt->fsid)); + if (old->name) { + opt->name = kstrdup(old->name, GFP_KERNEL); + if (!opt->name) + goto err; + } - opt->flags = CEPH_OPT_DEFAULT; - opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; - opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; - opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; - opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; + if (old->key) { + opt->key = kmalloc(sizeof(*opt->key), GFP_KERNEL); + if (!opt->key) + goto err; + + opt->key->type = old->key->type; + opt->key->created.tv_sec = old->key->created.tv_sec; + opt->key->created.tv_nsec = old->key->created.tv_nsec; + opt->key->len = old->key->len; + memcpy(opt->key->key, old->key->key, old->key->len); + } + + opt->osd_keepalive_timeout = old->osd_keepalive_timeout; + opt->osd_idle_ttl = old->osd_idle_ttl; + opt->mount_timeout = old->mount_timeout; + opt->osd_request_timeout = old->osd_request_timeout; + opt->flags = old->flags; + } else { + opt->flags = CEPH_OPT_DEFAULT; + opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; + opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; + opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; + opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; + } return opt; + +err: + kfree(opt->name); + kfree(opt->mon_addr); + kfree(opt); + return NULL; } EXPORT_SYMBOL(ceph_alloc_options); @@ -534,6 +566,37 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, } EXPORT_SYMBOL(ceph_parse_param); +int ceph_reconfigure_copts(struct fs_context *fc, struct ceph_options *new_opts, + struct ceph_options *opts) +{ + if (memcmp(&new_opts->my_addr, &opts->my_addr, + sizeof(opts->my_addr))) + return invalf(fc, "ceph: reconfiguration of ip not allowed"); + + if (memcmp(&new_opts->fsid, &opts->fsid, sizeof(opts->fsid))) + return invalf(fc, "ceph: reconfiguration of fsid not allowed"); + + if (strcmp_null(new_opts->name, opts->name)) + return invalf(fc, "ceph: reconfiguration of name not allowed"); + + if (new_opts->key && (!opts->key || + new_opts->key->type != opts->key->type || + new_opts->key->created.tv_sec != opts->key->created.tv_sec || + new_opts->key->created.tv_nsec != opts->key->created.tv_nsec || + new_opts->key->len != opts->key->len || + memcmp(new_opts->key->key, opts->key->key, opts->key->len))) + return invalf(fc, "ceph: reconfiguration of secret not allowed"); + + opts->osd_keepalive_timeout = new_opts->osd_keepalive_timeout; + opts->osd_idle_ttl = new_opts->osd_idle_ttl; + opts->mount_timeout = new_opts->mount_timeout; + opts->osd_request_timeout = new_opts->osd_request_timeout; + opts->flags = new_opts->flags; + + return 0; +} +EXPORT_SYMBOL(ceph_reconfigure_copts); + int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, bool show_all) {