Message ID | 1433941284-12984-1-git-send-email-zhiguohong@tencent.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jun 10, 2015 at 4:01 PM, Hong Zhiguo <honkiko@gmail.com> wrote: > in current implementaion init_net is always used. > > But in most cases, if user do a rbd map or ceph mount in > a container, it's expected to use the container network namespace. > > This patch saves the container's netns in ceph_options on a rbd map > or ceph mount. And use the netns other than init_net when creating > socket. Ref count of the netns is only taken by the ceph_options > in ceph_client since lifetime of osds and mon is within that of > ceph_client. > > I've tested this patch in docker container with below operations: > - rbd map > - write/read on the rbd > - rbd unmap > > Signed-off-by: Hong Zhiguo <zhiguohong@tencent.com> > --- > fs/ceph/mds_client.c | 3 ++- > include/linux/ceph/libceph.h | 3 +++ > include/linux/ceph/messenger.h | 4 +++- > net/ceph/ceph_common.c | 7 ++++--- > net/ceph/messenger.c | 8 +++++++- > net/ceph/mon_client.c | 2 +- > net/ceph/osd_client.c | 3 ++- > 7 files changed, 22 insertions(+), 8 deletions(-) > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 8080d48..3fb0976 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -440,7 +440,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, > s->s_seq = 0; > mutex_init(&s->s_mutex); > > - ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr); > + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr, > + mdsc->fsc->client->options->netns); > > spin_lock_init(&s->s_gen_ttl_lock); > s->s_cap_gen = 0; > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h > index d73a569..442d9f3 100644 > --- a/include/linux/ceph/libceph.h > +++ b/include/linux/ceph/libceph.h > @@ -22,6 +22,8 @@ > #include <linux/ceph/osd_client.h> > #include <linux/ceph/ceph_fs.h> > > +struct net; > + > /* > * mount options > */ > @@ -46,6 +48,7 @@ struct ceph_options { > unsigned long mount_timeout; /* jiffies */ > unsigned long osd_idle_ttl; /* jiffies */ > unsigned long osd_keepalive_timeout; /* jiffies */ > + struct net *netns; > > /* > * any type that can't be simply compared or doesn't need need > diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h > index e154994..3b0a314 100644 > --- a/include/linux/ceph/messenger.h > +++ b/include/linux/ceph/messenger.h > @@ -14,6 +14,7 @@ > > struct ceph_msg; > struct ceph_connection; > +struct net; > > /* > * Ceph defines these callbacks for handling connection events. > @@ -189,6 +190,7 @@ struct ceph_connection { > struct ceph_messenger *msgr; > > atomic_t sock_state; > + struct net *netns; > struct socket *sock; > struct ceph_entity_addr peer_addr; /* peer address */ > struct ceph_entity_addr peer_addr_for_me; > @@ -270,7 +272,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, > > extern void ceph_con_init(struct ceph_connection *con, void *private, > const struct ceph_connection_operations *ops, > - struct ceph_messenger *msgr); > + struct ceph_messenger *msgr, struct net *netns); > extern void ceph_con_open(struct ceph_connection *con, > __u8 entity_type, __u64 entity_num, > struct ceph_entity_addr *addr); > diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c > index 925d0c8..1c42d96 100644 > --- a/net/ceph/ceph_common.c > +++ b/net/ceph/ceph_common.c > @@ -269,6 +269,9 @@ static match_table_t opt_tokens = { > void ceph_destroy_options(struct ceph_options *opt) > { > dout("destroy_options %p\n", opt); > + if (opt->netns) { > + put_net(opt->netns); > + } > kfree(opt->name); > if (opt->key) { > ceph_crypto_key_destroy(opt->key); > @@ -335,9 +338,6 @@ ceph_parse_options(char *options, const char *dev_name, > int err = -ENOMEM; > substring_t argstr[MAX_OPT_ARGS]; > > - if (current->nsproxy->net_ns != &init_net) > - return ERR_PTR(-EINVAL); > - > opt = kzalloc(sizeof(*opt), GFP_KERNEL); > if (!opt) > return ERR_PTR(-ENOMEM); > @@ -501,6 +501,7 @@ ceph_parse_options(char *options, const char *dev_name, > } > > /* success */ > + opt->netns = get_net(current->nsproxy->net_ns); > return opt; > > out: > diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c > index 967080a..43ec07d 100644 > --- a/net/ceph/messenger.c > +++ b/net/ceph/messenger.c > @@ -736,7 +736,7 @@ bool ceph_con_opened(struct ceph_connection *con) > */ > void ceph_con_init(struct ceph_connection *con, void *private, > const struct ceph_connection_operations *ops, > - struct ceph_messenger *msgr) > + struct ceph_messenger *msgr, struct net *netns) > { > dout("con_init %p\n", con); > memset(con, 0, sizeof(*con)); > @@ -744,6 +744,12 @@ void ceph_con_init(struct ceph_connection *con, void *private, > con->ops = ops; > con->msgr = msgr; > > + /* > + * don't take extra refcnt of netns here since both mon and osds > + * have lifetime within that of ceph_client > + */ > + con->netns = netns; > + > con_sock_state_init(con); > > mutex_init(&con->mutex); > diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c > index 9d6ff12..04128af 100644 > --- a/net/ceph/mon_client.c > +++ b/net/ceph/mon_client.c > @@ -832,7 +832,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) > goto out_auth_reply; > > ceph_con_init(&monc->con, monc, &mon_con_ops, > - &monc->client->msgr); > + &monc->client->msgr, monc->client->options->netns); > > monc->cur_mon = -1; > monc->hunting = true; > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > index 5003367..32d9fa9 100644 > --- a/net/ceph/osd_client.c > +++ b/net/ceph/osd_client.c > @@ -1022,7 +1022,8 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) > INIT_LIST_HEAD(&osd->o_osd_lru); > osd->o_incarnation = 1; > > - ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); > + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr, > + osdc->client->options->netns); > > INIT_LIST_HEAD(&osd->o_keepalive_item); > return osd; It seems to me your patch boils down to killing the init_ns check and adding a netns field to struct ceph_connection, which is assigned to but never used. Given that, can you elaborate on the "And use the netns other than init_net when creating socket" part and explain in a little bit more detail what is accomplished here? Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
sorry, I missed the __sock_create hunk during my merge from our 3.13 kernel tree. Now I took it and tested rbd map/write/read/unmap in docker container again. I'll post the updated patch again. On Wed, Jun 10, 2015 at 9:30 PM, Ilya Dryomov <idryomov@gmail.com> wrote: > On Wed, Jun 10, 2015 at 4:01 PM, Hong Zhiguo <honkiko@gmail.com> wrote: >> in current implementaion init_net is always used. >> >> But in most cases, if user do a rbd map or ceph mount in >> a container, it's expected to use the container network namespace. >> >> This patch saves the container's netns in ceph_options on a rbd map >> or ceph mount. And use the netns other than init_net when creating >> socket. Ref count of the netns is only taken by the ceph_options >> in ceph_client since lifetime of osds and mon is within that of >> ceph_client. >> >> I've tested this patch in docker container with below operations: >> - rbd map >> - write/read on the rbd >> - rbd unmap >> >> Signed-off-by: Hong Zhiguo <zhiguohong@tencent.com> >> --- >> fs/ceph/mds_client.c | 3 ++- >> include/linux/ceph/libceph.h | 3 +++ >> include/linux/ceph/messenger.h | 4 +++- >> net/ceph/ceph_common.c | 7 ++++--- >> net/ceph/messenger.c | 8 +++++++- >> net/ceph/mon_client.c | 2 +- >> net/ceph/osd_client.c | 3 ++- >> 7 files changed, 22 insertions(+), 8 deletions(-) >> >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >> index 8080d48..3fb0976 100644 >> --- a/fs/ceph/mds_client.c >> +++ b/fs/ceph/mds_client.c >> @@ -440,7 +440,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, >> s->s_seq = 0; >> mutex_init(&s->s_mutex); >> >> - ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr); >> + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr, >> + mdsc->fsc->client->options->netns); >> >> spin_lock_init(&s->s_gen_ttl_lock); >> s->s_cap_gen = 0; >> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h >> index d73a569..442d9f3 100644 >> --- a/include/linux/ceph/libceph.h >> +++ b/include/linux/ceph/libceph.h >> @@ -22,6 +22,8 @@ >> #include <linux/ceph/osd_client.h> >> #include <linux/ceph/ceph_fs.h> >> >> +struct net; >> + >> /* >> * mount options >> */ >> @@ -46,6 +48,7 @@ struct ceph_options { >> unsigned long mount_timeout; /* jiffies */ >> unsigned long osd_idle_ttl; /* jiffies */ >> unsigned long osd_keepalive_timeout; /* jiffies */ >> + struct net *netns; >> >> /* >> * any type that can't be simply compared or doesn't need need >> diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h >> index e154994..3b0a314 100644 >> --- a/include/linux/ceph/messenger.h >> +++ b/include/linux/ceph/messenger.h >> @@ -14,6 +14,7 @@ >> >> struct ceph_msg; >> struct ceph_connection; >> +struct net; >> >> /* >> * Ceph defines these callbacks for handling connection events. >> @@ -189,6 +190,7 @@ struct ceph_connection { >> struct ceph_messenger *msgr; >> >> atomic_t sock_state; >> + struct net *netns; >> struct socket *sock; >> struct ceph_entity_addr peer_addr; /* peer address */ >> struct ceph_entity_addr peer_addr_for_me; >> @@ -270,7 +272,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, >> >> extern void ceph_con_init(struct ceph_connection *con, void *private, >> const struct ceph_connection_operations *ops, >> - struct ceph_messenger *msgr); >> + struct ceph_messenger *msgr, struct net *netns); >> extern void ceph_con_open(struct ceph_connection *con, >> __u8 entity_type, __u64 entity_num, >> struct ceph_entity_addr *addr); >> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c >> index 925d0c8..1c42d96 100644 >> --- a/net/ceph/ceph_common.c >> +++ b/net/ceph/ceph_common.c >> @@ -269,6 +269,9 @@ static match_table_t opt_tokens = { >> void ceph_destroy_options(struct ceph_options *opt) >> { >> dout("destroy_options %p\n", opt); >> + if (opt->netns) { >> + put_net(opt->netns); >> + } >> kfree(opt->name); >> if (opt->key) { >> ceph_crypto_key_destroy(opt->key); >> @@ -335,9 +338,6 @@ ceph_parse_options(char *options, const char *dev_name, >> int err = -ENOMEM; >> substring_t argstr[MAX_OPT_ARGS]; >> >> - if (current->nsproxy->net_ns != &init_net) >> - return ERR_PTR(-EINVAL); >> - >> opt = kzalloc(sizeof(*opt), GFP_KERNEL); >> if (!opt) >> return ERR_PTR(-ENOMEM); >> @@ -501,6 +501,7 @@ ceph_parse_options(char *options, const char *dev_name, >> } >> >> /* success */ >> + opt->netns = get_net(current->nsproxy->net_ns); >> return opt; >> >> out: >> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c >> index 967080a..43ec07d 100644 >> --- a/net/ceph/messenger.c >> +++ b/net/ceph/messenger.c >> @@ -736,7 +736,7 @@ bool ceph_con_opened(struct ceph_connection *con) >> */ >> void ceph_con_init(struct ceph_connection *con, void *private, >> const struct ceph_connection_operations *ops, >> - struct ceph_messenger *msgr) >> + struct ceph_messenger *msgr, struct net *netns) >> { >> dout("con_init %p\n", con); >> memset(con, 0, sizeof(*con)); >> @@ -744,6 +744,12 @@ void ceph_con_init(struct ceph_connection *con, void *private, >> con->ops = ops; >> con->msgr = msgr; >> >> + /* >> + * don't take extra refcnt of netns here since both mon and osds >> + * have lifetime within that of ceph_client >> + */ >> + con->netns = netns; >> + >> con_sock_state_init(con); >> >> mutex_init(&con->mutex); >> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c >> index 9d6ff12..04128af 100644 >> --- a/net/ceph/mon_client.c >> +++ b/net/ceph/mon_client.c >> @@ -832,7 +832,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) >> goto out_auth_reply; >> >> ceph_con_init(&monc->con, monc, &mon_con_ops, >> - &monc->client->msgr); >> + &monc->client->msgr, monc->client->options->netns); >> >> monc->cur_mon = -1; >> monc->hunting = true; >> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c >> index 5003367..32d9fa9 100644 >> --- a/net/ceph/osd_client.c >> +++ b/net/ceph/osd_client.c >> @@ -1022,7 +1022,8 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) >> INIT_LIST_HEAD(&osd->o_osd_lru); >> osd->o_incarnation = 1; >> >> - ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); >> + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr, >> + osdc->client->options->netns); >> >> INIT_LIST_HEAD(&osd->o_keepalive_item); >> return osd; > > It seems to me your patch boils down to killing the init_ns check and > adding a netns field to struct ceph_connection, which is assigned to > but never used. Given that, can you elaborate on the "And use the > netns other than init_net when creating socket" part and explain in > a little bit more detail what is accomplished here? > > Thanks, > > Ilya
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8080d48..3fb0976 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -440,7 +440,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr); + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr, + mdsc->fsc->client->options->netns); spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index d73a569..442d9f3 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -22,6 +22,8 @@ #include <linux/ceph/osd_client.h> #include <linux/ceph/ceph_fs.h> +struct net; + /* * mount options */ @@ -46,6 +48,7 @@ struct ceph_options { unsigned long mount_timeout; /* jiffies */ unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ + struct net *netns; /* * any type that can't be simply compared or doesn't need need diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e154994..3b0a314 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -14,6 +14,7 @@ struct ceph_msg; struct ceph_connection; +struct net; /* * Ceph defines these callbacks for handling connection events. @@ -189,6 +190,7 @@ struct ceph_connection { struct ceph_messenger *msgr; atomic_t sock_state; + struct net *netns; struct socket *sock; struct ceph_entity_addr peer_addr; /* peer address */ struct ceph_entity_addr peer_addr_for_me; @@ -270,7 +272,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, extern void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, - struct ceph_messenger *msgr); + struct ceph_messenger *msgr, struct net *netns); extern void ceph_con_open(struct ceph_connection *con, __u8 entity_type, __u64 entity_num, struct ceph_entity_addr *addr); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 925d0c8..1c42d96 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -269,6 +269,9 @@ static match_table_t opt_tokens = { void ceph_destroy_options(struct ceph_options *opt) { dout("destroy_options %p\n", opt); + if (opt->netns) { + put_net(opt->netns); + } kfree(opt->name); if (opt->key) { ceph_crypto_key_destroy(opt->key); @@ -335,9 +338,6 @@ ceph_parse_options(char *options, const char *dev_name, int err = -ENOMEM; substring_t argstr[MAX_OPT_ARGS]; - if (current->nsproxy->net_ns != &init_net) - return ERR_PTR(-EINVAL); - opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) return ERR_PTR(-ENOMEM); @@ -501,6 +501,7 @@ ceph_parse_options(char *options, const char *dev_name, } /* success */ + opt->netns = get_net(current->nsproxy->net_ns); return opt; out: diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 967080a..43ec07d 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -736,7 +736,7 @@ bool ceph_con_opened(struct ceph_connection *con) */ void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, - struct ceph_messenger *msgr) + struct ceph_messenger *msgr, struct net *netns) { dout("con_init %p\n", con); memset(con, 0, sizeof(*con)); @@ -744,6 +744,12 @@ void ceph_con_init(struct ceph_connection *con, void *private, con->ops = ops; con->msgr = msgr; + /* + * don't take extra refcnt of netns here since both mon and osds + * have lifetime within that of ceph_client + */ + con->netns = netns; + con_sock_state_init(con); mutex_init(&con->mutex); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 9d6ff12..04128af 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -832,7 +832,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) goto out_auth_reply; ceph_con_init(&monc->con, monc, &mon_con_ops, - &monc->client->msgr); + &monc->client->msgr, monc->client->options->netns); monc->cur_mon = -1; monc->hunting = true; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 5003367..32d9fa9 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1022,7 +1022,8 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) INIT_LIST_HEAD(&osd->o_osd_lru); osd->o_incarnation = 1; - ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr, + osdc->client->options->netns); INIT_LIST_HEAD(&osd->o_keepalive_item); return osd;
in current implementaion init_net is always used. But in most cases, if user do a rbd map or ceph mount in a container, it's expected to use the container network namespace. This patch saves the container's netns in ceph_options on a rbd map or ceph mount. And use the netns other than init_net when creating socket. Ref count of the netns is only taken by the ceph_options in ceph_client since lifetime of osds and mon is within that of ceph_client. I've tested this patch in docker container with below operations: - rbd map - write/read on the rbd - rbd unmap Signed-off-by: Hong Zhiguo <zhiguohong@tencent.com> --- fs/ceph/mds_client.c | 3 ++- include/linux/ceph/libceph.h | 3 +++ include/linux/ceph/messenger.h | 4 +++- net/ceph/ceph_common.c | 7 ++++--- net/ceph/messenger.c | 8 +++++++- net/ceph/mon_client.c | 2 +- net/ceph/osd_client.c | 3 ++- 7 files changed, 22 insertions(+), 8 deletions(-)