@@ -440,7 +440,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
s->s_seq = 0;
mutex_init(&s->s_mutex);
- ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
+ ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr,
+ mdsc->fsc->client->options->netns);
spin_lock_init(&s->s_gen_ttl_lock);
s->s_cap_gen = 0;
@@ -22,6 +22,8 @@
#include <linux/ceph/osd_client.h>
#include <linux/ceph/ceph_fs.h>
+struct net;
+
/*
* mount options
*/
@@ -46,6 +48,7 @@ struct ceph_options {
unsigned long mount_timeout; /* jiffies */
unsigned long osd_idle_ttl; /* jiffies */
unsigned long osd_keepalive_timeout; /* jiffies */
+ struct net *netns;
/*
* any type that can't be simply compared or doesn't need need
@@ -14,6 +14,7 @@
struct ceph_msg;
struct ceph_connection;
+struct net;
/*
* Ceph defines these callbacks for handling connection events.
@@ -189,6 +190,7 @@ struct ceph_connection {
struct ceph_messenger *msgr;
atomic_t sock_state;
+ struct net *netns;
struct socket *sock;
struct ceph_entity_addr peer_addr; /* peer address */
struct ceph_entity_addr peer_addr_for_me;
@@ -270,7 +272,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr,
extern void ceph_con_init(struct ceph_connection *con, void *private,
const struct ceph_connection_operations *ops,
- struct ceph_messenger *msgr);
+ struct ceph_messenger *msgr, struct net *netns);
extern void ceph_con_open(struct ceph_connection *con,
__u8 entity_type, __u64 entity_num,
struct ceph_entity_addr *addr);
@@ -269,6 +269,9 @@ static match_table_t opt_tokens = {
void ceph_destroy_options(struct ceph_options *opt)
{
dout("destroy_options %p\n", opt);
+ if (opt->netns) {
+ put_net(opt->netns);
+ }
kfree(opt->name);
if (opt->key) {
ceph_crypto_key_destroy(opt->key);
@@ -335,9 +338,6 @@ ceph_parse_options(char *options, const char *dev_name,
int err = -ENOMEM;
substring_t argstr[MAX_OPT_ARGS];
- if (current->nsproxy->net_ns != &init_net)
- return ERR_PTR(-EINVAL);
-
opt = kzalloc(sizeof(*opt), GFP_KERNEL);
if (!opt)
return ERR_PTR(-ENOMEM);
@@ -501,6 +501,7 @@ ceph_parse_options(char *options, const char *dev_name,
}
/* success */
+ opt->netns = get_net(current->nsproxy->net_ns);
return opt;
out:
@@ -480,8 +480,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
int ret;
BUG_ON(con->sock);
- ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
- IPPROTO_TCP, &sock);
+ ret = __sock_create(con->netns, con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &sock, 0);
if (ret)
return ret;
sock->sk->sk_allocation = GFP_NOFS;
@@ -736,7 +736,7 @@ bool ceph_con_opened(struct ceph_connection *con)
*/
void ceph_con_init(struct ceph_connection *con, void *private,
const struct ceph_connection_operations *ops,
- struct ceph_messenger *msgr)
+ struct ceph_messenger *msgr, struct net *netns)
{
dout("con_init %p\n", con);
memset(con, 0, sizeof(*con));
@@ -744,6 +744,12 @@ void ceph_con_init(struct ceph_connection *con, void *private,
con->ops = ops;
con->msgr = msgr;
+ /*
+ * don't take extra refcnt of netns here since both mon and osds
+ * have lifetime within that of ceph_client
+ */
+ con->netns = netns;
+
con_sock_state_init(con);
mutex_init(&con->mutex);
@@ -832,7 +832,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
goto out_auth_reply;
ceph_con_init(&monc->con, monc, &mon_con_ops,
- &monc->client->msgr);
+ &monc->client->msgr, monc->client->options->netns);
monc->cur_mon = -1;
monc->hunting = true;
@@ -1022,7 +1022,8 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
INIT_LIST_HEAD(&osd->o_osd_lru);
osd->o_incarnation = 1;
- ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
+ ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr,
+ osdc->client->options->netns);
INIT_LIST_HEAD(&osd->o_keepalive_item);
return osd;
in current implementaion init_net is always used. But in most cases, if a user does a rbd map or ceph mount in a container, it's expected to use the container network namespace. This patch saves the container's netns in ceph_options on a rbd map or ceph mount. And use the netns other than init_net when creating socket. Ref count of the netns is only taken by the ceph_options in ceph_client since lifetime of osds and mon is within that of ceph_client. I've tested this patch in docker container with below operations: - rbd map - write/read on the rbd - rbd unmap Signed-off-by: Hong Zhiguo <zhiguohong@tencent.com> --- fs/ceph/mds_client.c | 3 ++- include/linux/ceph/libceph.h | 3 +++ include/linux/ceph/messenger.h | 4 +++- net/ceph/ceph_common.c | 7 ++++--- net/ceph/messenger.c | 12 +++++++++--- net/ceph/mon_client.c | 2 +- net/ceph/osd_client.c | 3 ++- 7 files changed, 24 insertions(+), 10 deletions(-)