@@ -252,6 +252,8 @@ int sock_register(const struct net_proto_family *fam);
void sock_unregister(int family);
bool sock_is_registered(int family);
int sock_create(int family, int type, int proto, struct socket **res);
+int sock_create_net(struct net *net, int family, int type, int proto,
+ struct socket **res);
int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
int sock_create_lite(int family, int type, int proto, struct socket **res);
struct socket *sock_alloc(void);
@@ -2229,6 +2229,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sk->sk_kern_sock = kern;
sock_lock_init(sk);
+ DEBUG_NET_WARN_ON_ONCE(hold_net && !net_initialized(net));
sk->sk_net_refcnt = hold_net;
if (likely(sk->sk_net_refcnt)) {
get_net_track(net, &sk->ns_tracker, priority);
@@ -1623,6 +1623,38 @@ int sock_create(int family, int type, int protocol, struct socket **res)
}
EXPORT_SYMBOL(sock_create);
+/**
+ * sock_create_net - creates a socket for kernel space
+ *
+ * @net: net namespace
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * Creates a new socket and assigns it to @res, passing through LSM.
+ *
+ * The socket is for kernel space and should not be exposed to
+ * userspace via a file descriptor nor BPF hooks except for LSM
+ * (see inet_create(), inet_release(), etc).
+ *
+ * The socket holds a reference count of @net so that the caller does
+ * not need to care about @net's lifetime.
+ *
+ * This MUST NOT be called from the __net_init path and @net MUST be
+ * alive as of calling sock_create_net().
+ *
+ * Context: Process context. This function internally uses GFP_KERNEL.
+ * Return: 0 or an error.
+ */
+
+int sock_create_net(struct net *net, int family, int type, int protocol,
+ struct socket **res)
+{
+ return __sock_create(net, family, type, protocol, res, true, true);
+}
+EXPORT_SYMBOL(sock_create_net);
+
/**
* sock_create_kern - creates a socket (kernel space)
* @net: net namespace
Let's add a new API to create a kernel socket with netns refcnt held. We will remove the ugly kernel socket conversion in the next patch. DEBUG_NET_WARN_ON_ONCE() is to catch a path calling sock_create_net() from __net_init functions, which leak netns. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> --- include/linux/net.h | 2 ++ net/core/sock.c | 1 + net/socket.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+)