@@ -19,6 +19,7 @@ SBINDIR?=/sbin
CONF_ETC_DIR?=/etc/iproute2
CONF_USR_DIR?=$(LIBDIR)/iproute2
NETNS_RUN_DIR?=/var/run/netns
+MNTNS_RUN_DIR?=/var/run/netns-mnt
NETNS_ETC_DIR?=/etc/netns
DATADIR?=$(PREFIX)/share
HDRDIR?=$(PREFIX)/include/iproute2
@@ -41,6 +42,7 @@ endif
DEFINES+=-DCONF_USR_DIR=\"$(CONF_USR_DIR)\" \
-DCONF_ETC_DIR=\"$(CONF_ETC_DIR)\" \
-DNETNS_RUN_DIR=\"$(NETNS_RUN_DIR)\" \
+ -DMNTNS_RUN_DIR=\"$(MNTNS_RUN_DIR)\" \
-DNETNS_ETC_DIR=\"$(NETNS_ETC_DIR)\" \
-DCONF_COLOR=$(CONF_COLOR)
@@ -733,13 +733,24 @@ static int netns_identify(int argc, char **argv)
static int on_netns_del(char *nsname, void *arg)
{
- char netns_path[PATH_MAX];
+ char ns_path[PATH_MAX];
+ struct stat st;
+
+ snprintf(ns_path, sizeof(ns_path), "%s/%s", MNTNS_RUN_DIR, nsname);
+ if (!stat(ns_path, &st)) { /* may not exist if created by old iproute2 */
+ umount2(ns_path, MNT_DETACH);
+ if (unlink(ns_path) < 0) {
+ fprintf(stderr, "Cannot remove namespace file \"%s\": %s\n",
+ ns_path, strerror(errno));
+ return -1;
+ }
+ }
- snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, nsname);
- umount2(netns_path, MNT_DETACH);
- if (unlink(netns_path) < 0) {
+ snprintf(ns_path, sizeof(ns_path), "%s/%s", NETNS_RUN_DIR, nsname);
+ umount2(ns_path, MNT_DETACH);
+ if (unlink(ns_path) < 0) {
fprintf(stderr, "Cannot remove namespace file \"%s\": %s\n",
- netns_path, strerror(errno));
+ ns_path, strerror(errno));
return -1;
}
return 0;
@@ -885,17 +896,46 @@ static int bind_ns_file(const char *parent, const char *nsfile,
return 0;
}
+static ino_t get_mnt_ino(pid_t pid)
+{
+ char path[PATH_MAX];
+ struct stat st;
+
+ snprintf(path, sizeof(path), "/proc/%u/ns/mnt", (unsigned) pid);
+
+ if (stat(path, &st) != 0) {
+ fprintf(stderr, "stat of %s failed: %s\n",
+ path, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ return st.st_ino;
+}
+
static pid_t bind_ns_files_from_child(const char *ns_name, pid_t target_pid,
int *fd)
{
+ ino_t mnt_ino;
pid_t child;
+ mnt_ino = get_mnt_ino(getpid());
+
child = fork_and_wait(fd);
if (child)
return child;
if (bind_ns_file(NETNS_RUN_DIR, "net", ns_name, target_pid))
exit(EXIT_FAILURE);
+
+ /* We can only bind the mount namespace reference if the target pid is
+ * actually in a different mount namespace than ourselves. We ignore any
+ * errors in creating the mount namespace reference because an old
+ * namespace mount may be present if a network namespace with the same
+ * name was previously removed by an older version of iproute2; in this
+ * case that old reference will just be reused.
+ */
+ if (mnt_ino != get_mnt_ino(target_pid))
+ bind_ns_file(MNTNS_RUN_DIR, "mnt", ns_name, target_pid);
+
exit(EXIT_SUCCESS);
}
@@ -1003,8 +1043,13 @@ static int netns_add(int argc, char **argv, bool create)
* unmounting a network namespace file in one namespace will unmount the
* network namespace file in all namespaces allowing the network
* namespace to be freed sooner.
+ *
+ * The mount namespace directory cannot be shared because it's not
+ * possible to mount references to a mount namespace inside that
+ * namespace itself.
*/
- if (prepare_ns_mount_dir(NETNS_RUN_DIR, MS_SHARED))
+ if (prepare_ns_mount_dir(NETNS_RUN_DIR, MS_SHARED) ||
+ prepare_ns_mount_dir(MNTNS_RUN_DIR, MS_SLAVE))
return -1;
child = bind_ns_files_from_child(name, pid, &event_fd);
@@ -1012,12 +1057,17 @@ static int netns_add(int argc, char **argv, bool create)
exit(EXIT_FAILURE);
if (create) {
- if (unshare(CLONE_NEWNET) < 0) {
+ if (unshare(CLONE_NEWNET | CLONE_NEWNS) < 0) {
fprintf(stderr, "Failed to create a new network namespace \"%s\": %s\n",
name, strerror(errno));
close(event_fd);
exit(EXIT_FAILURE);
}
+
+ if (prepare_mountns(name, false)) {
+ close(event_fd);
+ exit(EXIT_FAILURE);
+ }
}
return sync_with_child(child, event_fd);
@@ -127,7 +127,13 @@ int netns_switch(char *name)
if (switch_ns(NETNS_RUN_DIR, name, CLONE_NEWNET))
return -1;
- return prepare_mountns(name, true);
+ /* Try to enter an existing persisted mount namespace. If this fails,
+ * preserve the old behaviour of creating a new namespace on entry.
+ */
+ if (switch_ns(MNTNS_RUN_DIR, name, CLONE_NEWNS))
+ return prepare_mountns(name, true);
+
+ return 0;
}
int netns_get_fd(const char *name)
When creating a new network namespace, persist not only the network namespace reference itself, but also create and persist a new mount namespace that is paired with the network namespace. This means that multiple subsequent invocations of 'ip netns exec' will reuse the same mount namespace instead of creating a new namespace on every entry, as was the behaviour before this patch. The persistent mount namespace has the benefit that any new mounts created inside the namespace will persist. Most notably, this is useful when using bpffs instances along with 'ip netns', as these were previously transient to a single 'ip netns' invocation. To preserve backwards compatibility, when changing namespaces we will fall back to the old behaviour of creating a new mount namespace when switching netns, if we can't find a persisted namespace to enter. This can happen if the netns instance was created with a previous version of iproute2 that doesn't persist the mount namespace. One caveat of the mount namespace persistence is that we can't make the containing directory mount shared, the way we do with the netns mounts. This means that if 'ip netns del' is invoked *inside* a namespace created with 'ip netns', the mount namespace reference will not be deleted and will stick around in the original mount namespace where it was created. This is unavoidable because it is not possible to create a bind-mounted reference to a mount namespace inside that same mount namespace (as that would create a circular reference). In such a situation, we may end up with the network namespace reference being removed but the mount namespace reference sticking around (the same thing can happen if 'ip netns del' is executed with an older version of iproute2). In this situation, a subsequent 'ip netns add' with the same namespace name will end up reusing the old mount namespace reference. Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> --- Makefile | 2 ++ ip/ipnetns.c | 64 +++++++++++++++++++++++++++++++++++++++++++------ lib/namespace.c | 8 ++++++- 3 files changed, 66 insertions(+), 8 deletions(-)