diff mbox series

[1/2] export: Add reexport= option

Message ID 20230404075039.10802-2-richard@nod.at (mailing list archive)
State New, archived
Headers show
Series nfs-utils: Improving NFS re-export wrt. crossmnt | expand

Commit Message

Richard Weinberger April 4, 2023, 7:50 a.m. UTC
When re-exporting a NFS volume it is mandatory to specify
either a UUID or numerical fsid= option because nfsd is unable
to derive an identifier on its own.

For NFS cross mounts this becomes a problem because nfsd also
needs an identifier for every crossed mount.
A common workaround is stating every single subvolume in the
exports list too.
But this defeats the purpose of the crossmnt option and is tedious.

This is where the reexport= tries to help.
It offers various strategies to automatically derive a identifier
for NFS volumes and sub volumes.

Currently two strategies are implemented:

1. auto-fsidnum
    In this mode mountd/exportd will create a new numerical fsid
    for a NFS volume and subvolume. The numbers are stored in a database,
    via fsidd, such that the server will always use the same fsid.
    The entry in the exports file allowed to skip the fsid= option but
    stating a UUID is allowed, if needed.

    This mode has the obvious downside that load balancing is by default not
    possible since multiple re-exporting NFS servers would generate
    different ids.
    It is possible if all load balancers use the same database.
    This can be achieved by using nfs-utils' fsidd and placing it's sqlit
    database on a network share which supports file locks or by implementing
    your own fsidd which is able to provide consistent fsids across multiple
    re-exporting nfs servers.

2. predefined-fsidnum
    This mode works just like auto-fsidnum but does not generate ids
    for you. It helps in the load balancing case. A system administrator
    has to manually maintain the database and install it on all re-exporting
    NFS servers. If you have a massive amount of subvolumes this mode
    will help because you don't have to bloat the exports list.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 configure.ac                 |   1 +
 support/Makefile.am          |   2 +-
 support/export/Makefile.am   |   2 +
 support/export/cache.c       |  74 +++++++-
 support/export/export.c      |  27 ++-
 support/include/nfslib.h     |   1 +
 support/nfs/Makefile.am      |   1 +
 support/nfs/exports.c        |  62 +++++++
 support/reexport/Makefile.am |   6 +
 support/reexport/reexport.c  | 327 +++++++++++++++++++++++++++++++++++
 systemd/Makefile.am          |   2 +
 utils/exportd/Makefile.am    |   6 +-
 utils/exportd/exportd.c      |   5 +
 utils/exportfs/Makefile.am   |   3 +
 utils/exportfs/exportfs.c    |  11 ++
 utils/exportfs/exports.man   |  31 ++++
 utils/mount/Makefile.am      |   3 +-
 utils/mountd/Makefile.am     |   2 +
 18 files changed, 555 insertions(+), 11 deletions(-)
 create mode 100644 support/reexport/Makefile.am
 create mode 100644 support/reexport/reexport.c
diff mbox series

Patch

diff --git a/configure.ac b/configure.ac
index 5d9cbf31..9eec17b2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -729,6 +729,7 @@  AC_CONFIG_FILES([
 	support/nsm/Makefile
 	support/nfsidmap/Makefile
 	support/nfsidmap/libnfsidmap.pc
+	support/reexport/Makefile
 	tools/Makefile
 	tools/locktest/Makefile
 	tools/nlmtest/Makefile
diff --git a/support/Makefile.am b/support/Makefile.am
index c962d4d4..07cfd87e 100644
--- a/support/Makefile.am
+++ b/support/Makefile.am
@@ -10,7 +10,7 @@  if CONFIG_JUNCTION
 OPTDIRS += junction
 endif
 
-SUBDIRS = export include misc nfs nsm $(OPTDIRS)
+SUBDIRS = export include misc nfs nsm reexport $(OPTDIRS)
 
 MAINTAINERCLEANFILES = Makefile.in
 
diff --git a/support/export/Makefile.am b/support/export/Makefile.am
index eec737f6..7338e1c7 100644
--- a/support/export/Makefile.am
+++ b/support/export/Makefile.am
@@ -14,6 +14,8 @@  libexport_a_SOURCES = client.c export.c hostname.c \
 		      xtab.c mount_clnt.c mount_xdr.c \
 		      cache.c auth.c v4root.c fsloc.c \
 		      v4clients.c
+libexport_a_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS) -I$(top_srcdir)/support/reexport
+
 BUILT_SOURCES 	= $(GENFILES)
 
 noinst_HEADERS = mount.h
diff --git a/support/export/cache.c b/support/export/cache.c
index e5f9f8df..b88e28a2 100644
--- a/support/export/cache.c
+++ b/support/export/cache.c
@@ -33,6 +33,7 @@ 
 #include "export.h"
 #include "pseudoflavors.h"
 #include "xcommon.h"
+#include "reexport.h"
 
 #ifdef HAVE_JUNCTION_SUPPORT
 #include "fsloc.h"
@@ -235,6 +236,16 @@  static void auth_unix_gid(int f)
 		xlog(L_ERROR, "auth_unix_gid: error writing reply");
 }
 
+static int match_crossmnt_fsidnum(uint32_t parsed_fsidnum, char *path)
+{
+	uint32_t fsidnum;
+
+	if (reexpdb_fsidnum_by_path(path, &fsidnum, 0) == 0)
+		return 0;
+
+	return fsidnum == parsed_fsidnum;
+}
+
 #ifdef USE_BLKID
 static const char *get_uuid_blkdev(char *path)
 {
@@ -687,8 +698,13 @@  static int match_fsid(struct parsed_fsid *parsed, nfs_export *exp, char *path)
 		goto match;
 	case FSID_NUM:
 		if (((exp->m_export.e_flags & NFSEXP_FSID) == 0 ||
-		     exp->m_export.e_fsid != parsed->fsidnum))
+		     exp->m_export.e_fsid != parsed->fsidnum)) {
+			if ((exp->m_export.e_flags & NFSEXP_CROSSMOUNT) && exp->m_export.e_reexport != REEXP_NONE &&
+			    match_crossmnt_fsidnum(parsed->fsidnum, path))
+				goto match;
+
 			goto nomatch;
+		}
 		goto match;
 	case FSID_UUID4_INUM:
 	case FSID_UUID16_INUM:
@@ -761,6 +777,7 @@  static void nfsd_fh(int f)
 	int dev_missing = 0;
 	char buf[RPC_CHAN_BUF_SIZE], *bp;
 	int blen;
+	int did_uncover = 0;
 
 	blen = cache_read(f, buf, sizeof(buf));
 	if (blen <= 0 || buf[blen-1] != '\n') return;
@@ -798,6 +815,11 @@  static void nfsd_fh(int f)
 		for (exp = exportlist[i].p_head; exp; exp = next_exp) {
 			char *path;
 
+			if (!did_uncover && parsed.fsidnum && parsed.fsidtype == FSID_NUM && exp->m_export.e_reexport != REEXP_NONE) {
+				reexpdb_uncover_subvolume(parsed.fsidnum);
+				did_uncover = 1;
+			}
+
 			if (exp->m_export.e_flags & NFSEXP_CROSSMOUNT) {
 				static nfs_export *prev = NULL;
 				static void *mnt = NULL;
@@ -935,7 +957,7 @@  static void write_fsloc(char **bp, int *blen, struct exportent *ep)
 	release_replicas(servers);
 }
 #endif
-static void write_secinfo(char **bp, int *blen, struct exportent *ep, int flag_mask)
+static void write_secinfo(char **bp, int *blen, struct exportent *ep, int flag_mask, int extra_flag)
 {
 	struct sec_entry *p;
 
@@ -950,11 +972,20 @@  static void write_secinfo(char **bp, int *blen, struct exportent *ep, int flag_m
 	qword_addint(bp, blen, p - ep->e_secinfo);
 	for (p = ep->e_secinfo; p->flav; p++) {
 		qword_addint(bp, blen, p->flav->fnum);
-		qword_addint(bp, blen, p->flags & flag_mask);
+		qword_addint(bp, blen, (p->flags | extra_flag) & flag_mask);
 	}
 
 }
 
+static int can_reexport_via_fsidnum(struct exportent *exp, struct statfs64 *st)
+{
+	if (st->f_type != 0x6969 /* NFS_SUPER_MAGIC */)
+		return 0;
+
+	return exp->e_reexport == REEXP_PREDEFINED_FSIDNUM ||
+	       exp->e_reexport == REEXP_AUTO_FSIDNUM;
+}
+
 static int dump_to_cache(int f, char *buf, int blen, char *domain,
 			 char *path, struct exportent *exp, int ttl)
 {
@@ -971,17 +1002,48 @@  static int dump_to_cache(int f, char *buf, int blen, char *domain,
 	if (exp) {
 		int different_fs = strcmp(path, exp->e_path) != 0;
 		int flag_mask = different_fs ? ~NFSEXP_FSID : ~0;
+		int rc, do_fsidnum = 0;
+		uint32_t fsidnum = exp->e_fsid;
+
+		if (different_fs) {
+			struct statfs64 st;
+
+			rc = nfsd_path_statfs64(path, &st);
+			if (rc) {
+				xlog(L_WARNING, "unable to statfs %s", path);
+				errno = EINVAL;
+				return -1;
+			}
+
+			if (can_reexport_via_fsidnum(exp, &st)) {
+				do_fsidnum = 1;
+				flag_mask = ~0;
+			}
+		}
 
 		qword_adduint(&bp, &blen, now + exp->e_ttl);
-		qword_addint(&bp, &blen, exp->e_flags & flag_mask);
+
+		if (do_fsidnum) {
+			uint32_t search_fsidnum = 0;
+			if (exp->e_reexport != REEXP_NONE && reexpdb_fsidnum_by_path(path, &search_fsidnum,
+			    exp->e_reexport == REEXP_AUTO_FSIDNUM) == 0) {
+				errno = EINVAL;
+				return -1;
+			}
+			fsidnum = search_fsidnum;
+			qword_addint(&bp, &blen, exp->e_flags | NFSEXP_FSID);
+		} else {
+			qword_addint(&bp, &blen, exp->e_flags & flag_mask);
+		}
+
 		qword_addint(&bp, &blen, exp->e_anonuid);
 		qword_addint(&bp, &blen, exp->e_anongid);
-		qword_addint(&bp, &blen, exp->e_fsid);
+		qword_addint(&bp, &blen, fsidnum);
 
 #ifdef HAVE_JUNCTION_SUPPORT
 		write_fsloc(&bp, &blen, exp);
 #endif
-		write_secinfo(&bp, &blen, exp, flag_mask);
+		write_secinfo(&bp, &blen, exp, flag_mask, do_fsidnum ? NFSEXP_FSID : 0);
 		if (exp->e_uuid == NULL || different_fs) {
 			char u[16];
 			if ((exp->e_flags & flag_mask & NFSEXP_FSID) == 0 &&
diff --git a/support/export/export.c b/support/export/export.c
index 03390dfc..c5e56b1a 100644
--- a/support/export/export.c
+++ b/support/export/export.c
@@ -25,6 +25,7 @@ 
 #include "exportfs.h"
 #include "nfsd_path.h"
 #include "xlog.h"
+#include "reexport.h"
 
 exp_hash_table exportlist[MCL_MAXTYPES] = {{NULL, {{NULL,NULL}, }}, }; 
 static int export_hash(char *);
@@ -115,6 +116,7 @@  export_read(char *fname, int ignore_hosts)
 	nfs_export		*exp;
 
 	int volumes = 0;
+	int reexport_found = 0;
 
 	setexportent(fname, "r");
 	while ((eep = getexportent(0,1)) != NULL) {
@@ -126,7 +128,25 @@  export_read(char *fname, int ignore_hosts)
 		}
 		else
 			warn_duplicated_exports(exp, eep);
+
+		if (eep->e_reexport)
+			reexport_found = 1;
+	}
+
+	if (reexport_found) {
+		for (int i = 0; i < MCL_MAXTYPES; i++) {
+			for (exp = exportlist[i].p_head; exp; exp = exp->m_next) {
+				if (exp->m_export.e_reexport)
+					continue;
+
+				if (exp->m_export.e_flags & NFSEXP_FSID) {
+					xlog(L_ERROR, "When a reexport= option is present no manully assigned numerical fsid= options are allowed");
+					return -1;
+				}
+			}
+		}
 	}
+
 	endexportent();
 
 	return volumes;
@@ -147,7 +167,7 @@  export_d_read(const char *dname, int ignore_hosts)
 	int n = 0, i;
 	struct dirent **namelist = NULL;
 	int volumes = 0;
-
+	int num_exports;
 
 	n = scandir(dname, &namelist, NULL, versionsort);
 	if (n < 0) {
@@ -186,7 +206,10 @@  export_d_read(const char *dname, int ignore_hosts)
 			continue;
 		}
 
-		volumes += export_read(fname, ignore_hosts);
+		num_exports = export_read(fname, ignore_hosts);
+		if (num_exports < 0)
+			return -1;
+		volumes += num_exports;
 	}
 
 	for (i = 0; i < n; i++)
diff --git a/support/include/nfslib.h b/support/include/nfslib.h
index 6faba71b..0465a1ff 100644
--- a/support/include/nfslib.h
+++ b/support/include/nfslib.h
@@ -85,6 +85,7 @@  struct exportent {
 	struct sec_entry e_secinfo[SECFLAVOR_COUNT+1];
 	unsigned int	e_ttl;
 	char *		e_realpath;
+	int		e_reexport;
 };
 
 struct rmtabent {
diff --git a/support/nfs/Makefile.am b/support/nfs/Makefile.am
index 67e3a8e1..2e1577cc 100644
--- a/support/nfs/Makefile.am
+++ b/support/nfs/Makefile.am
@@ -9,6 +9,7 @@  libnfs_la_SOURCES = exports.c rmtab.c xio.c rpcmisc.c rpcdispatch.c \
 		   svc_socket.c cacheio.c closeall.c nfs_mntent.c \
 		   svc_create.c atomicio.c strlcat.c strlcpy.c
 libnfs_la_LIBADD = libnfsconf.la
+libnfs_la_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS) -I$(top_srcdir)/support/reexport
 
 libnfsconf_la_SOURCES = conffile.c xlog.c
 
diff --git a/support/nfs/exports.c b/support/nfs/exports.c
index 2c8f0752..1d7e5228 100644
--- a/support/nfs/exports.c
+++ b/support/nfs/exports.c
@@ -31,6 +31,7 @@ 
 #include "xlog.h"
 #include "xio.h"
 #include "pseudoflavors.h"
+#include "reexport.h"
 
 #define EXPORT_DEFAULT_FLAGS	\
   (NFSEXP_READONLY|NFSEXP_ROOTSQUASH|NFSEXP_GATHERED_WRITES|NFSEXP_NOSUBTREECHECK)
@@ -103,6 +104,7 @@  static void init_exportent (struct exportent *ee, int fromkernel)
 	ee->e_nsqgids = 0;
 	ee->e_uuid = NULL;
 	ee->e_ttl = default_ttl;
+	ee->e_reexport = REEXP_NONE;
 }
 
 struct exportent *
@@ -302,6 +304,23 @@  putexportent(struct exportent *ep)
 	}
 	if (ep->e_uuid)
 		fprintf(fp, "fsid=%s,", ep->e_uuid);
+
+	if (ep->e_reexport) {
+		fprintf(fp, "reexport=");
+		switch (ep->e_reexport) {
+			case REEXP_AUTO_FSIDNUM:
+				fprintf(fp, "auto-fsidnum");
+				break;
+			case REEXP_PREDEFINED_FSIDNUM:
+				fprintf(fp, "predefined-fsidnum");
+				break;
+			default:
+				xlog(L_ERROR, "unknown reexport method %i", ep->e_reexport);
+				fprintf(fp, "none");
+		}
+		fprintf(fp, ",");
+	}
+
 	if (ep->e_mountpoint)
 		fprintf(fp, "mountpoint%s%s,",
 			ep->e_mountpoint[0]?"=":"", ep->e_mountpoint);
@@ -538,6 +557,7 @@  parseopts(char *cp, struct exportent *ep, int warn, int *had_subtree_opt_ptr)
 	char 	*flname = efname?efname:"command line";
 	int	flline = efp?efp->x_line:0;
 	unsigned int active = 0;
+	int saw_reexport = 0;
 
 	squids = ep->e_squids; nsquids = ep->e_nsquids;
 	sqgids = ep->e_sqgids; nsqgids = ep->e_nsqgids;
@@ -644,6 +664,13 @@  bad_option:
 			}
 		} else if (strncmp(opt, "fsid=", 5) == 0) {
 			char *oe;
+
+			if (saw_reexport) {
+				xlog(L_ERROR, "%s:%d: 'fsid=' has to be before 'reexport=' %s\n",
+				     flname, flline, opt);
+				goto bad_option;
+			}
+
 			if (strcmp(opt+5, "root") == 0) {
 				ep->e_fsid = 0;
 				setflags(NFSEXP_FSID, active, ep);
@@ -688,6 +715,41 @@  bad_option:
 			active = parse_flavors(opt+4, ep);
 			if (!active)
 				goto bad_option;
+		} else if (strncmp(opt, "reexport=", 9) == 0) {
+			char *strategy = strchr(opt, '=');
+
+			if (!strategy) {
+				xlog(L_ERROR, "%s:%d: bad option %s\n",
+				     flname, flline, opt);
+				goto bad_option;
+			}
+			strategy++;
+
+			if (saw_reexport) {
+				xlog(L_ERROR, "%s:%d: only one 'reexport=' is allowed%s\n",
+				     flname, flline, opt);
+				goto bad_option;
+			}
+
+			if (strcmp(strategy, "auto-fsidnum") == 0) {
+				ep->e_reexport = REEXP_AUTO_FSIDNUM;
+			} else if (strcmp(strategy, "predefined-fsidnum") == 0) {
+				ep->e_reexport = REEXP_PREDEFINED_FSIDNUM;
+			} else if (strcmp(strategy, "none") == 0) {
+				ep->e_reexport = REEXP_NONE;
+			} else {
+				xlog(L_ERROR, "%s:%d: bad option %s\n",
+				     flname, flline, strategy);
+				goto bad_option;
+			}
+
+			if (reexpdb_apply_reexport_settings(ep, flname, flline) != 0)
+				goto bad_option;
+
+			if (ep->e_fsid)
+				setflags(NFSEXP_FSID, active, ep);
+
+			saw_reexport = 1;
 		} else {
 			xlog(L_ERROR, "%s:%d: unknown keyword \"%s\"\n",
 					flname, flline, opt);
diff --git a/support/reexport/Makefile.am b/support/reexport/Makefile.am
new file mode 100644
index 00000000..9d544a8f
--- /dev/null
+++ b/support/reexport/Makefile.am
@@ -0,0 +1,6 @@ 
+## Process this file with automake to produce Makefile.in
+
+noinst_LIBRARIES = libreexport.a
+libreexport_a_SOURCES = reexport.c
+
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/support/reexport/reexport.c b/support/reexport/reexport.c
new file mode 100644
index 00000000..0c372ab6
--- /dev/null
+++ b/support/reexport/reexport.c
@@ -0,0 +1,327 @@ 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/random.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "nfsd_path.h"
+#include "conffile.h"
+#include "nfslib.h"
+#include "reexport.h"
+#include "reexport_backend.h"
+#include "xcommon.h"
+#include "xlog.h"
+
+static int fsidd_srv = -1;
+
+static bool connect_fsid_service(void)
+{
+	struct sockaddr_un addr;
+	char *sock_file;
+	int ret;
+	int s;
+
+	if (fsidd_srv != -1)
+		return true;
+
+	sock_file = conf_get_str_with_def("reexport", "fsidd_socket", FSID_SOCKET_NAME);
+
+	memset(&addr, 0, sizeof(struct sockaddr_un));
+	addr.sun_family = AF_UNIX;
+	strncpy(addr.sun_path, sock_file, sizeof(addr.sun_path) - 1);
+
+	s = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (s == -1) {
+		xlog(L_WARNING, "Unable to create AF_UNIX socket for %s: %m\n", sock_file);
+		return false;
+	}
+
+	ret = connect(s, (const struct sockaddr *)&addr, sizeof(struct sockaddr_un));
+	if (ret == -1) {
+		xlog(L_WARNING, "Unable to connect %s: %m, is fsidd running?\n", sock_file);
+		return false;
+	}
+
+	fsidd_srv = s;
+
+	return true;
+}
+
+int reexpdb_init(void)
+{
+	int try_count = 3;
+
+	while (try_count > 0 && !connect_fsid_service()) {
+		sleep(1);
+		try_count--;
+	}
+
+	return try_count > 0;
+}
+
+void reexpdb_destroy(void)
+{
+	close(fsidd_srv);
+	fsidd_srv = -1;
+}
+
+static bool parse_fsidd_reply(const char *cmd_info, char *buf, size_t len, char **result)
+{
+	if (len == 0) {
+		xlog(L_WARNING, "Unable to read %s result: server closed the connection", cmd_info);
+		return false;
+	} else if (len < 2) {
+		xlog(L_WARNING, "Unable to read %s result: server sent too few bytes", cmd_info);
+		return false;
+	}
+
+	if (buf[0] == '-') {
+		if (len > 2) {
+			char *reason = buf + 2;
+			xlog(L_WARNING, "Command %s failed, server said: %s", cmd_info, reason);
+		} else {
+			xlog(L_WARNING, "Command %s failed at server side", cmd_info);
+		}
+
+		return false;
+	}
+
+	if (buf[0] != '+') {
+		xlog(L_WARNING, "Unable to read %s result: server sent malformed answer", cmd_info);
+		return false;
+	}
+
+	if (len > 2) {
+		*result = strdup(buf + 2);
+	} else {
+		*result = NULL;
+	}
+
+	return true;
+}
+
+static bool do_fsidd_cmd(const char *cmd_info, char *msg, size_t len, char **result)
+{
+	char recvbuf[1024];
+	int n;
+
+	if (fsidd_srv == -1) {
+		xlog(L_NOTICE, "Reconnecting to fsid services");
+		if (reexpdb_init() == false)
+			return false;
+	}
+
+	xlog(D_GENERAL, "Request to fsidd: msg=\"%s\" len=%zd", msg, len);
+
+	if (write(fsidd_srv, msg, len) == -1) {
+		xlog(L_WARNING, "Unable to send %s command: %m", cmd_info);
+		goto out_close;
+	}
+
+	n = read(fsidd_srv, recvbuf, sizeof(recvbuf) - 1);
+	if (n <= -1) {
+		xlog(L_WARNING, "Unable to recv %s answer: %m", cmd_info);
+		goto out_close;
+	} else if (n == sizeof(recvbuf) - 1) {
+		//TODO: use better way to detect truncation
+		xlog(L_WARNING, "Unable to recv %s answer: answer truncated", cmd_info);
+		goto out_close;
+	}
+	recvbuf[n] = '\0';
+
+	xlog(D_GENERAL, "Answer from fsidd: msg=\"%s\" len=%i", recvbuf, n);
+
+	if (parse_fsidd_reply(cmd_info, recvbuf, n, result) == false) {
+		goto out_close;
+	}
+
+	return true;
+
+out_close:
+	close(fsidd_srv);
+	fsidd_srv = -1;
+	return false;
+}
+
+static bool fsidnum_get_by_path(char *path, uint32_t *fsidnum, bool may_create)
+{
+	char *msg, *result;
+	bool ret = false;
+	int len;
+
+	char *cmd = may_create ? "get_or_create_fsidnum" : "get_fsidnum";
+
+	len = asprintf(&msg, "%s %s", cmd, path);
+	if (len == -1) {
+		xlog(L_WARNING, "Unable to build %s command: %m", cmd);
+		goto out;
+	}
+
+	if (do_fsidd_cmd(cmd, msg, len, &result) == false) {
+		goto out;
+	}
+
+	if (result) {
+		bool bad_input = true;
+		char *endp;
+
+		errno = 0;
+		*fsidnum = strtoul(result, &endp, 10);
+		if (errno == 0 && *endp == '\0') {
+			bad_input = false;
+		}
+
+		free(result);
+
+		if (!bad_input) {
+			ret = true;
+		} else {
+			xlog(L_NOTICE, "Got malformed fsid for path %s", path);
+		}
+	} else {
+		xlog(L_NOTICE, "No fsid found for path %s", path);
+	}
+
+out:
+	free(msg);
+	return ret;
+}
+
+static bool path_by_fsidnum(uint32_t fsidnum, char **path)
+{
+	char *msg, *result;
+	bool ret = false;
+	int len;
+
+	len = asprintf(&msg, "get_path %d", (unsigned int)fsidnum);
+	if (len == -1) {
+		xlog(L_WARNING, "Unable to build get_path command: %m");
+		goto out;
+	}
+
+	if (do_fsidd_cmd("get_path", msg, len, &result) == false) {
+		goto out;
+	}
+
+	if (result) {
+		*path = result;
+		ret = true;
+	} else {
+		xlog(L_NOTICE, "No path found for fsid %u", (unsigned int)fsidnum);
+	}
+
+out:
+	free(msg);
+	return ret;
+}
+
+/*
+ * reexpdb_fsidnum_by_path - Lookup a fsid by path.
+ *
+ * @path: File system path used as lookup key
+ * @fsidnum: Pointer where found fsid is written to
+ * @may_create: If non-zero, allocate new fsid if lookup failed
+ *
+ */
+int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
+{
+	return fsidnum_get_by_path(path, fsidnum, may_create);
+}
+
+/*
+ * reexpdb_uncover_subvolume - Make sure a subvolume is present.
+ *
+ * @fsidnum: Numerical fsid number to look for
+ *
+ * Subvolumes (NFS cross mounts) get automatically mounted upon first
+ * access and can vanish after fs.nfs.nfs_mountpoint_timeout seconds.
+ * Also if the NFS server reboots, clients can still have valid file
+ * handles for such a subvolume.
+ *
+ * If kNFSd asks mountd for the path of a given fsidnum it can
+ * trigger an automount by calling statfs() on the given path.
+ */
+void reexpdb_uncover_subvolume(uint32_t fsidnum)
+{
+	struct statfs64 st;
+	char *path = NULL;
+	int ret;
+
+	if (path_by_fsidnum(fsidnum, &path)) {
+		ret = nfsd_path_statfs64(path, &st);
+		if (ret == -1)
+			xlog(L_WARNING, "statfs() failed");
+	}
+
+	free(path);
+}
+
+/*
+ * reexpdb_apply_reexport_settings - Apply reexport specific settings to an exportent
+ *
+ * @ep: exportent to apply to
+ * @flname: Current export file, only useful for logging
+ * @flline: Current line, only useful for logging
+ *
+ * This is a helper function for applying reexport specific settings to an exportent.
+ * It searches a suitable fsid an sets @ep->e_fsid.
+ */
+int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline)
+{
+	uint32_t fsidnum;
+	bool found, is_v4root = ((ep->e_flags & NFSEXP_FSID) && !ep->e_fsid);
+	int ret = 0;
+
+	if (ep->e_reexport == REEXP_NONE)
+		goto out;
+
+	if (ep->e_uuid)
+		goto out;
+
+	if (is_v4root)
+		goto out;
+
+	found = reexpdb_fsidnum_by_path(ep->e_path, &fsidnum, 0);
+	if (!found) {
+		if (ep->e_reexport == REEXP_AUTO_FSIDNUM) {
+			found = reexpdb_fsidnum_by_path(ep->e_path, &fsidnum, 1);
+			if (!found) {
+				xlog(L_ERROR, "%s:%i: Unable to generate fsid for %s",
+				     flname, flline, ep->e_path);
+				ret = -1;
+				goto out;
+			}
+		} else {
+			if (!ep->e_fsid) {
+				xlog(L_ERROR, "%s:%i: Selected 'reexport=' mode requires either a UUID 'fsid=' or a numerical 'fsid=' or a reexport db entry %d",
+				     flname, flline, ep->e_fsid);
+				ret = -1;
+			}
+
+			goto out;
+		}
+	}
+
+	if (ep->e_fsid) {
+		if (ep->e_fsid != fsidnum) {
+			xlog(L_ERROR, "%s:%i: Selected 'reexport=' mode requires configured numerical 'fsid=' to agree with reexport db entry",
+			     flname, flline);
+			ret = -1;
+		}
+	} else {
+		ep->e_fsid = fsidnum;
+	}
+
+out:
+	return ret;
+}
diff --git a/systemd/Makefile.am b/systemd/Makefile.am
index 7b5ab84b..a733a554 100644
--- a/systemd/Makefile.am
+++ b/systemd/Makefile.am
@@ -69,8 +69,10 @@  rpc_pipefs_generator_SOURCES = $(COMMON_SRCS) rpc-pipefs-generator.c
 nfs_server_generator_LDADD = ../support/export/libexport.a \
 			     ../support/nfs/libnfs.la \
 			     ../support/misc/libmisc.a \
+			     ../support/reexport/libreexport.a \
 			     $(LIBPTHREAD)
 
+
 rpc_pipefs_generator_LDADD = ../support/nfs/libnfs.la
 
 if INSTALL_SYSTEMD
diff --git a/utils/exportd/Makefile.am b/utils/exportd/Makefile.am
index c95bdee7..e9ffe6c0 100644
--- a/utils/exportd/Makefile.am
+++ b/utils/exportd/Makefile.am
@@ -16,11 +16,15 @@  exportd_SOURCES = exportd.c
 exportd_LDADD = ../../support/export/libexport.a \
 			../../support/nfs/libnfs.la \
 			../../support/misc/libmisc.a \
-			$(OPTLIBS) $(LIBBLKID) $(LIBPTHREAD) -luuid
+			../support/reexport/libreexport.a \
+			$(OPTLIBS) $(LIBBLKID) $(LIBPTHREAD) \
+			-luuid
 
 exportd_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS) \
 		-I$(top_srcdir)/support/export
 
+exportd_CPPFLAGS += -I$(top_srcdir)/support/reexport
+
 MAINTAINERCLEANFILES = Makefile.in
 
 #######################################################################
diff --git a/utils/exportd/exportd.c b/utils/exportd/exportd.c
index 2dd12cb6..e076a295 100644
--- a/utils/exportd/exportd.c
+++ b/utils/exportd/exportd.c
@@ -22,6 +22,7 @@ 
 #include "conffile.h"
 #include "exportfs.h"
 #include "export.h"
+#include "reexport.h"
 
 extern void my_svc_run(void);
 
@@ -296,6 +297,10 @@  main(int argc, char **argv)
 	/* Open files now to avoid sharing descriptors among forked processes */
 	cache_open();
 	v4clients_init();
+	if (reexpdb_init() != 0) {
+		xlog(L_ERROR, "%s: Failed to init reexport database", __func__);
+		exit(1);
+	}
 
 	/* Process incoming upcalls */
 	cache_process_loop();
diff --git a/utils/exportfs/Makefile.am b/utils/exportfs/Makefile.am
index 96524c72..7f8ce9fa 100644
--- a/utils/exportfs/Makefile.am
+++ b/utils/exportfs/Makefile.am
@@ -10,6 +10,9 @@  exportfs_SOURCES = exportfs.c
 exportfs_LDADD = ../../support/export/libexport.a \
 	       	 ../../support/nfs/libnfs.la \
 		 ../../support/misc/libmisc.a \
+		 ../../support/reexport/libreexport.a \
 		 $(LIBWRAP) $(LIBNSL) $(LIBPTHREAD)
 
+exportfs_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS) -I$(top_srcdir)/support/reexport
+
 MAINTAINERCLEANFILES = Makefile.in
diff --git a/utils/exportfs/exportfs.c b/utils/exportfs/exportfs.c
index 0897b229..8f055d34 100644
--- a/utils/exportfs/exportfs.c
+++ b/utils/exportfs/exportfs.c
@@ -38,6 +38,7 @@ 
 #include "exportfs.h"
 #include "xlog.h"
 #include "conffile.h"
+#include "reexport.h"
 
 static void	export_all(int verbose);
 static void	exportfs(char *arg, char *options, int verbose);
@@ -719,6 +720,16 @@  dump(int verbose, int export_format)
 				c = dumpopt(c, "fsid=%d", ep->e_fsid);
 			if (ep->e_uuid)
 				c = dumpopt(c, "fsid=%s", ep->e_uuid);
+			if (ep->e_reexport) {
+				switch (ep->e_reexport) {
+					case REEXP_AUTO_FSIDNUM:
+						c = dumpopt(c, "reexport=%s", "auto-fsidnum");
+						break;
+					case REEXP_PREDEFINED_FSIDNUM:
+						c = dumpopt(c, "reexport=%s", "predefined-fsidnum");
+						break;
+				}
+			}
 			if (ep->e_mountpoint)
 				c = dumpopt(c, "mountpoint%s%s",
 					    ep->e_mountpoint[0]?"=":"",
diff --git a/utils/exportfs/exports.man b/utils/exportfs/exports.man
index 54b3f877..ad2c2c59 100644
--- a/utils/exportfs/exports.man
+++ b/utils/exportfs/exports.man
@@ -420,6 +420,37 @@  will only work if all clients use a consistent security policy.  Note
 that early kernels did not support this export option, and instead
 enabled security labels by default.
 
+.TP
+.IR reexport= auto-fsidnum|predefined-fsidnum
+This option helps when a NFS share is re-exported. Since the NFS server
+needs a unique identifier for each exported filesystem and a NFS share
+cannot provide such, usually a manual fsid is needed.
+As soon
+.IR crossmnt
+is used manually assigning fsid won't work anymore. This is where this
+option becomes handy. It will automatically assign a numerical fsid
+to exported NFS shares. The fsid and path relations are stored in a SQLite
+database. If
+.IR auto-fsidnum
+is selected, the fsid is also autmatically allocated.
+.IR predefined-fsidnum
+assumes pre-allocated fsid numbers and will just look them up.
+This option depends also on the kernel, you will need at least kernel version
+5.19.
+Since
+.IR reexport=
+can automatically allocate and assign numerical fsids, it is no longer possible
+to have numerical fsids in other exports as soon this option is used in at least
+one export entry.
+
+The association between fsid numbers and paths is stored in a SQLite database.
+Don't edit or remove the database unless you know exactly what you're doing.
+.IR predefined-fsidnum
+is useful when you have used
+.IR auto-fsidnum
+before and don't want further entries stored.
+
+
 .SS User ID Mapping
 .PP
 .B nfsd
diff --git a/utils/mount/Makefile.am b/utils/mount/Makefile.am
index 3101f7ab..5ff1148c 100644
--- a/utils/mount/Makefile.am
+++ b/utils/mount/Makefile.am
@@ -29,8 +29,9 @@  endif
 
 mount_nfs_LDADD = ../../support/nfs/libnfs.la \
 		  ../../support/export/libexport.a \
+		  ../../support/reexport/libreexport.a \
 		  ../../support/misc/libmisc.a \
-		  $(LIBTIRPC)
+		   $(LIBTIRPC) $(LIBPTHREAD)
 
 mount_nfs_SOURCES = $(mount_common)
 
diff --git a/utils/mountd/Makefile.am b/utils/mountd/Makefile.am
index 13b25c90..197ef29b 100644
--- a/utils/mountd/Makefile.am
+++ b/utils/mountd/Makefile.am
@@ -17,9 +17,11 @@  mountd_SOURCES = mountd.c mount_dispatch.c rmtab.c \
 mountd_LDADD = ../../support/export/libexport.a \
 	       ../../support/nfs/libnfs.la \
 	       ../../support/misc/libmisc.a \
+	       ../../support/reexport/libreexport.a \
 	       $(OPTLIBS) \
 	       $(LIBBSD) $(LIBWRAP) $(LIBNSL) $(LIBBLKID) -luuid $(LIBTIRPC) \
 	       $(LIBPTHREAD)
+
 mountd_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS) \
 		  -I$(top_builddir)/support/include \
 		  -I$(top_srcdir)/support/export