diff mbox series

[1/5] Implement reexport helper library

Message ID 20220502085045.13038-2-richard@nod.at (mailing list archive)
State New, archived
Headers show
Series nfs-utils: Improving NFS re-exports | expand

Commit Message

Richard Weinberger May 2, 2022, 8:50 a.m. UTC
This internal library contains code that will be used by various
tools within the nfs-utils package to deal better with NFS re-export,
especially cross mounts.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 configure.ac                 |  12 ++
 support/Makefile.am          |   4 +
 support/reexport/Makefile.am |   6 +
 support/reexport/reexport.c  | 285 +++++++++++++++++++++++++++++++++++
 support/reexport/reexport.h  |  39 +++++
 5 files changed, 346 insertions(+)
 create mode 100644 support/reexport/Makefile.am
 create mode 100644 support/reexport/reexport.c
 create mode 100644 support/reexport/reexport.h

Comments

Steve Dickson May 10, 2022, 1:32 p.m. UTC | #1
Hello,

On 5/2/22 4:50 AM, Richard Weinberger wrote:
> This internal library contains code that will be used by various
> tools within the nfs-utils package to deal better with NFS re-export,
> especially cross mounts.
> 
> Signed-off-by: Richard Weinberger <richard@nod.at>
> ---
>   configure.ac                 |  12 ++
>   support/Makefile.am          |   4 +
>   support/reexport/Makefile.am |   6 +
>   support/reexport/reexport.c  | 285 +++++++++++++++++++++++++++++++++++
>   support/reexport/reexport.h  |  39 +++++
>   5 files changed, 346 insertions(+)
>   create mode 100644 support/reexport/Makefile.am
>   create mode 100644 support/reexport/reexport.c
>   create mode 100644 support/reexport/reexport.h
> 
> diff --git a/configure.ac b/configure.ac
> index 93626d62..86bf8ba9 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -274,6 +274,17 @@ AC_ARG_ENABLE(nfsv4server,
>   	fi
>   	AM_CONDITIONAL(CONFIG_NFSV4SERVER, [test "$enable_nfsv4server" = "yes" ])
>   
> +AC_ARG_ENABLE(reexport,
> +	[AC_HELP_STRING([--enable-reexport],
> +			[enable support for re-exporting NFS mounts  @<:@default=no@:>@])],
> +	enable_reexport=$enableval,
> +	enable_reexport="no")
> +	if test "$enable_reexport" = yes; then
> +		AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
> +                          [Define this if you want NFS re-export support compiled in])
> +	fi
> +	AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
> +
To get this moving I'm going to add a --disable-reexport flag

+AC_ARG_ENABLE(reexport,
+       [AC_HELP_STRING([--disable-reexport],
+                       [disable support for re-exporting NFS mounts 
@<:@default=no@:>@])],
+       enable_reexport=$enableval,
+       enable_reexport="yes")
+       if test "$enable_reexport" = yes; then
+               AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
+                          [Define this if you want NFS re-export 
support compiled in])
+       fi
+       AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
+

steved.

>   dnl Check for TI-RPC library and headers
>   AC_LIBTIRPC
>   
> @@ -730,6 +741,7 @@ AC_CONFIG_FILES([
>   	support/nsm/Makefile
>   	support/nfsidmap/Makefile
>   	support/nfsidmap/libnfsidmap.pc
> +	support/reexport/Makefile
>   	tools/Makefile
>   	tools/locktest/Makefile
>   	tools/nlmtest/Makefile
> diff --git a/support/Makefile.am b/support/Makefile.am
> index c962d4d4..986e9b5f 100644
> --- a/support/Makefile.am
> +++ b/support/Makefile.am
> @@ -10,6 +10,10 @@ if CONFIG_JUNCTION
>   OPTDIRS += junction
>   endif
>   
> +if CONFIG_REEXPORT
> +OPTDIRS += reexport
> +endif
> +
>   SUBDIRS = export include misc nfs nsm $(OPTDIRS)
>   
>   MAINTAINERCLEANFILES = Makefile.in
> diff --git a/support/reexport/Makefile.am b/support/reexport/Makefile.am
> new file mode 100644
> index 00000000..9d544a8f
> --- /dev/null
> +++ b/support/reexport/Makefile.am
> @@ -0,0 +1,6 @@
> +## Process this file with automake to produce Makefile.in
> +
> +noinst_LIBRARIES = libreexport.a
> +libreexport_a_SOURCES = reexport.c
> +
> +MAINTAINERCLEANFILES = Makefile.in
> diff --git a/support/reexport/reexport.c b/support/reexport/reexport.c
> new file mode 100644
> index 00000000..5474a21f
> --- /dev/null
> +++ b/support/reexport/reexport.c
> @@ -0,0 +1,285 @@
> +#ifdef HAVE_CONFIG_H
> +#include <config.h>
> +#endif
> +
> +#include <sqlite3.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <sys/random.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <sys/vfs.h>
> +#include <unistd.h>
> +
> +#include "nfsd_path.h"
> +#include "nfslib.h"
> +#include "reexport.h"
> +#include "xcommon.h"
> +#include "xlog.h"
> +
> +#define REEXPDB_DBFILE NFS_STATEDIR "/reexpdb.sqlite3"
> +#define REEXPDB_DBFILE_WAIT_USEC (5000)
> +
> +static sqlite3 *db;
> +static int init_done;
> +
> +static int prng_init(void)
> +{
> +	int seed;
> +
> +	if (getrandom(&seed, sizeof(seed), 0) != sizeof(seed)) {
> +		xlog(L_ERROR, "Unable to obtain seed for PRNG via getrandom()");
> +		return -1;
> +	}
> +
> +	srand(seed);
> +	return 0;
> +}
> +
> +static void wait_for_dbaccess(void)
> +{
> +	usleep(REEXPDB_DBFILE_WAIT_USEC + (rand() % REEXPDB_DBFILE_WAIT_USEC));
> +}
> +
> +/*
> + * reexpdb_init - Initialize reexport database
> + */
> +int reexpdb_init(void)
> +{
> +	char *sqlerr;
> +	int ret;
> +
> +	if (init_done)
> +		return 0;
> +
> +	if (prng_init() != 0)
> +		return -1;
> +
> +	ret = sqlite3_open_v2(REEXPDB_DBFILE, &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX, NULL);
> +	if (ret != SQLITE_OK) {
> +		xlog(L_ERROR, "Unable to open reexport database: %s", sqlite3_errstr(ret));
> +		return -1;
> +	}
> +
> +again:
> +	ret = sqlite3_exec(db, "CREATE TABLE IF NOT EXISTS fsidnums (num INTEGER PRIMARY KEY CHECK (num > 0 AND num < 4294967296), path TEXT UNIQUE); CREATE INDEX IF NOT EXISTS idx_ids_path ON fsidnums (path);", NULL, NULL, &sqlerr);
> +	switch (ret) {
> +	case SQLITE_OK:
> +		init_done = 1;
> +		ret = 0;
> +		break;
> +	case SQLITE_BUSY:
> +	case SQLITE_LOCKED:
> +		wait_for_dbaccess();
> +		goto again;
> +	default:
> +		xlog(L_ERROR, "Unable to init reexport database: %s", sqlite3_errstr(ret));
> +		sqlite3_free(sqlerr);
> +		sqlite3_close_v2(db);
> +		ret = -1;
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * reexpdb_destroy - Undo reexpdb_init().
> + */
> +void reexpdb_destroy(void)
> +{
> +	if (!init_done)
> +		return;
> +
> +	sqlite3_close_v2(db);
> +}
> +
> +static int get_fsidnum_by_path(char *path, uint32_t *fsidnum)
> +{
> +	static const char fsidnum_by_path_sql[] = "SELECT num FROM fsidnums WHERE path = ?1;";
> +	sqlite3_stmt *stmt = NULL;
> +	int found = 0;
> +	int ret;
> +
> +	ret = sqlite3_prepare_v2(db, fsidnum_by_path_sql, sizeof(fsidnum_by_path_sql), &stmt, NULL);
> +	if (ret != SQLITE_OK) {
> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
> +		goto out;
> +	}
> +
> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
> +	if (ret != SQLITE_OK) {
> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
> +		goto out;
> +	}
> +
> +again:
> +	ret = sqlite3_step(stmt);
> +	switch (ret) {
> +	case SQLITE_ROW:
> +		*fsidnum = sqlite3_column_int(stmt, 0);
> +		found = 1;
> +		break;
> +	case SQLITE_DONE:
> +		/* No hit */
> +		found = 0;
> +		break;
> +	case SQLITE_BUSY:
> +	case SQLITE_LOCKED:
> +		wait_for_dbaccess();
> +		goto again;
> +	default:
> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
> +	}
> +
> +out:
> +	sqlite3_finalize(stmt);
> +	return found;
> +}
> +
> +static int get_path_by_fsidnum(uint32_t fsidnum, char **path)
> +{
> +	static const char path_by_fsidnum_sql[] = "SELECT path FROM fsidnums WHERE num = ?1;";
> +	sqlite3_stmt *stmt = NULL;
> +	int found = 0;
> +	int ret;
> +
> +	ret = sqlite3_prepare_v2(db, path_by_fsidnum_sql, sizeof(path_by_fsidnum_sql), &stmt, NULL);
> +	if (ret != SQLITE_OK) {
> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
> +		goto out;
> +	}
> +
> +	ret = sqlite3_bind_int(stmt, 1, fsidnum);
> +	if (ret != SQLITE_OK) {
> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
> +		goto out;
> +	}
> +
> +again:
> +	ret = sqlite3_step(stmt);
> +	switch (ret) {
> +	case SQLITE_ROW:
> +		*path = xstrdup((char *)sqlite3_column_text(stmt, 0));
> +		found = 1;
> +		break;
> +	case SQLITE_DONE:
> +		/* No hit */
> +		found = 0;
> +		break;
> +	case SQLITE_BUSY:
> +	case SQLITE_LOCKED:
> +		wait_for_dbaccess();
> +		goto again;
> +	default:
> +		xlog(L_WARNING, "Error while looking up '%i' in database: %s", fsidnum, sqlite3_errstr(ret));
> +	}
> +
> +out:
> +	sqlite3_finalize(stmt);
> +	return found;
> +}
> +
> +static int new_fsidnum_by_path(char *path, uint32_t *fsidnum)
> +{
> +	/*
> +	 * This query is a little tricky. We use SQL to find and claim the smallest free fsid number.
> +	 * To find a free fsid the fsidnums is left joined to itself but with an offset of 1.
> +	 * Everything after the UNION statement is to handle the corner case where fsidnums
> +	 * is empty. In this case we want 1 as first fsid number.
> +	 */
> +	static const char new_fsidnum_by_path_sql[] = "INSERT INTO fsidnums VALUES ((SELECT ids1.num + 1 FROM fsidnums AS ids1 LEFT JOIN fsidnums AS ids2 ON ids2.num = ids1.num + 1 WHERE ids2.num IS NULL UNION SELECT 1 WHERE NOT EXISTS (SELECT NULL FROM fsidnums WHERE num = 1) LIMIT 1), ?1) RETURNING num;";
> +
> +	sqlite3_stmt *stmt = NULL;
> +	int found = 0, check = 0;
> +	int ret;
> +
> +	ret = sqlite3_prepare_v2(db, new_fsidnum_by_path_sql, sizeof(new_fsidnum_by_path_sql), &stmt, NULL);
> +	if (ret != SQLITE_OK) {
> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
> +		goto out;
> +	}
> +
> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
> +	if (ret != SQLITE_OK) {
> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
> +		goto out;
> +	}
> +
> +again:
> +	ret = sqlite3_step(stmt);
> +	switch (ret) {
> +	case SQLITE_ROW:
> +		*fsidnum = sqlite3_column_int(stmt, 0);
> +		found = 1;
> +		break;
> +	case SQLITE_CONSTRAINT:
> +		/* Maybe we lost the race against another writer and the path is now present. */
> +		check = 1;
> +		break;
> +	case SQLITE_BUSY:
> +	case SQLITE_LOCKED:
> +		wait_for_dbaccess();
> +		goto again;
> +	default:
> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
> +	}
> +
> +out:
> +	sqlite3_finalize(stmt);
> +
> +	if (check) {
> +		found = get_fsidnum_by_path(path, fsidnum);
> +		if (!found)
> +			xlog(L_WARNING, "SQLITE_CONSTRAINT error while inserting '%s' in database", path);
> +	}
> +
> +	return found;
> +}
> +
> +/*
> + * reexpdb_fsidnum_by_path - Lookup a fsid by path.
> + *
> + * @path: File system path used as lookup key
> + * @fsidnum: Pointer where found fsid is written to
> + * @may_create: If non-zero, allocate new fsid if lookup failed
> + *
> + */
> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
> +{
> +	int found;
> +
> +	found = get_fsidnum_by_path(path, fsidnum);
> +
> +	if (!found && may_create)
> +		found = new_fsidnum_by_path(path, fsidnum);
> +
> +	return found;
> +}
> +
> +/*
> + * reexpdb_uncover_subvolume - Make sure a subvolume is present.
> + *
> + * @fsidnum: Numerical fsid number to look for
> + *
> + * Subvolumes (NFS cross mounts) get automatically mounted upon first
> + * access and can vanish after fs.nfs.nfs_mountpoint_timeout seconds.
> + * Also if the NFS server reboots, clients can still have valid file
> + * handles for such a subvolume.
> + *
> + * If kNFSd asks mountd for the path of a given fsidnum it can
> + * trigger an automount by calling statfs() on the given path.
> + */
> +void reexpdb_uncover_subvolume(uint32_t fsidnum)
> +{
> +	struct statfs64 st;
> +	char *path = NULL;
> +	int ret;
> +
> +	if (get_path_by_fsidnum(fsidnum, &path)) {
> +		ret = nfsd_path_statfs64(path, &st);
> +		if (ret == -1)
> +			xlog(L_WARNING, "statfs() failed");
> +	}
> +
> +	free(path);
> +}
> diff --git a/support/reexport/reexport.h b/support/reexport/reexport.h
> new file mode 100644
> index 00000000..bb6d2a1b
> --- /dev/null
> +++ b/support/reexport/reexport.h
> @@ -0,0 +1,39 @@
> +#ifndef REEXPORT_H
> +#define REEXPORT_H
> +
> +enum {
> +	REEXP_NONE = 0,
> +	REEXP_AUTO_FSIDNUM,
> +	REEXP_PREDEFINED_FSIDNUM,
> +};
> +
> +#ifdef HAVE_REEXPORT_SUPPORT
> +int reexpdb_init(void);
> +void reexpdb_destroy(void);
> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create);
> +int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline);
> +void reexpdb_uncover_subvolume(uint32_t fsidnum);
> +#else
> +static inline int reexpdb_init(void) { return 0; }
> +static inline void reexpdb_destroy(void) {}
> +static inline int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
> +{
> +	(void)path;
> +	(void)may_create;
> +	*fsidnum = 0;
> +	return 0;
> +}
> +static inline int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline)
> +{
> +	(void)ep;
> +	(void)flname;
> +	(void)flline;
> +	return 0;
> +}
> +static inline void reexpdb_uncover_subvolume(uint32_t fsidnum)
> +{
> +	(void)fsidnum;
> +}
> +#endif /* HAVE_REEXPORT_SUPPORT */
> +
> +#endif /* REEXPORT_H */
Chuck Lever May 10, 2022, 1:48 p.m. UTC | #2
> On May 10, 2022, at 9:32 AM, Steve Dickson <steved@redhat.com> wrote:
> 
> Hello,
> 
> On 5/2/22 4:50 AM, Richard Weinberger wrote:
>> This internal library contains code that will be used by various
>> tools within the nfs-utils package to deal better with NFS re-export,
>> especially cross mounts.
>> Signed-off-by: Richard Weinberger <richard@nod.at>
>> ---
>>  configure.ac                 |  12 ++
>>  support/Makefile.am          |   4 +
>>  support/reexport/Makefile.am |   6 +
>>  support/reexport/reexport.c  | 285 +++++++++++++++++++++++++++++++++++
>>  support/reexport/reexport.h  |  39 +++++
>>  5 files changed, 346 insertions(+)
>>  create mode 100644 support/reexport/Makefile.am
>>  create mode 100644 support/reexport/reexport.c
>>  create mode 100644 support/reexport/reexport.h
>> diff --git a/configure.ac b/configure.ac
>> index 93626d62..86bf8ba9 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -274,6 +274,17 @@ AC_ARG_ENABLE(nfsv4server,
>>  	fi
>>  	AM_CONDITIONAL(CONFIG_NFSV4SERVER, [test "$enable_nfsv4server" = "yes" ])
>>  +AC_ARG_ENABLE(reexport,
>> +	[AC_HELP_STRING([--enable-reexport],
>> +			[enable support for re-exporting NFS mounts  @<:@default=no@:>@])],
>> +	enable_reexport=$enableval,
>> +	enable_reexport="no")
>> +	if test "$enable_reexport" = yes; then
>> +		AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
>> +                          [Define this if you want NFS re-export support compiled in])
>> +	fi
>> +	AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
>> +
> To get this moving I'm going to add a --disable-reexport flag

Hi Steve, no-one has given a reason why disabling support
for re-exports would be necessary. Therefore, can't this
switch just be removed?


> +AC_ARG_ENABLE(reexport,
> +       [AC_HELP_STRING([--disable-reexport],
> +                       [disable support for re-exporting NFS mounts @<:@default=no@:>@])],
> +       enable_reexport=$enableval,
> +       enable_reexport="yes")
> +       if test "$enable_reexport" = yes; then
> +               AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
> +                          [Define this if you want NFS re-export support compiled in])
> +       fi
> +       AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
> +
> 
> steved.
> 
>>  dnl Check for TI-RPC library and headers
>>  AC_LIBTIRPC
>>  @@ -730,6 +741,7 @@ AC_CONFIG_FILES([
>>  	support/nsm/Makefile
>>  	support/nfsidmap/Makefile
>>  	support/nfsidmap/libnfsidmap.pc
>> +	support/reexport/Makefile
>>  	tools/Makefile
>>  	tools/locktest/Makefile
>>  	tools/nlmtest/Makefile
>> diff --git a/support/Makefile.am b/support/Makefile.am
>> index c962d4d4..986e9b5f 100644
>> --- a/support/Makefile.am
>> +++ b/support/Makefile.am
>> @@ -10,6 +10,10 @@ if CONFIG_JUNCTION
>>  OPTDIRS += junction
>>  endif
>>  +if CONFIG_REEXPORT
>> +OPTDIRS += reexport
>> +endif
>> +
>>  SUBDIRS = export include misc nfs nsm $(OPTDIRS)
>>    MAINTAINERCLEANFILES = Makefile.in
>> diff --git a/support/reexport/Makefile.am b/support/reexport/Makefile.am
>> new file mode 100644
>> index 00000000..9d544a8f
>> --- /dev/null
>> +++ b/support/reexport/Makefile.am
>> @@ -0,0 +1,6 @@
>> +## Process this file with automake to produce Makefile.in
>> +
>> +noinst_LIBRARIES = libreexport.a
>> +libreexport_a_SOURCES = reexport.c
>> +
>> +MAINTAINERCLEANFILES = Makefile.in
>> diff --git a/support/reexport/reexport.c b/support/reexport/reexport.c
>> new file mode 100644
>> index 00000000..5474a21f
>> --- /dev/null
>> +++ b/support/reexport/reexport.c
>> @@ -0,0 +1,285 @@
>> +#ifdef HAVE_CONFIG_H
>> +#include <config.h>
>> +#endif
>> +
>> +#include <sqlite3.h>
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <sys/random.h>
>> +#include <sys/stat.h>
>> +#include <sys/types.h>
>> +#include <sys/vfs.h>
>> +#include <unistd.h>
>> +
>> +#include "nfsd_path.h"
>> +#include "nfslib.h"
>> +#include "reexport.h"
>> +#include "xcommon.h"
>> +#include "xlog.h"
>> +
>> +#define REEXPDB_DBFILE NFS_STATEDIR "/reexpdb.sqlite3"
>> +#define REEXPDB_DBFILE_WAIT_USEC (5000)
>> +
>> +static sqlite3 *db;
>> +static int init_done;
>> +
>> +static int prng_init(void)
>> +{
>> +	int seed;
>> +
>> +	if (getrandom(&seed, sizeof(seed), 0) != sizeof(seed)) {
>> +		xlog(L_ERROR, "Unable to obtain seed for PRNG via getrandom()");
>> +		return -1;
>> +	}
>> +
>> +	srand(seed);
>> +	return 0;
>> +}
>> +
>> +static void wait_for_dbaccess(void)
>> +{
>> +	usleep(REEXPDB_DBFILE_WAIT_USEC + (rand() % REEXPDB_DBFILE_WAIT_USEC));
>> +}
>> +
>> +/*
>> + * reexpdb_init - Initialize reexport database
>> + */
>> +int reexpdb_init(void)
>> +{
>> +	char *sqlerr;
>> +	int ret;
>> +
>> +	if (init_done)
>> +		return 0;
>> +
>> +	if (prng_init() != 0)
>> +		return -1;
>> +
>> +	ret = sqlite3_open_v2(REEXPDB_DBFILE, &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX, NULL);
>> +	if (ret != SQLITE_OK) {
>> +		xlog(L_ERROR, "Unable to open reexport database: %s", sqlite3_errstr(ret));
>> +		return -1;
>> +	}
>> +
>> +again:
>> +	ret = sqlite3_exec(db, "CREATE TABLE IF NOT EXISTS fsidnums (num INTEGER PRIMARY KEY CHECK (num > 0 AND num < 4294967296), path TEXT UNIQUE); CREATE INDEX IF NOT EXISTS idx_ids_path ON fsidnums (path);", NULL, NULL, &sqlerr);
>> +	switch (ret) {
>> +	case SQLITE_OK:
>> +		init_done = 1;
>> +		ret = 0;
>> +		break;
>> +	case SQLITE_BUSY:
>> +	case SQLITE_LOCKED:
>> +		wait_for_dbaccess();
>> +		goto again;
>> +	default:
>> +		xlog(L_ERROR, "Unable to init reexport database: %s", sqlite3_errstr(ret));
>> +		sqlite3_free(sqlerr);
>> +		sqlite3_close_v2(db);
>> +		ret = -1;
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +/*
>> + * reexpdb_destroy - Undo reexpdb_init().
>> + */
>> +void reexpdb_destroy(void)
>> +{
>> +	if (!init_done)
>> +		return;
>> +
>> +	sqlite3_close_v2(db);
>> +}
>> +
>> +static int get_fsidnum_by_path(char *path, uint32_t *fsidnum)
>> +{
>> +	static const char fsidnum_by_path_sql[] = "SELECT num FROM fsidnums WHERE path = ?1;";
>> +	sqlite3_stmt *stmt = NULL;
>> +	int found = 0;
>> +	int ret;
>> +
>> +	ret = sqlite3_prepare_v2(db, fsidnum_by_path_sql, sizeof(fsidnum_by_path_sql), &stmt, NULL);
>> +	if (ret != SQLITE_OK) {
>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
>> +		goto out;
>> +	}
>> +
>> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
>> +	if (ret != SQLITE_OK) {
>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
>> +		goto out;
>> +	}
>> +
>> +again:
>> +	ret = sqlite3_step(stmt);
>> +	switch (ret) {
>> +	case SQLITE_ROW:
>> +		*fsidnum = sqlite3_column_int(stmt, 0);
>> +		found = 1;
>> +		break;
>> +	case SQLITE_DONE:
>> +		/* No hit */
>> +		found = 0;
>> +		break;
>> +	case SQLITE_BUSY:
>> +	case SQLITE_LOCKED:
>> +		wait_for_dbaccess();
>> +		goto again;
>> +	default:
>> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
>> +	}
>> +
>> +out:
>> +	sqlite3_finalize(stmt);
>> +	return found;
>> +}
>> +
>> +static int get_path_by_fsidnum(uint32_t fsidnum, char **path)
>> +{
>> +	static const char path_by_fsidnum_sql[] = "SELECT path FROM fsidnums WHERE num = ?1;";
>> +	sqlite3_stmt *stmt = NULL;
>> +	int found = 0;
>> +	int ret;
>> +
>> +	ret = sqlite3_prepare_v2(db, path_by_fsidnum_sql, sizeof(path_by_fsidnum_sql), &stmt, NULL);
>> +	if (ret != SQLITE_OK) {
>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
>> +		goto out;
>> +	}
>> +
>> +	ret = sqlite3_bind_int(stmt, 1, fsidnum);
>> +	if (ret != SQLITE_OK) {
>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
>> +		goto out;
>> +	}
>> +
>> +again:
>> +	ret = sqlite3_step(stmt);
>> +	switch (ret) {
>> +	case SQLITE_ROW:
>> +		*path = xstrdup((char *)sqlite3_column_text(stmt, 0));
>> +		found = 1;
>> +		break;
>> +	case SQLITE_DONE:
>> +		/* No hit */
>> +		found = 0;
>> +		break;
>> +	case SQLITE_BUSY:
>> +	case SQLITE_LOCKED:
>> +		wait_for_dbaccess();
>> +		goto again;
>> +	default:
>> +		xlog(L_WARNING, "Error while looking up '%i' in database: %s", fsidnum, sqlite3_errstr(ret));
>> +	}
>> +
>> +out:
>> +	sqlite3_finalize(stmt);
>> +	return found;
>> +}
>> +
>> +static int new_fsidnum_by_path(char *path, uint32_t *fsidnum)
>> +{
>> +	/*
>> +	 * This query is a little tricky. We use SQL to find and claim the smallest free fsid number.
>> +	 * To find a free fsid the fsidnums is left joined to itself but with an offset of 1.
>> +	 * Everything after the UNION statement is to handle the corner case where fsidnums
>> +	 * is empty. In this case we want 1 as first fsid number.
>> +	 */
>> +	static const char new_fsidnum_by_path_sql[] = "INSERT INTO fsidnums VALUES ((SELECT ids1.num + 1 FROM fsidnums AS ids1 LEFT JOIN fsidnums AS ids2 ON ids2.num = ids1.num + 1 WHERE ids2.num IS NULL UNION SELECT 1 WHERE NOT EXISTS (SELECT NULL FROM fsidnums WHERE num = 1) LIMIT 1), ?1) RETURNING num;";
>> +
>> +	sqlite3_stmt *stmt = NULL;
>> +	int found = 0, check = 0;
>> +	int ret;
>> +
>> +	ret = sqlite3_prepare_v2(db, new_fsidnum_by_path_sql, sizeof(new_fsidnum_by_path_sql), &stmt, NULL);
>> +	if (ret != SQLITE_OK) {
>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
>> +		goto out;
>> +	}
>> +
>> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
>> +	if (ret != SQLITE_OK) {
>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
>> +		goto out;
>> +	}
>> +
>> +again:
>> +	ret = sqlite3_step(stmt);
>> +	switch (ret) {
>> +	case SQLITE_ROW:
>> +		*fsidnum = sqlite3_column_int(stmt, 0);
>> +		found = 1;
>> +		break;
>> +	case SQLITE_CONSTRAINT:
>> +		/* Maybe we lost the race against another writer and the path is now present. */
>> +		check = 1;
>> +		break;
>> +	case SQLITE_BUSY:
>> +	case SQLITE_LOCKED:
>> +		wait_for_dbaccess();
>> +		goto again;
>> +	default:
>> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
>> +	}
>> +
>> +out:
>> +	sqlite3_finalize(stmt);
>> +
>> +	if (check) {
>> +		found = get_fsidnum_by_path(path, fsidnum);
>> +		if (!found)
>> +			xlog(L_WARNING, "SQLITE_CONSTRAINT error while inserting '%s' in database", path);
>> +	}
>> +
>> +	return found;
>> +}
>> +
>> +/*
>> + * reexpdb_fsidnum_by_path - Lookup a fsid by path.
>> + *
>> + * @path: File system path used as lookup key
>> + * @fsidnum: Pointer where found fsid is written to
>> + * @may_create: If non-zero, allocate new fsid if lookup failed
>> + *
>> + */
>> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
>> +{
>> +	int found;
>> +
>> +	found = get_fsidnum_by_path(path, fsidnum);
>> +
>> +	if (!found && may_create)
>> +		found = new_fsidnum_by_path(path, fsidnum);
>> +
>> +	return found;
>> +}
>> +
>> +/*
>> + * reexpdb_uncover_subvolume - Make sure a subvolume is present.
>> + *
>> + * @fsidnum: Numerical fsid number to look for
>> + *
>> + * Subvolumes (NFS cross mounts) get automatically mounted upon first
>> + * access and can vanish after fs.nfs.nfs_mountpoint_timeout seconds.
>> + * Also if the NFS server reboots, clients can still have valid file
>> + * handles for such a subvolume.
>> + *
>> + * If kNFSd asks mountd for the path of a given fsidnum it can
>> + * trigger an automount by calling statfs() on the given path.
>> + */
>> +void reexpdb_uncover_subvolume(uint32_t fsidnum)
>> +{
>> +	struct statfs64 st;
>> +	char *path = NULL;
>> +	int ret;
>> +
>> +	if (get_path_by_fsidnum(fsidnum, &path)) {
>> +		ret = nfsd_path_statfs64(path, &st);
>> +		if (ret == -1)
>> +			xlog(L_WARNING, "statfs() failed");
>> +	}
>> +
>> +	free(path);
>> +}
>> diff --git a/support/reexport/reexport.h b/support/reexport/reexport.h
>> new file mode 100644
>> index 00000000..bb6d2a1b
>> --- /dev/null
>> +++ b/support/reexport/reexport.h
>> @@ -0,0 +1,39 @@
>> +#ifndef REEXPORT_H
>> +#define REEXPORT_H
>> +
>> +enum {
>> +	REEXP_NONE = 0,
>> +	REEXP_AUTO_FSIDNUM,
>> +	REEXP_PREDEFINED_FSIDNUM,
>> +};
>> +
>> +#ifdef HAVE_REEXPORT_SUPPORT
>> +int reexpdb_init(void);
>> +void reexpdb_destroy(void);
>> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create);
>> +int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline);
>> +void reexpdb_uncover_subvolume(uint32_t fsidnum);
>> +#else
>> +static inline int reexpdb_init(void) { return 0; }
>> +static inline void reexpdb_destroy(void) {}
>> +static inline int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
>> +{
>> +	(void)path;
>> +	(void)may_create;
>> +	*fsidnum = 0;
>> +	return 0;
>> +}
>> +static inline int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline)
>> +{
>> +	(void)ep;
>> +	(void)flname;
>> +	(void)flline;
>> +	return 0;
>> +}
>> +static inline void reexpdb_uncover_subvolume(uint32_t fsidnum)
>> +{
>> +	(void)fsidnum;
>> +}
>> +#endif /* HAVE_REEXPORT_SUPPORT */
>> +
>> +#endif /* REEXPORT_H */
> 

--
Chuck Lever
Richard Weinberger May 10, 2022, 1:59 p.m. UTC | #3
----- Ursprüngliche Mail -----
> Von: "chuck lever" <chuck.lever@oracle.com>
> An: "Steve Dickson" <steved@redhat.com>
> CC: "richard" <richard@nod.at>, "linux-nfs" <linux-nfs@vger.kernel.org>, "david" <david@sigma-star.at>, "bfields"
> <bfields@fieldses.org>, "luis turcitu" <luis.turcitu@appsbroker.com>, "david young" <david.young@appsbroker.com>,
> "david oberhollenzer" <david.oberhollenzer@sigma-star.at>, "trond myklebust" <trond.myklebust@hammerspace.com>, "anna
> schumaker" <anna.schumaker@netapp.com>, "chris chilvers" <chris.chilvers@appsbroker.com>
> Gesendet: Dienstag, 10. Mai 2022 15:48:49
> Betreff: Re: [PATCH 1/5] Implement reexport helper library

>> On May 10, 2022, at 9:32 AM, Steve Dickson <steved@redhat.com> wrote:
>> 
>> Hello,
>> 
>> On 5/2/22 4:50 AM, Richard Weinberger wrote:
>>> This internal library contains code that will be used by various
>>> tools within the nfs-utils package to deal better with NFS re-export,
>>> especially cross mounts.
>>> Signed-off-by: Richard Weinberger <richard@nod.at>
>>> ---
>>>  configure.ac                 |  12 ++
>>>  support/Makefile.am          |   4 +
>>>  support/reexport/Makefile.am |   6 +
>>>  support/reexport/reexport.c  | 285 +++++++++++++++++++++++++++++++++++
>>>  support/reexport/reexport.h  |  39 +++++
>>>  5 files changed, 346 insertions(+)
>>>  create mode 100644 support/reexport/Makefile.am
>>>  create mode 100644 support/reexport/reexport.c
>>>  create mode 100644 support/reexport/reexport.h
>>> diff --git a/configure.ac b/configure.ac
>>> index 93626d62..86bf8ba9 100644
>>> --- a/configure.ac
>>> +++ b/configure.ac
>>> @@ -274,6 +274,17 @@ AC_ARG_ENABLE(nfsv4server,
>>>  	fi
>>>  	AM_CONDITIONAL(CONFIG_NFSV4SERVER, [test "$enable_nfsv4server" = "yes" ])
>>>  +AC_ARG_ENABLE(reexport,
>>> +	[AC_HELP_STRING([--enable-reexport],
>>> +			[enable support for re-exporting NFS mounts  @<:@default=no@:>@])],
>>> +	enable_reexport=$enableval,
>>> +	enable_reexport="no")
>>> +	if test "$enable_reexport" = yes; then
>>> +		AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
>>> +                          [Define this if you want NFS re-export support
>>> compiled in])
>>> +	fi
>>> +	AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
>>> +
>> To get this moving I'm going to add a --disable-reexport flag
> 
> Hi Steve, no-one has given a reason why disabling support
> for re-exports would be necessary. Therefore, can't this
> switch just be removed?

Sure can we remove it. My idea was that new/experimental features should be opt-in.

Thanks,
//richard
Steve Dickson May 10, 2022, 2:04 p.m. UTC | #4
Hey!

On 5/10/22 9:48 AM, Chuck Lever III wrote:
> 
> 
>> On May 10, 2022, at 9:32 AM, Steve Dickson <steved@redhat.com> wrote:
>>
>> Hello,
>>
>> On 5/2/22 4:50 AM, Richard Weinberger wrote:
>>> This internal library contains code that will be used by various
>>> tools within the nfs-utils package to deal better with NFS re-export,
>>> especially cross mounts.
>>> Signed-off-by: Richard Weinberger <richard@nod.at>
>>> ---
>>>   configure.ac                 |  12 ++
>>>   support/Makefile.am          |   4 +
>>>   support/reexport/Makefile.am |   6 +
>>>   support/reexport/reexport.c  | 285 +++++++++++++++++++++++++++++++++++
>>>   support/reexport/reexport.h  |  39 +++++
>>>   5 files changed, 346 insertions(+)
>>>   create mode 100644 support/reexport/Makefile.am
>>>   create mode 100644 support/reexport/reexport.c
>>>   create mode 100644 support/reexport/reexport.h
>>> diff --git a/configure.ac b/configure.ac
>>> index 93626d62..86bf8ba9 100644
>>> --- a/configure.ac
>>> +++ b/configure.ac
>>> @@ -274,6 +274,17 @@ AC_ARG_ENABLE(nfsv4server,
>>>   	fi
>>>   	AM_CONDITIONAL(CONFIG_NFSV4SERVER, [test "$enable_nfsv4server" = "yes" ])
>>>   +AC_ARG_ENABLE(reexport,
>>> +	[AC_HELP_STRING([--enable-reexport],
>>> +			[enable support for re-exporting NFS mounts  @<:@default=no@:>@])],
>>> +	enable_reexport=$enableval,
>>> +	enable_reexport="no")
>>> +	if test "$enable_reexport" = yes; then
>>> +		AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
>>> +                          [Define this if you want NFS re-export support compiled in])
>>> +	fi
>>> +	AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
>>> +
>> To get this moving I'm going to add a --disable-reexport flag
> 
> Hi Steve, no-one has given a reason why disabling support
> for re-exports would be necessary. Therefore, can't this
> switch just be removed?The precedence has been that we used --disable-XXX flag
a lot in configure.ac... -nfsv4, -nfsv41, etc...
I'm just following along with that.

Yes, at this point, nobody is asking to turn it off but
in future somebody may want to... due some security hole
or just make the footprint of the package smaller.

I've always though it was a good idea to be able
to turn things off.. esp if the feature will
not be used.

steved.

> 
> 
>> +AC_ARG_ENABLE(reexport,
>> +       [AC_HELP_STRING([--disable-reexport],
>> +                       [disable support for re-exporting NFS mounts @<:@default=no@:>@])],
>> +       enable_reexport=$enableval,
>> +       enable_reexport="yes")
>> +       if test "$enable_reexport" = yes; then
>> +               AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
>> +                          [Define this if you want NFS re-export support compiled in])
>> +       fi
>> +       AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
>> +
>>
>> steved.
>>
>>>   dnl Check for TI-RPC library and headers
>>>   AC_LIBTIRPC
>>>   @@ -730,6 +741,7 @@ AC_CONFIG_FILES([
>>>   	support/nsm/Makefile
>>>   	support/nfsidmap/Makefile
>>>   	support/nfsidmap/libnfsidmap.pc
>>> +	support/reexport/Makefile
>>>   	tools/Makefile
>>>   	tools/locktest/Makefile
>>>   	tools/nlmtest/Makefile
>>> diff --git a/support/Makefile.am b/support/Makefile.am
>>> index c962d4d4..986e9b5f 100644
>>> --- a/support/Makefile.am
>>> +++ b/support/Makefile.am
>>> @@ -10,6 +10,10 @@ if CONFIG_JUNCTION
>>>   OPTDIRS += junction
>>>   endif
>>>   +if CONFIG_REEXPORT
>>> +OPTDIRS += reexport
>>> +endif
>>> +
>>>   SUBDIRS = export include misc nfs nsm $(OPTDIRS)
>>>     MAINTAINERCLEANFILES = Makefile.in
>>> diff --git a/support/reexport/Makefile.am b/support/reexport/Makefile.am
>>> new file mode 100644
>>> index 00000000..9d544a8f
>>> --- /dev/null
>>> +++ b/support/reexport/Makefile.am
>>> @@ -0,0 +1,6 @@
>>> +## Process this file with automake to produce Makefile.in
>>> +
>>> +noinst_LIBRARIES = libreexport.a
>>> +libreexport_a_SOURCES = reexport.c
>>> +
>>> +MAINTAINERCLEANFILES = Makefile.in
>>> diff --git a/support/reexport/reexport.c b/support/reexport/reexport.c
>>> new file mode 100644
>>> index 00000000..5474a21f
>>> --- /dev/null
>>> +++ b/support/reexport/reexport.c
>>> @@ -0,0 +1,285 @@
>>> +#ifdef HAVE_CONFIG_H
>>> +#include <config.h>
>>> +#endif
>>> +
>>> +#include <sqlite3.h>
>>> +#include <stdint.h>
>>> +#include <stdio.h>
>>> +#include <sys/random.h>
>>> +#include <sys/stat.h>
>>> +#include <sys/types.h>
>>> +#include <sys/vfs.h>
>>> +#include <unistd.h>
>>> +
>>> +#include "nfsd_path.h"
>>> +#include "nfslib.h"
>>> +#include "reexport.h"
>>> +#include "xcommon.h"
>>> +#include "xlog.h"
>>> +
>>> +#define REEXPDB_DBFILE NFS_STATEDIR "/reexpdb.sqlite3"
>>> +#define REEXPDB_DBFILE_WAIT_USEC (5000)
>>> +
>>> +static sqlite3 *db;
>>> +static int init_done;
>>> +
>>> +static int prng_init(void)
>>> +{
>>> +	int seed;
>>> +
>>> +	if (getrandom(&seed, sizeof(seed), 0) != sizeof(seed)) {
>>> +		xlog(L_ERROR, "Unable to obtain seed for PRNG via getrandom()");
>>> +		return -1;
>>> +	}
>>> +
>>> +	srand(seed);
>>> +	return 0;
>>> +}
>>> +
>>> +static void wait_for_dbaccess(void)
>>> +{
>>> +	usleep(REEXPDB_DBFILE_WAIT_USEC + (rand() % REEXPDB_DBFILE_WAIT_USEC));
>>> +}
>>> +
>>> +/*
>>> + * reexpdb_init - Initialize reexport database
>>> + */
>>> +int reexpdb_init(void)
>>> +{
>>> +	char *sqlerr;
>>> +	int ret;
>>> +
>>> +	if (init_done)
>>> +		return 0;
>>> +
>>> +	if (prng_init() != 0)
>>> +		return -1;
>>> +
>>> +	ret = sqlite3_open_v2(REEXPDB_DBFILE, &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX, NULL);
>>> +	if (ret != SQLITE_OK) {
>>> +		xlog(L_ERROR, "Unable to open reexport database: %s", sqlite3_errstr(ret));
>>> +		return -1;
>>> +	}
>>> +
>>> +again:
>>> +	ret = sqlite3_exec(db, "CREATE TABLE IF NOT EXISTS fsidnums (num INTEGER PRIMARY KEY CHECK (num > 0 AND num < 4294967296), path TEXT UNIQUE); CREATE INDEX IF NOT EXISTS idx_ids_path ON fsidnums (path);", NULL, NULL, &sqlerr);
>>> +	switch (ret) {
>>> +	case SQLITE_OK:
>>> +		init_done = 1;
>>> +		ret = 0;
>>> +		break;
>>> +	case SQLITE_BUSY:
>>> +	case SQLITE_LOCKED:
>>> +		wait_for_dbaccess();
>>> +		goto again;
>>> +	default:
>>> +		xlog(L_ERROR, "Unable to init reexport database: %s", sqlite3_errstr(ret));
>>> +		sqlite3_free(sqlerr);
>>> +		sqlite3_close_v2(db);
>>> +		ret = -1;
>>> +	}
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +/*
>>> + * reexpdb_destroy - Undo reexpdb_init().
>>> + */
>>> +void reexpdb_destroy(void)
>>> +{
>>> +	if (!init_done)
>>> +		return;
>>> +
>>> +	sqlite3_close_v2(db);
>>> +}
>>> +
>>> +static int get_fsidnum_by_path(char *path, uint32_t *fsidnum)
>>> +{
>>> +	static const char fsidnum_by_path_sql[] = "SELECT num FROM fsidnums WHERE path = ?1;";
>>> +	sqlite3_stmt *stmt = NULL;
>>> +	int found = 0;
>>> +	int ret;
>>> +
>>> +	ret = sqlite3_prepare_v2(db, fsidnum_by_path_sql, sizeof(fsidnum_by_path_sql), &stmt, NULL);
>>> +	if (ret != SQLITE_OK) {
>>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
>>> +		goto out;
>>> +	}
>>> +
>>> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
>>> +	if (ret != SQLITE_OK) {
>>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
>>> +		goto out;
>>> +	}
>>> +
>>> +again:
>>> +	ret = sqlite3_step(stmt);
>>> +	switch (ret) {
>>> +	case SQLITE_ROW:
>>> +		*fsidnum = sqlite3_column_int(stmt, 0);
>>> +		found = 1;
>>> +		break;
>>> +	case SQLITE_DONE:
>>> +		/* No hit */
>>> +		found = 0;
>>> +		break;
>>> +	case SQLITE_BUSY:
>>> +	case SQLITE_LOCKED:
>>> +		wait_for_dbaccess();
>>> +		goto again;
>>> +	default:
>>> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
>>> +	}
>>> +
>>> +out:
>>> +	sqlite3_finalize(stmt);
>>> +	return found;
>>> +}
>>> +
>>> +static int get_path_by_fsidnum(uint32_t fsidnum, char **path)
>>> +{
>>> +	static const char path_by_fsidnum_sql[] = "SELECT path FROM fsidnums WHERE num = ?1;";
>>> +	sqlite3_stmt *stmt = NULL;
>>> +	int found = 0;
>>> +	int ret;
>>> +
>>> +	ret = sqlite3_prepare_v2(db, path_by_fsidnum_sql, sizeof(path_by_fsidnum_sql), &stmt, NULL);
>>> +	if (ret != SQLITE_OK) {
>>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
>>> +		goto out;
>>> +	}
>>> +
>>> +	ret = sqlite3_bind_int(stmt, 1, fsidnum);
>>> +	if (ret != SQLITE_OK) {
>>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
>>> +		goto out;
>>> +	}
>>> +
>>> +again:
>>> +	ret = sqlite3_step(stmt);
>>> +	switch (ret) {
>>> +	case SQLITE_ROW:
>>> +		*path = xstrdup((char *)sqlite3_column_text(stmt, 0));
>>> +		found = 1;
>>> +		break;
>>> +	case SQLITE_DONE:
>>> +		/* No hit */
>>> +		found = 0;
>>> +		break;
>>> +	case SQLITE_BUSY:
>>> +	case SQLITE_LOCKED:
>>> +		wait_for_dbaccess();
>>> +		goto again;
>>> +	default:
>>> +		xlog(L_WARNING, "Error while looking up '%i' in database: %s", fsidnum, sqlite3_errstr(ret));
>>> +	}
>>> +
>>> +out:
>>> +	sqlite3_finalize(stmt);
>>> +	return found;
>>> +}
>>> +
>>> +static int new_fsidnum_by_path(char *path, uint32_t *fsidnum)
>>> +{
>>> +	/*
>>> +	 * This query is a little tricky. We use SQL to find and claim the smallest free fsid number.
>>> +	 * To find a free fsid the fsidnums is left joined to itself but with an offset of 1.
>>> +	 * Everything after the UNION statement is to handle the corner case where fsidnums
>>> +	 * is empty. In this case we want 1 as first fsid number.
>>> +	 */
>>> +	static const char new_fsidnum_by_path_sql[] = "INSERT INTO fsidnums VALUES ((SELECT ids1.num + 1 FROM fsidnums AS ids1 LEFT JOIN fsidnums AS ids2 ON ids2.num = ids1.num + 1 WHERE ids2.num IS NULL UNION SELECT 1 WHERE NOT EXISTS (SELECT NULL FROM fsidnums WHERE num = 1) LIMIT 1), ?1) RETURNING num;";
>>> +
>>> +	sqlite3_stmt *stmt = NULL;
>>> +	int found = 0, check = 0;
>>> +	int ret;
>>> +
>>> +	ret = sqlite3_prepare_v2(db, new_fsidnum_by_path_sql, sizeof(new_fsidnum_by_path_sql), &stmt, NULL);
>>> +	if (ret != SQLITE_OK) {
>>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
>>> +		goto out;
>>> +	}
>>> +
>>> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
>>> +	if (ret != SQLITE_OK) {
>>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
>>> +		goto out;
>>> +	}
>>> +
>>> +again:
>>> +	ret = sqlite3_step(stmt);
>>> +	switch (ret) {
>>> +	case SQLITE_ROW:
>>> +		*fsidnum = sqlite3_column_int(stmt, 0);
>>> +		found = 1;
>>> +		break;
>>> +	case SQLITE_CONSTRAINT:
>>> +		/* Maybe we lost the race against another writer and the path is now present. */
>>> +		check = 1;
>>> +		break;
>>> +	case SQLITE_BUSY:
>>> +	case SQLITE_LOCKED:
>>> +		wait_for_dbaccess();
>>> +		goto again;
>>> +	default:
>>> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
>>> +	}
>>> +
>>> +out:
>>> +	sqlite3_finalize(stmt);
>>> +
>>> +	if (check) {
>>> +		found = get_fsidnum_by_path(path, fsidnum);
>>> +		if (!found)
>>> +			xlog(L_WARNING, "SQLITE_CONSTRAINT error while inserting '%s' in database", path);
>>> +	}
>>> +
>>> +	return found;
>>> +}
>>> +
>>> +/*
>>> + * reexpdb_fsidnum_by_path - Lookup a fsid by path.
>>> + *
>>> + * @path: File system path used as lookup key
>>> + * @fsidnum: Pointer where found fsid is written to
>>> + * @may_create: If non-zero, allocate new fsid if lookup failed
>>> + *
>>> + */
>>> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
>>> +{
>>> +	int found;
>>> +
>>> +	found = get_fsidnum_by_path(path, fsidnum);
>>> +
>>> +	if (!found && may_create)
>>> +		found = new_fsidnum_by_path(path, fsidnum);
>>> +
>>> +	return found;
>>> +}
>>> +
>>> +/*
>>> + * reexpdb_uncover_subvolume - Make sure a subvolume is present.
>>> + *
>>> + * @fsidnum: Numerical fsid number to look for
>>> + *
>>> + * Subvolumes (NFS cross mounts) get automatically mounted upon first
>>> + * access and can vanish after fs.nfs.nfs_mountpoint_timeout seconds.
>>> + * Also if the NFS server reboots, clients can still have valid file
>>> + * handles for such a subvolume.
>>> + *
>>> + * If kNFSd asks mountd for the path of a given fsidnum it can
>>> + * trigger an automount by calling statfs() on the given path.
>>> + */
>>> +void reexpdb_uncover_subvolume(uint32_t fsidnum)
>>> +{
>>> +	struct statfs64 st;
>>> +	char *path = NULL;
>>> +	int ret;
>>> +
>>> +	if (get_path_by_fsidnum(fsidnum, &path)) {
>>> +		ret = nfsd_path_statfs64(path, &st);
>>> +		if (ret == -1)
>>> +			xlog(L_WARNING, "statfs() failed");
>>> +	}
>>> +
>>> +	free(path);
>>> +}
>>> diff --git a/support/reexport/reexport.h b/support/reexport/reexport.h
>>> new file mode 100644
>>> index 00000000..bb6d2a1b
>>> --- /dev/null
>>> +++ b/support/reexport/reexport.h
>>> @@ -0,0 +1,39 @@
>>> +#ifndef REEXPORT_H
>>> +#define REEXPORT_H
>>> +
>>> +enum {
>>> +	REEXP_NONE = 0,
>>> +	REEXP_AUTO_FSIDNUM,
>>> +	REEXP_PREDEFINED_FSIDNUM,
>>> +};
>>> +
>>> +#ifdef HAVE_REEXPORT_SUPPORT
>>> +int reexpdb_init(void);
>>> +void reexpdb_destroy(void);
>>> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create);
>>> +int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline);
>>> +void reexpdb_uncover_subvolume(uint32_t fsidnum);
>>> +#else
>>> +static inline int reexpdb_init(void) { return 0; }
>>> +static inline void reexpdb_destroy(void) {}
>>> +static inline int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
>>> +{
>>> +	(void)path;
>>> +	(void)may_create;
>>> +	*fsidnum = 0;
>>> +	return 0;
>>> +}
>>> +static inline int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline)
>>> +{
>>> +	(void)ep;
>>> +	(void)flname;
>>> +	(void)flline;
>>> +	return 0;
>>> +}
>>> +static inline void reexpdb_uncover_subvolume(uint32_t fsidnum)
>>> +{
>>> +	(void)fsidnum;
>>> +}
>>> +#endif /* HAVE_REEXPORT_SUPPORT */
>>> +
>>> +#endif /* REEXPORT_H */
>>
> 
> --
> Chuck Lever
> 
> 
>
Chuck Lever May 10, 2022, 2:17 p.m. UTC | #5
> On May 10, 2022, at 10:04 AM, Steve Dickson <steved@redhat.com> wrote:
> 
> Hey!
> 
> On 5/10/22 9:48 AM, Chuck Lever III wrote:
>>> On May 10, 2022, at 9:32 AM, Steve Dickson <steved@redhat.com> wrote:
>>> 
>>> Hello,
>>> 
>>> On 5/2/22 4:50 AM, Richard Weinberger wrote:
>>>> This internal library contains code that will be used by various
>>>> tools within the nfs-utils package to deal better with NFS re-export,
>>>> especially cross mounts.
>>>> Signed-off-by: Richard Weinberger <richard@nod.at>
>>>> ---
>>>>  configure.ac                 |  12 ++
>>>>  support/Makefile.am          |   4 +
>>>>  support/reexport/Makefile.am |   6 +
>>>>  support/reexport/reexport.c  | 285 +++++++++++++++++++++++++++++++++++
>>>>  support/reexport/reexport.h  |  39 +++++
>>>>  5 files changed, 346 insertions(+)
>>>>  create mode 100644 support/reexport/Makefile.am
>>>>  create mode 100644 support/reexport/reexport.c
>>>>  create mode 100644 support/reexport/reexport.h
>>>> diff --git a/configure.ac b/configure.ac
>>>> index 93626d62..86bf8ba9 100644
>>>> --- a/configure.ac
>>>> +++ b/configure.ac
>>>> @@ -274,6 +274,17 @@ AC_ARG_ENABLE(nfsv4server,
>>>>  	fi
>>>>  	AM_CONDITIONAL(CONFIG_NFSV4SERVER, [test "$enable_nfsv4server" = "yes" ])
>>>>  +AC_ARG_ENABLE(reexport,
>>>> +	[AC_HELP_STRING([--enable-reexport],
>>>> +			[enable support for re-exporting NFS mounts  @<:@default=no@:>@])],
>>>> +	enable_reexport=$enableval,
>>>> +	enable_reexport="no")
>>>> +	if test "$enable_reexport" = yes; then
>>>> +		AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
>>>> +                          [Define this if you want NFS re-export support compiled in])
>>>> +	fi
>>>> +	AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
>>>> +
>>> To get this moving I'm going to add a --disable-reexport flag
>> Hi Steve, no-one has given a reason why disabling support
>> for re-exports would be necessary. Therefore, can't this
>> switch just be removed?

> The precedence has been that we used --disable-XXX flag
> a lot in configure.ac... -nfsv4, -nfsv41, etc...
> I'm just following along with that.

I get that... but no-one has given a technical reason
why disabling this code would even be necessary.


> Yes, at this point, nobody is asking to turn it off but
> in future somebody may want to... due some security hole
> or just make the footprint of the package smaller.

I'll bite. What is the added footprint of this patch
series? I didn't think there was a new library dependency
or anything like that...


> I've always though it was a good idea to be able
> to turn things off.. esp if the feature will
> not be used.

If there is a hazard to leaving it on all the time, we
should find out now before the series is applied. So, is
there harm to leaving it on all the time? That should be
documented or fixed before merging.

I must be missing something.


> steved.
> 
>>> +AC_ARG_ENABLE(reexport,
>>> +       [AC_HELP_STRING([--disable-reexport],
>>> +                       [disable support for re-exporting NFS mounts @<:@default=no@:>@])],
>>> +       enable_reexport=$enableval,
>>> +       enable_reexport="yes")
>>> +       if test "$enable_reexport" = yes; then
>>> +               AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
>>> +                          [Define this if you want NFS re-export support compiled in])
>>> +       fi
>>> +       AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
>>> +
>>> 
>>> steved.
>>> 
>>>>  dnl Check for TI-RPC library and headers
>>>>  AC_LIBTIRPC
>>>>  @@ -730,6 +741,7 @@ AC_CONFIG_FILES([
>>>>  	support/nsm/Makefile
>>>>  	support/nfsidmap/Makefile
>>>>  	support/nfsidmap/libnfsidmap.pc
>>>> +	support/reexport/Makefile
>>>>  	tools/Makefile
>>>>  	tools/locktest/Makefile
>>>>  	tools/nlmtest/Makefile
>>>> diff --git a/support/Makefile.am b/support/Makefile.am
>>>> index c962d4d4..986e9b5f 100644
>>>> --- a/support/Makefile.am
>>>> +++ b/support/Makefile.am
>>>> @@ -10,6 +10,10 @@ if CONFIG_JUNCTION
>>>>  OPTDIRS += junction
>>>>  endif
>>>>  +if CONFIG_REEXPORT
>>>> +OPTDIRS += reexport
>>>> +endif
>>>> +
>>>>  SUBDIRS = export include misc nfs nsm $(OPTDIRS)
>>>>    MAINTAINERCLEANFILES = Makefile.in
>>>> diff --git a/support/reexport/Makefile.am b/support/reexport/Makefile.am
>>>> new file mode 100644
>>>> index 00000000..9d544a8f
>>>> --- /dev/null
>>>> +++ b/support/reexport/Makefile.am
>>>> @@ -0,0 +1,6 @@
>>>> +## Process this file with automake to produce Makefile.in
>>>> +
>>>> +noinst_LIBRARIES = libreexport.a
>>>> +libreexport_a_SOURCES = reexport.c
>>>> +
>>>> +MAINTAINERCLEANFILES = Makefile.in
>>>> diff --git a/support/reexport/reexport.c b/support/reexport/reexport.c
>>>> new file mode 100644
>>>> index 00000000..5474a21f
>>>> --- /dev/null
>>>> +++ b/support/reexport/reexport.c
>>>> @@ -0,0 +1,285 @@
>>>> +#ifdef HAVE_CONFIG_H
>>>> +#include <config.h>
>>>> +#endif
>>>> +
>>>> +#include <sqlite3.h>
>>>> +#include <stdint.h>
>>>> +#include <stdio.h>
>>>> +#include <sys/random.h>
>>>> +#include <sys/stat.h>
>>>> +#include <sys/types.h>
>>>> +#include <sys/vfs.h>
>>>> +#include <unistd.h>
>>>> +
>>>> +#include "nfsd_path.h"
>>>> +#include "nfslib.h"
>>>> +#include "reexport.h"
>>>> +#include "xcommon.h"
>>>> +#include "xlog.h"
>>>> +
>>>> +#define REEXPDB_DBFILE NFS_STATEDIR "/reexpdb.sqlite3"
>>>> +#define REEXPDB_DBFILE_WAIT_USEC (5000)
>>>> +
>>>> +static sqlite3 *db;
>>>> +static int init_done;
>>>> +
>>>> +static int prng_init(void)
>>>> +{
>>>> +	int seed;
>>>> +
>>>> +	if (getrandom(&seed, sizeof(seed), 0) != sizeof(seed)) {
>>>> +		xlog(L_ERROR, "Unable to obtain seed for PRNG via getrandom()");
>>>> +		return -1;
>>>> +	}
>>>> +
>>>> +	srand(seed);
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void wait_for_dbaccess(void)
>>>> +{
>>>> +	usleep(REEXPDB_DBFILE_WAIT_USEC + (rand() % REEXPDB_DBFILE_WAIT_USEC));
>>>> +}
>>>> +
>>>> +/*
>>>> + * reexpdb_init - Initialize reexport database
>>>> + */
>>>> +int reexpdb_init(void)
>>>> +{
>>>> +	char *sqlerr;
>>>> +	int ret;
>>>> +
>>>> +	if (init_done)
>>>> +		return 0;
>>>> +
>>>> +	if (prng_init() != 0)
>>>> +		return -1;
>>>> +
>>>> +	ret = sqlite3_open_v2(REEXPDB_DBFILE, &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX, NULL);
>>>> +	if (ret != SQLITE_OK) {
>>>> +		xlog(L_ERROR, "Unable to open reexport database: %s", sqlite3_errstr(ret));
>>>> +		return -1;
>>>> +	}
>>>> +
>>>> +again:
>>>> +	ret = sqlite3_exec(db, "CREATE TABLE IF NOT EXISTS fsidnums (num INTEGER PRIMARY KEY CHECK (num > 0 AND num < 4294967296), path TEXT UNIQUE); CREATE INDEX IF NOT EXISTS idx_ids_path ON fsidnums (path);", NULL, NULL, &sqlerr);
>>>> +	switch (ret) {
>>>> +	case SQLITE_OK:
>>>> +		init_done = 1;
>>>> +		ret = 0;
>>>> +		break;
>>>> +	case SQLITE_BUSY:
>>>> +	case SQLITE_LOCKED:
>>>> +		wait_for_dbaccess();
>>>> +		goto again;
>>>> +	default:
>>>> +		xlog(L_ERROR, "Unable to init reexport database: %s", sqlite3_errstr(ret));
>>>> +		sqlite3_free(sqlerr);
>>>> +		sqlite3_close_v2(db);
>>>> +		ret = -1;
>>>> +	}
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +/*
>>>> + * reexpdb_destroy - Undo reexpdb_init().
>>>> + */
>>>> +void reexpdb_destroy(void)
>>>> +{
>>>> +	if (!init_done)
>>>> +		return;
>>>> +
>>>> +	sqlite3_close_v2(db);
>>>> +}
>>>> +
>>>> +static int get_fsidnum_by_path(char *path, uint32_t *fsidnum)
>>>> +{
>>>> +	static const char fsidnum_by_path_sql[] = "SELECT num FROM fsidnums WHERE path = ?1;";
>>>> +	sqlite3_stmt *stmt = NULL;
>>>> +	int found = 0;
>>>> +	int ret;
>>>> +
>>>> +	ret = sqlite3_prepare_v2(db, fsidnum_by_path_sql, sizeof(fsidnum_by_path_sql), &stmt, NULL);
>>>> +	if (ret != SQLITE_OK) {
>>>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
>>>> +		goto out;
>>>> +	}
>>>> +
>>>> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
>>>> +	if (ret != SQLITE_OK) {
>>>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
>>>> +		goto out;
>>>> +	}
>>>> +
>>>> +again:
>>>> +	ret = sqlite3_step(stmt);
>>>> +	switch (ret) {
>>>> +	case SQLITE_ROW:
>>>> +		*fsidnum = sqlite3_column_int(stmt, 0);
>>>> +		found = 1;
>>>> +		break;
>>>> +	case SQLITE_DONE:
>>>> +		/* No hit */
>>>> +		found = 0;
>>>> +		break;
>>>> +	case SQLITE_BUSY:
>>>> +	case SQLITE_LOCKED:
>>>> +		wait_for_dbaccess();
>>>> +		goto again;
>>>> +	default:
>>>> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
>>>> +	}
>>>> +
>>>> +out:
>>>> +	sqlite3_finalize(stmt);
>>>> +	return found;
>>>> +}
>>>> +
>>>> +static int get_path_by_fsidnum(uint32_t fsidnum, char **path)
>>>> +{
>>>> +	static const char path_by_fsidnum_sql[] = "SELECT path FROM fsidnums WHERE num = ?1;";
>>>> +	sqlite3_stmt *stmt = NULL;
>>>> +	int found = 0;
>>>> +	int ret;
>>>> +
>>>> +	ret = sqlite3_prepare_v2(db, path_by_fsidnum_sql, sizeof(path_by_fsidnum_sql), &stmt, NULL);
>>>> +	if (ret != SQLITE_OK) {
>>>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
>>>> +		goto out;
>>>> +	}
>>>> +
>>>> +	ret = sqlite3_bind_int(stmt, 1, fsidnum);
>>>> +	if (ret != SQLITE_OK) {
>>>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
>>>> +		goto out;
>>>> +	}
>>>> +
>>>> +again:
>>>> +	ret = sqlite3_step(stmt);
>>>> +	switch (ret) {
>>>> +	case SQLITE_ROW:
>>>> +		*path = xstrdup((char *)sqlite3_column_text(stmt, 0));
>>>> +		found = 1;
>>>> +		break;
>>>> +	case SQLITE_DONE:
>>>> +		/* No hit */
>>>> +		found = 0;
>>>> +		break;
>>>> +	case SQLITE_BUSY:
>>>> +	case SQLITE_LOCKED:
>>>> +		wait_for_dbaccess();
>>>> +		goto again;
>>>> +	default:
>>>> +		xlog(L_WARNING, "Error while looking up '%i' in database: %s", fsidnum, sqlite3_errstr(ret));
>>>> +	}
>>>> +
>>>> +out:
>>>> +	sqlite3_finalize(stmt);
>>>> +	return found;
>>>> +}
>>>> +
>>>> +static int new_fsidnum_by_path(char *path, uint32_t *fsidnum)
>>>> +{
>>>> +	/*
>>>> +	 * This query is a little tricky. We use SQL to find and claim the smallest free fsid number.
>>>> +	 * To find a free fsid the fsidnums is left joined to itself but with an offset of 1.
>>>> +	 * Everything after the UNION statement is to handle the corner case where fsidnums
>>>> +	 * is empty. In this case we want 1 as first fsid number.
>>>> +	 */
>>>> +	static const char new_fsidnum_by_path_sql[] = "INSERT INTO fsidnums VALUES ((SELECT ids1.num + 1 FROM fsidnums AS ids1 LEFT JOIN fsidnums AS ids2 ON ids2.num = ids1.num + 1 WHERE ids2.num IS NULL UNION SELECT 1 WHERE NOT EXISTS (SELECT NULL FROM fsidnums WHERE num = 1) LIMIT 1), ?1) RETURNING num;";
>>>> +
>>>> +	sqlite3_stmt *stmt = NULL;
>>>> +	int found = 0, check = 0;
>>>> +	int ret;
>>>> +
>>>> +	ret = sqlite3_prepare_v2(db, new_fsidnum_by_path_sql, sizeof(new_fsidnum_by_path_sql), &stmt, NULL);
>>>> +	if (ret != SQLITE_OK) {
>>>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
>>>> +		goto out;
>>>> +	}
>>>> +
>>>> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
>>>> +	if (ret != SQLITE_OK) {
>>>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
>>>> +		goto out;
>>>> +	}
>>>> +
>>>> +again:
>>>> +	ret = sqlite3_step(stmt);
>>>> +	switch (ret) {
>>>> +	case SQLITE_ROW:
>>>> +		*fsidnum = sqlite3_column_int(stmt, 0);
>>>> +		found = 1;
>>>> +		break;
>>>> +	case SQLITE_CONSTRAINT:
>>>> +		/* Maybe we lost the race against another writer and the path is now present. */
>>>> +		check = 1;
>>>> +		break;
>>>> +	case SQLITE_BUSY:
>>>> +	case SQLITE_LOCKED:
>>>> +		wait_for_dbaccess();
>>>> +		goto again;
>>>> +	default:
>>>> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
>>>> +	}
>>>> +
>>>> +out:
>>>> +	sqlite3_finalize(stmt);
>>>> +
>>>> +	if (check) {
>>>> +		found = get_fsidnum_by_path(path, fsidnum);
>>>> +		if (!found)
>>>> +			xlog(L_WARNING, "SQLITE_CONSTRAINT error while inserting '%s' in database", path);
>>>> +	}
>>>> +
>>>> +	return found;
>>>> +}
>>>> +
>>>> +/*
>>>> + * reexpdb_fsidnum_by_path - Lookup a fsid by path.
>>>> + *
>>>> + * @path: File system path used as lookup key
>>>> + * @fsidnum: Pointer where found fsid is written to
>>>> + * @may_create: If non-zero, allocate new fsid if lookup failed
>>>> + *
>>>> + */
>>>> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
>>>> +{
>>>> +	int found;
>>>> +
>>>> +	found = get_fsidnum_by_path(path, fsidnum);
>>>> +
>>>> +	if (!found && may_create)
>>>> +		found = new_fsidnum_by_path(path, fsidnum);
>>>> +
>>>> +	return found;
>>>> +}
>>>> +
>>>> +/*
>>>> + * reexpdb_uncover_subvolume - Make sure a subvolume is present.
>>>> + *
>>>> + * @fsidnum: Numerical fsid number to look for
>>>> + *
>>>> + * Subvolumes (NFS cross mounts) get automatically mounted upon first
>>>> + * access and can vanish after fs.nfs.nfs_mountpoint_timeout seconds.
>>>> + * Also if the NFS server reboots, clients can still have valid file
>>>> + * handles for such a subvolume.
>>>> + *
>>>> + * If kNFSd asks mountd for the path of a given fsidnum it can
>>>> + * trigger an automount by calling statfs() on the given path.
>>>> + */
>>>> +void reexpdb_uncover_subvolume(uint32_t fsidnum)
>>>> +{
>>>> +	struct statfs64 st;
>>>> +	char *path = NULL;
>>>> +	int ret;
>>>> +
>>>> +	if (get_path_by_fsidnum(fsidnum, &path)) {
>>>> +		ret = nfsd_path_statfs64(path, &st);
>>>> +		if (ret == -1)
>>>> +			xlog(L_WARNING, "statfs() failed");
>>>> +	}
>>>> +
>>>> +	free(path);
>>>> +}
>>>> diff --git a/support/reexport/reexport.h b/support/reexport/reexport.h
>>>> new file mode 100644
>>>> index 00000000..bb6d2a1b
>>>> --- /dev/null
>>>> +++ b/support/reexport/reexport.h
>>>> @@ -0,0 +1,39 @@
>>>> +#ifndef REEXPORT_H
>>>> +#define REEXPORT_H
>>>> +
>>>> +enum {
>>>> +	REEXP_NONE = 0,
>>>> +	REEXP_AUTO_FSIDNUM,
>>>> +	REEXP_PREDEFINED_FSIDNUM,
>>>> +};
>>>> +
>>>> +#ifdef HAVE_REEXPORT_SUPPORT
>>>> +int reexpdb_init(void);
>>>> +void reexpdb_destroy(void);
>>>> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create);
>>>> +int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline);
>>>> +void reexpdb_uncover_subvolume(uint32_t fsidnum);
>>>> +#else
>>>> +static inline int reexpdb_init(void) { return 0; }
>>>> +static inline void reexpdb_destroy(void) {}
>>>> +static inline int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
>>>> +{
>>>> +	(void)path;
>>>> +	(void)may_create;
>>>> +	*fsidnum = 0;
>>>> +	return 0;
>>>> +}
>>>> +static inline int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline)
>>>> +{
>>>> +	(void)ep;
>>>> +	(void)flname;
>>>> +	(void)flline;
>>>> +	return 0;
>>>> +}
>>>> +static inline void reexpdb_uncover_subvolume(uint32_t fsidnum)
>>>> +{
>>>> +	(void)fsidnum;
>>>> +}
>>>> +#endif /* HAVE_REEXPORT_SUPPORT */
>>>> +
>>>> +#endif /* REEXPORT_H */
>>> 
>> --
>> Chuck Lever

--
Chuck Lever
Steve Dickson May 10, 2022, 8:08 p.m. UTC | #6
On 5/10/22 10:17 AM, Chuck Lever III wrote:
> 
> 
>> On May 10, 2022, at 10:04 AM, Steve Dickson <steved@redhat.com> wrote:
>>
>> Hey!
>>
>> On 5/10/22 9:48 AM, Chuck Lever III wrote:
>>>> On May 10, 2022, at 9:32 AM, Steve Dickson <steved@redhat.com> wrote:
>>>>
>>>> Hello,
>>>>
>>>> On 5/2/22 4:50 AM, Richard Weinberger wrote:
>>>>> This internal library contains code that will be used by various
>>>>> tools within the nfs-utils package to deal better with NFS re-export,
>>>>> especially cross mounts.
>>>>> Signed-off-by: Richard Weinberger <richard@nod.at>
>>>>> ---
>>>>>   configure.ac                 |  12 ++
>>>>>   support/Makefile.am          |   4 +
>>>>>   support/reexport/Makefile.am |   6 +
>>>>>   support/reexport/reexport.c  | 285 +++++++++++++++++++++++++++++++++++
>>>>>   support/reexport/reexport.h  |  39 +++++
>>>>>   5 files changed, 346 insertions(+)
>>>>>   create mode 100644 support/reexport/Makefile.am
>>>>>   create mode 100644 support/reexport/reexport.c
>>>>>   create mode 100644 support/reexport/reexport.h
>>>>> diff --git a/configure.ac b/configure.ac
>>>>> index 93626d62..86bf8ba9 100644
>>>>> --- a/configure.ac
>>>>> +++ b/configure.ac
>>>>> @@ -274,6 +274,17 @@ AC_ARG_ENABLE(nfsv4server,
>>>>>   	fi
>>>>>   	AM_CONDITIONAL(CONFIG_NFSV4SERVER, [test "$enable_nfsv4server" = "yes" ])
>>>>>   +AC_ARG_ENABLE(reexport,
>>>>> +	[AC_HELP_STRING([--enable-reexport],
>>>>> +			[enable support for re-exporting NFS mounts  @<:@default=no@:>@])],
>>>>> +	enable_reexport=$enableval,
>>>>> +	enable_reexport="no")
>>>>> +	if test "$enable_reexport" = yes; then
>>>>> +		AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
>>>>> +                          [Define this if you want NFS re-export support compiled in])
>>>>> +	fi
>>>>> +	AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
>>>>> +
>>>> To get this moving I'm going to add a --disable-reexport flag
>>> Hi Steve, no-one has given a reason why disabling support
>>> for re-exports would be necessary. Therefore, can't this
>>> switch just be removed?
> 
>> The precedence has been that we used --disable-XXX flag
>> a lot in configure.ac... -nfsv4, -nfsv41, etc...
>> I'm just following along with that.
> 
> I get that... but no-one has given a technical reason
> why disabling this code would even be necessary.
Nobody has see the code yet... :-)

There is a lot of churn in these patch.

> 
> 
>> Yes, at this point, nobody is asking to turn it off but
>> in future somebody may want to... due some security hole
>> or just make the footprint of the package smaller.
> 
> I'll bite. What is the added footprint of this patch
> series? I didn't think there was a new library dependency
> or anything like that...
Well mountd is now using an database and there
is a static reexport lib that a number daemons
and command like with...

In the I would like to bring (or be able) bring the
foot print of nfs-utils down so it will be more container
friendly... IDK if is possible... but that is the idea.

> 
> 
>> I've always though it was a good idea to be able
>> to turn things off.. esp if the feature will
>> not be used.
> 
> If there is a hazard to leaving it on all the time, we
> should find out now before the series is applied. So, is
> there harm to leaving it on all the time? That should be
> documented or fixed before merging.
Putting my distro maintainer hat on... If the flux
capacitor breaks and the server is no longer able
to exports things correctly... As I said there is
a lot of churn.... I would like a way to put things
back, so while we fix the flux capacitor, I can still
maintain a stable server.

I have found.... Have a way to get back...
is very useful! ;-)

steved.

> 
> I must be missing something.
> 
> 
>> steved.
>>
>>>> +AC_ARG_ENABLE(reexport,
>>>> +       [AC_HELP_STRING([--disable-reexport],
>>>> +                       [disable support for re-exporting NFS mounts @<:@default=no@:>@])],
>>>> +       enable_reexport=$enableval,
>>>> +       enable_reexport="yes")
>>>> +       if test "$enable_reexport" = yes; then
>>>> +               AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
>>>> +                          [Define this if you want NFS re-export support compiled in])
>>>> +       fi
>>>> +       AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
>>>> +
>>>>
>>>> steved.
>>>>
>>>>>   dnl Check for TI-RPC library and headers
>>>>>   AC_LIBTIRPC
>>>>>   @@ -730,6 +741,7 @@ AC_CONFIG_FILES([
>>>>>   	support/nsm/Makefile
>>>>>   	support/nfsidmap/Makefile
>>>>>   	support/nfsidmap/libnfsidmap.pc
>>>>> +	support/reexport/Makefile
>>>>>   	tools/Makefile
>>>>>   	tools/locktest/Makefile
>>>>>   	tools/nlmtest/Makefile
>>>>> diff --git a/support/Makefile.am b/support/Makefile.am
>>>>> index c962d4d4..986e9b5f 100644
>>>>> --- a/support/Makefile.am
>>>>> +++ b/support/Makefile.am
>>>>> @@ -10,6 +10,10 @@ if CONFIG_JUNCTION
>>>>>   OPTDIRS += junction
>>>>>   endif
>>>>>   +if CONFIG_REEXPORT
>>>>> +OPTDIRS += reexport
>>>>> +endif
>>>>> +
>>>>>   SUBDIRS = export include misc nfs nsm $(OPTDIRS)
>>>>>     MAINTAINERCLEANFILES = Makefile.in
>>>>> diff --git a/support/reexport/Makefile.am b/support/reexport/Makefile.am
>>>>> new file mode 100644
>>>>> index 00000000..9d544a8f
>>>>> --- /dev/null
>>>>> +++ b/support/reexport/Makefile.am
>>>>> @@ -0,0 +1,6 @@
>>>>> +## Process this file with automake to produce Makefile.in
>>>>> +
>>>>> +noinst_LIBRARIES = libreexport.a
>>>>> +libreexport_a_SOURCES = reexport.c
>>>>> +
>>>>> +MAINTAINERCLEANFILES = Makefile.in
>>>>> diff --git a/support/reexport/reexport.c b/support/reexport/reexport.c
>>>>> new file mode 100644
>>>>> index 00000000..5474a21f
>>>>> --- /dev/null
>>>>> +++ b/support/reexport/reexport.c
>>>>> @@ -0,0 +1,285 @@
>>>>> +#ifdef HAVE_CONFIG_H
>>>>> +#include <config.h>
>>>>> +#endif
>>>>> +
>>>>> +#include <sqlite3.h>
>>>>> +#include <stdint.h>
>>>>> +#include <stdio.h>
>>>>> +#include <sys/random.h>
>>>>> +#include <sys/stat.h>
>>>>> +#include <sys/types.h>
>>>>> +#include <sys/vfs.h>
>>>>> +#include <unistd.h>
>>>>> +
>>>>> +#include "nfsd_path.h"
>>>>> +#include "nfslib.h"
>>>>> +#include "reexport.h"
>>>>> +#include "xcommon.h"
>>>>> +#include "xlog.h"
>>>>> +
>>>>> +#define REEXPDB_DBFILE NFS_STATEDIR "/reexpdb.sqlite3"
>>>>> +#define REEXPDB_DBFILE_WAIT_USEC (5000)
>>>>> +
>>>>> +static sqlite3 *db;
>>>>> +static int init_done;
>>>>> +
>>>>> +static int prng_init(void)
>>>>> +{
>>>>> +	int seed;
>>>>> +
>>>>> +	if (getrandom(&seed, sizeof(seed), 0) != sizeof(seed)) {
>>>>> +		xlog(L_ERROR, "Unable to obtain seed for PRNG via getrandom()");
>>>>> +		return -1;
>>>>> +	}
>>>>> +
>>>>> +	srand(seed);
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static void wait_for_dbaccess(void)
>>>>> +{
>>>>> +	usleep(REEXPDB_DBFILE_WAIT_USEC + (rand() % REEXPDB_DBFILE_WAIT_USEC));
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * reexpdb_init - Initialize reexport database
>>>>> + */
>>>>> +int reexpdb_init(void)
>>>>> +{
>>>>> +	char *sqlerr;
>>>>> +	int ret;
>>>>> +
>>>>> +	if (init_done)
>>>>> +		return 0;
>>>>> +
>>>>> +	if (prng_init() != 0)
>>>>> +		return -1;
>>>>> +
>>>>> +	ret = sqlite3_open_v2(REEXPDB_DBFILE, &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX, NULL);
>>>>> +	if (ret != SQLITE_OK) {
>>>>> +		xlog(L_ERROR, "Unable to open reexport database: %s", sqlite3_errstr(ret));
>>>>> +		return -1;
>>>>> +	}
>>>>> +
>>>>> +again:
>>>>> +	ret = sqlite3_exec(db, "CREATE TABLE IF NOT EXISTS fsidnums (num INTEGER PRIMARY KEY CHECK (num > 0 AND num < 4294967296), path TEXT UNIQUE); CREATE INDEX IF NOT EXISTS idx_ids_path ON fsidnums (path);", NULL, NULL, &sqlerr);
>>>>> +	switch (ret) {
>>>>> +	case SQLITE_OK:
>>>>> +		init_done = 1;
>>>>> +		ret = 0;
>>>>> +		break;
>>>>> +	case SQLITE_BUSY:
>>>>> +	case SQLITE_LOCKED:
>>>>> +		wait_for_dbaccess();
>>>>> +		goto again;
>>>>> +	default:
>>>>> +		xlog(L_ERROR, "Unable to init reexport database: %s", sqlite3_errstr(ret));
>>>>> +		sqlite3_free(sqlerr);
>>>>> +		sqlite3_close_v2(db);
>>>>> +		ret = -1;
>>>>> +	}
>>>>> +
>>>>> +	return ret;
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * reexpdb_destroy - Undo reexpdb_init().
>>>>> + */
>>>>> +void reexpdb_destroy(void)
>>>>> +{
>>>>> +	if (!init_done)
>>>>> +		return;
>>>>> +
>>>>> +	sqlite3_close_v2(db);
>>>>> +}
>>>>> +
>>>>> +static int get_fsidnum_by_path(char *path, uint32_t *fsidnum)
>>>>> +{
>>>>> +	static const char fsidnum_by_path_sql[] = "SELECT num FROM fsidnums WHERE path = ?1;";
>>>>> +	sqlite3_stmt *stmt = NULL;
>>>>> +	int found = 0;
>>>>> +	int ret;
>>>>> +
>>>>> +	ret = sqlite3_prepare_v2(db, fsidnum_by_path_sql, sizeof(fsidnum_by_path_sql), &stmt, NULL);
>>>>> +	if (ret != SQLITE_OK) {
>>>>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
>>>>> +		goto out;
>>>>> +	}
>>>>> +
>>>>> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
>>>>> +	if (ret != SQLITE_OK) {
>>>>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
>>>>> +		goto out;
>>>>> +	}
>>>>> +
>>>>> +again:
>>>>> +	ret = sqlite3_step(stmt);
>>>>> +	switch (ret) {
>>>>> +	case SQLITE_ROW:
>>>>> +		*fsidnum = sqlite3_column_int(stmt, 0);
>>>>> +		found = 1;
>>>>> +		break;
>>>>> +	case SQLITE_DONE:
>>>>> +		/* No hit */
>>>>> +		found = 0;
>>>>> +		break;
>>>>> +	case SQLITE_BUSY:
>>>>> +	case SQLITE_LOCKED:
>>>>> +		wait_for_dbaccess();
>>>>> +		goto again;
>>>>> +	default:
>>>>> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
>>>>> +	}
>>>>> +
>>>>> +out:
>>>>> +	sqlite3_finalize(stmt);
>>>>> +	return found;
>>>>> +}
>>>>> +
>>>>> +static int get_path_by_fsidnum(uint32_t fsidnum, char **path)
>>>>> +{
>>>>> +	static const char path_by_fsidnum_sql[] = "SELECT path FROM fsidnums WHERE num = ?1;";
>>>>> +	sqlite3_stmt *stmt = NULL;
>>>>> +	int found = 0;
>>>>> +	int ret;
>>>>> +
>>>>> +	ret = sqlite3_prepare_v2(db, path_by_fsidnum_sql, sizeof(path_by_fsidnum_sql), &stmt, NULL);
>>>>> +	if (ret != SQLITE_OK) {
>>>>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
>>>>> +		goto out;
>>>>> +	}
>>>>> +
>>>>> +	ret = sqlite3_bind_int(stmt, 1, fsidnum);
>>>>> +	if (ret != SQLITE_OK) {
>>>>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
>>>>> +		goto out;
>>>>> +	}
>>>>> +
>>>>> +again:
>>>>> +	ret = sqlite3_step(stmt);
>>>>> +	switch (ret) {
>>>>> +	case SQLITE_ROW:
>>>>> +		*path = xstrdup((char *)sqlite3_column_text(stmt, 0));
>>>>> +		found = 1;
>>>>> +		break;
>>>>> +	case SQLITE_DONE:
>>>>> +		/* No hit */
>>>>> +		found = 0;
>>>>> +		break;
>>>>> +	case SQLITE_BUSY:
>>>>> +	case SQLITE_LOCKED:
>>>>> +		wait_for_dbaccess();
>>>>> +		goto again;
>>>>> +	default:
>>>>> +		xlog(L_WARNING, "Error while looking up '%i' in database: %s", fsidnum, sqlite3_errstr(ret));
>>>>> +	}
>>>>> +
>>>>> +out:
>>>>> +	sqlite3_finalize(stmt);
>>>>> +	return found;
>>>>> +}
>>>>> +
>>>>> +static int new_fsidnum_by_path(char *path, uint32_t *fsidnum)
>>>>> +{
>>>>> +	/*
>>>>> +	 * This query is a little tricky. We use SQL to find and claim the smallest free fsid number.
>>>>> +	 * To find a free fsid the fsidnums is left joined to itself but with an offset of 1.
>>>>> +	 * Everything after the UNION statement is to handle the corner case where fsidnums
>>>>> +	 * is empty. In this case we want 1 as first fsid number.
>>>>> +	 */
>>>>> +	static const char new_fsidnum_by_path_sql[] = "INSERT INTO fsidnums VALUES ((SELECT ids1.num + 1 FROM fsidnums AS ids1 LEFT JOIN fsidnums AS ids2 ON ids2.num = ids1.num + 1 WHERE ids2.num IS NULL UNION SELECT 1 WHERE NOT EXISTS (SELECT NULL FROM fsidnums WHERE num = 1) LIMIT 1), ?1) RETURNING num;";
>>>>> +
>>>>> +	sqlite3_stmt *stmt = NULL;
>>>>> +	int found = 0, check = 0;
>>>>> +	int ret;
>>>>> +
>>>>> +	ret = sqlite3_prepare_v2(db, new_fsidnum_by_path_sql, sizeof(new_fsidnum_by_path_sql), &stmt, NULL);
>>>>> +	if (ret != SQLITE_OK) {
>>>>> +		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
>>>>> +		goto out;
>>>>> +	}
>>>>> +
>>>>> +	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
>>>>> +	if (ret != SQLITE_OK) {
>>>>> +		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
>>>>> +		goto out;
>>>>> +	}
>>>>> +
>>>>> +again:
>>>>> +	ret = sqlite3_step(stmt);
>>>>> +	switch (ret) {
>>>>> +	case SQLITE_ROW:
>>>>> +		*fsidnum = sqlite3_column_int(stmt, 0);
>>>>> +		found = 1;
>>>>> +		break;
>>>>> +	case SQLITE_CONSTRAINT:
>>>>> +		/* Maybe we lost the race against another writer and the path is now present. */
>>>>> +		check = 1;
>>>>> +		break;
>>>>> +	case SQLITE_BUSY:
>>>>> +	case SQLITE_LOCKED:
>>>>> +		wait_for_dbaccess();
>>>>> +		goto again;
>>>>> +	default:
>>>>> +		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
>>>>> +	}
>>>>> +
>>>>> +out:
>>>>> +	sqlite3_finalize(stmt);
>>>>> +
>>>>> +	if (check) {
>>>>> +		found = get_fsidnum_by_path(path, fsidnum);
>>>>> +		if (!found)
>>>>> +			xlog(L_WARNING, "SQLITE_CONSTRAINT error while inserting '%s' in database", path);
>>>>> +	}
>>>>> +
>>>>> +	return found;
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * reexpdb_fsidnum_by_path - Lookup a fsid by path.
>>>>> + *
>>>>> + * @path: File system path used as lookup key
>>>>> + * @fsidnum: Pointer where found fsid is written to
>>>>> + * @may_create: If non-zero, allocate new fsid if lookup failed
>>>>> + *
>>>>> + */
>>>>> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
>>>>> +{
>>>>> +	int found;
>>>>> +
>>>>> +	found = get_fsidnum_by_path(path, fsidnum);
>>>>> +
>>>>> +	if (!found && may_create)
>>>>> +		found = new_fsidnum_by_path(path, fsidnum);
>>>>> +
>>>>> +	return found;
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * reexpdb_uncover_subvolume - Make sure a subvolume is present.
>>>>> + *
>>>>> + * @fsidnum: Numerical fsid number to look for
>>>>> + *
>>>>> + * Subvolumes (NFS cross mounts) get automatically mounted upon first
>>>>> + * access and can vanish after fs.nfs.nfs_mountpoint_timeout seconds.
>>>>> + * Also if the NFS server reboots, clients can still have valid file
>>>>> + * handles for such a subvolume.
>>>>> + *
>>>>> + * If kNFSd asks mountd for the path of a given fsidnum it can
>>>>> + * trigger an automount by calling statfs() on the given path.
>>>>> + */
>>>>> +void reexpdb_uncover_subvolume(uint32_t fsidnum)
>>>>> +{
>>>>> +	struct statfs64 st;
>>>>> +	char *path = NULL;
>>>>> +	int ret;
>>>>> +
>>>>> +	if (get_path_by_fsidnum(fsidnum, &path)) {
>>>>> +		ret = nfsd_path_statfs64(path, &st);
>>>>> +		if (ret == -1)
>>>>> +			xlog(L_WARNING, "statfs() failed");
>>>>> +	}
>>>>> +
>>>>> +	free(path);
>>>>> +}
>>>>> diff --git a/support/reexport/reexport.h b/support/reexport/reexport.h
>>>>> new file mode 100644
>>>>> index 00000000..bb6d2a1b
>>>>> --- /dev/null
>>>>> +++ b/support/reexport/reexport.h
>>>>> @@ -0,0 +1,39 @@
>>>>> +#ifndef REEXPORT_H
>>>>> +#define REEXPORT_H
>>>>> +
>>>>> +enum {
>>>>> +	REEXP_NONE = 0,
>>>>> +	REEXP_AUTO_FSIDNUM,
>>>>> +	REEXP_PREDEFINED_FSIDNUM,
>>>>> +};
>>>>> +
>>>>> +#ifdef HAVE_REEXPORT_SUPPORT
>>>>> +int reexpdb_init(void);
>>>>> +void reexpdb_destroy(void);
>>>>> +int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create);
>>>>> +int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline);
>>>>> +void reexpdb_uncover_subvolume(uint32_t fsidnum);
>>>>> +#else
>>>>> +static inline int reexpdb_init(void) { return 0; }
>>>>> +static inline void reexpdb_destroy(void) {}
>>>>> +static inline int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
>>>>> +{
>>>>> +	(void)path;
>>>>> +	(void)may_create;
>>>>> +	*fsidnum = 0;
>>>>> +	return 0;
>>>>> +}
>>>>> +static inline int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline)
>>>>> +{
>>>>> +	(void)ep;
>>>>> +	(void)flname;
>>>>> +	(void)flline;
>>>>> +	return 0;
>>>>> +}
>>>>> +static inline void reexpdb_uncover_subvolume(uint32_t fsidnum)
>>>>> +{
>>>>> +	(void)fsidnum;
>>>>> +}
>>>>> +#endif /* HAVE_REEXPORT_SUPPORT */
>>>>> +
>>>>> +#endif /* REEXPORT_H */
>>>>
>>> --
>>> Chuck Lever
> 
> --
> Chuck Lever
> 
> 
>
Richard Weinberger May 10, 2022, 8:32 p.m. UTC | #7
----- Ursprüngliche Mail -----
> Von: "Steve Dickson" <steved@redhat.com>
>> I'll bite. What is the added footprint of this patch
>> series? I didn't think there was a new library dependency
>> or anything like that...
> Well mountd is now using an database and there
> is a static reexport lib that a number daemons
> and command like with...
> 
> In the I would like to bring (or be able) bring the
> foot print of nfs-utils down so it will be more container
> friendly... IDK if is possible... but that is the idea.

Getting rid of the SQLite dependency shouldn't be a big deal.
I'm already working on a plugin interface for reexport such that
a sysadmin can easily use other databases than SQLite.

E.g. in nfs.conf you point to a shared library which hides all the
database stuff from you.

Thanks,
//richard
Chuck Lever May 10, 2022, 8:37 p.m. UTC | #8
> On May 10, 2022, at 4:08 PM, Steve Dickson <steved@redhat.com> wrote:
> 
> On 5/10/22 10:17 AM, Chuck Lever III wrote:
>>> On May 10, 2022, at 10:04 AM, Steve Dickson <steved@redhat.com> wrote:
>>> 
>>> Hey!
>>> 
>>> On 5/10/22 9:48 AM, Chuck Lever III wrote:
>>>>> On May 10, 2022, at 9:32 AM, Steve Dickson <steved@redhat.com> wrote:
>>>>> 
>>>>> Hello,
>>>>> 
>>>>> On 5/2/22 4:50 AM, Richard Weinberger wrote:
>>>>>> This internal library contains code that will be used by various
>>>>>> tools within the nfs-utils package to deal better with NFS re-export,
>>>>>> especially cross mounts.
>>>>>> Signed-off-by: Richard Weinberger <richard@nod.at>
>>>>>> ---
>>>>>>  configure.ac                 |  12 ++
>>>>>>  support/Makefile.am          |   4 +
>>>>>>  support/reexport/Makefile.am |   6 +
>>>>>>  support/reexport/reexport.c  | 285 +++++++++++++++++++++++++++++++++++
>>>>>>  support/reexport/reexport.h  |  39 +++++
>>>>>>  5 files changed, 346 insertions(+)
>>>>>>  create mode 100644 support/reexport/Makefile.am
>>>>>>  create mode 100644 support/reexport/reexport.c
>>>>>>  create mode 100644 support/reexport/reexport.h
>>>>>> diff --git a/configure.ac b/configure.ac
>>>>>> index 93626d62..86bf8ba9 100644
>>>>>> --- a/configure.ac
>>>>>> +++ b/configure.ac
>>>>>> @@ -274,6 +274,17 @@ AC_ARG_ENABLE(nfsv4server,
>>>>>>  	fi
>>>>>>  	AM_CONDITIONAL(CONFIG_NFSV4SERVER, [test "$enable_nfsv4server" = "yes" ])
>>>>>>  +AC_ARG_ENABLE(reexport,
>>>>>> +	[AC_HELP_STRING([--enable-reexport],
>>>>>> +			[enable support for re-exporting NFS mounts  @<:@default=no@:>@])],
>>>>>> +	enable_reexport=$enableval,
>>>>>> +	enable_reexport="no")
>>>>>> +	if test "$enable_reexport" = yes; then
>>>>>> +		AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
>>>>>> +                          [Define this if you want NFS re-export support compiled in])
>>>>>> +	fi
>>>>>> +	AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
>>>>>> +
>>>>> To get this moving I'm going to add a --disable-reexport flag
>>>> Hi Steve, no-one has given a reason why disabling support
>>>> for re-exports would be necessary. Therefore, can't this
>>>> switch just be removed?
>>> The precedence has been that we used --disable-XXX flag
>>> a lot in configure.ac... -nfsv4, -nfsv41, etc...
>>> I'm just following along with that.
>> I get that... but no-one has given a technical reason
>> why disabling this code would even be necessary.
> Nobody has see the code yet... :-)
> 
> There is a lot of churn in these patch.
> 
>>> Yes, at this point, nobody is asking to turn it off but
>>> in future somebody may want to... due some security hole
>>> or just make the footprint of the package smaller.
>> I'll bite. What is the added footprint of this patch
>> series? I didn't think there was a new library dependency
>> or anything like that...
> Well mountd is now using an database and there
> is a static reexport lib that a number daemons
> and command like with...

nfs-utils already depends on sqlite for the nfsd
cltrack stuff, so I don't really see an additional
dependency for nfs-utils there.

However, there is a configure.ac switch that says
if there's no sqlite in the build environment,
then nfsdcltrack is entirely disabled. I guess
the build environment needs to deal correctly
with that situation and re-export support.


--
Chuck Lever
diff mbox series

Patch

diff --git a/configure.ac b/configure.ac
index 93626d62..86bf8ba9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -274,6 +274,17 @@  AC_ARG_ENABLE(nfsv4server,
 	fi
 	AM_CONDITIONAL(CONFIG_NFSV4SERVER, [test "$enable_nfsv4server" = "yes" ])
 
+AC_ARG_ENABLE(reexport,
+	[AC_HELP_STRING([--enable-reexport],
+			[enable support for re-exporting NFS mounts  @<:@default=no@:>@])],
+	enable_reexport=$enableval,
+	enable_reexport="no")
+	if test "$enable_reexport" = yes; then
+		AC_DEFINE(HAVE_REEXPORT_SUPPORT, 1,
+                          [Define this if you want NFS re-export support compiled in])
+	fi
+	AM_CONDITIONAL(CONFIG_REEXPORT, [test "$enable_reexport" = "yes" ])
+
 dnl Check for TI-RPC library and headers
 AC_LIBTIRPC
 
@@ -730,6 +741,7 @@  AC_CONFIG_FILES([
 	support/nsm/Makefile
 	support/nfsidmap/Makefile
 	support/nfsidmap/libnfsidmap.pc
+	support/reexport/Makefile
 	tools/Makefile
 	tools/locktest/Makefile
 	tools/nlmtest/Makefile
diff --git a/support/Makefile.am b/support/Makefile.am
index c962d4d4..986e9b5f 100644
--- a/support/Makefile.am
+++ b/support/Makefile.am
@@ -10,6 +10,10 @@  if CONFIG_JUNCTION
 OPTDIRS += junction
 endif
 
+if CONFIG_REEXPORT
+OPTDIRS += reexport
+endif
+
 SUBDIRS = export include misc nfs nsm $(OPTDIRS)
 
 MAINTAINERCLEANFILES = Makefile.in
diff --git a/support/reexport/Makefile.am b/support/reexport/Makefile.am
new file mode 100644
index 00000000..9d544a8f
--- /dev/null
+++ b/support/reexport/Makefile.am
@@ -0,0 +1,6 @@ 
+## Process this file with automake to produce Makefile.in
+
+noinst_LIBRARIES = libreexport.a
+libreexport_a_SOURCES = reexport.c
+
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/support/reexport/reexport.c b/support/reexport/reexport.c
new file mode 100644
index 00000000..5474a21f
--- /dev/null
+++ b/support/reexport/reexport.c
@@ -0,0 +1,285 @@ 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <sqlite3.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/random.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include "nfsd_path.h"
+#include "nfslib.h"
+#include "reexport.h"
+#include "xcommon.h"
+#include "xlog.h"
+
+#define REEXPDB_DBFILE NFS_STATEDIR "/reexpdb.sqlite3"
+#define REEXPDB_DBFILE_WAIT_USEC (5000)
+
+static sqlite3 *db;
+static int init_done;
+
+static int prng_init(void)
+{
+	int seed;
+
+	if (getrandom(&seed, sizeof(seed), 0) != sizeof(seed)) {
+		xlog(L_ERROR, "Unable to obtain seed for PRNG via getrandom()");
+		return -1;
+	}
+
+	srand(seed);
+	return 0;
+}
+
+static void wait_for_dbaccess(void)
+{
+	usleep(REEXPDB_DBFILE_WAIT_USEC + (rand() % REEXPDB_DBFILE_WAIT_USEC));
+}
+
+/*
+ * reexpdb_init - Initialize reexport database
+ */
+int reexpdb_init(void)
+{
+	char *sqlerr;
+	int ret;
+
+	if (init_done)
+		return 0;
+
+	if (prng_init() != 0)
+		return -1;
+
+	ret = sqlite3_open_v2(REEXPDB_DBFILE, &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_ERROR, "Unable to open reexport database: %s", sqlite3_errstr(ret));
+		return -1;
+	}
+
+again:
+	ret = sqlite3_exec(db, "CREATE TABLE IF NOT EXISTS fsidnums (num INTEGER PRIMARY KEY CHECK (num > 0 AND num < 4294967296), path TEXT UNIQUE); CREATE INDEX IF NOT EXISTS idx_ids_path ON fsidnums (path);", NULL, NULL, &sqlerr);
+	switch (ret) {
+	case SQLITE_OK:
+		init_done = 1;
+		ret = 0;
+		break;
+	case SQLITE_BUSY:
+	case SQLITE_LOCKED:
+		wait_for_dbaccess();
+		goto again;
+	default:
+		xlog(L_ERROR, "Unable to init reexport database: %s", sqlite3_errstr(ret));
+		sqlite3_free(sqlerr);
+		sqlite3_close_v2(db);
+		ret = -1;
+	}
+
+	return ret;
+}
+
+/*
+ * reexpdb_destroy - Undo reexpdb_init().
+ */
+void reexpdb_destroy(void)
+{
+	if (!init_done)
+		return;
+
+	sqlite3_close_v2(db);
+}
+
+static int get_fsidnum_by_path(char *path, uint32_t *fsidnum)
+{
+	static const char fsidnum_by_path_sql[] = "SELECT num FROM fsidnums WHERE path = ?1;";
+	sqlite3_stmt *stmt = NULL;
+	int found = 0;
+	int ret;
+
+	ret = sqlite3_prepare_v2(db, fsidnum_by_path_sql, sizeof(fsidnum_by_path_sql), &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
+		goto out;
+	}
+
+	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", fsidnum_by_path_sql, sqlite3_errstr(ret));
+		goto out;
+	}
+
+again:
+	ret = sqlite3_step(stmt);
+	switch (ret) {
+	case SQLITE_ROW:
+		*fsidnum = sqlite3_column_int(stmt, 0);
+		found = 1;
+		break;
+	case SQLITE_DONE:
+		/* No hit */
+		found = 0;
+		break;
+	case SQLITE_BUSY:
+	case SQLITE_LOCKED:
+		wait_for_dbaccess();
+		goto again;
+	default:
+		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
+	}
+
+out:
+	sqlite3_finalize(stmt);
+	return found;
+}
+
+static int get_path_by_fsidnum(uint32_t fsidnum, char **path)
+{
+	static const char path_by_fsidnum_sql[] = "SELECT path FROM fsidnums WHERE num = ?1;";
+	sqlite3_stmt *stmt = NULL;
+	int found = 0;
+	int ret;
+
+	ret = sqlite3_prepare_v2(db, path_by_fsidnum_sql, sizeof(path_by_fsidnum_sql), &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
+		goto out;
+	}
+
+	ret = sqlite3_bind_int(stmt, 1, fsidnum);
+	if (ret != SQLITE_OK) {
+		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", path_by_fsidnum_sql, sqlite3_errstr(ret));
+		goto out;
+	}
+
+again:
+	ret = sqlite3_step(stmt);
+	switch (ret) {
+	case SQLITE_ROW:
+		*path = xstrdup((char *)sqlite3_column_text(stmt, 0));
+		found = 1;
+		break;
+	case SQLITE_DONE:
+		/* No hit */
+		found = 0;
+		break;
+	case SQLITE_BUSY:
+	case SQLITE_LOCKED:
+		wait_for_dbaccess();
+		goto again;
+	default:
+		xlog(L_WARNING, "Error while looking up '%i' in database: %s", fsidnum, sqlite3_errstr(ret));
+	}
+
+out:
+	sqlite3_finalize(stmt);
+	return found;
+}
+
+static int new_fsidnum_by_path(char *path, uint32_t *fsidnum)
+{
+	/*
+	 * This query is a little tricky. We use SQL to find and claim the smallest free fsid number.
+	 * To find a free fsid the fsidnums is left joined to itself but with an offset of 1.
+	 * Everything after the UNION statement is to handle the corner case where fsidnums
+	 * is empty. In this case we want 1 as first fsid number.
+	 */
+	static const char new_fsidnum_by_path_sql[] = "INSERT INTO fsidnums VALUES ((SELECT ids1.num + 1 FROM fsidnums AS ids1 LEFT JOIN fsidnums AS ids2 ON ids2.num = ids1.num + 1 WHERE ids2.num IS NULL UNION SELECT 1 WHERE NOT EXISTS (SELECT NULL FROM fsidnums WHERE num = 1) LIMIT 1), ?1) RETURNING num;";
+
+	sqlite3_stmt *stmt = NULL;
+	int found = 0, check = 0;
+	int ret;
+
+	ret = sqlite3_prepare_v2(db, new_fsidnum_by_path_sql, sizeof(new_fsidnum_by_path_sql), &stmt, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_WARNING, "Unable to prepare SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
+		goto out;
+	}
+
+	ret = sqlite3_bind_text(stmt, 1, path, -1, NULL);
+	if (ret != SQLITE_OK) {
+		xlog(L_WARNING, "Unable to bind SQL query '%s': %s", new_fsidnum_by_path_sql, sqlite3_errstr(ret));
+		goto out;
+	}
+
+again:
+	ret = sqlite3_step(stmt);
+	switch (ret) {
+	case SQLITE_ROW:
+		*fsidnum = sqlite3_column_int(stmt, 0);
+		found = 1;
+		break;
+	case SQLITE_CONSTRAINT:
+		/* Maybe we lost the race against another writer and the path is now present. */
+		check = 1;
+		break;
+	case SQLITE_BUSY:
+	case SQLITE_LOCKED:
+		wait_for_dbaccess();
+		goto again;
+	default:
+		xlog(L_WARNING, "Error while looking up '%s' in database: %s", path, sqlite3_errstr(ret));
+	}
+
+out:
+	sqlite3_finalize(stmt);
+
+	if (check) {
+		found = get_fsidnum_by_path(path, fsidnum);
+		if (!found)
+			xlog(L_WARNING, "SQLITE_CONSTRAINT error while inserting '%s' in database", path);
+	}
+
+	return found;
+}
+
+/*
+ * reexpdb_fsidnum_by_path - Lookup a fsid by path.
+ *
+ * @path: File system path used as lookup key
+ * @fsidnum: Pointer where found fsid is written to
+ * @may_create: If non-zero, allocate new fsid if lookup failed
+ *
+ */
+int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
+{
+	int found;
+
+	found = get_fsidnum_by_path(path, fsidnum);
+
+	if (!found && may_create)
+		found = new_fsidnum_by_path(path, fsidnum);
+
+	return found;
+}
+
+/*
+ * reexpdb_uncover_subvolume - Make sure a subvolume is present.
+ *
+ * @fsidnum: Numerical fsid number to look for
+ *
+ * Subvolumes (NFS cross mounts) get automatically mounted upon first
+ * access and can vanish after fs.nfs.nfs_mountpoint_timeout seconds.
+ * Also if the NFS server reboots, clients can still have valid file
+ * handles for such a subvolume.
+ *
+ * If kNFSd asks mountd for the path of a given fsidnum it can
+ * trigger an automount by calling statfs() on the given path.
+ */
+void reexpdb_uncover_subvolume(uint32_t fsidnum)
+{
+	struct statfs64 st;
+	char *path = NULL;
+	int ret;
+
+	if (get_path_by_fsidnum(fsidnum, &path)) {
+		ret = nfsd_path_statfs64(path, &st);
+		if (ret == -1)
+			xlog(L_WARNING, "statfs() failed");
+	}
+
+	free(path);
+}
diff --git a/support/reexport/reexport.h b/support/reexport/reexport.h
new file mode 100644
index 00000000..bb6d2a1b
--- /dev/null
+++ b/support/reexport/reexport.h
@@ -0,0 +1,39 @@ 
+#ifndef REEXPORT_H
+#define REEXPORT_H
+
+enum {
+	REEXP_NONE = 0,
+	REEXP_AUTO_FSIDNUM,
+	REEXP_PREDEFINED_FSIDNUM,
+};
+
+#ifdef HAVE_REEXPORT_SUPPORT
+int reexpdb_init(void);
+void reexpdb_destroy(void);
+int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create);
+int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline);
+void reexpdb_uncover_subvolume(uint32_t fsidnum);
+#else
+static inline int reexpdb_init(void) { return 0; }
+static inline void reexpdb_destroy(void) {}
+static inline int reexpdb_fsidnum_by_path(char *path, uint32_t *fsidnum, int may_create)
+{
+	(void)path;
+	(void)may_create;
+	*fsidnum = 0;
+	return 0;
+}
+static inline int reexpdb_apply_reexport_settings(struct exportent *ep, char *flname, int flline)
+{
+	(void)ep;
+	(void)flname;
+	(void)flline;
+	return 0;
+}
+static inline void reexpdb_uncover_subvolume(uint32_t fsidnum)
+{
+	(void)fsidnum;
+}
+#endif /* HAVE_REEXPORT_SUPPORT */
+
+#endif /* REEXPORT_H */