diff mbox series

[11/11] svcgssd: Wait for nullrpc channel if not available

Message ID 20200718092421.31691-12-nazard@nazar.ca (mailing list archive)
State New, archived
Headers show
Series nfs-utils: Misc cleanups & fixes | expand

Commit Message

Doug Nazar July 18, 2020, 9:24 a.m. UTC
Signed-off-by: Doug Nazar <nazard@nazar.ca>
---
 utils/gssd/svcgssd.c | 99 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 80 insertions(+), 19 deletions(-)

Comments

J. Bruce Fields July 18, 2020, 3:55 p.m. UTC | #1
On Sat, Jul 18, 2020 at 05:24:21AM -0400, Doug Nazar wrote:
> Signed-off-by: Doug Nazar <nazard@nazar.ca>

So, is there a race here that could result in a hang, and has anyone
seen it in practice?

Just curious.  Thanks for doing this.--b.

> ---
>  utils/gssd/svcgssd.c | 99 +++++++++++++++++++++++++++++++++++---------
>  1 file changed, 80 insertions(+), 19 deletions(-)
> 
> diff --git a/utils/gssd/svcgssd.c b/utils/gssd/svcgssd.c
> index 3155a2f9..3ab2100b 100644
> --- a/utils/gssd/svcgssd.c
> +++ b/utils/gssd/svcgssd.c
> @@ -67,9 +67,14 @@
>  #include "misc.h"
>  #include "svcgssd_krb5.h"
>  
> -struct state_paths etab;
> +struct state_paths etab; /* from cacheio.c */
>  static bool signal_received = false;
>  static struct event_base *evbase = NULL;
> +static int nullrpc_fd = -1;
> +static struct event *nullrpc_event = NULL;
> +static struct event *wait_event = NULL;
> +
> +#define NULLRPC_FILE "/proc/net/rpc/auth.rpcsec.init/channel"
>  
>  static void
>  sig_die(int signal)
> @@ -118,6 +123,66 @@ svcgssd_nullrpc_cb(int fd, short UNUSED(which), void *UNUSED(data))
>  	handle_nullreq(lbuf);
>  }
>  
> +static void
> +svcgssd_nullrpc_close(void)
> +{
> +	if (nullrpc_event) {
> +		printerr(2, "closing nullrpc channel %s\n", NULLRPC_FILE);
> +		event_free(nullrpc_event);
> +		nullrpc_event = NULL;
> +	}
> +	if (nullrpc_fd != -1) {
> +		close(nullrpc_fd);
> +		nullrpc_fd = -1;
> +	}
> +}
> +
> +static void
> +svcgssd_nullrpc_open(void)
> +{
> +	nullrpc_fd = open(NULLRPC_FILE, O_RDWR);
> +	if (nullrpc_fd < 0) {
> +		printerr(0, "failed to open %s: %s\n",
> +			 NULLRPC_FILE, strerror(errno));
> +		return;
> +	}
> +	nullrpc_event = event_new(evbase, nullrpc_fd, EV_READ | EV_PERSIST,
> +				  svcgssd_nullrpc_cb, NULL);
> +	if (!nullrpc_event) {
> +		printerr(0, "failed to create event for %s: %s\n",
> +			 NULLRPC_FILE, strerror(errno));
> +		close(nullrpc_fd);
> +		nullrpc_fd = -1;
> +		return;
> +	}
> +	event_add(nullrpc_event, NULL);
> +	printerr(2, "opened nullrpc channel %s\n", NULLRPC_FILE);
> +}
> +
> +static void
> +svcgssd_wait_cb(int UNUSED(fd), short UNUSED(which), void *UNUSED(data))
> +{
> +	static int times = 0;
> +	int rc;
> +
> +	rc = access(NULLRPC_FILE, R_OK | W_OK);
> +	if (rc != 0) {
> +		struct timeval t = {times < 10 ? 1 : 10, 0};
> +		times++;
> +		if (times % 30 == 0)
> +			printerr(2, "still waiting for nullrpc channel: %s\n",
> +				NULLRPC_FILE);
> +		evtimer_add(wait_event, &t);
> +		return;
> +	}
> +
> +	svcgssd_nullrpc_open();
> +	event_free(wait_event);
> +	wait_event = NULL;
> +}
> +
> +
> +
>  int
>  main(int argc, char *argv[])
>  {
> @@ -132,8 +197,6 @@ main(int argc, char *argv[])
>  	char *principal = NULL;
>  	char *s;
>  	int rc;
> -	int nullrpc_fd = -1;
> -	struct event *nullrpc_event = NULL;
>  
>  	conf_init_file(NFS_CONFFILE);
>  
> @@ -250,22 +313,19 @@ main(int argc, char *argv[])
>  		}
>  	}
>  
> -#define NULLRPC_FILE "/proc/net/rpc/auth.rpcsec.init/channel"
> -
> -	nullrpc_fd = open(NULLRPC_FILE, O_RDWR);
> -	if (nullrpc_fd < 0) {
> -		printerr(0, "failed to open %s: %s\n",
> -			 NULLRPC_FILE, strerror(errno));
> -		exit(1);
> -	}
> -	nullrpc_event = event_new(evbase, nullrpc_fd, EV_READ | EV_PERSIST,
> -				  svcgssd_nullrpc_cb, NULL);
> +	svcgssd_nullrpc_open();
>  	if (!nullrpc_event) {
> -		printerr(0, "failed to create event for %s: %s\n",
> -			 NULLRPC_FILE, strerror(errno));
> -		exit(1);
> +		struct timeval t = {1, 0};
> +
> +		printerr(2, "waiting for nullrpc channel to appear\n");
> +		wait_event = evtimer_new(evbase, svcgssd_wait_cb, NULL);
> +		if (!wait_event) {
> +			printerr(0, "ERROR: failed to create wait event: %s\n",
> +				 strerror(errno));
> +			exit(EXIT_FAILURE);
> +		}
> +		evtimer_add(wait_event, &t);
>  	}
> -	event_add(nullrpc_event, NULL);
>  
>  	daemon_ready();
>  
> @@ -275,8 +335,9 @@ main(int argc, char *argv[])
>  	if (rc < 0)
>  		printerr(0, "event_base_dispatch() returned %i!\n", rc);
>  
> -	event_free(nullrpc_event);
> -	close(nullrpc_fd);
> +	svcgssd_nullrpc_close();
> +	if (wait_event)
> +		event_free(wait_event);
>  
>  	event_base_free(evbase);
>  
> -- 
> 2.26.2
Doug Nazar July 18, 2020, 6:07 p.m. UTC | #2
On 2020-07-18 11:55, J. Bruce Fields wrote:

> So, is there a race here that could result in a hang, and has anyone
> seen it in practice?
>
> Just curious.  Thanks for doing this.--b.

Not a hang, with the existing code, svcgssd will just exit. I'd have to 
go and restart it after boot. I'm assuming a systemd setup would just 
restart it automatically.
On my original systems I'd configured it to force load the modules, but 
I'd forgotten about that (it was about 10 years ago) when I built this 
new box last month.

As mentioned in the other email, sunrpc is loaded from an initramfs that 
doesn't have any of the gss modules. When nfsd is started (after 
svcgssd) it'll then load the gss modules but by then it's too late.

It seems to be a standard Gentoo setup. I just checked the default 
kernel config for genkernel (their kernel & initramfs builder) and it 
has all the rpc & nfs as modules, with the initramfs not supporting 
Kerberos.

Looks like Debian modprobes rpcsec_gss_krb5 before starting rpc.svcgssd.

So with this waiting for the file to appear, and I'm planning on adding 
a conditional module load to the Gentoo rpc.svcgssd init script, things 
should be as bulletproof as I can make it.

Doug
diff mbox series

Patch

diff --git a/utils/gssd/svcgssd.c b/utils/gssd/svcgssd.c
index 3155a2f9..3ab2100b 100644
--- a/utils/gssd/svcgssd.c
+++ b/utils/gssd/svcgssd.c
@@ -67,9 +67,14 @@ 
 #include "misc.h"
 #include "svcgssd_krb5.h"
 
-struct state_paths etab;
+struct state_paths etab; /* from cacheio.c */
 static bool signal_received = false;
 static struct event_base *evbase = NULL;
+static int nullrpc_fd = -1;
+static struct event *nullrpc_event = NULL;
+static struct event *wait_event = NULL;
+
+#define NULLRPC_FILE "/proc/net/rpc/auth.rpcsec.init/channel"
 
 static void
 sig_die(int signal)
@@ -118,6 +123,66 @@  svcgssd_nullrpc_cb(int fd, short UNUSED(which), void *UNUSED(data))
 	handle_nullreq(lbuf);
 }
 
+static void
+svcgssd_nullrpc_close(void)
+{
+	if (nullrpc_event) {
+		printerr(2, "closing nullrpc channel %s\n", NULLRPC_FILE);
+		event_free(nullrpc_event);
+		nullrpc_event = NULL;
+	}
+	if (nullrpc_fd != -1) {
+		close(nullrpc_fd);
+		nullrpc_fd = -1;
+	}
+}
+
+static void
+svcgssd_nullrpc_open(void)
+{
+	nullrpc_fd = open(NULLRPC_FILE, O_RDWR);
+	if (nullrpc_fd < 0) {
+		printerr(0, "failed to open %s: %s\n",
+			 NULLRPC_FILE, strerror(errno));
+		return;
+	}
+	nullrpc_event = event_new(evbase, nullrpc_fd, EV_READ | EV_PERSIST,
+				  svcgssd_nullrpc_cb, NULL);
+	if (!nullrpc_event) {
+		printerr(0, "failed to create event for %s: %s\n",
+			 NULLRPC_FILE, strerror(errno));
+		close(nullrpc_fd);
+		nullrpc_fd = -1;
+		return;
+	}
+	event_add(nullrpc_event, NULL);
+	printerr(2, "opened nullrpc channel %s\n", NULLRPC_FILE);
+}
+
+static void
+svcgssd_wait_cb(int UNUSED(fd), short UNUSED(which), void *UNUSED(data))
+{
+	static int times = 0;
+	int rc;
+
+	rc = access(NULLRPC_FILE, R_OK | W_OK);
+	if (rc != 0) {
+		struct timeval t = {times < 10 ? 1 : 10, 0};
+		times++;
+		if (times % 30 == 0)
+			printerr(2, "still waiting for nullrpc channel: %s\n",
+				NULLRPC_FILE);
+		evtimer_add(wait_event, &t);
+		return;
+	}
+
+	svcgssd_nullrpc_open();
+	event_free(wait_event);
+	wait_event = NULL;
+}
+
+
+
 int
 main(int argc, char *argv[])
 {
@@ -132,8 +197,6 @@  main(int argc, char *argv[])
 	char *principal = NULL;
 	char *s;
 	int rc;
-	int nullrpc_fd = -1;
-	struct event *nullrpc_event = NULL;
 
 	conf_init_file(NFS_CONFFILE);
 
@@ -250,22 +313,19 @@  main(int argc, char *argv[])
 		}
 	}
 
-#define NULLRPC_FILE "/proc/net/rpc/auth.rpcsec.init/channel"
-
-	nullrpc_fd = open(NULLRPC_FILE, O_RDWR);
-	if (nullrpc_fd < 0) {
-		printerr(0, "failed to open %s: %s\n",
-			 NULLRPC_FILE, strerror(errno));
-		exit(1);
-	}
-	nullrpc_event = event_new(evbase, nullrpc_fd, EV_READ | EV_PERSIST,
-				  svcgssd_nullrpc_cb, NULL);
+	svcgssd_nullrpc_open();
 	if (!nullrpc_event) {
-		printerr(0, "failed to create event for %s: %s\n",
-			 NULLRPC_FILE, strerror(errno));
-		exit(1);
+		struct timeval t = {1, 0};
+
+		printerr(2, "waiting for nullrpc channel to appear\n");
+		wait_event = evtimer_new(evbase, svcgssd_wait_cb, NULL);
+		if (!wait_event) {
+			printerr(0, "ERROR: failed to create wait event: %s\n",
+				 strerror(errno));
+			exit(EXIT_FAILURE);
+		}
+		evtimer_add(wait_event, &t);
 	}
-	event_add(nullrpc_event, NULL);
 
 	daemon_ready();
 
@@ -275,8 +335,9 @@  main(int argc, char *argv[])
 	if (rc < 0)
 		printerr(0, "event_base_dispatch() returned %i!\n", rc);
 
-	event_free(nullrpc_event);
-	close(nullrpc_fd);
+	svcgssd_nullrpc_close();
+	if (wait_event)
+		event_free(wait_event);
 
 	event_base_free(evbase);