diff mbox series

[v2] Re: Strange segmentation violations of rpc.gssd in Debian Buster

Message ID bebca60d-09e4-f118-c195-c6245e6496fb@nazar.ca (mailing list archive)
State New, archived
Headers show
Series [v2] Re: Strange segmentation violations of rpc.gssd in Debian Buster | expand

Commit Message

Doug Nazar June 26, 2020, 9:30 p.m. UTC
On 2020-06-26 17:02, J. Bruce Fields wrote:
> Unless I'm missing something--an upcall thread could still be using this
> file descriptor.
>
> If we're particularly unlucky, we could do a new open in a moment and
> reuse this file descriptor number, and then then writes in do_downcall()
> could end up going to some other random file.
>
> I think we want these closes done by gssd_free_client() in the !refcnt
> case?

Makes sense. I was thinking more that it was an abort situation and we 
shouldn't be sending any data to the kernel but re-use is definitely a 
concern.

I've split it so that we are removed from the event loop in destroy() 
but the close happens in free().

Doug
From 8ef49081e8a42bfa05bb63265cd4f951e2b23413 Mon Sep 17 00:00:00 2001
From: Doug Nazar <nazard@nazar.ca>
Date: Fri, 26 Jun 2020 16:02:04 -0400
Subject: [PATCH] gssd: Refcount struct clnt_info to protect multithread usage

Struct clnt_info is shared with the various upcall threads so
we need to ensure that it stays around even if the client dir
gets removed.

Reported-by: Sebastian Kraus <sebastian.kraus@tu-berlin.de>
Signed-off-by: Doug Nazar <nazard@nazar.ca>
---
 utils/gssd/gssd.c      | 67 ++++++++++++++++++++++++++++++++----------
 utils/gssd/gssd.h      |  5 ++--
 utils/gssd/gssd_proc.c |  4 +--
 3 files changed, 55 insertions(+), 21 deletions(-)

Comments

J. Bruce Fields June 26, 2020, 9:44 p.m. UTC | #1
On Fri, Jun 26, 2020 at 05:30:15PM -0400, Doug Nazar wrote:
> On 2020-06-26 17:02, J. Bruce Fields wrote:
> >Unless I'm missing something--an upcall thread could still be using this
> >file descriptor.
> >
> >If we're particularly unlucky, we could do a new open in a moment and
> >reuse this file descriptor number, and then then writes in do_downcall()
> >could end up going to some other random file.
> >
> >I think we want these closes done by gssd_free_client() in the !refcnt
> >case?
> 
> Makes sense. I was thinking more that it was an abort situation and
> we shouldn't be sending any data to the kernel but re-use is
> definitely a concern.
> 
> I've split it so that we are removed from the event loop in
> destroy() but the close happens in free().

Looks good to me.  Steve?

--b.

> 
> Doug
> 

> From 8ef49081e8a42bfa05bb63265cd4f951e2b23413 Mon Sep 17 00:00:00 2001
> From: Doug Nazar <nazard@nazar.ca>
> Date: Fri, 26 Jun 2020 16:02:04 -0400
> Subject: [PATCH] gssd: Refcount struct clnt_info to protect multithread usage
> 
> Struct clnt_info is shared with the various upcall threads so
> we need to ensure that it stays around even if the client dir
> gets removed.
> 
> Reported-by: Sebastian Kraus <sebastian.kraus@tu-berlin.de>
> Signed-off-by: Doug Nazar <nazard@nazar.ca>
> ---
>  utils/gssd/gssd.c      | 67 ++++++++++++++++++++++++++++++++----------
>  utils/gssd/gssd.h      |  5 ++--
>  utils/gssd/gssd_proc.c |  4 +--
>  3 files changed, 55 insertions(+), 21 deletions(-)
> 
> diff --git a/utils/gssd/gssd.c b/utils/gssd/gssd.c
> index 588da0fb..b40c3220 100644
> --- a/utils/gssd/gssd.c
> +++ b/utils/gssd/gssd.c
> @@ -90,9 +90,7 @@ char *ccachedir = NULL;
>  /* Avoid DNS reverse lookups on server names */
>  static bool avoid_dns = true;
>  static bool use_gssproxy = false;
> -int thread_started = false;
> -pthread_mutex_t pmutex = PTHREAD_MUTEX_INITIALIZER;
> -pthread_cond_t pcond = PTHREAD_COND_INITIALIZER;
> +pthread_mutex_t clp_lock = PTHREAD_MUTEX_INITIALIZER;
>  
>  TAILQ_HEAD(topdir_list_head, topdir) topdir_list;
>  
> @@ -359,20 +357,28 @@ out:
>  	free(port);
>  }
>  
> +/* Actually frees clp and fields that might be used from other
> + * threads if was last reference.
> + */
>  static void
> -gssd_destroy_client(struct clnt_info *clp)
> +gssd_free_client(struct clnt_info *clp)
>  {
> -	if (clp->krb5_fd >= 0) {
> +	int refcnt;
> +
> +	pthread_mutex_lock(&clp_lock);
> +	refcnt = --clp->refcount;
> +	pthread_mutex_unlock(&clp_lock);
> +	if (refcnt > 0)
> +		return;
> +
> +	printerr(3, "freeing client %s\n", clp->relpath);
> +
> +	if (clp->krb5_fd >= 0)
>  		close(clp->krb5_fd);
> -		event_del(&clp->krb5_ev);
> -	}
>  
> -	if (clp->gssd_fd >= 0) {
> +	if (clp->gssd_fd >= 0)
>  		close(clp->gssd_fd);
> -		event_del(&clp->gssd_ev);
> -	}
>  
> -	inotify_rm_watch(inotify_fd, clp->wd);
>  	free(clp->relpath);
>  	free(clp->servicename);
>  	free(clp->servername);
> @@ -380,6 +386,24 @@ gssd_destroy_client(struct clnt_info *clp)
>  	free(clp);
>  }
>  
> +/* Called when removing from clnt_list to tear down event handling.
> + * Will then free clp if was last reference.
> + */
> +static void
> +gssd_destroy_client(struct clnt_info *clp)
> +{
> +	printerr(3, "destroying client %s\n", clp->relpath);
> +
> +	if (clp->krb5_fd >= 0)
> +		event_del(&clp->krb5_ev);
> +
> +	if (clp->gssd_fd >= 0)
> +		event_del(&clp->gssd_ev);
> +
> +	inotify_rm_watch(inotify_fd, clp->wd);
> +	gssd_free_client(clp);
> +}
> +
>  static void gssd_scan(void);
>  
>  static int
> @@ -416,11 +440,21 @@ static struct clnt_upcall_info *alloc_upcall_info(struct clnt_info *clp)
>  	info = malloc(sizeof(struct clnt_upcall_info));
>  	if (info == NULL)
>  		return NULL;
> +
> +	pthread_mutex_lock(&clp_lock);
> +	clp->refcount++;
> +	pthread_mutex_unlock(&clp_lock);
>  	info->clp = clp;
>  
>  	return info;
>  }
>  
> +void free_upcall_info(struct clnt_upcall_info *info)
> +{
> +	gssd_free_client(info->clp);
> +	free(info);
> +}
> +
>  /* For each upcall read the upcall info into the buffer, then create a
>   * thread in a detached state so that resources are released back into
>   * the system without the need for a join.
> @@ -438,13 +472,13 @@ gssd_clnt_gssd_cb(int UNUSED(fd), short UNUSED(which), void *data)
>  	info->lbuflen = read(clp->gssd_fd, info->lbuf, sizeof(info->lbuf));
>  	if (info->lbuflen <= 0 || info->lbuf[info->lbuflen-1] != '\n') {
>  		printerr(0, "WARNING: %s: failed reading request\n", __func__);
> -		free(info);
> +		free_upcall_info(info);
>  		return;
>  	}
>  	info->lbuf[info->lbuflen-1] = 0;
>  
>  	if (start_upcall_thread(handle_gssd_upcall, info))
> -		free(info);
> +		free_upcall_info(info);
>  }
>  
>  static void
> @@ -461,12 +495,12 @@ gssd_clnt_krb5_cb(int UNUSED(fd), short UNUSED(which), void *data)
>  			sizeof(info->uid)) < (ssize_t)sizeof(info->uid)) {
>  		printerr(0, "WARNING: %s: failed reading uid from krb5 "
>  			 "upcall pipe: %s\n", __func__, strerror(errno));
> -		free(info);
> +		free_upcall_info(info);
>  		return;
>  	}
>  
>  	if (start_upcall_thread(handle_krb5_upcall, info))
> -		free(info);
> +		free_upcall_info(info);
>  }
>  
>  static struct clnt_info *
> @@ -501,6 +535,7 @@ gssd_get_clnt(struct topdir *tdi, const char *name)
>  	clp->name = clp->relpath + strlen(tdi->name) + 1;
>  	clp->krb5_fd = -1;
>  	clp->gssd_fd = -1;
> +	clp->refcount = 1;
>  
>  	TAILQ_INSERT_HEAD(&tdi->clnt_list, clp, list);
>  	return clp;
> @@ -651,7 +686,7 @@ gssd_scan_topdir(const char *name)
>  		if (clp->scanned)
>  			continue;
>  
> -		printerr(3, "destroying client %s\n", clp->relpath);
> +		printerr(3, "orphaned client %s\n", clp->relpath);
>  		saveprev = clp->list.tqe_prev;
>  		TAILQ_REMOVE(&tdi->clnt_list, clp, list);
>  		gssd_destroy_client(clp);
> diff --git a/utils/gssd/gssd.h b/utils/gssd/gssd.h
> index f4f59754..d33e4dff 100644
> --- a/utils/gssd/gssd.h
> +++ b/utils/gssd/gssd.h
> @@ -63,12 +63,10 @@ extern unsigned int 		context_timeout;
>  extern unsigned int rpc_timeout;
>  extern char			*preferred_realm;
>  extern pthread_mutex_t ple_lock;
> -extern pthread_cond_t pcond;
> -extern pthread_mutex_t pmutex;
> -extern int thread_started;
>  
>  struct clnt_info {
>  	TAILQ_ENTRY(clnt_info)	list;
> +	int			refcount;
>  	int			wd;
>  	bool			scanned;
>  	char			*name;
> @@ -94,6 +92,7 @@ struct clnt_upcall_info {
>  
>  void handle_krb5_upcall(struct clnt_upcall_info *clp);
>  void handle_gssd_upcall(struct clnt_upcall_info *clp);
> +void free_upcall_info(struct clnt_upcall_info *info);
>  
>  
>  #endif /* _RPC_GSSD_H_ */
> diff --git a/utils/gssd/gssd_proc.c b/utils/gssd/gssd_proc.c
> index 8fe6605b..05c1da64 100644
> --- a/utils/gssd/gssd_proc.c
> +++ b/utils/gssd/gssd_proc.c
> @@ -730,7 +730,7 @@ handle_krb5_upcall(struct clnt_upcall_info *info)
>  	printerr(2, "\n%s: uid %d (%s)\n", __func__, info->uid, clp->relpath);
>  
>  	process_krb5_upcall(clp, info->uid, clp->krb5_fd, NULL, NULL, NULL);
> -	free(info);
> +	free_upcall_info(info);
>  }
>  
>  void
> @@ -830,6 +830,6 @@ handle_gssd_upcall(struct clnt_upcall_info *info)
>  out:
>  	free(upcall_str);
>  out_nomem:
> -	free(info);
> +	free_upcall_info(info);
>  	return;
>  }
> -- 
> 2.26.2
>
Kraus, Sebastian June 29, 2020, 5:39 a.m. UTC | #2
Hi Doug,
thanks very much for your patch and efforts.
I manually backported the patch to nfs-utils 1.3.4-2.5 source in Debian Buster.
I am now testing the modified build on one of my NFSv4 file servers. Looks promising.

One additional question: Which nfs-utils branch are your working on - steved/nfs-utils.git ?

Best Sebastian
Doug Nazar June 29, 2020, 2:09 p.m. UTC | #3
On 2020-06-29 01:39, Kraus, Sebastian wrote:
> Hi Doug,
> thanks very much for your patch and efforts.
> I manually backported the patch to nfs-utils 1.3.4-2.5 source in Debian Buster.
> I am now testing the modified build on one of my NFSv4 file servers. Looks promising.
>
> One additional question: Which nfs-utils branch are your working on - steved/nfs-utils.git ?

Yes, I'm working against upstream. I did check briefly that the code 
hadn't changed too much since 1.3.4 in that area.

I've found one other place that has insufficient locking but the race to 
hit it is fairly small. It's in the Kerberos machine principal cache 
when it refreshes the machine credentials. I have a patch for that, but 
it's pretty invasive due to some other changes I'm currently working on. 
Let me know if you hit it, and I can work on a simple version to backport.

Doug
Kraus, Sebastian July 1, 2020, 7:39 a.m. UTC | #4
Hi Doug,

>> Yes, I'm working against upstream. I did check briefly that the code hadn't changed too much since 1.3.4 in that area.
OK, thanks for the info. I wondered, because your patch did not show up as a commit within upstream. 
Your patch seems to do a good job - no more segfaults since a period of four days. :-)

>> I've found one other place that has insufficient locking but the race to hit it is fairly small. It's in the Kerberos machine principal cache when it refreshes the machine credentials. 
These type of patches are always welcome. :-)
In the recent past, some of our scientific staff exprienced strange problems with Kerberos authentication against our NFSv4 file servers. 
Maybe, the outages were in connection with this type of race condition. But, I do not know for sure as the authentication errors did happen on a rather sporadic basis.

>> I have a patch for that, but it's pretty invasive due to some other changes I'm currently working on. Let me know if you hit it, and I can work on a simple version to backport.
NFSv4+Kerberos is not for the faint-hearted. I do not fear of invasive patches - as long as they are not missing technical correctness. ;-)

A question far apart from this:
How is it about the spread of NFSv4+Kerberos setups within academic community and commerical environments? 
Are there, up to your knowledge, any bigger on-premise or cloud setups out there?
And are there any companies running dedicated NFSv4+Kerberos setups?


Best and keep well and fit
Sebastian
Peter Eriksson July 1, 2020, 8:13 a.m. UTC | #5
>>> I've found one other place that has insufficient locking but the race to hit it is fairly small. It's in the Kerberos machine principal cache when it refreshes the machine credentials. 
> These type of patches are always welcome. :-)
> In the recent past, some of our scientific staff exprienced strange problems with Kerberos authentication against our NFSv4 file servers. 
> Maybe, the outages were in connection with this type of race condition. But, I do not know for sure as the authentication errors did happen on a rather sporadic basis.

We (Linköping University in Sweden) have seen these problems before too. I sent a patch for rpc.gssd this spring that “fixed” this problem too (well, fixed the symptom and not the root cause so it wasn’t the right fix). Without that patch we typically had rpc.gssd crash on our multiuser client servers every other day. It was partly masked by Puppet detecting it down and restarting it but the users had strange errors that they reported and then when the support folks checked everything was running :-). It also crashed very often on a set of test machines that every minute would connect to our NFS servers in order to verify that they were running and giving good response times. Multiple NFS connections being set up and teared with concurrently many times easily forced this problem to happen after a day or two.


> A question far apart from this:
> How is it about the spread of NFSv4+Kerberos setups within academic community and commerical environments? 

We are using NFSv4+Kerberos. Most of our users are SMBv3 clients (Windows & Mac, 10x the Linux users) though but we have some 600 NFS clients (99.9% Linux (CentOS & Ubuntu mostly) based, servers are FreeBSD with ZFS). We used to be a big Sun/Solaris NFS shop previously so NFS comes “naturally” for us :-)

(Would have loved to use NFSv4+Kerberos on the MacOS clients but unfortunately MacOS panics when the Kerberos ticket expires and you have an active NFS share mounted which is a bit of a bummer :-)

(Using NFS v3 or lower and without Kerberos isn’t really an option - real ACLs and some sort of security is really needed)


Anyway - it’s good to see that the root cause for this bug has been found and fixed the right way :-)

- Peter

> Are there, up to your knowledge, any bigger on-premise or cloud setups out there?
> And are there any companies running dedicated NFSv4+Kerberos setups?
> 
> 
> Best and keep well and fit
> Sebastian
> 
> _________________
> Sebastian Kraus
> Team IT am Institut für Chemie
> Gebäude C, Straße des 17. Juni 115, Raum C7
> 
> Technische Universität Berlin
> Fakultät II
> Institut für Chemie
> Sekretariat C3
> Straße des 17. Juni 135
> 10623 Berlin
> 
> 
> Tel.: +49 30 314 22263
> Fax: +49 30 314 29309
> Email: sebastian.kraus@tu-berlin.de
> 
> ________________________________________
> From: Doug Nazar <nazard@nazar.ca>
> Sent: Monday, June 29, 2020 16:09
> To: Kraus, Sebastian
> Cc: linux-nfs@vger.kernel.org
> Subject: Re: [PATCH v2] Re: Strange segmentation violations of rpc.gssd in Debian Buster
> 
> On 2020-06-29 01:39, Kraus, Sebastian wrote:
>> Hi Doug,
>> thanks very much for your patch and efforts.
>> I manually backported the patch to nfs-utils 1.3.4-2.5 source in Debian Buster.
>> I am now testing the modified build on one of my NFSv4 file servers. Looks promising.
>> 
>> One additional question: Which nfs-utils branch are your working on - steved/nfs-utils.git ?
> 
> Yes, I'm working against upstream. I did check briefly that the code
> hadn't changed too much since 1.3.4 in that area.
> 
> I've found one other place that has insufficient locking but the race to
> hit it is fairly small. It's in the Kerberos machine principal cache
> when it refreshes the machine credentials. I have a patch for that, but
> it's pretty invasive due to some other changes I'm currently working on.
> Let me know if you hit it, and I can work on a simple version to backport.
> 
> Doug
>
Doug Nazar July 1, 2020, 6:45 p.m. UTC | #6
On 2020-07-01 03:39, Kraus, Sebastian wrote:
> OK, thanks for the info. I wondered, because your patch did not show 
> up as a commit within upstream.
> Your patch seems to do a good job - no more segfaults since a period of four days. :-)

I'm not a maintainer, just an enthusiastic user with a compiler... ;-)
I'm sure it'll get applied in the near future, as time permits.

>> I've found one other place that has insufficient locking but the race to hit it is fairly small. It's in the Kerberos machine principal cache when it refreshes the machine credentials.
> These type of patches are always welcome. :-)
> In the recent past, some of our scientific staff exprienced strange problems with Kerberos authentication against our NFSv4 file servers.
> Maybe, the outages were in connection with this type of race condition. But, I do not know for sure as the authentication errors did happen on a rather sporadic basis.

The previous bug could also cause authentication issues without crashing 
depending on load, timing, memory usage, malloc library, etc. This one 
would only crop up during machine credentials refresh, which by default 
is once every 24 hours. I've just posted a patch 'gssd: Fix locking for 
machine principal list', the interesting part for backporting is around 
line 447. It used to always strdup() even if cache name was the same.

>> I have a patch for that, but it's pretty invasive due to some other changes I'm currently working on. Let me know if you hit it, and I can work on a simple version to backport.
> NFSv4+Kerberos is not for the faint-hearted. I do not fear of invasive patches - as long as they are not missing technical correctness. ;-)

No guarantees... but I do try.  ;-)

> A question far apart from this:
> How is it about the spread of NFSv4+Kerberos setups within academic community and commerical environments?
> Are there, up to your knowledge, any bigger on-premise or cloud setups out there?
> And are there any companies running dedicated NFSv4+Kerberos setups?

I really have no idea. I only run it on my home network of a few dozen 
(old) machines. From what I've seen while googling
trying to figure out how the code base works, there are fair number of 
users. There's also been a large amount of work in
recent years, which would point to something driving that.

Doug
diff mbox series

Patch

diff --git a/utils/gssd/gssd.c b/utils/gssd/gssd.c
index 588da0fb..b40c3220 100644
--- a/utils/gssd/gssd.c
+++ b/utils/gssd/gssd.c
@@ -90,9 +90,7 @@  char *ccachedir = NULL;
 /* Avoid DNS reverse lookups on server names */
 static bool avoid_dns = true;
 static bool use_gssproxy = false;
-int thread_started = false;
-pthread_mutex_t pmutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t pcond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t clp_lock = PTHREAD_MUTEX_INITIALIZER;
 
 TAILQ_HEAD(topdir_list_head, topdir) topdir_list;
 
@@ -359,20 +357,28 @@  out:
 	free(port);
 }
 
+/* Actually frees clp and fields that might be used from other
+ * threads if was last reference.
+ */
 static void
-gssd_destroy_client(struct clnt_info *clp)
+gssd_free_client(struct clnt_info *clp)
 {
-	if (clp->krb5_fd >= 0) {
+	int refcnt;
+
+	pthread_mutex_lock(&clp_lock);
+	refcnt = --clp->refcount;
+	pthread_mutex_unlock(&clp_lock);
+	if (refcnt > 0)
+		return;
+
+	printerr(3, "freeing client %s\n", clp->relpath);
+
+	if (clp->krb5_fd >= 0)
 		close(clp->krb5_fd);
-		event_del(&clp->krb5_ev);
-	}
 
-	if (clp->gssd_fd >= 0) {
+	if (clp->gssd_fd >= 0)
 		close(clp->gssd_fd);
-		event_del(&clp->gssd_ev);
-	}
 
-	inotify_rm_watch(inotify_fd, clp->wd);
 	free(clp->relpath);
 	free(clp->servicename);
 	free(clp->servername);
@@ -380,6 +386,24 @@  gssd_destroy_client(struct clnt_info *clp)
 	free(clp);
 }
 
+/* Called when removing from clnt_list to tear down event handling.
+ * Will then free clp if was last reference.
+ */
+static void
+gssd_destroy_client(struct clnt_info *clp)
+{
+	printerr(3, "destroying client %s\n", clp->relpath);
+
+	if (clp->krb5_fd >= 0)
+		event_del(&clp->krb5_ev);
+
+	if (clp->gssd_fd >= 0)
+		event_del(&clp->gssd_ev);
+
+	inotify_rm_watch(inotify_fd, clp->wd);
+	gssd_free_client(clp);
+}
+
 static void gssd_scan(void);
 
 static int
@@ -416,11 +440,21 @@  static struct clnt_upcall_info *alloc_upcall_info(struct clnt_info *clp)
 	info = malloc(sizeof(struct clnt_upcall_info));
 	if (info == NULL)
 		return NULL;
+
+	pthread_mutex_lock(&clp_lock);
+	clp->refcount++;
+	pthread_mutex_unlock(&clp_lock);
 	info->clp = clp;
 
 	return info;
 }
 
+void free_upcall_info(struct clnt_upcall_info *info)
+{
+	gssd_free_client(info->clp);
+	free(info);
+}
+
 /* For each upcall read the upcall info into the buffer, then create a
  * thread in a detached state so that resources are released back into
  * the system without the need for a join.
@@ -438,13 +472,13 @@  gssd_clnt_gssd_cb(int UNUSED(fd), short UNUSED(which), void *data)
 	info->lbuflen = read(clp->gssd_fd, info->lbuf, sizeof(info->lbuf));
 	if (info->lbuflen <= 0 || info->lbuf[info->lbuflen-1] != '\n') {
 		printerr(0, "WARNING: %s: failed reading request\n", __func__);
-		free(info);
+		free_upcall_info(info);
 		return;
 	}
 	info->lbuf[info->lbuflen-1] = 0;
 
 	if (start_upcall_thread(handle_gssd_upcall, info))
-		free(info);
+		free_upcall_info(info);
 }
 
 static void
@@ -461,12 +495,12 @@  gssd_clnt_krb5_cb(int UNUSED(fd), short UNUSED(which), void *data)
 			sizeof(info->uid)) < (ssize_t)sizeof(info->uid)) {
 		printerr(0, "WARNING: %s: failed reading uid from krb5 "
 			 "upcall pipe: %s\n", __func__, strerror(errno));
-		free(info);
+		free_upcall_info(info);
 		return;
 	}
 
 	if (start_upcall_thread(handle_krb5_upcall, info))
-		free(info);
+		free_upcall_info(info);
 }
 
 static struct clnt_info *
@@ -501,6 +535,7 @@  gssd_get_clnt(struct topdir *tdi, const char *name)
 	clp->name = clp->relpath + strlen(tdi->name) + 1;
 	clp->krb5_fd = -1;
 	clp->gssd_fd = -1;
+	clp->refcount = 1;
 
 	TAILQ_INSERT_HEAD(&tdi->clnt_list, clp, list);
 	return clp;
@@ -651,7 +686,7 @@  gssd_scan_topdir(const char *name)
 		if (clp->scanned)
 			continue;
 
-		printerr(3, "destroying client %s\n", clp->relpath);
+		printerr(3, "orphaned client %s\n", clp->relpath);
 		saveprev = clp->list.tqe_prev;
 		TAILQ_REMOVE(&tdi->clnt_list, clp, list);
 		gssd_destroy_client(clp);
diff --git a/utils/gssd/gssd.h b/utils/gssd/gssd.h
index f4f59754..d33e4dff 100644
--- a/utils/gssd/gssd.h
+++ b/utils/gssd/gssd.h
@@ -63,12 +63,10 @@  extern unsigned int 		context_timeout;
 extern unsigned int rpc_timeout;
 extern char			*preferred_realm;
 extern pthread_mutex_t ple_lock;
-extern pthread_cond_t pcond;
-extern pthread_mutex_t pmutex;
-extern int thread_started;
 
 struct clnt_info {
 	TAILQ_ENTRY(clnt_info)	list;
+	int			refcount;
 	int			wd;
 	bool			scanned;
 	char			*name;
@@ -94,6 +92,7 @@  struct clnt_upcall_info {
 
 void handle_krb5_upcall(struct clnt_upcall_info *clp);
 void handle_gssd_upcall(struct clnt_upcall_info *clp);
+void free_upcall_info(struct clnt_upcall_info *info);
 
 
 #endif /* _RPC_GSSD_H_ */
diff --git a/utils/gssd/gssd_proc.c b/utils/gssd/gssd_proc.c
index 8fe6605b..05c1da64 100644
--- a/utils/gssd/gssd_proc.c
+++ b/utils/gssd/gssd_proc.c
@@ -730,7 +730,7 @@  handle_krb5_upcall(struct clnt_upcall_info *info)
 	printerr(2, "\n%s: uid %d (%s)\n", __func__, info->uid, clp->relpath);
 
 	process_krb5_upcall(clp, info->uid, clp->krb5_fd, NULL, NULL, NULL);
-	free(info);
+	free_upcall_info(info);
 }
 
 void
@@ -830,6 +830,6 @@  handle_gssd_upcall(struct clnt_upcall_info *info)
 out:
 	free(upcall_str);
 out_nomem:
-	free(info);
+	free_upcall_info(info);
 	return;
 }