diff mbox series

[18/23] libmultipath: fix log_thread startup and teardown

Message ID 20200924134054.14632-19-mwilck@suse.com (mailing list archive)
State Not Applicable, archived
Delegated to: christophe varoqui
Headers show
Series libmultipath: improve cleanup on exit | expand

Commit Message

Martin Wilck Sept. 24, 2020, 1:40 p.m. UTC
From: Martin Wilck <mwilck@suse.com>

This fixes several issues with the log_thread. First, the running
flag logq_running should be set by the thread itself, not by
log_thread_start()/_stop(). Second, the thread was both cancelled and
terminated via a flag (again, logq_running). It's sufficient,
and better, to just cancel it and use logq_running as indication for
successful termination. Third, the locking wasn't cancel-safe in some
places. Forth, log_thread_start() and log_thread_stop() didn't wait for
startup/teardown properly. Fifth, using (pthread_t)0 is wrong (pthread_t is
opaque; there's no guarantee that 0 is not a valid pthread_t value). Finally,
pthread_cancel() was called under logq_lock, which doesn't make sense to
me at all.

Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 libmultipath/log_pthread.c | 59 ++++++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 15 deletions(-)

Comments

Benjamin Marzinski Sept. 28, 2020, 8:15 p.m. UTC | #1
On Thu, Sep 24, 2020 at 03:40:49PM +0200, mwilck@suse.com wrote:
> From: Martin Wilck <mwilck@suse.com>
> 
> This fixes several issues with the log_thread. First, the running
> flag logq_running should be set by the thread itself, not by
> log_thread_start()/_stop(). Second, the thread was both cancelled and
> terminated via a flag (again, logq_running). It's sufficient,
> and better, to just cancel it and use logq_running as indication for
> successful termination. Third, the locking wasn't cancel-safe in some
> places. Forth, log_thread_start() and log_thread_stop() didn't wait for
> startup/teardown properly. Fifth, using (pthread_t)0 is wrong (pthread_t is
> opaque; there's no guarantee that 0 is not a valid pthread_t value). Finally,
> pthread_cancel() was called under logq_lock, which doesn't make sense to
> me at all.
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
>  libmultipath/log_pthread.c | 59 ++++++++++++++++++++++++++++----------
>  1 file changed, 44 insertions(+), 15 deletions(-)
> 
> diff --git a/libmultipath/log_pthread.c b/libmultipath/log_pthread.c
> index 0c327ff..5825e66 100644
> --- a/libmultipath/log_pthread.c
> +++ b/libmultipath/log_pthread.c
> @@ -13,6 +13,7 @@
>  #include "log_pthread.h"
>  #include "log.h"
>  #include "lock.h"
> +#include "util.h"
>  
>  static pthread_t log_thr;
>  
> @@ -56,28 +57,45 @@ static void flush_logqueue (void)
>  	} while (empty == 0);
>  }
>  
> +static void cleanup_log_thread(__attribute((unused)) void *arg)
> +{
> +	logdbg(stderr, "log thread exiting");
> +	pthread_mutex_lock(&logev_lock);
> +	logq_running = 0;
> +	pthread_cond_signal(&logev_cond);
> +	pthread_mutex_unlock(&logev_lock);
> +}
> +
>  static void * log_thread (__attribute__((unused)) void * et)
>  {
>  	int running;
>  
>  	pthread_mutex_lock(&logev_lock);
> -	logq_running = 1;
> +	running = logq_running;
> +	if (!running)
> +		logq_running = 1;
> +	pthread_cond_signal(&logev_cond);
>  	pthread_mutex_unlock(&logev_lock);
> +	if (running)
> +		/* already started */
> +		return NULL;
> +	pthread_cleanup_push(cleanup_log_thread, NULL);
>  
>  	mlockall(MCL_CURRENT | MCL_FUTURE);
>  	logdbg(stderr,"enter log_thread\n");
>  
>  	while (1) {
>  		pthread_mutex_lock(&logev_lock);
> -		if (logq_running && !log_messages_pending)
> +		pthread_cleanup_push(cleanup_mutex, &logev_lock);
> +		while (!log_messages_pending)
> +			/* this is a cancellation point */
>  			pthread_cond_wait(&logev_cond, &logev_lock);
>  		log_messages_pending = 0;
> -		running = logq_running;
> -		pthread_mutex_unlock(&logev_lock);
> -		if (!running)
> -			break;
> +		pthread_cleanup_pop(1);
> +
>  		flush_logqueue();
>  	}
> +	pthread_cleanup_pop(1);
>  	return NULL;
>  }
>  
> @@ -98,6 +116,12 @@ void log_thread_start (pthread_attr_t *attr)
>  		exit(1);
>  	}
>  
> +	pthread_mutex_lock(&logev_lock);
> +	pthread_cleanup_push(cleanup_mutex, &logev_lock);
> +	/* wait for thread startup */
> +	while (!logq_running)
> +		pthread_cond_wait(&logev_cond, &logev_lock);
> +	pthread_cleanup_pop(1);
>  	return;
>  }
>  
> @@ -112,21 +136,26 @@ void log_thread_reset (void)
>  
>  void log_thread_stop (void)
>  {
> +	int running;
> +
>  	if (!la)
>  		return;
> -
>  	logdbg(stderr,"enter log_thread_stop\n");
>  
>  	pthread_mutex_lock(&logev_lock);
> -	logq_running = 0;
> -	pthread_cond_signal(&logev_cond);
> -	pthread_mutex_unlock(&logev_lock);
> +	pthread_cleanup_push(cleanup_mutex, &logev_lock);
> +	running = logq_running;
> +	if (running) {
> +		pthread_cancel(log_thr);
> +		pthread_cond_signal(&logev_cond);
> +	}
> +	while (logq_running)
> +		pthread_cond_wait(&logev_cond, &logev_lock);

If we're already going to join the thread, why do we have to wait for
the cleanup handler first? Won't the join do the waiting we need?

-Ben

> +	pthread_cleanup_pop(1);
> +
> +	if (running)
> +		pthread_join(log_thr, NULL);
>  
> -	pthread_mutex_lock(&logq_lock);
> -	pthread_cancel(log_thr);
> -	pthread_mutex_unlock(&logq_lock);
> -	pthread_join(log_thr, NULL);
> -	log_thr = (pthread_t)0;
>  
>  	flush_logqueue();
>  
> -- 
> 2.28.0

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Martin Wilck Sept. 29, 2020, 9:18 a.m. UTC | #2
On Mon, 2020-09-28 at 15:15 -0500, Benjamin Marzinski wrote:
> On Thu, Sep 24, 2020 at 03:40:49PM +0200, mwilck@suse.com wrote:
> > From: Martin Wilck <mwilck@suse.com>
> > 
> > This fixes several issues with the log_thread. First, the running
> > flag logq_running should be set by the thread itself, not by
> > log_thread_start()/_stop(). Second, the thread was both cancelled
> > and
> > terminated via a flag (again, logq_running). It's sufficient,
> > and better, to just cancel it and use logq_running as indication
> > for
> > successful termination. Third, the locking wasn't cancel-safe in
> > some
> > places. Forth, log_thread_start() and log_thread_stop() didn't wait
> > for
> > startup/teardown properly. Fifth, using (pthread_t)0 is wrong
> > (pthread_t is
> > opaque; there's no guarantee that 0 is not a valid pthread_t
> > value). Finally,
> > pthread_cancel() was called under logq_lock, which doesn't make
> > sense to
> > me at all.
> > 
> > Signed-off-by: Martin Wilck <mwilck@suse.com>
> > ---
> >  
> >  	pthread_mutex_lock(&logev_lock);
> > -	logq_running = 0;
> > -	pthread_cond_signal(&logev_cond);
> > -	pthread_mutex_unlock(&logev_lock);
> > +	pthread_cleanup_push(cleanup_mutex, &logev_lock);
> > +	running = logq_running;
> > +	if (running) {
> > +		pthread_cancel(log_thr);
> > +		pthread_cond_signal(&logev_cond);
> > +	}
> > +	while (logq_running)
> > +		pthread_cond_wait(&logev_cond, &logev_lock);
> 
> If we're already going to join the thread, why do we have to wait for
> the cleanup handler first? Won't the join do the waiting we need?

Yes, I guess it would. I implemented it in a way analogous the startup
sequence, but it's really not necessary for shutdown. Thanks for
pointing it out.

Martin


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

diff --git a/libmultipath/log_pthread.c b/libmultipath/log_pthread.c
index 0c327ff..5825e66 100644
--- a/libmultipath/log_pthread.c
+++ b/libmultipath/log_pthread.c
@@ -13,6 +13,7 @@ 
 #include "log_pthread.h"
 #include "log.h"
 #include "lock.h"
+#include "util.h"
 
 static pthread_t log_thr;
 
@@ -56,28 +57,45 @@  static void flush_logqueue (void)
 	} while (empty == 0);
 }
 
+static void cleanup_log_thread(__attribute((unused)) void *arg)
+{
+	logdbg(stderr, "log thread exiting");
+	pthread_mutex_lock(&logev_lock);
+	logq_running = 0;
+	pthread_cond_signal(&logev_cond);
+	pthread_mutex_unlock(&logev_lock);
+}
+
 static void * log_thread (__attribute__((unused)) void * et)
 {
 	int running;
 
 	pthread_mutex_lock(&logev_lock);
-	logq_running = 1;
+	running = logq_running;
+	if (!running)
+		logq_running = 1;
+	pthread_cond_signal(&logev_cond);
 	pthread_mutex_unlock(&logev_lock);
+	if (running)
+		/* already started */
+		return NULL;
+	pthread_cleanup_push(cleanup_log_thread, NULL);
 
 	mlockall(MCL_CURRENT | MCL_FUTURE);
 	logdbg(stderr,"enter log_thread\n");
 
 	while (1) {
 		pthread_mutex_lock(&logev_lock);
-		if (logq_running && !log_messages_pending)
+		pthread_cleanup_push(cleanup_mutex, &logev_lock);
+		while (!log_messages_pending)
+			/* this is a cancellation point */
 			pthread_cond_wait(&logev_cond, &logev_lock);
 		log_messages_pending = 0;
-		running = logq_running;
-		pthread_mutex_unlock(&logev_lock);
-		if (!running)
-			break;
+		pthread_cleanup_pop(1);
+
 		flush_logqueue();
 	}
+	pthread_cleanup_pop(1);
 	return NULL;
 }
 
@@ -98,6 +116,12 @@  void log_thread_start (pthread_attr_t *attr)
 		exit(1);
 	}
 
+	pthread_mutex_lock(&logev_lock);
+	pthread_cleanup_push(cleanup_mutex, &logev_lock);
+	/* wait for thread startup */
+	while (!logq_running)
+		pthread_cond_wait(&logev_cond, &logev_lock);
+	pthread_cleanup_pop(1);
 	return;
 }
 
@@ -112,21 +136,26 @@  void log_thread_reset (void)
 
 void log_thread_stop (void)
 {
+	int running;
+
 	if (!la)
 		return;
-
 	logdbg(stderr,"enter log_thread_stop\n");
 
 	pthread_mutex_lock(&logev_lock);
-	logq_running = 0;
-	pthread_cond_signal(&logev_cond);
-	pthread_mutex_unlock(&logev_lock);
+	pthread_cleanup_push(cleanup_mutex, &logev_lock);
+	running = logq_running;
+	if (running) {
+		pthread_cancel(log_thr);
+		pthread_cond_signal(&logev_cond);
+	}
+	while (logq_running)
+		pthread_cond_wait(&logev_cond, &logev_lock);
+	pthread_cleanup_pop(1);
+
+	if (running)
+		pthread_join(log_thr, NULL);
 
-	pthread_mutex_lock(&logq_lock);
-	pthread_cancel(log_thr);
-	pthread_mutex_unlock(&logq_lock);
-	pthread_join(log_thr, NULL);
-	log_thr = (pthread_t)0;
 
 	flush_logqueue();