diff mbox

[v2,1/7] libmultipath: fix tur checker locking

Message ID 20180210003638.GY14513@octiron.msp.redhat.com (mailing list archive)
State Not Applicable, archived
Delegated to: Mike Snitzer
Headers show

Commit Message

Benjamin Marzinski Feb. 10, 2018, 12:36 a.m. UTC
On Sat, Feb 10, 2018 at 12:36:05AM +0100, Martin Wilck wrote:
> On Sat, 2018-02-10 at 00:28 +0100, Martin Wilck wrote:
> > Maybe it's easier than we thought. Attached is a patch on top of
> > yours that I think might work, please have a look. 
> > 
> 
> That one didn't even compile. This one is better.
> 
> Martin

How about this one instead. The idea is that once we are in the cleanup
handler, we just cleanup and exit. But before we enter it, we atomically
exchange running, and if running was 0, we pause(), since the checker is
either about to cancel us, or already has.



> 
> -- 
> Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
> HRB 21284 (AG Nürnberg)

> From afb9c7de3658d49c4f28f6b9ee618a87b806ecdd Mon Sep 17 00:00:00 2001
> From: Martin Wilck <mwilck@suse.com>
> Date: Sat, 10 Feb 2018 00:22:17 +0100
> Subject: [PATCH] tur checker: make sure pthread_cancel isn't called for exited
>  thread
> 
> If we enter the cleanup function as the result of a pthread_cancel by another
> thread, we don't need to wait for a cancellation any more. If we exit
> regularly, just tell the other thread not to try to cancel us.
> ---
>  libmultipath/checkers/tur.c | 9 +++++----
>  1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
> index 894ad41c89c3..5d2b36bfa883 100644
> --- a/libmultipath/checkers/tur.c
> +++ b/libmultipath/checkers/tur.c
> @@ -214,15 +214,13 @@ retry:
>  
>  static void cleanup_func(void *data)
>  {
> -	int running, holders;
> +	int holders;
>  	struct tur_checker_context *ct = data;
>  
> -	running = uatomic_xchg(&ct->running, 0);
> +	uatomic_set(&ct->running, 0);
>  	holders = uatomic_sub_return(&ct->holders, 1);
>  	if (!holders)
>  		cleanup_context(ct);
> -	if (!running)
> -		pause();
>  }
>  
>  static int tur_running(struct tur_checker_context *ct)
> @@ -266,6 +264,9 @@ static void *tur_thread(void *ctx)
>  	pthread_cond_signal(&ct->active);
>  	pthread_mutex_unlock(&ct->lock);
>  
> +	/* Tell main checker thread not to cancel us, as we exit anyway */
> +	uatomic_set(&ct->running, 0);
> +
>  	condlog(3, "%s: tur checker finished, state %s",
>  		tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
>  	tur_thread_cleanup_pop(ct);
> -- 
> 2.16.1
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Comments

Martin Wilck Feb. 10, 2018, 4:11 p.m. UTC | #1
On Fri, 2018-02-09 at 18:36 -0600, Benjamin Marzinski wrote:
> On Sat, Feb 10, 2018 at 12:36:05AM +0100, Martin Wilck wrote:
> > On Sat, 2018-02-10 at 00:28 +0100, Martin Wilck wrote:
> > > Maybe it's easier than we thought. Attached is a patch on top of
> > > yours that I think might work, please have a look. 
> > > 
> > 
> > That one didn't even compile. This one is better.
> > 
> > Martin
> 
> How about this one instead. The idea is that once we are in the
> cleanup
> handler, we just cleanup and exit. But before we enter it, we
> atomically
> exchange running, and if running was 0, we pause(), since the checker
> is
> either about to cancel us, or already has.
> 

Yes, that should work. Nice.

Regards,
Martin


> diff --git a/libmultipath/checkers/tur.c
> b/libmultipath/checkers/tur.c
> index 894ad41..3774a17 100644
> --- a/libmultipath/checkers/tur.c
> +++ b/libmultipath/checkers/tur.c
> @@ -214,15 +214,12 @@ retry:
>  
>  static void cleanup_func(void *data)
>  {
> -	int running, holders;
> +	int holders;
>  	struct tur_checker_context *ct = data;
>  
> -	running = uatomic_xchg(&ct->running, 0);
>  	holders = uatomic_sub_return(&ct->holders, 1);
>  	if (!holders)
>  		cleanup_context(ct);
> -	if (!running)
> -		pause();
>  }
>  
>  static int tur_running(struct tur_checker_context *ct)
> @@ -242,7 +239,7 @@ static void copy_msg_to_tcc(void *ct_p, const
> char *msg)
>  static void *tur_thread(void *ctx)
>  {
>  	struct tur_checker_context *ct = ctx;
> -	int state;
> +	int state, running;
>  	char devt[32];
>  
>  	condlog(3, "%s: tur checker starting up",
> @@ -268,6 +265,11 @@ static void *tur_thread(void *ctx)
>  
>  	condlog(3, "%s: tur checker finished, state %s",
>  		tur_devt(devt, sizeof(devt), ct),
> checker_state_name(state));
> +
> +	running = uatomic_xchg(&ct->running, 0);
> +	if (!running)
> +		pause();
> +
>  	tur_thread_cleanup_pop(ct);
>  
>  	return ((void *)0);
> 
> 
> > 
> > -- 
> > Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
> > SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham
> > Norton
> > HRB 21284 (AG Nürnberg)
> > From afb9c7de3658d49c4f28f6b9ee618a87b806ecdd Mon Sep 17 00:00:00
> > 2001
> > From: Martin Wilck <mwilck@suse.com>
> > Date: Sat, 10 Feb 2018 00:22:17 +0100
> > Subject: [PATCH] tur checker: make sure pthread_cancel isn't called
> > for exited
> >  thread
> > 
> > If we enter the cleanup function as the result of a pthread_cancel
> > by another
> > thread, we don't need to wait for a cancellation any more. If we
> > exit
> > regularly, just tell the other thread not to try to cancel us.
> > ---
> >  libmultipath/checkers/tur.c | 9 +++++----
> >  1 file changed, 5 insertions(+), 4 deletions(-)
> > 
> > diff --git a/libmultipath/checkers/tur.c
> > b/libmultipath/checkers/tur.c
> > index 894ad41c89c3..5d2b36bfa883 100644
> > --- a/libmultipath/checkers/tur.c
> > +++ b/libmultipath/checkers/tur.c
> > @@ -214,15 +214,13 @@ retry:
> >  
> >  static void cleanup_func(void *data)
> >  {
> > -	int running, holders;
> > +	int holders;
> >  	struct tur_checker_context *ct = data;
> >  
> > -	running = uatomic_xchg(&ct->running, 0);
> > +	uatomic_set(&ct->running, 0);
> >  	holders = uatomic_sub_return(&ct->holders, 1);
> >  	if (!holders)
> >  		cleanup_context(ct);
> > -	if (!running)
> > -		pause();
> >  }
> >  
> >  static int tur_running(struct tur_checker_context *ct)
> > @@ -266,6 +264,9 @@ static void *tur_thread(void *ctx)
> >  	pthread_cond_signal(&ct->active);
> >  	pthread_mutex_unlock(&ct->lock);
> >  
> > +	/* Tell main checker thread not to cancel us, as we exit
> > anyway */
> > +	uatomic_set(&ct->running, 0);
> > +
> >  	condlog(3, "%s: tur checker finished, state %s",
> >  		tur_devt(devt, sizeof(devt), ct),
> > checker_state_name(state));
> >  	tur_thread_cleanup_pop(ct);
> > -- 
> > 2.16.1
> > 
> 
>
Martin Wilck Feb. 10, 2018, 7:42 p.m. UTC | #2
On Sat, 2018-02-10 at 17:11 +0100, Martin Wilck wrote:
> On Fri, 2018-02-09 at 18:36 -0600, Benjamin Marzinski wrote:
> > On Sat, Feb 10, 2018 at 12:36:05AM +0100, Martin Wilck wrote:
> > > On Sat, 2018-02-10 at 00:28 +0100, Martin Wilck wrote:
> > > > Maybe it's easier than we thought. Attached is a patch on top
> > > > of
> > > > yours that I think might work, please have a look. 
> > > > 
> > > 
> > > That one didn't even compile. This one is better.
> > > 
> > > Martin
> > 
> > How about this one instead. The idea is that once we are in the
> > cleanup
> > handler, we just cleanup and exit. But before we enter it, we
> > atomically
> > exchange running, and if running was 0, we pause(), since the
> > checker
> > is
> > either about to cancel us, or already has.
> > 
> 
> Yes, that should work. Nice.

... but I just realized that we don't rcu_register_thread() the TUR
thread. Maybe we should if we use RCU primitives?

Martin
Benjamin Marzinski Feb. 12, 2018, 6:44 p.m. UTC | #3
On Sat, Feb 10, 2018 at 08:42:31PM +0100, Martin Wilck wrote:
> On Sat, 2018-02-10 at 17:11 +0100, Martin Wilck wrote:
> > On Fri, 2018-02-09 at 18:36 -0600, Benjamin Marzinski wrote:
> > > On Sat, Feb 10, 2018 at 12:36:05AM +0100, Martin Wilck wrote:
> > > > On Sat, 2018-02-10 at 00:28 +0100, Martin Wilck wrote:
> > > > > Maybe it's easier than we thought. Attached is a patch on top
> > > > > of
> > > > > yours that I think might work, please have a look. 
> > > > > 
> > > > 
> > > > That one didn't even compile. This one is better.
> > > > 
> > > > Martin
> > > 
> > > How about this one instead. The idea is that once we are in the
> > > cleanup
> > > handler, we just cleanup and exit. But before we enter it, we
> > > atomically
> > > exchange running, and if running was 0, we pause(), since the
> > > checker
> > > is
> > > either about to cancel us, or already has.
> > > 
> > 
> > Yes, that should work. Nice.
> 
> ... but I just realized that we don't rcu_register_thread() the TUR
> thread. Maybe we should if we use RCU primitives?

Looking online, I see this.

"void rcu_register_thread(void): For all RCU flavors other than
bullet-proof RCU, each thread must invoke this function before its first
call to rcu_read_lock() or call_rcu(). If a given thread never invokes
any RCU read-side functions, it need not invoke
rcu_register_thread(void)."

That makes it sound like this is unnecessary for using the atomic
operations. Did you see something that makes you think differently? I
probably won't hurt to add it anyway. But, if it's fuzzy how to
correctly use the rcu operations, we could just go back to wrapping
normal operations in a spin_lock, now that we have a method that's not
excessively complicated, and would only require holding the lock for
simple variable manipulations.

Ben

> 
> Martin
> 
> -- 
> Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
> HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Martin Wilck Feb. 12, 2018, 7:16 p.m. UTC | #4
On Mon, 2018-02-12 at 12:44 -0600, Benjamin Marzinski wrote:
> On Sat, Feb 10, 2018 at 08:42:31PM +0100, Martin Wilck wrote:
> > On Sat, 2018-02-10 at 17:11 +0100, Martin Wilck wrote:
> > > On Fri, 2018-02-09 at 18:36 -0600, Benjamin Marzinski wrote:
> > > > On Sat, Feb 10, 2018 at 12:36:05AM +0100, Martin Wilck wrote:
> > > > > On Sat, 2018-02-10 at 00:28 +0100, Martin Wilck wrote:
> > > > > > Maybe it's easier than we thought. Attached is a patch on
> > > > > > top
> > > > > > of
> > > > > > yours that I think might work, please have a look. 
> > > > > > 
> > > > > 
> > > > > That one didn't even compile. This one is better.
> > > > > 
> > > > > Martin
> > > > 
> > > > How about this one instead. The idea is that once we are in the
> > > > cleanup
> > > > handler, we just cleanup and exit. But before we enter it, we
> > > > atomically
> > > > exchange running, and if running was 0, we pause(), since the
> > > > checker
> > > > is
> > > > either about to cancel us, or already has.
> > > > 
> > > 
> > > Yes, that should work. Nice.
> > 
> > ... but I just realized that we don't rcu_register_thread() the TUR
> > thread. Maybe we should if we use RCU primitives?
> 
> Looking online, I see this.
> 
> "void rcu_register_thread(void): For all RCU flavors other than
> bullet-proof RCU, each thread must invoke this function before its
> first
> call to rcu_read_lock() or call_rcu(). If a given thread never
> invokes
> any RCU read-side functions, it need not invoke
> rcu_register_thread(void)."
> 
> That makes it sound like this is unnecessary for using the atomic
> operations. Did you see something that makes you think differently? I
> probably won't hurt to add it anyway. But, if it's fuzzy how to
> correctly use the rcu operations, we could just go back to wrapping
> normal operations in a spin_lock, now that we have a method that's
> not
> excessively complicated, and would only require holding the lock for
> simple variable manipulations.

OK, it's not strictly necessary then. Thanks for checking.
I wouldn't prefer going back to spinlocks. I think the atomic
operations are a step in the right direction.

Martin
diff mbox

Patch

diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index 894ad41..3774a17 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -214,15 +214,12 @@  retry:
 
 static void cleanup_func(void *data)
 {
-	int running, holders;
+	int holders;
 	struct tur_checker_context *ct = data;
 
-	running = uatomic_xchg(&ct->running, 0);
 	holders = uatomic_sub_return(&ct->holders, 1);
 	if (!holders)
 		cleanup_context(ct);
-	if (!running)
-		pause();
 }
 
 static int tur_running(struct tur_checker_context *ct)
@@ -242,7 +239,7 @@  static void copy_msg_to_tcc(void *ct_p, const char *msg)
 static void *tur_thread(void *ctx)
 {
 	struct tur_checker_context *ct = ctx;
-	int state;
+	int state, running;
 	char devt[32];
 
 	condlog(3, "%s: tur checker starting up",
@@ -268,6 +265,11 @@  static void *tur_thread(void *ctx)
 
 	condlog(3, "%s: tur checker finished, state %s",
 		tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
+
+	running = uatomic_xchg(&ct->running, 0);
+	if (!running)
+		pause();
+
 	tur_thread_cleanup_pop(ct);
 
 	return ((void *)0);