diff mbox series

[10/27] libxfs: add kernel-compatible completion API

Message ID 20201015072155.1631135-11-david@fromorbit.com (mailing list archive)
State Superseded, archived
Headers show
Series xfsprogs: xfs_buf unification and AIO | expand

Commit Message

Dave Chinner Oct. 15, 2020, 7:21 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

This is needed for the kernel buffer cache conversion to be able
to wait on IO synchrnously. It is implemented with pthread mutexes
and conditional variables.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 include/Makefile     |  1 +
 include/completion.h | 61 ++++++++++++++++++++++++++++++++++++++++++++
 include/libxfs.h     |  1 +
 libxfs/libxfs_priv.h |  1 +
 4 files changed, 64 insertions(+)
 create mode 100644 include/completion.h

Comments

Darrick J. Wong Oct. 15, 2020, 5:09 p.m. UTC | #1
On Thu, Oct 15, 2020 at 06:21:38PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> This is needed for the kernel buffer cache conversion to be able
> to wait on IO synchrnously. It is implemented with pthread mutexes
> and conditional variables.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  include/Makefile     |  1 +
>  include/completion.h | 61 ++++++++++++++++++++++++++++++++++++++++++++
>  include/libxfs.h     |  1 +
>  libxfs/libxfs_priv.h |  1 +
>  4 files changed, 64 insertions(+)
>  create mode 100644 include/completion.h
> 
> diff --git a/include/Makefile b/include/Makefile
> index f7c40a5ce1a1..98031e70fa0d 100644
> --- a/include/Makefile
> +++ b/include/Makefile
> @@ -12,6 +12,7 @@ LIBHFILES = libxfs.h \
>  	atomic.h \
>  	bitops.h \
>  	cache.h \
> +	completion.h \
>  	hlist.h \
>  	kmem.h \
>  	list.h \
> diff --git a/include/completion.h b/include/completion.h
> new file mode 100644
> index 000000000000..92194c3f1484
> --- /dev/null
> +++ b/include/completion.h
> @@ -0,0 +1,61 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2019 RedHat, Inc.
> + * All Rights Reserved.
> + */
> +#ifndef __LIBXFS_COMPLETION_H__
> +#define __LIBXFS_COMPLETION_H__
> +
> +/*
> + * This implements kernel compatible completion semantics. This is slightly
> + * different to the way pthread conditional variables work in that completions
> + * can be signalled before the waiter tries to wait on the variable. In the
> + * pthread case, the completion is ignored and the waiter goes to sleep, whilst
> + * the kernel will see that the completion has already been completed and so
> + * will not block. This is handled through the addition of the the @signalled
> + * flag in the struct completion.

Hmm... do any of the existing pthread_cond_t users need these semantics?

I suspect the ones in scrub/vfs.c might actually be vulnerable to the
signal-before-wait race that this completion structure solves.

In any case, seeing as this primitive isn't inherent to the xfs disk
format, maybe these new concurrency management things belong in libfrog?

--D

> + */
> +struct completion {
> +	pthread_mutex_t		lock;
> +	pthread_cond_t		cond;
> +	bool			signalled; /* for kernel completion behaviour */
> +	int			waiters;
> +};
> +
> +static inline void
> +init_completion(struct completion *w)
> +{
> +	pthread_mutex_init(&w->lock, NULL);
> +	pthread_cond_init(&w->cond, NULL);
> +	w->signalled = false;
> +}
> +
> +static inline void
> +complete(struct completion *w)
> +{
> +	pthread_mutex_lock(&w->lock);
> +	w->signalled = true;
> +	pthread_cond_broadcast(&w->cond);
> +	pthread_mutex_unlock(&w->lock);
> +}
> +
> +/*
> + * Support for mulitple waiters requires that we count the number of waiters
> + * we have and only clear the signalled variable once all those waiters have
> + * been woken.
> + */
> +static inline void
> +wait_for_completion(struct completion *w)
> +{
> +	pthread_mutex_lock(&w->lock);
> +	if (!w->signalled) {
> +		w->waiters++;
> +		pthread_cond_wait(&w->cond, &w->lock);
> +		w->waiters--;
> +	}
> +	if (!w->waiters)
> +		w->signalled = false;
> +	pthread_mutex_unlock(&w->lock);
> +}
> +
> +#endif /* __LIBXFS_COMPLETION_H__ */
> diff --git a/include/libxfs.h b/include/libxfs.h
> index caf4a5139469..d03ec8aeaf5c 100644
> --- a/include/libxfs.h
> +++ b/include/libxfs.h
> @@ -19,6 +19,7 @@
>  #include "libfrog/radix-tree.h"
>  #include "atomic.h"
>  #include "spinlock.h"
> +#include "completion.h"
>  
>  #include "xfs_types.h"
>  #include "xfs_fs.h"
> diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h
> index e134f65c5dd1..5cbc4fe69732 100644
> --- a/libxfs/libxfs_priv.h
> +++ b/libxfs/libxfs_priv.h
> @@ -49,6 +49,7 @@
>  #include "libfrog/radix-tree.h"
>  #include "atomic.h"
>  #include "spinlock.h"
> +#include "completion.h"
>  
>  #include "xfs_types.h"
>  #include "xfs_arch.h"
> -- 
> 2.28.0
>
Dave Chinner Oct. 19, 2020, 10:21 p.m. UTC | #2
On Thu, Oct 15, 2020 at 10:09:17AM -0700, Darrick J. Wong wrote:
> On Thu, Oct 15, 2020 at 06:21:38PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > This is needed for the kernel buffer cache conversion to be able
> > to wait on IO synchrnously. It is implemented with pthread mutexes
> > and conditional variables.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> >  include/Makefile     |  1 +
> >  include/completion.h | 61 ++++++++++++++++++++++++++++++++++++++++++++
> >  include/libxfs.h     |  1 +
> >  libxfs/libxfs_priv.h |  1 +
> >  4 files changed, 64 insertions(+)
> >  create mode 100644 include/completion.h
> > 
> > diff --git a/include/Makefile b/include/Makefile
> > index f7c40a5ce1a1..98031e70fa0d 100644
> > --- a/include/Makefile
> > +++ b/include/Makefile
> > @@ -12,6 +12,7 @@ LIBHFILES = libxfs.h \
> >  	atomic.h \
> >  	bitops.h \
> >  	cache.h \
> > +	completion.h \
> >  	hlist.h \
> >  	kmem.h \
> >  	list.h \
> > diff --git a/include/completion.h b/include/completion.h
> > new file mode 100644
> > index 000000000000..92194c3f1484
> > --- /dev/null
> > +++ b/include/completion.h
> > @@ -0,0 +1,61 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (c) 2019 RedHat, Inc.
> > + * All Rights Reserved.
> > + */
> > +#ifndef __LIBXFS_COMPLETION_H__
> > +#define __LIBXFS_COMPLETION_H__
> > +
> > +/*
> > + * This implements kernel compatible completion semantics. This is slightly
> > + * different to the way pthread conditional variables work in that completions
> > + * can be signalled before the waiter tries to wait on the variable. In the
> > + * pthread case, the completion is ignored and the waiter goes to sleep, whilst
> > + * the kernel will see that the completion has already been completed and so
> > + * will not block. This is handled through the addition of the the @signalled
> > + * flag in the struct completion.
> 
> Hmm... do any of the existing pthread_cond_t users need these semantics?

No idea. I haven't done an audit, and that's outside the scope of
what I'm doing here...

> I suspect the ones in scrub/vfs.c might actually be vulnerable to the
> signal-before-wait race that this completion structure solves.
> 
> In any case, seeing as this primitive isn't inherent to the xfs disk
> format, maybe these new concurrency management things belong in libfrog?

I've just mirrored all the other kernel wrapper headers by placing
them in include/. I don't see much point in moving these to libfrog,
because they have no actual built object code that needs to be
linked. i.e. they are included globally for everything via
libxfs/libxfs_priv.h and include/libxfs.h, and that's all that is
needed to use these APIs. If they get more complex and require
actual object files to be built, then it's time to reconsider...

Cheers,

Dave.
diff mbox series

Patch

diff --git a/include/Makefile b/include/Makefile
index f7c40a5ce1a1..98031e70fa0d 100644
--- a/include/Makefile
+++ b/include/Makefile
@@ -12,6 +12,7 @@  LIBHFILES = libxfs.h \
 	atomic.h \
 	bitops.h \
 	cache.h \
+	completion.h \
 	hlist.h \
 	kmem.h \
 	list.h \
diff --git a/include/completion.h b/include/completion.h
new file mode 100644
index 000000000000..92194c3f1484
--- /dev/null
+++ b/include/completion.h
@@ -0,0 +1,61 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 RedHat, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __LIBXFS_COMPLETION_H__
+#define __LIBXFS_COMPLETION_H__
+
+/*
+ * This implements kernel compatible completion semantics. This is slightly
+ * different to the way pthread conditional variables work in that completions
+ * can be signalled before the waiter tries to wait on the variable. In the
+ * pthread case, the completion is ignored and the waiter goes to sleep, whilst
+ * the kernel will see that the completion has already been completed and so
+ * will not block. This is handled through the addition of the the @signalled
+ * flag in the struct completion.
+ */
+struct completion {
+	pthread_mutex_t		lock;
+	pthread_cond_t		cond;
+	bool			signalled; /* for kernel completion behaviour */
+	int			waiters;
+};
+
+static inline void
+init_completion(struct completion *w)
+{
+	pthread_mutex_init(&w->lock, NULL);
+	pthread_cond_init(&w->cond, NULL);
+	w->signalled = false;
+}
+
+static inline void
+complete(struct completion *w)
+{
+	pthread_mutex_lock(&w->lock);
+	w->signalled = true;
+	pthread_cond_broadcast(&w->cond);
+	pthread_mutex_unlock(&w->lock);
+}
+
+/*
+ * Support for mulitple waiters requires that we count the number of waiters
+ * we have and only clear the signalled variable once all those waiters have
+ * been woken.
+ */
+static inline void
+wait_for_completion(struct completion *w)
+{
+	pthread_mutex_lock(&w->lock);
+	if (!w->signalled) {
+		w->waiters++;
+		pthread_cond_wait(&w->cond, &w->lock);
+		w->waiters--;
+	}
+	if (!w->waiters)
+		w->signalled = false;
+	pthread_mutex_unlock(&w->lock);
+}
+
+#endif /* __LIBXFS_COMPLETION_H__ */
diff --git a/include/libxfs.h b/include/libxfs.h
index caf4a5139469..d03ec8aeaf5c 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -19,6 +19,7 @@ 
 #include "libfrog/radix-tree.h"
 #include "atomic.h"
 #include "spinlock.h"
+#include "completion.h"
 
 #include "xfs_types.h"
 #include "xfs_fs.h"
diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h
index e134f65c5dd1..5cbc4fe69732 100644
--- a/libxfs/libxfs_priv.h
+++ b/libxfs/libxfs_priv.h
@@ -49,6 +49,7 @@ 
 #include "libfrog/radix-tree.h"
 #include "atomic.h"
 #include "spinlock.h"
+#include "completion.h"
 
 #include "xfs_types.h"
 #include "xfs_arch.h"