diff mbox series

[RFC,02/21] Add a prelocked wake-up

Message ID 157117608708.15019.1998141309054662114.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series pipe: Keyrings, Block and USB notifications | expand

Commit Message

David Howells Oct. 15, 2019, 9:48 p.m. UTC
Add a wakeup call for a case whereby the caller already has the waitqueue
spinlock held.  This can be used by pipes to alter the ring buffer indices
under the spinlock.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 include/linux/wait.h |    2 ++
 kernel/sched/wait.c  |    7 +++++++
 2 files changed, 9 insertions(+)

Comments

Linus Torvalds Oct. 15, 2019, 10:14 p.m. UTC | #1
On Tue, Oct 15, 2019 at 2:48 PM David Howells <dhowells@redhat.com> wrote:
>
> Add a wakeup call for a case whereby the caller already has the waitqueue
> spinlock held.

That naming is crazy.

We already have helper functions like this, and they are just called
"wake_up_locked()".

So the "prelocked" naming is just odd. Make it be
wake_up_interruptible_sync_poll_locked().

The helper function should likely be

  void __wake_up_locked_sync_key(struct wait_queue_head *wq_head,
unsigned int mode, void *key)
  {
        __wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL);
  }
  EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);

to match the other naming patterns there.

[ Unrelated ]

Looking at that mess of functions, I also wonder if we should try to
just remove the bookmark code again. It was cute, and it was useful,
but I think the problem with the page lock list may have been fixed by
commit 9a1ea439b16b ("mm: put_and_wait_on_page_locked() while page is
migrated") which avoids the retry condition with
migrate_page_move_mapping().

Tim/Kan? Do you have the problematic load still?

              Linus
David Howells Oct. 15, 2019, 10:33 p.m. UTC | #2
Linus Torvalds <torvalds@linux-foundation.org> wrote:

> > Add a wakeup call for a case whereby the caller already has the waitqueue
> > spinlock held.
> 
> That naming is crazy.

Sorry, yeah.  This is a bit hacked together at the moment and needs some more
tidying up.  I've had a lot of problems with performance regressions of up to
40% from seemingly simple changes involving moving locks around - it turns out
that the problem was that I was running with kasan enabled.

David
David Howells Oct. 16, 2019, 2:26 p.m. UTC | #3
Btw, is there any point in __wake_up_sync_key() taking a nr_exclusive
argument since it clears WF_SYNC if nr_exclusive != 1 and doesn't make sense
to be >1 anyway.

David
---
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 3eb7cae8206c..bb7676d396cd 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -201,9 +201,9 @@ void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void
 void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
 		unsigned int mode, void *key, wait_queue_entry_t *bookmark);
-void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
-void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -214,7 +214,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 #define wake_up_interruptible(x)	__wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
 #define wake_up_interruptible_nr(x, nr)	__wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
 #define wake_up_interruptible_all(x)	__wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
-#define wake_up_interruptible_sync(x)	__wake_up_sync((x), TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_sync(x)	__wake_up_sync((x), TASK_INTERRUPTIBLE)
 
 /*
  * Wakeup macros to be used to report events to the targets.
@@ -228,7 +228,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 #define wake_up_interruptible_poll(x, m)					\
 	__wake_up(x, TASK_INTERRUPTIBLE, 1, poll_to_key(m))
 #define wake_up_interruptible_sync_poll(x, m)					\
-	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, poll_to_key(m))
+	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
 
 #define ___wait_cond_timeout(condition)						\
 ({										\
diff --git a/kernel/exit.c b/kernel/exit.c
index a46a50d67002..a1ff25ef050e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1435,7 +1435,7 @@ static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
 void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
 {
 	__wake_up_sync_key(&parent->signal->wait_chldexit,
-				TASK_INTERRUPTIBLE, 1, p);
+			   TASK_INTERRUPTIBLE, p);
 }
 
 static long do_wait(struct wait_opts *wo)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index c1e566a114ca..b4b52361dab7 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -169,7 +169,6 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
  * __wake_up_sync_key - wake up threads blocked on a waitqueue.
  * @wq_head: the waitqueue
  * @mode: which threads
- * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: opaque value to be passed to wakeup targets
  *
  * The sync wakeup differs that the waker knows that it will schedule
@@ -183,26 +182,21 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
  * accessing the task state.
  */
 void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
-			int nr_exclusive, void *key)
+			void *key)
 {
-	int wake_flags = 1; /* XXX WF_SYNC */
-
 	if (unlikely(!wq_head))
 		return;
 
-	if (unlikely(nr_exclusive != 1))
-		wake_flags = 0;
-
-	__wake_up_common_lock(wq_head, mode, nr_exclusive, wake_flags, key);
+	__wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
 
 /*
  * __wake_up_sync - see __wake_up_sync_key()
  */
-void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive)
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
 {
-	__wake_up_sync_key(wq_head, mode, nr_exclusive, NULL);
+	__wake_up_sync_key(wq_head, mode, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
Linus Torvalds Oct. 16, 2019, 3:31 p.m. UTC | #4
On Wed, Oct 16, 2019 at 7:26 AM David Howells <dhowells@redhat.com> wrote:
>
> Btw, is there any point in __wake_up_sync_key() taking a nr_exclusive
> argument since it clears WF_SYNC if nr_exclusive != 1 and doesn't make sense
> to be >1 anyway.

Ack, looks sane to me.

We have _very_ few users of nr_exclusive. I wonder if it's even worth
having at all, but it's definitely not worth it here.

I'd love for nr_exclusive to go away and be replaced by WF_ALL
instead. Right now it looks like there is one SGI driver that uses it,
and the sbitmap code. That was all I could find.

Oh well. You removing one case is at last a small amount of progress.

         Linus
Tim Chen Oct. 16, 2019, 5:02 p.m. UTC | #5
On 10/15/19 3:14 PM, Linus Torvalds wrote:
> On Tue, Oct 15, 2019 at 2:48 PM David Howells <dhowells@redhat.com> wrote:
>>
>> Add a wakeup call for a case whereby the caller already has the waitqueue
>> spinlock held.
> 
> That naming is crazy.
> 
> We already have helper functions like this, and they are just called
> "wake_up_locked()".
> 
> So the "prelocked" naming is just odd. Make it be
> wake_up_interruptible_sync_poll_locked().
> 
> The helper function should likely be
> 
>   void __wake_up_locked_sync_key(struct wait_queue_head *wq_head,
> unsigned int mode, void *key)
>   {
>         __wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL);
>   }
>   EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);
> 
> to match the other naming patterns there.
> 
> [ Unrelated ]
> 
> Looking at that mess of functions, I also wonder if we should try to
> just remove the bookmark code again. It was cute, and it was useful,
> but I think the problem with the page lock list may have been fixed by
> commit 9a1ea439b16b ("mm: put_and_wait_on_page_locked() while page is
> migrated") which avoids the retry condition with
> migrate_page_move_mapping().
> 
> Tim/Kan? Do you have the problematic load still?
> 

Unfortunately, we do not have ready access to that problematic load
which was run by a customer on 8 socket system.  They were not
willing to give the workload to us, and have not responded to my
request to rerun their load with commit 9a1ea439b16b.

The commit greatly reduced migration failures with concurrent page faulting.
And successful migrations could have prevented the big
pile up of waiters faulting and waiting on the page, which was the
problem the bookmark code was trying to solve.

So I also tend to think that the problem should have been resolved.
But unfortunately I don't have a ready workload to confirm.

Tim
diff mbox series

Patch

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 3eb7cae8206c..d511b298a20c 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -229,6 +229,8 @@  void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 	__wake_up(x, TASK_INTERRUPTIBLE, 1, poll_to_key(m))
 #define wake_up_interruptible_sync_poll(x, m)					\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, poll_to_key(m))
+void prelocked_wake_up_interruptible_sync_poll(struct wait_queue_head *wq_head,
+					       __poll_t mask);
 
 #define ___wait_cond_timeout(condition)						\
 ({										\
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index c1e566a114ca..43fbbbe9af27 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -126,6 +126,13 @@  static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int
 	} while (bookmark.flags & WQ_FLAG_BOOKMARK);
 }
 
+void prelocked_wake_up_interruptible_sync_poll(struct wait_queue_head *wq_head,
+					       __poll_t mask)
+{
+	__wake_up_common(wq_head, TASK_INTERRUPTIBLE, 1, WF_SYNC,
+			 poll_to_key(mask), NULL);
+}
+
 /**
  * __wake_up - wake up threads blocked on a waitqueue.
  * @wq_head: the waitqueue