diff mbox series

[RFC,07/10] pipe: Conditionalise wakeup in pipe_read() [ver #2]

Message ID 157186189069.3995.10292601951655075484.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series pipe: Notification queue preparation [ver #2] | expand

Commit Message

David Howells Oct. 23, 2019, 8:18 p.m. UTC
Only do a wakeup in pipe_read() if we made space in a completely full
buffer.  The producer shouldn't be waiting on pipe->wait otherwise.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/pipe.c |   15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

Comments

Konstantin Khlebnikov Oct. 27, 2019, 3:57 p.m. UTC | #1
On 23/10/2019 23.18, David Howells wrote:
> Only do a wakeup in pipe_read() if we made space in a completely full
> buffer.  The producer shouldn't be waiting on pipe->wait otherwise.

We could go further and wakeup writer only when at least half of buffer is empty.
This gives better batching and reduces rate of context switches.

https://lore.kernel.org/lkml/157219118016.7078.16223055699799396042.stgit@buzz/T/#u

> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
> 
>   fs/pipe.c |   15 ++++++---------
>   1 file changed, 6 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 1274305772fb..e3a8f10750c9 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -327,11 +327,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>   				spin_lock_irq(&pipe->wait.lock);
>   				tail++;
>   				pipe_commit_read(pipe, tail);
> -				do_wakeup = 0;
> -				wake_up_interruptible_sync_poll_locked(
> -					&pipe->wait, EPOLLOUT | EPOLLWRNORM);
> +				do_wakeup = 1;
> +				if (head - (tail - 1) == pipe->max_usage)
> +					wake_up_interruptible_sync_poll_locked(
> +						&pipe->wait, EPOLLOUT | EPOLLWRNORM);
>   				spin_unlock_irq(&pipe->wait.lock);
> -				kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> +				if (head - (tail - 1) == pipe->max_usage)
> +					kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
>   			}
>   			total_len -= chars;
>   			if (!total_len)
> @@ -360,11 +362,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>   				ret = -ERESTARTSYS;
>   			break;
>   		}
> -		if (do_wakeup) {
> -			wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
> - 			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> -			do_wakeup = 0;
> -		}
>   		pipe_wait(pipe);
>   	}
>   	__pipe_unlock(pipe);
> 
>
David Howells Oct. 31, 2019, 3:21 p.m. UTC | #2
Konstantin Khlebnikov <khlebnikov@yandex-team.ru> wrote:

> > Only do a wakeup in pipe_read() if we made space in a completely full
> > buffer.  The producer shouldn't be waiting on pipe->wait otherwise.
> 
> We could go further and wakeup writer only when at least half of buffer is
> empty.  This gives better batching and reduces rate of context switches.
> 
> https://lore.kernel.org/lkml/157219118016.7078.16223055699799396042.stgit@buzz/T/#u

Yeah, I saw that.  I suspect that where you put the slider may depend on the
context.

David
David Howells Oct. 31, 2019, 4:38 p.m. UTC | #3
Okay, attached is a change that might give you what you want.  I tried my
pipe-bench program (see cover note) with perf.  The output of the program with
the patch applied was:

-       pipe                  305127298     36262221772       302185181         7887690

The output of perf with the patch applied:

        239,943.92 msec task-clock                #    1.997 CPUs utilized
            17,728      context-switches          #   73.884 M/sec
               124      cpu-migrations            #    0.517 M/sec
             9,330      page-faults               #   38.884 M/sec
   885,107,207,365      cycles                    # 3688822.793 GHz
 1,386,873,499,490      instructions              #    1.57  insn per cycle
   311,037,372,339      branches                  # 1296296921.931 M/sec
        33,467,827      branch-misses             #    0.01% of all branches

And without:

        239,891.87 msec task-clock                #    1.997 CPUs utilized
            22,187      context-switches          #   92.488 M/sec
               133      cpu-migrations            #    0.554 M/sec
             9,334      page-faults               #   38.909 M/sec
   884,906,976,128      cycles                    # 3688787.725 GHz
 1,391,986,932,265      instructions              #    1.57  insn per cycle
   311,394,686,857      branches                  # 1298067400.849 M/sec
        30,242,823      branch-misses             #    0.01% of all branches

So it did make something like a 20% reduction in context switches.

David
---
diff --git a/fs/pipe.c b/fs/pipe.c
index e3d5f7a39123..5167921edd73 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -276,7 +276,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	size_t total_len = iov_iter_count(to);
 	struct file *filp = iocb->ki_filp;
 	struct pipe_inode_info *pipe = filp->private_data;
-	int do_wakeup;
+	int do_wakeup, wake;
 	ssize_t ret;

 	/* Null read succeeds. */
@@ -329,11 +329,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 				tail++;
 				pipe->tail = tail;
 				do_wakeup = 1;
-				if (head - (tail - 1) == pipe->max_usage)
+				wake = head - (tail - 1) == pipe->max_usage / 2;
+				if (wake)
 					wake_up_interruptible_sync_poll_locked(
 						&pipe->wait, EPOLLOUT | EPOLLWRNORM);
 				spin_unlock_irq(&pipe->wait.lock);
-				if (head - (tail - 1) == pipe->max_usage)
+				if (wake)
 					kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 			}
 			total_len -= chars;
Konstantin Khlebnikov Nov. 3, 2019, 11:04 a.m. UTC | #4
On 31/10/2019 19.38, David Howells wrote:
> Okay, attached is a change that might give you what you want.  I tried my
> pipe-bench program (see cover note) with perf.  The output of the program with
> the patch applied was:
> 
> -       pipe                  305127298     36262221772       302185181         7887690
> 
> The output of perf with the patch applied:
> 
>          239,943.92 msec task-clock                #    1.997 CPUs utilized
>              17,728      context-switches          #   73.884 M/sec
>                 124      cpu-migrations            #    0.517 M/sec
>               9,330      page-faults               #   38.884 M/sec
>     885,107,207,365      cycles                    # 3688822.793 GHz
>   1,386,873,499,490      instructions              #    1.57  insn per cycle
>     311,037,372,339      branches                  # 1296296921.931 M/sec
>          33,467,827      branch-misses             #    0.01% of all branches
> 
> And without:
> 
>          239,891.87 msec task-clock                #    1.997 CPUs utilized
>              22,187      context-switches          #   92.488 M/sec
>                 133      cpu-migrations            #    0.554 M/sec
>               9,334      page-faults               #   38.909 M/sec
>     884,906,976,128      cycles                    # 3688787.725 GHz
>   1,391,986,932,265      instructions              #    1.57  insn per cycle
>     311,394,686,857      branches                  # 1298067400.849 M/sec
>          30,242,823      branch-misses             #    0.01% of all branches
> 
> So it did make something like a 20% reduction in context switches.

Ok. Looks promising. Depending on workload reduction might be much bigger.

I suppose buffer resize (grow) makes wakeup unconditionally. Should be ok.

> 
> David
> ---
> diff --git a/fs/pipe.c b/fs/pipe.c
> index e3d5f7a39123..5167921edd73 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -276,7 +276,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>   	size_t total_len = iov_iter_count(to);
>   	struct file *filp = iocb->ki_filp;
>   	struct pipe_inode_info *pipe = filp->private_data;
> -	int do_wakeup;
> +	int do_wakeup, wake;
>   	ssize_t ret;
> 
>   	/* Null read succeeds. */
> @@ -329,11 +329,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>   				tail++;
>   				pipe->tail = tail;
>   				do_wakeup = 1;
> -				if (head - (tail - 1) == pipe->max_usage)
> +				wake = head - (tail - 1) == pipe->max_usage / 2;
> +				if (wake)
>   					wake_up_interruptible_sync_poll_locked(
>   						&pipe->wait, EPOLLOUT | EPOLLWRNORM);
>   				spin_unlock_irq(&pipe->wait.lock);
> -				if (head - (tail - 1) == pipe->max_usage)
> +				if (wake)
>   					kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
>   			}
>   			total_len -= chars;
>
diff mbox series

Patch

diff --git a/fs/pipe.c b/fs/pipe.c
index 1274305772fb..e3a8f10750c9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -327,11 +327,13 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 				spin_lock_irq(&pipe->wait.lock);
 				tail++;
 				pipe_commit_read(pipe, tail);
-				do_wakeup = 0;
-				wake_up_interruptible_sync_poll_locked(
-					&pipe->wait, EPOLLOUT | EPOLLWRNORM);
+				do_wakeup = 1;
+				if (head - (tail - 1) == pipe->max_usage)
+					wake_up_interruptible_sync_poll_locked(
+						&pipe->wait, EPOLLOUT | EPOLLWRNORM);
 				spin_unlock_irq(&pipe->wait.lock);
-				kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
+				if (head - (tail - 1) == pipe->max_usage)
+					kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 			}
 			total_len -= chars;
 			if (!total_len)
@@ -360,11 +362,6 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 				ret = -ERESTARTSYS;
 			break;
 		}
-		if (do_wakeup) {
-			wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
- 			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
-			do_wakeup = 0;
-		}
 		pipe_wait(pipe);
 	}
 	__pipe_unlock(pipe);