diff mbox series

[v2] pipe: use __pipe_{lock,unlock} instead of spinlock

Message ID 20230103063303.23345-1-zhanghongchen@loongson.cn (mailing list archive)
State Superseded
Headers show
Series [v2] pipe: use __pipe_{lock,unlock} instead of spinlock | expand

Commit Message

Hongchen Zhang Jan. 3, 2023, 6:33 a.m. UTC
Use spinlock in pipe_read/write cost too much time,IMO
pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
On the other hand, we can use __pipe_lock/unlock to protect the
pipe->head/tail in pipe_resize_ring and post_one_notification.

Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
---
 fs/pipe.c                 | 24 ++++--------------------
 include/linux/pipe_fs_i.h | 12 ++++++++++++
 kernel/watch_queue.c      |  8 ++++----
 3 files changed, 20 insertions(+), 24 deletions(-)


base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7

Comments

Matthew Wilcox (Oracle) Jan. 3, 2023, 5:51 p.m. UTC | #1
On Tue, Jan 03, 2023 at 02:33:03PM +0800, Hongchen Zhang wrote:
> Use spinlock in pipe_read/write cost too much time,IMO

Everybody has an opinion.  Do you have data?

> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> On the other hand, we can use __pipe_lock/unlock to protect the
> pipe->head/tail in pipe_resize_ring and post_one_notification.
> 
> Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
> ---

you're supposed to write here what changes you made between v1 and v2.

>  fs/pipe.c                 | 24 ++++--------------------
>  include/linux/pipe_fs_i.h | 12 ++++++++++++
>  kernel/watch_queue.c      |  8 ++++----
>  3 files changed, 20 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 42c7ff41c2db..cf449779bf71 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
>  }
>  EXPORT_SYMBOL(pipe_unlock);
>  
> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> -{
> -	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> -}
> -
> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> -{
> -	mutex_unlock(&pipe->mutex);
> -}
> -
>  void pipe_double_lock(struct pipe_inode_info *pipe1,
>  		      struct pipe_inode_info *pipe2)
>  {
> @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  	 */
>  	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
>  	for (;;) {
> -		/* Read ->head with a barrier vs post_one_notification() */
> -		unsigned int head = smp_load_acquire(&pipe->head);
> +		unsigned int head = pipe->head;
>  		unsigned int tail = pipe->tail;
>  		unsigned int mask = pipe->ring_size - 1;
>  
> @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  
>  			if (!buf->len) {
>  				pipe_buf_release(pipe, buf);
> -				spin_lock_irq(&pipe->rd_wait.lock);
>  #ifdef CONFIG_WATCH_QUEUE
>  				if (buf->flags & PIPE_BUF_FLAG_LOSS)
>  					pipe->note_loss = true;
>  #endif
>  				tail++;
>  				pipe->tail = tail;
> -				spin_unlock_irq(&pipe->rd_wait.lock);
>  			}
>  			total_len -= chars;
>  			if (!total_len)
> @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  			 * it, either the reader will consume it or it'll still
>  			 * be there for the next write.
>  			 */
> -			spin_lock_irq(&pipe->rd_wait.lock);
>  
>  			head = pipe->head;
>  			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
> -				spin_unlock_irq(&pipe->rd_wait.lock);
>  				continue;
>  			}
>  
>  			pipe->head = head + 1;
> -			spin_unlock_irq(&pipe->rd_wait.lock);
>  
>  			/* Insert it into the buffer array */
>  			buf = &pipe->bufs[head & mask];
> @@ -1260,14 +1244,14 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>  	if (unlikely(!bufs))
>  		return -ENOMEM;
>  
> -	spin_lock_irq(&pipe->rd_wait.lock);
> +	__pipe_lock(pipe);
>  	mask = pipe->ring_size - 1;
>  	head = pipe->head;
>  	tail = pipe->tail;
>  
>  	n = pipe_occupancy(head, tail);
>  	if (nr_slots < n) {
> -		spin_unlock_irq(&pipe->rd_wait.lock);
> +		__pipe_unlock(pipe);
>  		kfree(bufs);
>  		return -EBUSY;
>  	}
> @@ -1303,7 +1287,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>  	pipe->tail = tail;
>  	pipe->head = head;
>  
> -	spin_unlock_irq(&pipe->rd_wait.lock);
> +	__pipe_unlock(pipe);
>  
>  	/* This might have made more room for writers */
>  	wake_up_interruptible(&pipe->wr_wait);
> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
> index 6cb65df3e3ba..f5084daf6eaf 100644
> --- a/include/linux/pipe_fs_i.h
> +++ b/include/linux/pipe_fs_i.h
> @@ -2,6 +2,8 @@
>  #ifndef _LINUX_PIPE_FS_I_H
>  #define _LINUX_PIPE_FS_I_H
>  
> +#include <linux/fs.h>
> +
>  #define PIPE_DEF_BUFFERS	16
>  
>  #define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
> @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
>  #define PIPE_SIZE		PAGE_SIZE
>  
>  /* Pipe lock and unlock operations */
> +static inline void __pipe_lock(struct pipe_inode_info *pipe)
> +{
> +	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> +}
> +
> +static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> +{
> +	mutex_unlock(&pipe->mutex);
> +}
> +
>  void pipe_lock(struct pipe_inode_info *);
>  void pipe_unlock(struct pipe_inode_info *);
>  void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
> diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
> index a6f9bdd956c3..92e46cfe9419 100644
> --- a/kernel/watch_queue.c
> +++ b/kernel/watch_queue.c
> @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
>  	if (!pipe)
>  		return false;
>  
> -	spin_lock_irq(&pipe->rd_wait.lock);
> +	__pipe_lock(pipe);
>  
>  	mask = pipe->ring_size - 1;
>  	head = pipe->head;
> @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
>  	buf->offset = offset;
>  	buf->len = len;
>  	buf->flags = PIPE_BUF_FLAG_WHOLE;
> -	smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
> +	pipe->head = head + 1;
>  
>  	if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
> -		spin_unlock_irq(&pipe->rd_wait.lock);
> +		__pipe_unlock(pipe);
>  		BUG();
>  	}
>  	wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>  	done = true;
>  
>  out:
> -	spin_unlock_irq(&pipe->rd_wait.lock);
> +	__pipe_unlock(pipe);
>  	if (done)
>  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>  	return done;
> 
> base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7
> -- 
> 2.31.1
>
Hongchen Zhang Jan. 4, 2023, 3:46 a.m. UTC | #2
Hi Matthew,

On 2023/1/4 am 1:51, Matthew Wilcox wrote:
> On Tue, Jan 03, 2023 at 02:33:03PM +0800, Hongchen Zhang wrote:
>> Use spinlock in pipe_read/write cost too much time,IMO
> 
> Everybody has an opinion.  Do you have data?
> 
I tested this patch using UnixBench's pipe test case on a x86_64 
machine,and get the following data:
1) before this patch
System Benchmarks Partial Index  BASELINE       RESULT    INDEX
Pipe Throughput                   12440.0     493023.3    396.3
                                                                    ========
System Benchmarks Index Score (Partial Only)              396.3

2) after this patch
System Benchmarks Partial Index  BASELINE       RESULT    INDEX
Pipe Throughput                   12440.0     507551.4    408.0
                                                                    ========
System Benchmarks Index Score (Partial Only)              408.0

so we get ~3% speedup.

>> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
>> On the other hand, we can use __pipe_lock/unlock to protect the
>> pipe->head/tail in pipe_resize_ring and post_one_notification.
>>
>> Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
>> ---
> 
> you're supposed to write here what changes you made between v1 and v2.
> 
I added the linux/fs.h in v2 to fix the linux-test-robot test error in v1.
>>   fs/pipe.c                 | 24 ++++--------------------
>>   include/linux/pipe_fs_i.h | 12 ++++++++++++
>>   kernel/watch_queue.c      |  8 ++++----
>>   3 files changed, 20 insertions(+), 24 deletions(-)
>>
>> diff --git a/fs/pipe.c b/fs/pipe.c
>> index 42c7ff41c2db..cf449779bf71 100644
>> --- a/fs/pipe.c
>> +++ b/fs/pipe.c
>> @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
>>   }
>>   EXPORT_SYMBOL(pipe_unlock);
>>   
>> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
>> -{
>> -	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
>> -}
>> -
>> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
>> -{
>> -	mutex_unlock(&pipe->mutex);
>> -}
>> -
>>   void pipe_double_lock(struct pipe_inode_info *pipe1,
>>   		      struct pipe_inode_info *pipe2)
>>   {
>> @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>>   	 */
>>   	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
>>   	for (;;) {
>> -		/* Read ->head with a barrier vs post_one_notification() */
>> -		unsigned int head = smp_load_acquire(&pipe->head);
>> +		unsigned int head = pipe->head;
>>   		unsigned int tail = pipe->tail;
>>   		unsigned int mask = pipe->ring_size - 1;
>>   
>> @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>>   
>>   			if (!buf->len) {
>>   				pipe_buf_release(pipe, buf);
>> -				spin_lock_irq(&pipe->rd_wait.lock);
>>   #ifdef CONFIG_WATCH_QUEUE
>>   				if (buf->flags & PIPE_BUF_FLAG_LOSS)
>>   					pipe->note_loss = true;
>>   #endif
>>   				tail++;
>>   				pipe->tail = tail;
>> -				spin_unlock_irq(&pipe->rd_wait.lock);
>>   			}
>>   			total_len -= chars;
>>   			if (!total_len)
>> @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>>   			 * it, either the reader will consume it or it'll still
>>   			 * be there for the next write.
>>   			 */
>> -			spin_lock_irq(&pipe->rd_wait.lock);
>>   
>>   			head = pipe->head;
>>   			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
>> -				spin_unlock_irq(&pipe->rd_wait.lock);
>>   				continue;
>>   			}
>>   
>>   			pipe->head = head + 1;
>> -			spin_unlock_irq(&pipe->rd_wait.lock);
>>   
>>   			/* Insert it into the buffer array */
>>   			buf = &pipe->bufs[head & mask];
>> @@ -1260,14 +1244,14 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>>   	if (unlikely(!bufs))
>>   		return -ENOMEM;
>>   
>> -	spin_lock_irq(&pipe->rd_wait.lock);
>> +	__pipe_lock(pipe);
>>   	mask = pipe->ring_size - 1;
>>   	head = pipe->head;
>>   	tail = pipe->tail;
>>   
>>   	n = pipe_occupancy(head, tail);
>>   	if (nr_slots < n) {
>> -		spin_unlock_irq(&pipe->rd_wait.lock);
>> +		__pipe_unlock(pipe);
>>   		kfree(bufs);
>>   		return -EBUSY;
>>   	}
>> @@ -1303,7 +1287,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>>   	pipe->tail = tail;
>>   	pipe->head = head;
>>   
>> -	spin_unlock_irq(&pipe->rd_wait.lock);
>> +	__pipe_unlock(pipe);
>>   
>>   	/* This might have made more room for writers */
>>   	wake_up_interruptible(&pipe->wr_wait);
>> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
>> index 6cb65df3e3ba..f5084daf6eaf 100644
>> --- a/include/linux/pipe_fs_i.h
>> +++ b/include/linux/pipe_fs_i.h
>> @@ -2,6 +2,8 @@
>>   #ifndef _LINUX_PIPE_FS_I_H
>>   #define _LINUX_PIPE_FS_I_H
>>   
>> +#include <linux/fs.h>
>> +
>>   #define PIPE_DEF_BUFFERS	16
>>   
>>   #define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
>> @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
>>   #define PIPE_SIZE		PAGE_SIZE
>>   
>>   /* Pipe lock and unlock operations */
>> +static inline void __pipe_lock(struct pipe_inode_info *pipe)
>> +{
>> +	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
>> +}
>> +
>> +static inline void __pipe_unlock(struct pipe_inode_info *pipe)
>> +{
>> +	mutex_unlock(&pipe->mutex);
>> +}
>> +
>>   void pipe_lock(struct pipe_inode_info *);
>>   void pipe_unlock(struct pipe_inode_info *);
>>   void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
>> diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
>> index a6f9bdd956c3..92e46cfe9419 100644
>> --- a/kernel/watch_queue.c
>> +++ b/kernel/watch_queue.c
>> @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
>>   	if (!pipe)
>>   		return false;
>>   
>> -	spin_lock_irq(&pipe->rd_wait.lock);
>> +	__pipe_lock(pipe);
>>   
>>   	mask = pipe->ring_size - 1;
>>   	head = pipe->head;
>> @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
>>   	buf->offset = offset;
>>   	buf->len = len;
>>   	buf->flags = PIPE_BUF_FLAG_WHOLE;
>> -	smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
>> +	pipe->head = head + 1;
>>   
>>   	if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
>> -		spin_unlock_irq(&pipe->rd_wait.lock);
>> +		__pipe_unlock(pipe);
>>   		BUG();
>>   	}
>>   	wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>>   	done = true;
>>   
>>   out:
>> -	spin_unlock_irq(&pipe->rd_wait.lock);
>> +	__pipe_unlock(pipe);
>>   	if (done)
>>   		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>>   	return done;
>>
>> base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7
>> -- 
>> 2.31.1
>>
Andrew Morton Jan. 6, 2023, 2:59 a.m. UTC | #3
On Tue,  3 Jan 2023 14:33:03 +0800 Hongchen Zhang <zhanghongchen@loongson.cn> wrote:

> Use spinlock in pipe_read/write cost too much time,IMO
> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> On the other hand, we can use __pipe_lock/unlock to protect the
> pipe->head/tail in pipe_resize_ring and post_one_notification.

Can you please test this with the test code in Linus's 0ddad21d3e99 and
check that performance is good?
kernel test robot Jan. 6, 2023, 7:08 a.m. UTC | #4
Greeting,

FYI, we noticed WARNING:possible_recursive_locking_detected due to commit (built with gcc-11):

commit: 2afced4b77a399b14eb2e2797968228d7ce69a2a ("[PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock")
url: https://github.com/intel-lab-lkp/linux/commits/Hongchen-Zhang/pipe-use-__pipe_-lock-unlock-instead-of-spinlock/20230103-143459
patch link: https://lore.kernel.org/all/20230103063303.23345-1-zhanghongchen@loongson.cn/
patch subject: [PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock

in testcase: trinity
version: trinity-static-x86_64-x86_64-1c734c75-1_2020-01-06
with following parameters:

	runtime: 300s

test-description: Trinity is a linux system call fuzz tester.
test-url: http://codemonkey.org.uk/projects/trinity/


on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


If you fix the issue, kindly add following tag
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Link: https://lore.kernel.org/oe-lkp/202301061340.c954d61f-oliver.sang@intel.com


[  493.585155][ T1930] Unable to find swap-space signature
[  508.410154][ T1930]
[  508.410930][ T1930] ============================================
[  508.412135][ T1930] WARNING: possible recursive locking detected
[  508.413313][ T1930] 6.2.0-rc1-00085-g2afced4b77a3 #14 Not tainted
[  508.414545][ T1930] --------------------------------------------
[  508.415735][ T1930] trinity-c2/1930 is trying to acquire lock:
[ 508.416905][ T1930] ffff8881641d3068 (&pipe->mutex/1){+.+.}-{3:3}, at: pipe_resize_ring (??:?) 
[  508.418698][ T1930]
[  508.418698][ T1930] but task is already holding lock:
[ 508.420086][ T1930] ffff8881641d3068 (&pipe->mutex/1){+.+.}-{3:3}, at: pipe_fcntl (??:?) 
[  508.421781][ T1930]
[  508.421781][ T1930] other info that might help us debug this:
[  508.423268][ T1930]  Possible unsafe locking scenario:
[  508.423268][ T1930]
[  508.424688][ T1930]        CPU0
[  508.425410][ T1930]        ----
[  508.426105][ T1930]   lock(&pipe->mutex/1);
[  508.426972][ T1930]   lock(&pipe->mutex/1);
[  508.427840][ T1930]
[  508.427840][ T1930]  *** DEADLOCK ***
[  508.427840][ T1930]
[  508.429476][ T1930]  May be due to missing lock nesting notation
[  508.429476][ T1930]
[  508.430981][ T1930] 1 lock held by trinity-c2/1930:
[ 508.431945][ T1930] #0: ffff8881641d3068 (&pipe->mutex/1){+.+.}-{3:3}, at: pipe_fcntl (??:?) 
[  508.433663][ T1930]
[  508.433663][ T1930] stack backtrace:
[  508.434847][ T1930] CPU: 1 PID: 1930 Comm: trinity-c2 Not tainted 6.2.0-rc1-00085-g2afced4b77a3 #14 b8d9e225d32aed8adc2a69ef5f115031b187ce0c
[  508.436917][ T1930] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014
[  508.438696][ T1930] Call Trace:
[  508.439402][ T1930]  <TASK>
[ 508.440075][ T1930] dump_stack_lvl (??:?) 
[ 508.440972][ T1930] validate_chain.cold (lockdep.c:?) 
[ 508.441972][ T1930] ? check_prev_add (lockdep.c:?) 
[ 508.442911][ T1930] ? mark_held_locks (lockdep.c:?) 
[ 508.443824][ T1930] __lock_acquire (lockdep.c:?) 
[ 508.444714][ T1930] lock_acquire (??:?) 
[ 508.445575][ T1930] ? pipe_resize_ring (??:?) 
[ 508.446497][ T1930] ? rcu_read_unlock (main.c:?) 
[ 508.447403][ T1930] ? entry_SYSCALL_64_after_hwframe (??:?) 
[ 508.448483][ T1930] __mutex_lock (mutex.c:?) 
[ 508.449361][ T1930] ? pipe_resize_ring (??:?) 
[ 508.450248][ T1930] ? kasan_quarantine_reduce (??:?) 
[ 508.451257][ T1930] ? lock_downgrade (lockdep.c:?) 
[ 508.452161][ T1930] ? pipe_resize_ring (??:?) 
[ 508.453053][ T1930] ? mark_held_locks (lockdep.c:?) 
[ 508.453935][ T1930] ? mutex_lock_io_nested (mutex.c:?) 
[ 508.454925][ T1930] ? kasan_quarantine_reduce (??:?) 
[ 508.455990][ T1930] ? pipe_resize_ring (??:?) 
[ 508.456904][ T1930] ? pipe_resize_ring (??:?) 
[ 508.457862][ T1930] ? pipe_resize_ring (??:?) 
[ 508.458791][ T1930] pipe_resize_ring (??:?) 
[ 508.459702][ T1930] pipe_fcntl (??:?) 
[ 508.460537][ T1930] ? find_held_lock (lockdep.c:?) 
[ 508.461435][ T1930] do_fcntl (fcntl.c:?) 
[ 508.462220][ T1930] ? __task_pid_nr_ns (??:?) 
[ 508.463185][ T1930] ? f_getown (fcntl.c:?) 
[ 508.464058][ T1930] ? __x64_sys_alarm (??:?) 
[ 508.464959][ T1930] __x64_sys_fcntl (??:?) 
[ 508.465869][ T1930] do_syscall_64 (??:?) 
[ 508.466745][ T1930] entry_SYSCALL_64_after_hwframe (??:?) 
[  508.467721][ T1930] RIP: 0033:0x463519
[ 508.468493][ T1930] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db 59 00 00 c3 66 2e 0f 1f 84 00 00 00 00
All code
========
   0:	00 f3                	add    %dh,%bl
   2:	c3                   	retq   
   3:	66 2e 0f 1f 84 00 00 	nopw   %cs:0x0(%rax,%rax,1)
   a:	00 00 00 
   d:	0f 1f 40 00          	nopl   0x0(%rax)
  11:	48 89 f8             	mov    %rdi,%rax
  14:	48 89 f7             	mov    %rsi,%rdi
  17:	48 89 d6             	mov    %rdx,%rsi
  1a:	48 89 ca             	mov    %rcx,%rdx
  1d:	4d 89 c2             	mov    %r8,%r10
  20:	4d 89 c8             	mov    %r9,%r8
  23:	4c 8b 4c 24 08       	mov    0x8(%rsp),%r9
  28:	0f 05                	syscall 
  2a:*	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax		<-- trapping instruction
  30:	0f 83 db 59 00 00    	jae    0x5a11
  36:	c3                   	retq   
  37:	66                   	data16
  38:	2e                   	cs
  39:	0f                   	.byte 0xf
  3a:	1f                   	(bad)  
  3b:	84 00                	test   %al,(%rax)
  3d:	00 00                	add    %al,(%rax)
	...

Code starting with the faulting instruction
===========================================
   0:	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax
   6:	0f 83 db 59 00 00    	jae    0x59e7
   c:	c3                   	retq   
   d:	66                   	data16
   e:	2e                   	cs
   f:	0f                   	.byte 0xf
  10:	1f                   	(bad)  
  11:	84 00                	test   %al,(%rax)
  13:	00 00                	add    %al,(%rax)
	...
[  508.471663][ T1930] RSP: 002b:00007ffd6a2fa2c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000048
[  508.473176][ T1930] RAX: ffffffffffffffda RBX: 0000000000000048 RCX: 0000000000463519
[  508.474574][ T1930] RDX: 0000000000000066 RSI: 0000000000000407 RDI: 000000000000013e
[  508.476026][ T1930] RBP: 00007f36cc433000 R08: fffffffffffffff8 R09: 000000000000000f
[  508.477459][ T1930] R10: 0000000010000000 R11: 0000000000000246 R12: 0000000000000002
[  508.478870][ T1930] R13: 00007f36cc433058 R14: 0000000002241850 R15: 00007f36cc433000
[  508.482573][ T1930]  </TASK>
[  510.837665][ T2080] Unable to find swap-space signature
[  518.245623][ T2080] futex_wake_op: trinity-c1 tries to shift op by -1; fix this program
[  639.655156][  T298] hwclock: can't open '/dev/misc/rtc': No such file or directory
LKP: ttyS0: 273: LKP: rebooting forcely
[  651.988559][  T273] sysrq: Emergency Sync
[  651.989495][   T29] Emergency Sync complete
[  651.990747][  T273] sysrq: Resetting



To reproduce:

        # build kernel
	cd linux
	cp config-6.2.0-rc1-00085-g2afced4b77a3 .config
	make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
	make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
	cd <mod-install-dir>
	find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz


        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.
Hongchen Zhang Jan. 6, 2023, 7:50 a.m. UTC | #5
Hi Andrew,
On 2023/1/6 am 10:59, Andrew Morton wrote:
> On Tue,  3 Jan 2023 14:33:03 +0800 Hongchen Zhang <zhanghongchen@loongson.cn> wrote:
> 
>> Use spinlock in pipe_read/write cost too much time,IMO
>> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
>> On the other hand, we can use __pipe_lock/unlock to protect the
>> pipe->head/tail in pipe_resize_ring and post_one_notification.
> 
> Can you please test this with the test code in Linus's 0ddad21d3e99 and
> check that performance is good?
> 
I tested with the test code in Linus's 0ddad21d3e99,and get the 
following result:

1) before this patch
          13,136.54 msec task-clock           #    3.870 CPUs utilized 

          1,186,779      context-switches     #   90.342 K/sec 

            668,867      cpu-migrations       #   50.917 K/sec 

                895      page-faults          #   68.131 /sec 

     29,875,711,543      cycles               #    2.274 GHz 

     12,372,397,462      instructions         #    0.41  insn per cycle 

      2,480,235,723      branches             #  188.804 M/sec 

         47,191,943      branch-misses        #    1.90% of all branches 


        3.394806886 seconds time elapsed

        0.037869000 seconds user
        0.189346000 seconds sys

2) after this patch

          12,395.63 msec task-clock          #    4.138 CPUs utilized 

          1,193,381      context-switches    #   96.274 K/sec 

            585,543      cpu-migrations      #   47.238 K/sec 

              1,063      page-faults         #   85.756 /sec 

     27,691,587,226      cycles              #    2.234 GHz 

     11,738,307,999      instructions        #    0.42  insn per cycle 

      2,351,299,522      branches            #  189.688 M/sec 

         45,404,526      branch-misses       #    1.93% of all branches 


        2.995280878 seconds time elapsed

        0.010615000 seconds user
        0.206999000 seconds sys
After adding this patch, the time used on this test program becomes less.

Another thing, because of my carelessness, trinity tool tested a bug, I
will remove the unnecessary __pipe_{lock,unlock} in pipe_resize_ring in 
v3, because the lock is owned in pipe_fcntl/pipe_ioctl.

Thanks.
diff mbox series

Patch

diff --git a/fs/pipe.c b/fs/pipe.c
index 42c7ff41c2db..cf449779bf71 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -98,16 +98,6 @@  void pipe_unlock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_unlock);
 
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
-	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
-	mutex_unlock(&pipe->mutex);
-}
-
 void pipe_double_lock(struct pipe_inode_info *pipe1,
 		      struct pipe_inode_info *pipe2)
 {
@@ -253,8 +243,7 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	 */
 	was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 	for (;;) {
-		/* Read ->head with a barrier vs post_one_notification() */
-		unsigned int head = smp_load_acquire(&pipe->head);
+		unsigned int head = pipe->head;
 		unsigned int tail = pipe->tail;
 		unsigned int mask = pipe->ring_size - 1;
 
@@ -322,14 +311,12 @@  pipe_read(struct kiocb *iocb, struct iov_iter *to)
 
 			if (!buf->len) {
 				pipe_buf_release(pipe, buf);
-				spin_lock_irq(&pipe->rd_wait.lock);
 #ifdef CONFIG_WATCH_QUEUE
 				if (buf->flags & PIPE_BUF_FLAG_LOSS)
 					pipe->note_loss = true;
 #endif
 				tail++;
 				pipe->tail = tail;
-				spin_unlock_irq(&pipe->rd_wait.lock);
 			}
 			total_len -= chars;
 			if (!total_len)
@@ -506,16 +493,13 @@  pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			 * it, either the reader will consume it or it'll still
 			 * be there for the next write.
 			 */
-			spin_lock_irq(&pipe->rd_wait.lock);
 
 			head = pipe->head;
 			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
-				spin_unlock_irq(&pipe->rd_wait.lock);
 				continue;
 			}
 
 			pipe->head = head + 1;
-			spin_unlock_irq(&pipe->rd_wait.lock);
 
 			/* Insert it into the buffer array */
 			buf = &pipe->bufs[head & mask];
@@ -1260,14 +1244,14 @@  int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
 	if (unlikely(!bufs))
 		return -ENOMEM;
 
-	spin_lock_irq(&pipe->rd_wait.lock);
+	__pipe_lock(pipe);
 	mask = pipe->ring_size - 1;
 	head = pipe->head;
 	tail = pipe->tail;
 
 	n = pipe_occupancy(head, tail);
 	if (nr_slots < n) {
-		spin_unlock_irq(&pipe->rd_wait.lock);
+		__pipe_unlock(pipe);
 		kfree(bufs);
 		return -EBUSY;
 	}
@@ -1303,7 +1287,7 @@  int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
 	pipe->tail = tail;
 	pipe->head = head;
 
-	spin_unlock_irq(&pipe->rd_wait.lock);
+	__pipe_unlock(pipe);
 
 	/* This might have made more room for writers */
 	wake_up_interruptible(&pipe->wr_wait);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 6cb65df3e3ba..f5084daf6eaf 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -2,6 +2,8 @@ 
 #ifndef _LINUX_PIPE_FS_I_H
 #define _LINUX_PIPE_FS_I_H
 
+#include <linux/fs.h>
+
 #define PIPE_DEF_BUFFERS	16
 
 #define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
@@ -223,6 +225,16 @@  static inline void pipe_discard_from(struct pipe_inode_info *pipe,
 #define PIPE_SIZE		PAGE_SIZE
 
 /* Pipe lock and unlock operations */
+static inline void __pipe_lock(struct pipe_inode_info *pipe)
+{
+	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
+static inline void __pipe_unlock(struct pipe_inode_info *pipe)
+{
+	mutex_unlock(&pipe->mutex);
+}
+
 void pipe_lock(struct pipe_inode_info *);
 void pipe_unlock(struct pipe_inode_info *);
 void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index a6f9bdd956c3..92e46cfe9419 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -108,7 +108,7 @@  static bool post_one_notification(struct watch_queue *wqueue,
 	if (!pipe)
 		return false;
 
-	spin_lock_irq(&pipe->rd_wait.lock);
+	__pipe_lock(pipe);
 
 	mask = pipe->ring_size - 1;
 	head = pipe->head;
@@ -135,17 +135,17 @@  static bool post_one_notification(struct watch_queue *wqueue,
 	buf->offset = offset;
 	buf->len = len;
 	buf->flags = PIPE_BUF_FLAG_WHOLE;
-	smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
+	pipe->head = head + 1;
 
 	if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
-		spin_unlock_irq(&pipe->rd_wait.lock);
+		__pipe_unlock(pipe);
 		BUG();
 	}
 	wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 	done = true;
 
 out:
-	spin_unlock_irq(&pipe->rd_wait.lock);
+	__pipe_unlock(pipe);
 	if (done)
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	return done;