Message ID | 20230103063303.23345-1-zhanghongchen@loongson.cn (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [v2] pipe: use __pipe_{lock,unlock} instead of spinlock | expand |
On Tue, Jan 03, 2023 at 02:33:03PM +0800, Hongchen Zhang wrote: > Use spinlock in pipe_read/write cost too much time,IMO Everybody has an opinion. Do you have data? > pipe->{head,tail} can be protected by __pipe_{lock,unlock}. > On the other hand, we can use __pipe_lock/unlock to protect the > pipe->head/tail in pipe_resize_ring and post_one_notification. > > Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn> > --- you're supposed to write here what changes you made between v1 and v2. > fs/pipe.c | 24 ++++-------------------- > include/linux/pipe_fs_i.h | 12 ++++++++++++ > kernel/watch_queue.c | 8 ++++---- > 3 files changed, 20 insertions(+), 24 deletions(-) > > diff --git a/fs/pipe.c b/fs/pipe.c > index 42c7ff41c2db..cf449779bf71 100644 > --- a/fs/pipe.c > +++ b/fs/pipe.c > @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe) > } > EXPORT_SYMBOL(pipe_unlock); > > -static inline void __pipe_lock(struct pipe_inode_info *pipe) > -{ > - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); > -} > - > -static inline void __pipe_unlock(struct pipe_inode_info *pipe) > -{ > - mutex_unlock(&pipe->mutex); > -} > - > void pipe_double_lock(struct pipe_inode_info *pipe1, > struct pipe_inode_info *pipe2) > { > @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > */ > was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); > for (;;) { > - /* Read ->head with a barrier vs post_one_notification() */ > - unsigned int head = smp_load_acquire(&pipe->head); > + unsigned int head = pipe->head; > unsigned int tail = pipe->tail; > unsigned int mask = pipe->ring_size - 1; > > @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > > if (!buf->len) { > pipe_buf_release(pipe, buf); > - spin_lock_irq(&pipe->rd_wait.lock); > #ifdef CONFIG_WATCH_QUEUE > if (buf->flags & PIPE_BUF_FLAG_LOSS) > pipe->note_loss = true; > #endif > tail++; > pipe->tail = tail; > - spin_unlock_irq(&pipe->rd_wait.lock); > } > total_len -= chars; > if (!total_len) > @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > * it, either the reader will consume it or it'll still > * be there for the next write. > */ > - spin_lock_irq(&pipe->rd_wait.lock); > > head = pipe->head; > if (pipe_full(head, pipe->tail, pipe->max_usage)) { > - spin_unlock_irq(&pipe->rd_wait.lock); > continue; > } > > pipe->head = head + 1; > - spin_unlock_irq(&pipe->rd_wait.lock); > > /* Insert it into the buffer array */ > buf = &pipe->bufs[head & mask]; > @@ -1260,14 +1244,14 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) > if (unlikely(!bufs)) > return -ENOMEM; > > - spin_lock_irq(&pipe->rd_wait.lock); > + __pipe_lock(pipe); > mask = pipe->ring_size - 1; > head = pipe->head; > tail = pipe->tail; > > n = pipe_occupancy(head, tail); > if (nr_slots < n) { > - spin_unlock_irq(&pipe->rd_wait.lock); > + __pipe_unlock(pipe); > kfree(bufs); > return -EBUSY; > } > @@ -1303,7 +1287,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) > pipe->tail = tail; > pipe->head = head; > > - spin_unlock_irq(&pipe->rd_wait.lock); > + __pipe_unlock(pipe); > > /* This might have made more room for writers */ > wake_up_interruptible(&pipe->wr_wait); > diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h > index 6cb65df3e3ba..f5084daf6eaf 100644 > --- a/include/linux/pipe_fs_i.h > +++ b/include/linux/pipe_fs_i.h > @@ -2,6 +2,8 @@ > #ifndef _LINUX_PIPE_FS_I_H > #define _LINUX_PIPE_FS_I_H > > +#include <linux/fs.h> > + > #define PIPE_DEF_BUFFERS 16 > > #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ > @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe, > #define PIPE_SIZE PAGE_SIZE > > /* Pipe lock and unlock operations */ > +static inline void __pipe_lock(struct pipe_inode_info *pipe) > +{ > + mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); > +} > + > +static inline void __pipe_unlock(struct pipe_inode_info *pipe) > +{ > + mutex_unlock(&pipe->mutex); > +} > + > void pipe_lock(struct pipe_inode_info *); > void pipe_unlock(struct pipe_inode_info *); > void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); > diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c > index a6f9bdd956c3..92e46cfe9419 100644 > --- a/kernel/watch_queue.c > +++ b/kernel/watch_queue.c > @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue, > if (!pipe) > return false; > > - spin_lock_irq(&pipe->rd_wait.lock); > + __pipe_lock(pipe); > > mask = pipe->ring_size - 1; > head = pipe->head; > @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue, > buf->offset = offset; > buf->len = len; > buf->flags = PIPE_BUF_FLAG_WHOLE; > - smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */ > + pipe->head = head + 1; > > if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { > - spin_unlock_irq(&pipe->rd_wait.lock); > + __pipe_unlock(pipe); > BUG(); > } > wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); > done = true; > > out: > - spin_unlock_irq(&pipe->rd_wait.lock); > + __pipe_unlock(pipe); > if (done) > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > return done; > > base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7 > -- > 2.31.1 >
Hi Matthew, On 2023/1/4 am 1:51, Matthew Wilcox wrote: > On Tue, Jan 03, 2023 at 02:33:03PM +0800, Hongchen Zhang wrote: >> Use spinlock in pipe_read/write cost too much time,IMO > > Everybody has an opinion. Do you have data? > I tested this patch using UnixBench's pipe test case on a x86_64 machine,and get the following data: 1) before this patch System Benchmarks Partial Index BASELINE RESULT INDEX Pipe Throughput 12440.0 493023.3 396.3 ======== System Benchmarks Index Score (Partial Only) 396.3 2) after this patch System Benchmarks Partial Index BASELINE RESULT INDEX Pipe Throughput 12440.0 507551.4 408.0 ======== System Benchmarks Index Score (Partial Only) 408.0 so we get ~3% speedup. >> pipe->{head,tail} can be protected by __pipe_{lock,unlock}. >> On the other hand, we can use __pipe_lock/unlock to protect the >> pipe->head/tail in pipe_resize_ring and post_one_notification. >> >> Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn> >> --- > > you're supposed to write here what changes you made between v1 and v2. > I added the linux/fs.h in v2 to fix the linux-test-robot test error in v1. >> fs/pipe.c | 24 ++++-------------------- >> include/linux/pipe_fs_i.h | 12 ++++++++++++ >> kernel/watch_queue.c | 8 ++++---- >> 3 files changed, 20 insertions(+), 24 deletions(-) >> >> diff --git a/fs/pipe.c b/fs/pipe.c >> index 42c7ff41c2db..cf449779bf71 100644 >> --- a/fs/pipe.c >> +++ b/fs/pipe.c >> @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe) >> } >> EXPORT_SYMBOL(pipe_unlock); >> >> -static inline void __pipe_lock(struct pipe_inode_info *pipe) >> -{ >> - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); >> -} >> - >> -static inline void __pipe_unlock(struct pipe_inode_info *pipe) >> -{ >> - mutex_unlock(&pipe->mutex); >> -} >> - >> void pipe_double_lock(struct pipe_inode_info *pipe1, >> struct pipe_inode_info *pipe2) >> { >> @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) >> */ >> was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); >> for (;;) { >> - /* Read ->head with a barrier vs post_one_notification() */ >> - unsigned int head = smp_load_acquire(&pipe->head); >> + unsigned int head = pipe->head; >> unsigned int tail = pipe->tail; >> unsigned int mask = pipe->ring_size - 1; >> >> @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) >> >> if (!buf->len) { >> pipe_buf_release(pipe, buf); >> - spin_lock_irq(&pipe->rd_wait.lock); >> #ifdef CONFIG_WATCH_QUEUE >> if (buf->flags & PIPE_BUF_FLAG_LOSS) >> pipe->note_loss = true; >> #endif >> tail++; >> pipe->tail = tail; >> - spin_unlock_irq(&pipe->rd_wait.lock); >> } >> total_len -= chars; >> if (!total_len) >> @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) >> * it, either the reader will consume it or it'll still >> * be there for the next write. >> */ >> - spin_lock_irq(&pipe->rd_wait.lock); >> >> head = pipe->head; >> if (pipe_full(head, pipe->tail, pipe->max_usage)) { >> - spin_unlock_irq(&pipe->rd_wait.lock); >> continue; >> } >> >> pipe->head = head + 1; >> - spin_unlock_irq(&pipe->rd_wait.lock); >> >> /* Insert it into the buffer array */ >> buf = &pipe->bufs[head & mask]; >> @@ -1260,14 +1244,14 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) >> if (unlikely(!bufs)) >> return -ENOMEM; >> >> - spin_lock_irq(&pipe->rd_wait.lock); >> + __pipe_lock(pipe); >> mask = pipe->ring_size - 1; >> head = pipe->head; >> tail = pipe->tail; >> >> n = pipe_occupancy(head, tail); >> if (nr_slots < n) { >> - spin_unlock_irq(&pipe->rd_wait.lock); >> + __pipe_unlock(pipe); >> kfree(bufs); >> return -EBUSY; >> } >> @@ -1303,7 +1287,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) >> pipe->tail = tail; >> pipe->head = head; >> >> - spin_unlock_irq(&pipe->rd_wait.lock); >> + __pipe_unlock(pipe); >> >> /* This might have made more room for writers */ >> wake_up_interruptible(&pipe->wr_wait); >> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h >> index 6cb65df3e3ba..f5084daf6eaf 100644 >> --- a/include/linux/pipe_fs_i.h >> +++ b/include/linux/pipe_fs_i.h >> @@ -2,6 +2,8 @@ >> #ifndef _LINUX_PIPE_FS_I_H >> #define _LINUX_PIPE_FS_I_H >> >> +#include <linux/fs.h> >> + >> #define PIPE_DEF_BUFFERS 16 >> >> #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ >> @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe, >> #define PIPE_SIZE PAGE_SIZE >> >> /* Pipe lock and unlock operations */ >> +static inline void __pipe_lock(struct pipe_inode_info *pipe) >> +{ >> + mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); >> +} >> + >> +static inline void __pipe_unlock(struct pipe_inode_info *pipe) >> +{ >> + mutex_unlock(&pipe->mutex); >> +} >> + >> void pipe_lock(struct pipe_inode_info *); >> void pipe_unlock(struct pipe_inode_info *); >> void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); >> diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c >> index a6f9bdd956c3..92e46cfe9419 100644 >> --- a/kernel/watch_queue.c >> +++ b/kernel/watch_queue.c >> @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue, >> if (!pipe) >> return false; >> >> - spin_lock_irq(&pipe->rd_wait.lock); >> + __pipe_lock(pipe); >> >> mask = pipe->ring_size - 1; >> head = pipe->head; >> @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue, >> buf->offset = offset; >> buf->len = len; >> buf->flags = PIPE_BUF_FLAG_WHOLE; >> - smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */ >> + pipe->head = head + 1; >> >> if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { >> - spin_unlock_irq(&pipe->rd_wait.lock); >> + __pipe_unlock(pipe); >> BUG(); >> } >> wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); >> done = true; >> >> out: >> - spin_unlock_irq(&pipe->rd_wait.lock); >> + __pipe_unlock(pipe); >> if (done) >> kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); >> return done; >> >> base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7 >> -- >> 2.31.1 >>
On Tue, 3 Jan 2023 14:33:03 +0800 Hongchen Zhang <zhanghongchen@loongson.cn> wrote: > Use spinlock in pipe_read/write cost too much time,IMO > pipe->{head,tail} can be protected by __pipe_{lock,unlock}. > On the other hand, we can use __pipe_lock/unlock to protect the > pipe->head/tail in pipe_resize_ring and post_one_notification. Can you please test this with the test code in Linus's 0ddad21d3e99 and check that performance is good?
Greeting, FYI, we noticed WARNING:possible_recursive_locking_detected due to commit (built with gcc-11): commit: 2afced4b77a399b14eb2e2797968228d7ce69a2a ("[PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock") url: https://github.com/intel-lab-lkp/linux/commits/Hongchen-Zhang/pipe-use-__pipe_-lock-unlock-instead-of-spinlock/20230103-143459 patch link: https://lore.kernel.org/all/20230103063303.23345-1-zhanghongchen@loongson.cn/ patch subject: [PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock in testcase: trinity version: trinity-static-x86_64-x86_64-1c734c75-1_2020-01-06 with following parameters: runtime: 300s test-description: Trinity is a linux system call fuzz tester. test-url: http://codemonkey.org.uk/projects/trinity/ on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace): If you fix the issue, kindly add following tag | Reported-by: kernel test robot <oliver.sang@intel.com> | Link: https://lore.kernel.org/oe-lkp/202301061340.c954d61f-oliver.sang@intel.com [ 493.585155][ T1930] Unable to find swap-space signature [ 508.410154][ T1930] [ 508.410930][ T1930] ============================================ [ 508.412135][ T1930] WARNING: possible recursive locking detected [ 508.413313][ T1930] 6.2.0-rc1-00085-g2afced4b77a3 #14 Not tainted [ 508.414545][ T1930] -------------------------------------------- [ 508.415735][ T1930] trinity-c2/1930 is trying to acquire lock: [ 508.416905][ T1930] ffff8881641d3068 (&pipe->mutex/1){+.+.}-{3:3}, at: pipe_resize_ring (??:?) [ 508.418698][ T1930] [ 508.418698][ T1930] but task is already holding lock: [ 508.420086][ T1930] ffff8881641d3068 (&pipe->mutex/1){+.+.}-{3:3}, at: pipe_fcntl (??:?) [ 508.421781][ T1930] [ 508.421781][ T1930] other info that might help us debug this: [ 508.423268][ T1930] Possible unsafe locking scenario: [ 508.423268][ T1930] [ 508.424688][ T1930] CPU0 [ 508.425410][ T1930] ---- [ 508.426105][ T1930] lock(&pipe->mutex/1); [ 508.426972][ T1930] lock(&pipe->mutex/1); [ 508.427840][ T1930] [ 508.427840][ T1930] *** DEADLOCK *** [ 508.427840][ T1930] [ 508.429476][ T1930] May be due to missing lock nesting notation [ 508.429476][ T1930] [ 508.430981][ T1930] 1 lock held by trinity-c2/1930: [ 508.431945][ T1930] #0: ffff8881641d3068 (&pipe->mutex/1){+.+.}-{3:3}, at: pipe_fcntl (??:?) [ 508.433663][ T1930] [ 508.433663][ T1930] stack backtrace: [ 508.434847][ T1930] CPU: 1 PID: 1930 Comm: trinity-c2 Not tainted 6.2.0-rc1-00085-g2afced4b77a3 #14 b8d9e225d32aed8adc2a69ef5f115031b187ce0c [ 508.436917][ T1930] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014 [ 508.438696][ T1930] Call Trace: [ 508.439402][ T1930] <TASK> [ 508.440075][ T1930] dump_stack_lvl (??:?) [ 508.440972][ T1930] validate_chain.cold (lockdep.c:?) [ 508.441972][ T1930] ? check_prev_add (lockdep.c:?) [ 508.442911][ T1930] ? mark_held_locks (lockdep.c:?) [ 508.443824][ T1930] __lock_acquire (lockdep.c:?) [ 508.444714][ T1930] lock_acquire (??:?) [ 508.445575][ T1930] ? pipe_resize_ring (??:?) [ 508.446497][ T1930] ? rcu_read_unlock (main.c:?) [ 508.447403][ T1930] ? entry_SYSCALL_64_after_hwframe (??:?) [ 508.448483][ T1930] __mutex_lock (mutex.c:?) [ 508.449361][ T1930] ? pipe_resize_ring (??:?) [ 508.450248][ T1930] ? kasan_quarantine_reduce (??:?) [ 508.451257][ T1930] ? lock_downgrade (lockdep.c:?) [ 508.452161][ T1930] ? pipe_resize_ring (??:?) [ 508.453053][ T1930] ? mark_held_locks (lockdep.c:?) [ 508.453935][ T1930] ? mutex_lock_io_nested (mutex.c:?) [ 508.454925][ T1930] ? kasan_quarantine_reduce (??:?) [ 508.455990][ T1930] ? pipe_resize_ring (??:?) [ 508.456904][ T1930] ? pipe_resize_ring (??:?) [ 508.457862][ T1930] ? pipe_resize_ring (??:?) [ 508.458791][ T1930] pipe_resize_ring (??:?) [ 508.459702][ T1930] pipe_fcntl (??:?) [ 508.460537][ T1930] ? find_held_lock (lockdep.c:?) [ 508.461435][ T1930] do_fcntl (fcntl.c:?) [ 508.462220][ T1930] ? __task_pid_nr_ns (??:?) [ 508.463185][ T1930] ? f_getown (fcntl.c:?) [ 508.464058][ T1930] ? __x64_sys_alarm (??:?) [ 508.464959][ T1930] __x64_sys_fcntl (??:?) [ 508.465869][ T1930] do_syscall_64 (??:?) [ 508.466745][ T1930] entry_SYSCALL_64_after_hwframe (??:?) [ 508.467721][ T1930] RIP: 0033:0x463519 [ 508.468493][ T1930] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db 59 00 00 c3 66 2e 0f 1f 84 00 00 00 00 All code ======== 0: 00 f3 add %dh,%bl 2: c3 retq 3: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) a: 00 00 00 d: 0f 1f 40 00 nopl 0x0(%rax) 11: 48 89 f8 mov %rdi,%rax 14: 48 89 f7 mov %rsi,%rdi 17: 48 89 d6 mov %rdx,%rsi 1a: 48 89 ca mov %rcx,%rdx 1d: 4d 89 c2 mov %r8,%r10 20: 4d 89 c8 mov %r9,%r8 23: 4c 8b 4c 24 08 mov 0x8(%rsp),%r9 28: 0f 05 syscall 2a:* 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax <-- trapping instruction 30: 0f 83 db 59 00 00 jae 0x5a11 36: c3 retq 37: 66 data16 38: 2e cs 39: 0f .byte 0xf 3a: 1f (bad) 3b: 84 00 test %al,(%rax) 3d: 00 00 add %al,(%rax) ... Code starting with the faulting instruction =========================================== 0: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax 6: 0f 83 db 59 00 00 jae 0x59e7 c: c3 retq d: 66 data16 e: 2e cs f: 0f .byte 0xf 10: 1f (bad) 11: 84 00 test %al,(%rax) 13: 00 00 add %al,(%rax) ... [ 508.471663][ T1930] RSP: 002b:00007ffd6a2fa2c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000048 [ 508.473176][ T1930] RAX: ffffffffffffffda RBX: 0000000000000048 RCX: 0000000000463519 [ 508.474574][ T1930] RDX: 0000000000000066 RSI: 0000000000000407 RDI: 000000000000013e [ 508.476026][ T1930] RBP: 00007f36cc433000 R08: fffffffffffffff8 R09: 000000000000000f [ 508.477459][ T1930] R10: 0000000010000000 R11: 0000000000000246 R12: 0000000000000002 [ 508.478870][ T1930] R13: 00007f36cc433058 R14: 0000000002241850 R15: 00007f36cc433000 [ 508.482573][ T1930] </TASK> [ 510.837665][ T2080] Unable to find swap-space signature [ 518.245623][ T2080] futex_wake_op: trinity-c1 tries to shift op by -1; fix this program [ 639.655156][ T298] hwclock: can't open '/dev/misc/rtc': No such file or directory LKP: ttyS0: 273: LKP: rebooting forcely [ 651.988559][ T273] sysrq: Emergency Sync [ 651.989495][ T29] Emergency Sync complete [ 651.990747][ T273] sysrq: Resetting To reproduce: # build kernel cd linux cp config-6.2.0-rc1-00085-g2afced4b77a3 .config make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install cd <mod-install-dir> find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz git clone https://github.com/intel/lkp-tests.git cd lkp-tests bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email # if come across any failure that blocks the test, # please remove ~/.lkp and /lkp dir to run from a clean state.
Hi Andrew, On 2023/1/6 am 10:59, Andrew Morton wrote: > On Tue, 3 Jan 2023 14:33:03 +0800 Hongchen Zhang <zhanghongchen@loongson.cn> wrote: > >> Use spinlock in pipe_read/write cost too much time,IMO >> pipe->{head,tail} can be protected by __pipe_{lock,unlock}. >> On the other hand, we can use __pipe_lock/unlock to protect the >> pipe->head/tail in pipe_resize_ring and post_one_notification. > > Can you please test this with the test code in Linus's 0ddad21d3e99 and > check that performance is good? > I tested with the test code in Linus's 0ddad21d3e99,and get the following result: 1) before this patch 13,136.54 msec task-clock # 3.870 CPUs utilized 1,186,779 context-switches # 90.342 K/sec 668,867 cpu-migrations # 50.917 K/sec 895 page-faults # 68.131 /sec 29,875,711,543 cycles # 2.274 GHz 12,372,397,462 instructions # 0.41 insn per cycle 2,480,235,723 branches # 188.804 M/sec 47,191,943 branch-misses # 1.90% of all branches 3.394806886 seconds time elapsed 0.037869000 seconds user 0.189346000 seconds sys 2) after this patch 12,395.63 msec task-clock # 4.138 CPUs utilized 1,193,381 context-switches # 96.274 K/sec 585,543 cpu-migrations # 47.238 K/sec 1,063 page-faults # 85.756 /sec 27,691,587,226 cycles # 2.234 GHz 11,738,307,999 instructions # 0.42 insn per cycle 2,351,299,522 branches # 189.688 M/sec 45,404,526 branch-misses # 1.93% of all branches 2.995280878 seconds time elapsed 0.010615000 seconds user 0.206999000 seconds sys After adding this patch, the time used on this test program becomes less. Another thing, because of my carelessness, trinity tool tested a bug, I will remove the unnecessary __pipe_{lock,unlock} in pipe_resize_ring in v3, because the lock is owned in pipe_fcntl/pipe_ioctl. Thanks.
diff --git a/fs/pipe.c b/fs/pipe.c index 42c7ff41c2db..cf449779bf71 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe) } EXPORT_SYMBOL(pipe_unlock); -static inline void __pipe_lock(struct pipe_inode_info *pipe) -{ - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); -} - -static inline void __pipe_unlock(struct pipe_inode_info *pipe) -{ - mutex_unlock(&pipe->mutex); -} - void pipe_double_lock(struct pipe_inode_info *pipe1, struct pipe_inode_info *pipe2) { @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) */ was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); for (;;) { - /* Read ->head with a barrier vs post_one_notification() */ - unsigned int head = smp_load_acquire(&pipe->head); + unsigned int head = pipe->head; unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) if (!buf->len) { pipe_buf_release(pipe, buf); - spin_lock_irq(&pipe->rd_wait.lock); #ifdef CONFIG_WATCH_QUEUE if (buf->flags & PIPE_BUF_FLAG_LOSS) pipe->note_loss = true; #endif tail++; pipe->tail = tail; - spin_unlock_irq(&pipe->rd_wait.lock); } total_len -= chars; if (!total_len) @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * it, either the reader will consume it or it'll still * be there for the next write. */ - spin_lock_irq(&pipe->rd_wait.lock); head = pipe->head; if (pipe_full(head, pipe->tail, pipe->max_usage)) { - spin_unlock_irq(&pipe->rd_wait.lock); continue; } pipe->head = head + 1; - spin_unlock_irq(&pipe->rd_wait.lock); /* Insert it into the buffer array */ buf = &pipe->bufs[head & mask]; @@ -1260,14 +1244,14 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) if (unlikely(!bufs)) return -ENOMEM; - spin_lock_irq(&pipe->rd_wait.lock); + __pipe_lock(pipe); mask = pipe->ring_size - 1; head = pipe->head; tail = pipe->tail; n = pipe_occupancy(head, tail); if (nr_slots < n) { - spin_unlock_irq(&pipe->rd_wait.lock); + __pipe_unlock(pipe); kfree(bufs); return -EBUSY; } @@ -1303,7 +1287,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) pipe->tail = tail; pipe->head = head; - spin_unlock_irq(&pipe->rd_wait.lock); + __pipe_unlock(pipe); /* This might have made more room for writers */ wake_up_interruptible(&pipe->wr_wait); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 6cb65df3e3ba..f5084daf6eaf 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -2,6 +2,8 @@ #ifndef _LINUX_PIPE_FS_I_H #define _LINUX_PIPE_FS_I_H +#include <linux/fs.h> + #define PIPE_DEF_BUFFERS 16 #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe, #define PIPE_SIZE PAGE_SIZE /* Pipe lock and unlock operations */ +static inline void __pipe_lock(struct pipe_inode_info *pipe) +{ + mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); +} + +static inline void __pipe_unlock(struct pipe_inode_info *pipe) +{ + mutex_unlock(&pipe->mutex); +} + void pipe_lock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index a6f9bdd956c3..92e46cfe9419 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue, if (!pipe) return false; - spin_lock_irq(&pipe->rd_wait.lock); + __pipe_lock(pipe); mask = pipe->ring_size - 1; head = pipe->head; @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue, buf->offset = offset; buf->len = len; buf->flags = PIPE_BUF_FLAG_WHOLE; - smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */ + pipe->head = head + 1; if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { - spin_unlock_irq(&pipe->rd_wait.lock); + __pipe_unlock(pipe); BUG(); } wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); done = true; out: - spin_unlock_irq(&pipe->rd_wait.lock); + __pipe_unlock(pipe); if (done) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); return done;
Use spinlock in pipe_read/write cost too much time,IMO pipe->{head,tail} can be protected by __pipe_{lock,unlock}. On the other hand, we can use __pipe_lock/unlock to protect the pipe->head/tail in pipe_resize_ring and post_one_notification. Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn> --- fs/pipe.c | 24 ++++-------------------- include/linux/pipe_fs_i.h | 12 ++++++++++++ kernel/watch_queue.c | 8 ++++---- 3 files changed, 20 insertions(+), 24 deletions(-) base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7