diff mbox series

eventfd: convert global percpu eventfd_wake_count to ctx percpu eventfd_wake_count

Message ID 20210604074212.17808-1-qiang.zhang@windriver.com (mailing list archive)
State New
Headers show
Series eventfd: convert global percpu eventfd_wake_count to ctx percpu eventfd_wake_count | expand

Commit Message

Zhang, Qiang June 4, 2021, 7:42 a.m. UTC
From: Zqiang <qiang.zhang@windriver.com>

In RT system, the spinlock_irq be replaced by rt_mutex, when
call eventfd_signal(), if the current task is preempted after
increasing the current CPU eventfd_wake_count, when other task
run on this CPU and  call eventfd_signal(), find this CPU
eventfd_wake_count is not zero, will trigger warning and direct
return, miss wakeup.
In no-RT system, even if the eventfd_signal() call is nested, if
if it's different eventfd_ctx object, it is not happen deadlock.

Fixes: b5e683d5cab8 ("eventfd: track eventfd_signal() recursion depth")
Signed-off-by: Zqiang <qiang.zhang@windriver.com>
---
 fs/aio.c                |  2 +-
 fs/eventfd.c            | 21 +++++++++++++++++----
 include/linux/eventfd.h |  9 ++-------
 3 files changed, 20 insertions(+), 12 deletions(-)

Comments

kernel test robot June 4, 2021, 1:21 p.m. UTC | #1
Hi,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.13-rc4 next-20210604]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/qiang-zhang-windriver-com/eventfd-convert-global-percpu-eventfd_wake_count-to-ctx-percpu-eventfd_wake_count/20210604-154249
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git f88cd3fb9df228e5ce4e13ec3dbad671ddb2146e
config: arm64-randconfig-r013-20210604 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 5c0d1b2f902aa6a9cf47cc7e42c5b83bb2217cf9)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm64 cross compiling tool for clang build
        # apt-get install binutils-aarch64-linux-gnu
        # https://github.com/0day-ci/linux/commit/3cff73140b2b518eec3e30712d9c66bbde8ad375
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review qiang-zhang-windriver-com/eventfd-convert-global-percpu-eventfd_wake_count-to-ctx-percpu-eventfd_wake_count/20210604-154249
        git checkout 3cff73140b2b518eec3e30712d9c66bbde8ad375
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> fs/eventfd.c:434:6: warning: variable 'fd' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
           if (!ctx->eventfd_wake_count)
               ^~~~~~~~~~~~~~~~~~~~~~~~
   fs/eventfd.c:461:9: note: uninitialized use occurs here
           return fd;
                  ^~
   fs/eventfd.c:434:2: note: remove the 'if' if its condition is always false
           if (!ctx->eventfd_wake_count)
           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   fs/eventfd.c:420:8: note: initialize the variable 'fd' to silence this warning
           int fd;
                 ^
                  = 0
   1 warning generated.


vim +434 fs/eventfd.c

   415	
   416	static int do_eventfd(unsigned int count, int flags)
   417	{
   418		struct eventfd_ctx *ctx;
   419		struct file *file;
   420		int fd;
   421	
   422		/* Check the EFD_* constants for consistency.  */
   423		BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
   424		BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
   425	
   426		if (flags & ~EFD_FLAGS_SET)
   427			return -EINVAL;
   428	
   429		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
   430		if (!ctx)
   431			return -ENOMEM;
   432	
   433		ctx->eventfd_wake_count = alloc_percpu(int);
 > 434		if (!ctx->eventfd_wake_count)
   435			goto err;
   436	
   437		kref_init(&ctx->kref);
   438		init_waitqueue_head(&ctx->wqh);
   439		ctx->count = count;
   440		ctx->flags = flags;
   441		ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
   442	
   443		flags &= EFD_SHARED_FCNTL_FLAGS;
   444		flags |= O_RDWR;
   445		fd = get_unused_fd_flags(flags);
   446		if (fd < 0)
   447			goto err;
   448	
   449		file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
   450		if (IS_ERR(file)) {
   451			put_unused_fd(fd);
   452			fd = PTR_ERR(file);
   453			goto err;
   454		}
   455	
   456		file->f_mode |= FMODE_NOWAIT;
   457		fd_install(fd, file);
   458		return fd;
   459	err:
   460		eventfd_free_ctx(ctx);
   461		return fd;
   462	}
   463	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot June 4, 2021, 3:03 p.m. UTC | #2
Hi,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.13-rc4 next-20210604]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/qiang-zhang-windriver-com/eventfd-convert-global-percpu-eventfd_wake_count-to-ctx-percpu-eventfd_wake_count/20210604-154249
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git f88cd3fb9df228e5ce4e13ec3dbad671ddb2146e
config: arm-randconfig-s031-20210604 (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-341-g8af24329-dirty
        # https://github.com/0day-ci/linux/commit/3cff73140b2b518eec3e30712d9c66bbde8ad375
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review qiang-zhang-windriver-com/eventfd-convert-global-percpu-eventfd_wake_count-to-ctx-percpu-eventfd_wake_count/20210604-154249
        git checkout 3cff73140b2b518eec3e30712d9c66bbde8ad375
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
   fs/aio.c:588:24: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void [noderef] __percpu *__pdata @@     got struct kioctx_cpu *cpu @@
   fs/aio.c:588:24: sparse:     expected void [noderef] __percpu *__pdata
   fs/aio.c:588:24: sparse:     got struct kioctx_cpu *cpu
   fs/aio.c:755:18: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct kioctx_cpu *cpu @@     got struct kioctx_cpu [noderef] __percpu * @@
   fs/aio.c:755:18: sparse:     expected struct kioctx_cpu *cpu
   fs/aio.c:755:18: sparse:     got struct kioctx_cpu [noderef] __percpu *
   fs/aio.c:802:24: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void [noderef] __percpu *__pdata @@     got struct kioctx_cpu *cpu @@
   fs/aio.c:802:24: sparse:     expected void [noderef] __percpu *__pdata
   fs/aio.c:802:24: sparse:     got struct kioctx_cpu *cpu
   fs/aio.c:907:16: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected void const [noderef] __percpu *__vpp_verify @@     got struct kioctx_cpu * @@
   fs/aio.c:907:16: sparse:     expected void const [noderef] __percpu *__vpp_verify
   fs/aio.c:907:16: sparse:     got struct kioctx_cpu *
   fs/aio.c:925:16: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected void const [noderef] __percpu *__vpp_verify @@     got struct kioctx_cpu * @@
   fs/aio.c:925:16: sparse:     expected void const [noderef] __percpu *__vpp_verify
   fs/aio.c:925:16: sparse:     got struct kioctx_cpu *
   fs/aio.c: note: in included file:
>> include/linux/eventfd.h:46:33: sparse: sparse: marked inline, but without a definition

vim +46 include/linux/eventfd.h

    45	
  > 46	inline bool eventfd_signal_count(struct eventfd_ctx *ctx);
    47	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Zhang, Qiang June 7, 2021, 3:40 a.m. UTC | #3
Hello Axboe

Can you help with the review ?
In RT system, I test and find calltrace: 


BUG: using smp_processor_id() in preemptible [00000000] code: vhost-5257/5262
caller is debug_smp_processor_id+0x17/0x20
CPU: 1 PID: 5262 Comm: vhost-5257 Not tainted 5.10.41-rt34-yocto-preempt-rt #1
Hardware name: Intel(R) Client Systems NUC7i5DNKE/NUC7i5DNB, BIOS DNKBLi5v.86A.0064.2019.0523.1933 05/23/2019
Call Trace:
dump_stack+0x60/0x76
check_preemption_disabled+0xce/0xd0
debug_smp_processor_id+0x17/0x20
print_stop_info+0x20/0x40
dump_stack_print_info+0xac/0xc0
show_regs_print_info+0x9/0x10
show_regs+0x1a/0x50
__warn+0x84/0xc0
? eventfd_signal+0x85/0xa0
report_bug+0xa1/0xc0
handle_bug+0x45/0x90
exc_invalid_op+0x19/0x70
asm_exc_invalid_op+0x12/0x20
RIP: 0010:eventfd_signal+0x85/0xa0
Code: 00 00 be 03 00 00 00 4c 89 f7 e8 26 0e e2 ff 65 ff 0d cf 4c 17 60 4c 89 f7 e8 d7 f1 be 00 4c 89 e0 5b 41 5c 41 5d 41 5e 5d c3 <0f> 0b 45 31 e4 5b 4c 89 e0 41 5c 41 5d 41 5e 5d c3 66 2e 0f 1f 84
RSP: 0018:ffffb12902617d00 EFLAGS: 00010202
RAX: 0000000000000001 RBX: ffff8fe2f3a60120 RCX: 0000000000000000
RDX: 00000000000092e2 RSI: 0000000000000001 RDI: ffff8fe1c36d2d20
RBP: ffffb12902617d20 R08: 0000044e00000061 R09: 0000000000000000
R10: 00000000fffffe4e R11: ffff8fe2f38ff800 R12: 0000000000000000
R13: ffff8fe2f3a60270 R14: ffff8fe2f3a60000 R15: ffff8fe2f3a60120
vhost_add_used_and_signal_n+0x41/0x50 [vhost]
handle_rx+0xb9/0x9e0 [vhost_net]
handle_rx_net+0x15/0x20 [vhost_net]
vhost_worker+0x95/0xe0 [vhost]
kthread+0x19c/0x1c0
? vhost_dev_reset_owner+0x50/0x50 [vhost]
? __kthread_parkme+0xa0/0xa0
ret_from_fork+0x22/0x30
diff mbox series

Patch

diff --git a/fs/aio.c b/fs/aio.c
index 76ce0cc3ee4e..b45983d5d35a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1695,7 +1695,7 @@  static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		list_del(&iocb->ki_list);
 		iocb->ki_res.res = mangle_poll(mask);
 		req->done = true;
-		if (iocb->ki_eventfd && eventfd_signal_count()) {
+		if (iocb->ki_eventfd && eventfd_signal_count(iocb->ki_eventfd)) {
 			iocb = NULL;
 			INIT_WORK(&req->work, aio_poll_put_work);
 			schedule_work(&req->work);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e265b6dd4f34..ef92d3dedde8 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -25,7 +25,6 @@ 
 #include <linux/idr.h>
 #include <linux/uio.h>
 
-DEFINE_PER_CPU(int, eventfd_wake_count);
 
 static DEFINE_IDA(eventfd_ida);
 
@@ -43,8 +42,15 @@  struct eventfd_ctx {
 	__u64 count;
 	unsigned int flags;
 	int id;
+	int __percpu *eventfd_wake_count;
 };
 
+inline bool eventfd_signal_count(struct eventfd_ctx *ctx)
+{
+	return this_cpu_read(*ctx->eventfd_wake_count);
+}
+EXPORT_SYMBOL_GPL(eventfd_signal_count);
+
 /**
  * eventfd_signal - Adds @n to the eventfd counter.
  * @ctx: [in] Pointer to the eventfd context.
@@ -71,17 +77,17 @@  __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
 	 * it returns true, the eventfd_signal() call should be deferred to a
 	 * safe context.
 	 */
-	if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
+	if (WARN_ON_ONCE(this_cpu_read(*ctx->eventfd_wake_count)))
 		return 0;
 
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
-	this_cpu_inc(eventfd_wake_count);
+	this_cpu_inc(*ctx->eventfd_wake_count);
 	if (ULLONG_MAX - ctx->count < n)
 		n = ULLONG_MAX - ctx->count;
 	ctx->count += n;
 	if (waitqueue_active(&ctx->wqh))
 		wake_up_locked_poll(&ctx->wqh, EPOLLIN);
-	this_cpu_dec(eventfd_wake_count);
+	this_cpu_dec(*ctx->eventfd_wake_count);
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 
 	return n;
@@ -92,6 +98,9 @@  static void eventfd_free_ctx(struct eventfd_ctx *ctx)
 {
 	if (ctx->id >= 0)
 		ida_simple_remove(&eventfd_ida, ctx->id);
+
+	if (ctx->eventfd_wake_count)
+		free_percpu(ctx->eventfd_wake_count);
 	kfree(ctx);
 }
 
@@ -421,6 +430,10 @@  static int do_eventfd(unsigned int count, int flags)
 	if (!ctx)
 		return -ENOMEM;
 
+	ctx->eventfd_wake_count = alloc_percpu(int);
+	if (!ctx->eventfd_wake_count)
+		goto err;
+
 	kref_init(&ctx->kref);
 	init_waitqueue_head(&ctx->wqh);
 	ctx->count = count;
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index fa0a524baed0..1deda815ef1b 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -43,12 +43,7 @@  int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
 				  __u64 *cnt);
 void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
 
-DECLARE_PER_CPU(int, eventfd_wake_count);
-
-static inline bool eventfd_signal_count(void)
-{
-	return this_cpu_read(eventfd_wake_count);
-}
+inline bool eventfd_signal_count(struct eventfd_ctx *ctx);
 
 #else /* CONFIG_EVENTFD */
 
@@ -78,7 +73,7 @@  static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
 	return -ENOSYS;
 }
 
-static inline bool eventfd_signal_count(void)
+static inline bool eventfd_signal_count(struct eventfd_ctx *ctx)
 {
 	return false;
 }