Message ID | 20250309055003.32194-1-wen.yang@linux.dev (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v3] eventfd: introduce configurable maximum value for eventfd | expand |
Hi Wen, kernel test robot noticed the following build errors: [auto build test ERROR on brauner-vfs/vfs.all] [also build test ERROR on linus/master v6.14-rc5 next-20250307] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Wen-Yang/eventfd-introduce-configurable-maximum-value-for-eventfd/20250309-135152 base: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git vfs.all patch link: https://lore.kernel.org/r/20250309055003.32194-1-wen.yang%40linux.dev patch subject: [PATCH v3] eventfd: introduce configurable maximum value for eventfd config: i386-buildonly-randconfig-002-20250309 (https://download.01.org/0day-ci/archive/20250309/202503092210.z9CcRYoe-lkp@intel.com/config) compiler: clang version 19.1.7 (https://github.com/llvm/llvm-project cd708029e0b2869e80abe31ddb175f7c35361f90) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250309/202503092210.z9CcRYoe-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202503092210.z9CcRYoe-lkp@intel.com/ All errors (new ones prefixed by >>): >> fs/eventfd.c:251:69: error: use of undeclared identifier 'ucnt' 251 | return (ctx->maximum > ctx->count) && (ctx->maximum - ctx->count > ucnt); | ^ 1 error generated. vim +/ucnt +251 fs/eventfd.c 248 249 static inline bool eventfd_is_writable(const struct eventfd_ctx *ctx) 250 { > 251 return (ctx->maximum > ctx->count) && (ctx->maximum - ctx->count > ucnt); 252 } 253
Hi Wen, kernel test robot noticed the following build errors: [auto build test ERROR on brauner-vfs/vfs.all] [also build test ERROR on linus/master v6.14-rc5 next-20250307] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Wen-Yang/eventfd-introduce-configurable-maximum-value-for-eventfd/20250309-135152 base: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git vfs.all patch link: https://lore.kernel.org/r/20250309055003.32194-1-wen.yang%40linux.dev patch subject: [PATCH v3] eventfd: introduce configurable maximum value for eventfd config: i386-buildonly-randconfig-003-20250309 (https://download.01.org/0day-ci/archive/20250309/202503092223.G4FJDVsM-lkp@intel.com/config) compiler: gcc-11 (Debian 11.3.0-12) 11.3.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250309/202503092223.G4FJDVsM-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202503092223.G4FJDVsM-lkp@intel.com/ All error/warnings (new ones prefixed by >>): fs/eventfd.c: In function 'eventfd_is_writable': >> fs/eventfd.c:251:76: error: 'ucnt' undeclared (first use in this function); did you mean 'uint'? 251 | return (ctx->maximum > ctx->count) && (ctx->maximum - ctx->count > ucnt); | ^~~~ | uint fs/eventfd.c:251:76: note: each undeclared identifier is reported only once for each function it appears in >> fs/eventfd.c:252:1: warning: control reaches end of non-void function [-Wreturn-type] 252 | } | ^ vim +251 fs/eventfd.c 248 249 static inline bool eventfd_is_writable(const struct eventfd_ctx *ctx) 250 { > 251 return (ctx->maximum > ctx->count) && (ctx->maximum - ctx->count > ucnt); > 252 } 253
diff --git a/fs/eventfd.c b/fs/eventfd.c index af42b2c7d235..cb004aded4df 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -39,6 +39,7 @@ struct eventfd_ctx { * also, adds to the "count" counter and issue a wakeup. */ __u64 count; + __u64 maximum; unsigned int flags; int id; }; @@ -49,7 +50,7 @@ struct eventfd_ctx { * @mask: [in] poll mask * * This function is supposed to be called by the kernel in paths that do not - * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX + * allow sleeping. In this function we allow the counter to reach the maximum * value, and we signal this as overflow condition by returning a EPOLLERR * to poll(2). */ @@ -70,7 +71,7 @@ void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask) spin_lock_irqsave(&ctx->wqh.lock, flags); current->in_eventfd = 1; - if (ctx->count < ULLONG_MAX) + if (ctx->count < ctx->maximum) ctx->count++; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask); @@ -119,7 +120,7 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait) { struct eventfd_ctx *ctx = file->private_data; __poll_t events = 0; - u64 count; + u64 count, max; poll_wait(file, &ctx->wqh, wait); @@ -162,12 +163,13 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait) * eventfd_poll returns 0 */ count = READ_ONCE(ctx->count); + max = READ_ONCE(ctx->maximum); if (count > 0) events |= EPOLLIN; - if (count == ULLONG_MAX) + if (count == max) events |= EPOLLERR; - if (ULLONG_MAX - 1 > count) + if (max - 1 > count) events |= EPOLLOUT; return events; @@ -244,6 +246,11 @@ static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to) return sizeof(ucnt); } +static inline bool eventfd_is_writable(const struct eventfd_ctx *ctx) +{ + return (ctx->maximum > ctx->count) && (ctx->maximum - ctx->count > ucnt); +} + static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -259,11 +266,11 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c return -EINVAL; spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; - if (ULLONG_MAX - ctx->count > ucnt) + if (eventfd_is_writable(ctx)) res = sizeof(ucnt); else if (!(file->f_flags & O_NONBLOCK)) { res = wait_event_interruptible_locked_irq(ctx->wqh, - ULLONG_MAX - ctx->count > ucnt); + eventfd_is_writable(ctx)); if (!res) res = sizeof(ucnt); } @@ -299,6 +306,46 @@ static void eventfd_show_fdinfo(struct seq_file *m, struct file *f) } #endif +static long eventfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct eventfd_ctx *ctx = file->private_data; + void __user *argp = (void __user *)arg; + int ret = 0; + __u64 max; + + if (!argp) + return -EINVAL; + + switch (cmd) { + case EFD_IOC_SET_MAXIMUM: { + if (copy_from_user(&max, argp, sizeof(max))) + return -EFAULT; + + spin_lock_irq(&ctx->wqh.lock); + if (ctx->count >= max) + ret = -EINVAL; + else + ctx->maximum = max; + spin_unlock_irq(&ctx->wqh.lock); + break; + } + case EFD_IOC_GET_MAXIMUM: { + spin_lock_irq(&ctx->wqh.lock); + max = ctx->maximum; + spin_unlock_irq(&ctx->wqh.lock); + + if (copy_to_user(argp, &max, sizeof(max))) + ret = -EFAULT; + break; + } + default: + ret = -ENOENT; + break; + } + + return ret; +} + static const struct file_operations eventfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = eventfd_show_fdinfo, @@ -308,6 +355,7 @@ static const struct file_operations eventfd_fops = { .read_iter = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, + .unlocked_ioctl = eventfd_ioctl, }; /** @@ -398,6 +446,7 @@ static int do_eventfd(unsigned int count, int flags) init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; + ctx->maximum = ULLONG_MAX; ctx->id = ida_alloc(&eventfd_ida, GFP_KERNEL); flags &= EFD_SHARED_FCNTL_FLAGS; diff --git a/include/uapi/linux/eventfd.h b/include/uapi/linux/eventfd.h index 2eb9ab6c32f3..96e6430a3d12 100644 --- a/include/uapi/linux/eventfd.h +++ b/include/uapi/linux/eventfd.h @@ -8,4 +8,7 @@ #define EFD_CLOEXEC O_CLOEXEC #define EFD_NONBLOCK O_NONBLOCK +#define EFD_IOC_SET_MAXIMUM _IOW('E', 0, __u64) +#define EFD_IOC_GET_MAXIMUM _IOR('E', 0, __u64) + #endif /* _UAPI_LINUX_EVENTFD_H */
For the NON-SEMAPHORE eventfd, a write (2) call adds the 8-byte integer value provided in its buffer to the counter, while a read (2) returns the 8-byte value containing the value and resetting the counter value to 0. Therefore, the accumulated value of multiple writes can be retrieved by a single read. Currently, the reading thread is waked up immediately after the writing thread writes eventfd, and the maximum value of the counter is ULLONG_MAX, therefore, in the ping pong scene with frequent reading and writing, the CPU will be exhausted. By introducing the configurable maximum counter, we could achieve flow control and reduce unnecessary CPU overhead. We may use the following test code: #define _GNU_SOURCE #include <assert.h> #include <errno.h> #include <getopt.h> #include <pthread.h> #include <poll.h> #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <string.h> #include <sys/eventfd.h> #include <sys/prctl.h> #include <sys/ioctl.h> #define EFD_IOC_SET_MAXIMUM _IOW('E', 0, __u64) #define EFD_IOC_GET_MAXIMUM _IOR('E', 0, __u64) struct param { int fd; int cpu; }; static void publish(void *data) { struct param * param = (struct param *)data; unsigned long long value = 1; cpu_set_t cpuset; prctl(PR_SET_NAME, "publish"); CPU_ZERO(&cpuset); CPU_SET(param->cpu, &cpuset); sched_setaffinity(0, sizeof(cpuset), &cpuset); while (1) eventfd_write(param->fd, value); } static void subscribe(void *data) { struct param *param = (struct param *)data; unsigned long long value = 0; struct pollfd pfds[1]; cpu_set_t cpuset; prctl(PR_SET_NAME, "subscribe"); CPU_ZERO(&cpuset); CPU_SET(param->cpu, &cpuset); sched_setaffinity(0, sizeof(cpuset), &cpuset); pfds[0].fd = param->fd; pfds[0].events = POLLIN; while(1) { poll(pfds, 1, -1); if(pfds[0].revents & POLLIN) { read(param->fd, &value, sizeof(value)); } } } static void usage(void) { printf("Usage: \n"); printf("\t"); printf("<-p cpuid> <-s cpuid> <-m maximum> \n"); } int main(int argc, char *argv[]) { struct param sub_param = {0}; struct param pub_param = {0}; char *optstr = "p:s:m:"; int opt, ret, fd; __u64 maximum; pid_t pid; if (argc < 2) { usage(); return 1; } while((opt = getopt(argc, argv, optstr)) != -1){ switch(opt) { case 'p': pub_param.cpu = atoi(optarg); break; case 's': sub_param.cpu = atoi(optarg); break; case 'm': maximum = atoi(optarg); break; case '?': usage(); return -1; } } fd = eventfd(0, EFD_CLOEXEC); assert(fd); ret = ioctl(fd, EFD_IOC_SET_MAXIMUM, &maximum); if (ret) { printf("error=%s\n", strerror(errno)); return -1; } sub_param.fd = fd; pub_param.fd = fd; pid = fork(); if (pid == 0) subscribe(&sub_param); else if (pid > 0) publish(&pub_param); else { printf("XXX: fork error!\n"); return -1; } return 0; } $ ./a.out -p 2 -s 3 -m 6553500 -----cpu2-usage----------cpu3-usage---- usr sys idl wai stl:usr sys idl wai stl 47 53 0 0 0: 46 54 0 0 0 53 47 0 0 0: 45 54 1 0 0 56 44 0 0 0: 48 52 0 0 0 53 47 0 0 0: 45 55 0 0 0 $ ./a.out -p 2 -s 3 -m 100 -----cpu2-usage----------cpu3-usage---- usr sys idl wai stl:usr sys idl wai stl 41 59 0 0 0: 33 65 2 0 0 46 54 0 0 0: 30 67 2 0 0 38 62 0 0 0: 33 65 2 0 0 37 63 0 0 0: 31 66 3 0 0 $ ./a.out -p 2 -s 3 -m 10 -----cpu2-usage----------cpu3-usage---- usr sys idl wai stl:usr sys idl wai stl 37 43 20 0 0: 21 42 37 0 0 30 47 23 0 0: 20 42 38 0 0 39 39 23 0 0: 24 37 39 0 0 39 40 22 0 0: 23 41 36 0 0 Signed-off-by: Wen Yang <wen.yang@linux.dev> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Jens Axboe <axboe@kernel.dk> Cc: Christian Brauner <brauner@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Dylan Yudaken <dylany@fb.com> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Dave Young <dyoung@redhat.com> Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- v3: simply achieve flow control by configuring the maximum value of counter v2: fix compilation errors https://lore.kernel.org/all/20240811085954.17162-1-wen.yang@linux.dev/ v1: https://lore.kernel.org/all/20240519144124.4429-1-wen.yang@linux.dev/ fs/eventfd.c | 63 ++++++++++++++++++++++++++++++++---- include/uapi/linux/eventfd.h | 3 ++ 2 files changed, 59 insertions(+), 7 deletions(-)