diff mbox

[v4,4/9] epoll: Add implementation for epoll_ctl_batch

Message ID 1425952155-27603-5-git-send-email-famz@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Fam Zheng March 10, 2015, 1:49 a.m. UTC
This new syscall is a batched version of epoll_ctl. It will execute each
command as specified in cmds in given order, and stop at first failure
or upon completion of all commands.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 fs/eventpoll.c                 | 50 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/syscalls.h       |  4 ++++
 include/uapi/linux/eventpoll.h | 11 ++++++++++
 3 files changed, 65 insertions(+)

Comments

Dan Rosenberg March 10, 2015, 1:59 p.m. UTC | #1
On 03/09/2015 09:49 PM, Fam Zheng wrote:
> +	if (!cmds || ncmds <= 0 || ncmds > EP_MAX_BATCH)
> +		return -EINVAL;
> +	cmd_size = sizeof(struct epoll_ctl_cmd) * ncmds;
> +	/* TODO: optimize for small arguments like select/poll with a stack
> +	 * allocated buffer */
> +
> +	kcmds = kmalloc(cmd_size, GFP_KERNEL);
> +	if (!kcmds)
> +		return -ENOMEM;
You probably want to define EP_MAX_BATCH as some sane value much less
than INT_MAX/(sizeof(struct epoll_ctl_cmd)). While this avoids the
integer overflow from before, any user can cause the kernel to kmalloc
up to INT_MAX bytes. Probably not a huge deal because it's freed at the
end of the syscall, but generally not a great idea.

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 7909c88..54dc63f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -99,6 +99,8 @@ 
 
 #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
 
+#define EP_MAX_BATCH (INT_MAX / sizeof(struct epoll_ctl_cmd))
+
 #define EP_UNACTIVE_PTR ((void *) -1L)
 
 #define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry))
@@ -2069,6 +2071,54 @@  SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
 			      sigmask ? &ksigmask : NULL);
 }
 
+SYSCALL_DEFINE4(epoll_ctl_batch, int, epfd, int, flags,
+		int, ncmds, struct epoll_ctl_cmd __user *, cmds)
+{
+	struct epoll_ctl_cmd *kcmds = NULL;
+	int i, ret = 0;
+	size_t cmd_size;
+
+	if (flags)
+		return -EINVAL;
+	if (!cmds || ncmds <= 0 || ncmds > EP_MAX_BATCH)
+		return -EINVAL;
+	cmd_size = sizeof(struct epoll_ctl_cmd) * ncmds;
+	/* TODO: optimize for small arguments like select/poll with a stack
+	 * allocated buffer */
+
+	kcmds = kmalloc(cmd_size, GFP_KERNEL);
+	if (!kcmds)
+		return -ENOMEM;
+	if (copy_from_user(kcmds, cmds, cmd_size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+	for (i = 0; i < ncmds; i++) {
+		struct epoll_event ev = (struct epoll_event) {
+			.events = kcmds[i].events,
+			.data = kcmds[i].data,
+		};
+		if (kcmds[i].flags) {
+			kcmds[i].result = -EINVAL;
+			goto copy;
+		}
+		kcmds[i].result = ep_ctl_do(epfd, kcmds[i].op,
+					    kcmds[i].fd, ev);
+		if (kcmds[i].result)
+			goto copy;
+		ret++;
+	}
+copy:
+	/* We lose the number of succeeded commands in favor of returning
+	 * -EFAULT, but in this case the application will want to fix the
+	 *  memory bug first. */
+	if (copy_to_user(cmds, kcmds, cmd_size))
+		ret = -EFAULT;
+out:
+	kfree(kcmds);
+	return ret;
+}
+
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
 			struct epoll_event __user *, events,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 76d1e38..7d784e3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -12,6 +12,7 @@ 
 #define _LINUX_SYSCALLS_H
 
 struct epoll_event;
+struct epoll_ctl_cmd;
 struct iattr;
 struct inode;
 struct iocb;
@@ -634,6 +635,9 @@  asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
 				int maxevents, int timeout,
 				const sigset_t __user *sigmask,
 				size_t sigsetsize);
+asmlinkage long sys_epoll_ctl_batch(int epfd, int flags,
+				    int ncmds,
+				    struct epoll_ctl_cmd __user *cmds);
 asmlinkage long sys_gethostname(char __user *name, int len);
 asmlinkage long sys_sethostname(char __user *name, int len);
 asmlinkage long sys_setdomainname(char __user *name, int len);
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index bc81fb2..4e18b17 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -18,6 +18,8 @@ 
 #include <linux/fcntl.h>
 #include <linux/types.h>
 
+#include <linux/signal.h>
+
 /* Flags for epoll_create1.  */
 #define EPOLL_CLOEXEC O_CLOEXEC
 
@@ -61,6 +63,15 @@  struct epoll_event {
 	__u64 data;
 } EPOLL_PACKED;
 
+struct epoll_ctl_cmd {
+	int flags;
+	int op;
+	int fd;
+	__u32 events;
+	__u64 data;
+	int result;
+} EPOLL_PACKED;
+
 #ifdef CONFIG_PM_SLEEP
 static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
 {