diff mbox series

[2/3] eventpoll: support non-blocking do_epoll_ctl() calls

Message ID 20200122160231.11876-3-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series Add io_uring support for epoll_ctl | expand

Commit Message

Jens Axboe Jan. 22, 2020, 4:02 p.m. UTC
Also make it available outside of epoll, along with the helper that
decides if we need to copy the passed in epoll_event.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/eventpoll.c            | 42 ++++++++++++++++++++++++++++-----------
 include/linux/eventpoll.h |  9 +++++++++
 2 files changed, 39 insertions(+), 12 deletions(-)

Comments

Jann Horn Jan. 22, 2020, 4:20 p.m. UTC | #1
On Wed, Jan 22, 2020 at 5:02 PM Jens Axboe <axboe@kernel.dk> wrote:
> Also make it available outside of epoll, along with the helper that
> decides if we need to copy the passed in epoll_event.
[...]
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index cd848e8d08e2..162af749ea50 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
[...]
> -static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
> +static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
> +                                  bool nonblock)
> +{
> +       if (!nonblock) {
> +               mutex_lock_nested(mutex, depth);
> +               return 0;
> +       }
> +       if (!mutex_trylock(mutex))
> +               return 0;
> +       return -EAGAIN;

The documentation for mutex_trylock() says:

 * Try to acquire the mutex atomically. Returns 1 if the mutex
 * has been acquired successfully, and 0 on contention.

So in the success case, this evaluates to:

    if (!1)
      return 0;
    return -EAGAIN;

which is

    if (0)
      return 0;
    return -EAGAIN;

which is

    return -EAGAIN;

I think you'll have to get rid of the negation.

> +}
> +
> +int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
> +                bool nonblock)
>  {
>         int error;
>         int full_check = 0;
> @@ -2145,13 +2152,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
>          * deep wakeup paths from forming in parallel through multiple
>          * EPOLL_CTL_ADD operations.
>          */
> -       mutex_lock_nested(&ep->mtx, 0);
> +       error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
> +       if (error)
> +               goto error_tgt_fput;
>         if (op == EPOLL_CTL_ADD) {
>                 if (!list_empty(&f.file->f_ep_links) ||
>                                                 is_file_epoll(tf.file)) {
>                         full_check = 1;
>                         mutex_unlock(&ep->mtx);
> -                       mutex_lock(&epmutex);
> +                       error = epoll_mutex_lock(&epmutex, 0, nonblock);
> +                       if (error)
> +                               goto error_tgt_fput;

When we reach the "goto", full_check==1 and epmutex is not held. But
at the jump target, this code runs:

error_tgt_fput:
  if (full_check) // true
    mutex_unlock(&epmutex);

So I think we're releasing a lock that we don't hold.

>                         if (is_file_epoll(tf.file)) {
>                                 error = -ELOOP;
>                                 if (ep_loop_check(ep, tf.file) != 0) {
> @@ -2161,10 +2172,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
>                         } else
>                                 list_add(&tf.file->f_tfile_llink,
>                                                         &tfile_check_list);
> -                       mutex_lock_nested(&ep->mtx, 0);
> +                       error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
> +                       if (error) {
> +out_del:
> +                               list_del(&tf.file->f_tfile_llink);
> +                               goto error_tgt_fput;
> +                       }
>                         if (is_file_epoll(tf.file)) {
>                                 tep = tf.file->private_data;
> -                               mutex_lock_nested(&tep->mtx, 1);
> +                               error = epoll_mutex_lock(&tep->mtx, 1, nonblock);
> +                               if (error)
> +                                       goto out_del;

When we reach this "goto", ep->mtx is held and never dropped.

>                         }
>                 }
>         }
> @@ -2233,7 +2251,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
>             copy_from_user(&epds, event, sizeof(struct epoll_event)))
>                 return -EFAULT;
>
> -       return do_epoll_ctl(epfd, op, fd, &epds);
> +       return do_epoll_ctl(epfd, op, fd, &epds, false);
>  }
Jens Axboe Jan. 22, 2020, 4:23 p.m. UTC | #2
On 1/22/20 9:20 AM, Jann Horn wrote:
> On Wed, Jan 22, 2020 at 5:02 PM Jens Axboe <axboe@kernel.dk> wrote:
>> Also make it available outside of epoll, along with the helper that
>> decides if we need to copy the passed in epoll_event.
> [...]
>> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
>> index cd848e8d08e2..162af749ea50 100644
>> --- a/fs/eventpoll.c
>> +++ b/fs/eventpoll.c
> [...]
>> -static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
>> +static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
>> +                                  bool nonblock)
>> +{
>> +       if (!nonblock) {
>> +               mutex_lock_nested(mutex, depth);
>> +               return 0;
>> +       }
>> +       if (!mutex_trylock(mutex))
>> +               return 0;
>> +       return -EAGAIN;
> 
> The documentation for mutex_trylock() says:
> 
>  * Try to acquire the mutex atomically. Returns 1 if the mutex
>  * has been acquired successfully, and 0 on contention.
> 
> So in the success case, this evaluates to:
> 
>     if (!1)
>       return 0;
>     return -EAGAIN;
> 
> which is
> 
>     if (0)
>       return 0;
>     return -EAGAIN;
> 
> which is
> 
>     return -EAGAIN;
> 
> I think you'll have to get rid of the negation.

Doh indeed. I'll rework and run the test case, just rebased this and I
think I inadvertently used an older version. Ditto for the below.
diff mbox series

Patch

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cd848e8d08e2..162af749ea50 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -354,12 +354,6 @@  static inline struct epitem *ep_item_from_epqueue(poll_table *p)
 	return container_of(p, struct ep_pqueue, pt)->epi;
 }
 
-/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
-static inline int ep_op_has_event(int op)
-{
-	return op != EPOLL_CTL_DEL;
-}
-
 /* Initialize the poll safe wake up structure */
 static void ep_nested_calls_init(struct nested_calls *ncalls)
 {
@@ -2074,7 +2068,20 @@  SYSCALL_DEFINE1(epoll_create, int, size)
 	return do_epoll_create(0);
 }
 
-static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
+static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
+				   bool nonblock)
+{
+	if (!nonblock) {
+		mutex_lock_nested(mutex, depth);
+		return 0;
+	}
+	if (!mutex_trylock(mutex))
+		return 0;
+	return -EAGAIN;
+}
+
+int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
+		 bool nonblock)
 {
 	int error;
 	int full_check = 0;
@@ -2145,13 +2152,17 @@  static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
 	 * deep wakeup paths from forming in parallel through multiple
 	 * EPOLL_CTL_ADD operations.
 	 */
-	mutex_lock_nested(&ep->mtx, 0);
+	error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
+	if (error)
+		goto error_tgt_fput;
 	if (op == EPOLL_CTL_ADD) {
 		if (!list_empty(&f.file->f_ep_links) ||
 						is_file_epoll(tf.file)) {
 			full_check = 1;
 			mutex_unlock(&ep->mtx);
-			mutex_lock(&epmutex);
+			error = epoll_mutex_lock(&epmutex, 0, nonblock);
+			if (error)
+				goto error_tgt_fput;
 			if (is_file_epoll(tf.file)) {
 				error = -ELOOP;
 				if (ep_loop_check(ep, tf.file) != 0) {
@@ -2161,10 +2172,17 @@  static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds)
 			} else
 				list_add(&tf.file->f_tfile_llink,
 							&tfile_check_list);
-			mutex_lock_nested(&ep->mtx, 0);
+			error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
+			if (error) {
+out_del:
+				list_del(&tf.file->f_tfile_llink);
+				goto error_tgt_fput;
+			}
 			if (is_file_epoll(tf.file)) {
 				tep = tf.file->private_data;
-				mutex_lock_nested(&tep->mtx, 1);
+				error = epoll_mutex_lock(&tep->mtx, 1, nonblock);
+				if (error)
+					goto out_del;
 			}
 		}
 	}
@@ -2233,7 +2251,7 @@  SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 	    copy_from_user(&epds, event, sizeof(struct epoll_event)))
 		return -EFAULT;
 
-	return do_epoll_ctl(epfd, op, fd, &epds);
+	return do_epoll_ctl(epfd, op, fd, &epds, false);
 }
 
 /*
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index bc6d79b00c4e..8f000fada5a4 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -61,6 +61,15 @@  static inline void eventpoll_release(struct file *file)
 	eventpoll_release_file(file);
 }
 
+int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
+		 bool nonblock);
+
+/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
+static inline int ep_op_has_event(int op)
+{
+	return op != EPOLL_CTL_DEL;
+}
+
 #else
 
 static inline void eventpoll_init_file(struct file *file) {}