From patchwork Sun Oct 4 02:39:03 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815473 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 755B592C for ; Sun, 4 Oct 2020 02:41:18 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 668B8206B6 for ; Sun, 4 Oct 2020 02:41:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726278AbgJDCjb (ORCPT ); Sat, 3 Oct 2020 22:39:31 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48866 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726194AbgJDCja (ORCPT ); Sat, 3 Oct 2020 22:39:30 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id ACB78C0613D0; Sat, 3 Oct 2020 19:39:30 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvt-00BUpP-Bv; Sun, 04 Oct 2020 02:39:29 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 01/27] epoll: switch epitem->pwqlist to single-linked list Date: Sun, 4 Oct 2020 03:39:03 +0100 Message-Id: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023608.GM3421308@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro We only traverse it once to destroy all associated eppoll_entry at epitem destruction time. The order of traversal is irrelevant there. Signed-off-by: Al Viro --- fs/eventpoll.c | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4df61129566d..ae41868d9b35 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -128,6 +128,24 @@ struct nested_calls { spinlock_t lock; }; +/* Wait structure used by the poll hooks */ +struct eppoll_entry { + /* List header used to link this structure to the "struct epitem" */ + struct eppoll_entry *next; + + /* The "base" pointer is set to the container "struct epitem" */ + struct epitem *base; + + /* + * Wait queue item that will be linked to the target file wait + * queue head. + */ + wait_queue_entry_t wait; + + /* The wait queue head that linked the "wait" wait queue item */ + wait_queue_head_t *whead; +}; + /* * Each file descriptor added to the eventpoll interface will * have an entry of this type linked to the "rbr" RB tree. @@ -158,7 +176,7 @@ struct epitem { int nwait; /* List containing poll wait queues */ - struct list_head pwqlist; + struct eppoll_entry *pwqlist; /* The "container" of this item */ struct eventpoll *ep; @@ -231,24 +249,6 @@ struct eventpoll { #endif }; -/* Wait structure used by the poll hooks */ -struct eppoll_entry { - /* List header used to link this structure to the "struct epitem" */ - struct list_head llink; - - /* The "base" pointer is set to the container "struct epitem" */ - struct epitem *base; - - /* - * Wait queue item that will be linked to the target file wait - * queue head. - */ - wait_queue_entry_t wait; - - /* The wait queue head that linked the "wait" wait queue item */ - wait_queue_head_t *whead; -}; - /* Wrapper struct used by poll queueing */ struct ep_pqueue { poll_table pt; @@ -617,13 +617,11 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq) */ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) { - struct list_head *lsthead = &epi->pwqlist; + struct eppoll_entry **p = &epi->pwqlist; struct eppoll_entry *pwq; - while (!list_empty(lsthead)) { - pwq = list_first_entry(lsthead, struct eppoll_entry, llink); - - list_del(&pwq->llink); + while ((pwq = *p) != NULL) { + *p = pwq->next; ep_remove_wait_queue(pwq); kmem_cache_free(pwq_cache, pwq); } @@ -1320,7 +1318,8 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, add_wait_queue_exclusive(whead, &pwq->wait); else add_wait_queue(whead, &pwq->wait); - list_add_tail(&pwq->llink, &epi->pwqlist); + pwq->next = epi->pwqlist; + epi->pwqlist = pwq; epi->nwait++; } else { /* We have to signal that an error occurred */ @@ -1507,7 +1506,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); INIT_LIST_HEAD(&epi->fllink); - INIT_LIST_HEAD(&epi->pwqlist); + epi->pwqlist = NULL; epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; From patchwork Sun Oct 4 02:39:04 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815471 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 028056CA for ; Sun, 4 Oct 2020 02:41:18 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E81FA206B6 for ; Sun, 4 Oct 2020 02:41:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726899AbgJDClO (ORCPT ); Sat, 3 Oct 2020 22:41:14 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48868 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726294AbgJDCjb (ORCPT ); Sat, 3 Oct 2020 22:39:31 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D673CC0613E7; Sat, 3 Oct 2020 19:39:30 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvt-00BUpS-HI; Sun, 04 Oct 2020 02:39:29 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 02/27] epoll: get rid of epitem->nwait Date: Sun, 4 Oct 2020 03:39:04 +0100 Message-Id: <20201004023929.2740074-2-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro we use it only to indicate allocation failures within queueing callback back to ep_insert(). Might as well use epq.epi for that reporting... Signed-off-by: Al Viro --- fs/eventpoll.c | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ae41868d9b35..44aca681d897 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -172,9 +172,6 @@ struct epitem { /* The file descriptor information this item refers to */ struct epoll_filefd ffd; - /* Number of active wait queue attached to poll operations */ - int nwait; - /* List containing poll wait queues */ struct eppoll_entry *pwqlist; @@ -351,12 +348,6 @@ static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p) return container_of(p, struct eppoll_entry, wait)->base; } -/* Get the "struct epitem" from an epoll queue wrapper */ -static inline struct epitem *ep_item_from_epqueue(poll_table *p) -{ - return container_of(p, struct ep_pqueue, pt)->epi; -} - /* Initialize the poll safe wake up structure */ static void ep_nested_calls_init(struct nested_calls *ncalls) { @@ -1307,24 +1298,28 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt) { - struct epitem *epi = ep_item_from_epqueue(pt); + struct ep_pqueue *epq = container_of(pt, struct ep_pqueue, pt); + struct epitem *epi = epq->epi; struct eppoll_entry *pwq; - if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) { - init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); - pwq->whead = whead; - pwq->base = epi; - if (epi->event.events & EPOLLEXCLUSIVE) - add_wait_queue_exclusive(whead, &pwq->wait); - else - add_wait_queue(whead, &pwq->wait); - pwq->next = epi->pwqlist; - epi->pwqlist = pwq; - epi->nwait++; - } else { - /* We have to signal that an error occurred */ - epi->nwait = -1; + if (unlikely(!epi)) // an earlier allocation has failed + return; + + pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL); + if (unlikely(!pwq)) { + epq->epi = NULL; + return; } + + init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); + pwq->whead = whead; + pwq->base = epi; + if (epi->event.events & EPOLLEXCLUSIVE) + add_wait_queue_exclusive(whead, &pwq->wait); + else + add_wait_queue(whead, &pwq->wait); + pwq->next = epi->pwqlist; + epi->pwqlist = pwq; } static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) @@ -1510,7 +1505,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; - epi->nwait = 0; epi->next = EP_UNACTIVE_PTR; if (epi->event.events & EPOLLWAKEUP) { error = ep_create_wakeup_source(epi); @@ -1555,7 +1549,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, * high memory pressure. */ error = -ENOMEM; - if (epi->nwait < 0) + if (!epq.epi) goto error_unregister; /* We have to drop the new item inside our item list to keep track of it */ From patchwork Sun Oct 4 02:39:05 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815423 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D47BF92C for ; Sun, 4 Oct 2020 02:39:45 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id C692F20679 for ; Sun, 4 Oct 2020 02:39:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726418AbgJDCjc (ORCPT ); Sat, 3 Oct 2020 22:39:32 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48870 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726327AbgJDCjb (ORCPT ); Sat, 3 Oct 2020 22:39:31 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 073F9C0613D0; Sat, 3 Oct 2020 19:39:30 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvt-00BUpZ-Lz; Sun, 04 Oct 2020 02:39:29 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 03/27] untangling ep_call_nested(): get rid of useless arguments Date: Sun, 4 Oct 2020 03:39:05 +0100 Message-Id: <20201004023929.2740074-3-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro ctx is always equal to current, ncalls - to &poll_loop_ncalls. Signed-off-by: Al Viro --- fs/eventpoll.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 44aca681d897..ef73d71a5dc8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -455,21 +455,19 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) * the same nested call (by the meaning of same cookie) is * no re-entered. * - * @ncalls: Pointer to the nested_calls structure to be used for this call. * @nproc: Nested call core function pointer. * @priv: Opaque data to be passed to the @nproc callback. * @cookie: Cookie to be used to identify this nested call. - * @ctx: This instance context. * * Returns: Returns the code returned by the @nproc callback, or -1 if * the maximum recursion limit has been exceeded. */ -static int ep_call_nested(struct nested_calls *ncalls, - int (*nproc)(void *, void *, int), void *priv, - void *cookie, void *ctx) +static int ep_call_nested(int (*nproc)(void *, void *, int), void *priv, + void *cookie) { int error, call_nests = 0; unsigned long flags; + struct nested_calls *ncalls = &poll_loop_ncalls; struct list_head *lsthead = &ncalls->tasks_call_list; struct nested_call_node *tncur; struct nested_call_node tnode; @@ -482,7 +480,7 @@ static int ep_call_nested(struct nested_calls *ncalls, * very much limited. */ list_for_each_entry(tncur, lsthead, llink) { - if (tncur->ctx == ctx && + if (tncur->ctx == current && (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) { /* * Ops ... loop detected or maximum nest level reached. @@ -494,7 +492,7 @@ static int ep_call_nested(struct nested_calls *ncalls, } /* Add the current task and cookie to the list */ - tnode.ctx = ctx; + tnode.ctx = current; tnode.cookie = cookie; list_add(&tnode.llink, lsthead); @@ -1397,10 +1395,8 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) break; } } else { - error = ep_call_nested(&poll_loop_ncalls, - reverse_path_check_proc, - child_file, child_file, - current); + error = ep_call_nested(reverse_path_check_proc, + child_file, child_file); } if (error != 0) break; @@ -1431,9 +1427,8 @@ static int reverse_path_check(void) /* let's call this for all tfiles */ list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { path_count_init(); - error = ep_call_nested(&poll_loop_ncalls, - reverse_path_check_proc, current_file, - current_file, current); + error = ep_call_nested(reverse_path_check_proc, current_file, + current_file); if (error) break; } @@ -1970,9 +1965,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->gen == loop_check_gen) continue; - error = ep_call_nested(&poll_loop_ncalls, - ep_loop_check_proc, epi->ffd.file, - ep_tovisit, current); + error = ep_call_nested(ep_loop_check_proc, epi->ffd.file, + ep_tovisit); if (error != 0) break; } else { @@ -2009,8 +2003,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) */ static int ep_loop_check(struct eventpoll *ep, struct file *file) { - return ep_call_nested(&poll_loop_ncalls, - ep_loop_check_proc, file, ep, current); + return ep_call_nested(ep_loop_check_proc, file, ep); } static void clear_tfile_check_list(void) From patchwork Sun Oct 4 02:39:07 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815465 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 82E1D112E for ; Sun, 4 Oct 2020 02:40:56 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 6B0C5206B6 for ; Sun, 4 Oct 2020 02:40:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726851AbgJDCkz (ORCPT ); Sat, 3 Oct 2020 22:40:55 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48878 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726195AbgJDCjb (ORCPT ); Sat, 3 Oct 2020 22:39:31 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7E1D4C0613D0; Sat, 3 Oct 2020 19:39:31 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvu-00BUpm-0z; Sun, 04 Oct 2020 02:39:30 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 05/27] untangling ep_call_nested(): take pushing cookie into a helper Date: Sun, 4 Oct 2020 03:39:07 +0100 Message-Id: <20201004023929.2740074-5-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro Signed-off-by: Al Viro --- fs/eventpoll.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 43aecae0935c..bd2cc78c47c8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -424,6 +424,21 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) #endif /* CONFIG_NET_RX_BUSY_POLL */ +static bool ep_push_nested(void *cookie) +{ + int i; + + if (nesting > EP_MAX_NESTS) /* too deep nesting */ + return false; + + for (i = 0; i < nesting; i++) { + if (cookies[i] == cookie) /* loop detected */ + return false; + } + cookies[nesting++] = cookie; + return true; +} + /** * ep_call_nested - Perform a bound (possibly) nested call, by checking * that the recursion limit is not exceeded, and that @@ -440,17 +455,10 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) static int ep_call_nested(int (*nproc)(void *, void *, int), void *priv, void *cookie) { - int error, i; + int error; - if (nesting > EP_MAX_NESTS) /* too deep nesting */ + if (!ep_push_nested(cookie)) return -1; - - for (i = 0; i < nesting; i++) { - if (cookies[i] == cookie) /* loop detected */ - return -1; - } - cookies[nesting++] = cookie; - /* Call the nested function */ error = (*nproc)(priv, cookie, nesting - 1); nesting--; From patchwork Sun Oct 4 02:39:08 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815463 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 18C3792C for ; Sun, 4 Oct 2020 02:40:56 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 01317206B6 for ; Sun, 4 Oct 2020 02:40:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726847AbgJDCkz (ORCPT ); Sat, 3 Oct 2020 22:40:55 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48880 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726348AbgJDCjc (ORCPT ); Sat, 3 Oct 2020 22:39:32 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8F5E0C0613E7; Sat, 3 Oct 2020 19:39:31 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvu-00BUpt-AD; Sun, 04 Oct 2020 02:39:30 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 06/27] untangling ep_call_nested(): move push/pop of cookie into the callbacks Date: Sun, 4 Oct 2020 03:39:08 +0100 Message-Id: <20201004023929.2740074-6-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro Signed-off-by: Al Viro --- fs/eventpoll.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index bd2cc78c47c8..9a6ee5991f3d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -455,15 +455,7 @@ static bool ep_push_nested(void *cookie) static int ep_call_nested(int (*nproc)(void *, void *, int), void *priv, void *cookie) { - int error; - - if (!ep_push_nested(cookie)) - return -1; - /* Call the nested function */ - error = (*nproc)(priv, cookie, nesting - 1); - nesting--; - - return error; + return (*nproc)(priv, cookie, nesting); } /* @@ -1340,6 +1332,9 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) struct file *child_file; struct epitem *epi; + if (!ep_push_nested(cookie)) /* limits recursion */ + return -1; + /* CTL_DEL can remove links here, but that can't increase our count */ rcu_read_lock(); list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { @@ -1362,6 +1357,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) } } rcu_read_unlock(); + nesting--; /* pop */ return error; } @@ -1913,6 +1909,9 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) struct rb_node *rbp; struct epitem *epi; + if (!ep_push_nested(cookie)) /* limits recursion */ + return -1; + mutex_lock_nested(&ep->mtx, call_nests + 1); ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { @@ -1942,6 +1941,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) } } mutex_unlock(&ep->mtx); + nesting--; /* pop */ return error; } From patchwork Sun Oct 4 02:39:10 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815467 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C776692C for ; Sun, 4 Oct 2020 02:41:09 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id BA8C4206B6 for ; Sun, 4 Oct 2020 02:41:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726841AbgJDCky (ORCPT ); Sat, 3 Oct 2020 22:40:54 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48884 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726363AbgJDCjc (ORCPT ); Sat, 3 Oct 2020 22:39:32 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 02299C0613E9; Sat, 3 Oct 2020 19:39:31 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvu-00BUq6-GG; Sun, 04 Oct 2020 02:39:30 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 08/27] reverse_path_check_proc(): sane arguments Date: Sun, 4 Oct 2020 03:39:10 +0100 Message-Id: <20201004023929.2740074-8-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro no need to force its calling conventions to match the callback for late unlamented ep_call_nested()... Signed-off-by: Al Viro --- fs/eventpoll.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8c3b02755a50..3e6f1f97f246 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1306,20 +1306,18 @@ static void path_count_init(void) path_count[i] = 0; } -static int reverse_path_check_proc(void *priv, void *cookie, int depth) +static int reverse_path_check_proc(struct file *file, int depth) { int error = 0; - struct file *file = priv; - struct file *child_file; struct epitem *epi; - if (!ep_push_nested(cookie)) /* limits recursion */ + if (!ep_push_nested(file)) /* limits recursion */ return -1; /* CTL_DEL can remove links here, but that can't increase our count */ rcu_read_lock(); list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { - child_file = epi->ep->file; + struct file *child_file = epi->ep->file; if (is_file_epoll(child_file)) { if (list_empty(&child_file->f_ep_links)) { if (path_count_inc(depth)) { @@ -1327,7 +1325,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int depth) break; } } else { - error = reverse_path_check_proc(child_file, child_file, + error = reverse_path_check_proc(child_file, depth + 1); } if (error != 0) @@ -1360,7 +1358,7 @@ static int reverse_path_check(void) /* let's call this for all tfiles */ list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { path_count_init(); - error = reverse_path_check_proc(current_file, current_file, 0); + error = reverse_path_check_proc(current_file, 0); if (error) break; } From patchwork Sun Oct 4 02:39:11 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815469 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 23F5A112E for ; Sun, 4 Oct 2020 02:41:12 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 159BC20679 for ; Sun, 4 Oct 2020 02:41:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726830AbgJDCky (ORCPT ); Sat, 3 Oct 2020 22:40:54 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48886 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726389AbgJDCjc (ORCPT ); Sat, 3 Oct 2020 22:39:32 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2A0AAC061787; Sat, 3 Oct 2020 19:39:32 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvu-00BUqE-QM; Sun, 04 Oct 2020 02:39:30 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 09/27] reverse_path_check_proc(): don't bother with cookies Date: Sun, 4 Oct 2020 03:39:11 +0100 Message-Id: <20201004023929.2740074-9-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro We know there's no loops by the time we call it; the only thing we care about is too deep reverse paths. Signed-off-by: Al Viro --- fs/eventpoll.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3e6f1f97f246..0f540e91aa92 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1311,7 +1311,7 @@ static int reverse_path_check_proc(struct file *file, int depth) int error = 0; struct epitem *epi; - if (!ep_push_nested(file)) /* limits recursion */ + if (depth > EP_MAX_NESTS) /* too deep nesting */ return -1; /* CTL_DEL can remove links here, but that can't increase our count */ @@ -1336,7 +1336,6 @@ static int reverse_path_check_proc(struct file *file, int depth) } } rcu_read_unlock(); - nesting--; /* pop */ return error; } From patchwork Sun Oct 4 02:39:12 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815461 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 4918A6CA for ; Sun, 4 Oct 2020 02:40:55 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 30C6120679 for ; Sun, 4 Oct 2020 02:40:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726834AbgJDCky (ORCPT ); Sat, 3 Oct 2020 22:40:54 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48888 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726407AbgJDCjc (ORCPT ); Sat, 3 Oct 2020 22:39:32 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4B74DC0613A5; Sat, 3 Oct 2020 19:39:32 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvv-00BUqM-0O; Sun, 04 Oct 2020 02:39:31 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 10/27] clean reverse_path_check_proc() a bit Date: Sun, 4 Oct 2020 03:39:12 +0100 Message-Id: <20201004023929.2740074-10-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro Signed-off-by: Al Viro --- fs/eventpoll.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0f540e91aa92..33af838046ea 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1317,23 +1317,15 @@ static int reverse_path_check_proc(struct file *file, int depth) /* CTL_DEL can remove links here, but that can't increase our count */ rcu_read_lock(); list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { - struct file *child_file = epi->ep->file; - if (is_file_epoll(child_file)) { - if (list_empty(&child_file->f_ep_links)) { - if (path_count_inc(depth)) { - error = -1; - break; - } - } else { - error = reverse_path_check_proc(child_file, - depth + 1); - } - if (error != 0) - break; - } else { - printk(KERN_ERR "reverse_path_check_proc: " - "file is not an ep!\n"); - } + struct file *recepient = epi->ep->file; + if (WARN_ON(!is_file_epoll(recepient))) + continue; + if (list_empty(&recepient->f_ep_links)) + error = path_count_inc(depth); + else + error = reverse_path_check_proc(recepient, depth + 1); + if (error != 0) + break; } rcu_read_unlock(); return error; From patchwork Sun Oct 4 02:39:13 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815455 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8568A6CA for ; Sun, 4 Oct 2020 02:40:43 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 6B867206B6 for ; Sun, 4 Oct 2020 02:40:43 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726801AbgJDCkm (ORCPT ); Sat, 3 Oct 2020 22:40:42 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48890 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726443AbgJDCjd (ORCPT ); Sat, 3 Oct 2020 22:39:33 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8EDE8C0613D0; Sat, 3 Oct 2020 19:39:32 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvv-00BUqV-3n; Sun, 04 Oct 2020 02:39:31 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 11/27] ep_loop_check_proc(): lift pushing the cookie into callers Date: Sun, 4 Oct 2020 03:39:13 +0100 Message-Id: <20201004023929.2740074-11-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro Signed-off-by: Al Viro --- fs/eventpoll.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 33af838046ea..9edea3933790 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1877,9 +1877,6 @@ static int ep_loop_check_proc(void *priv, void *cookie, int depth) struct rb_node *rbp; struct epitem *epi; - if (!ep_push_nested(cookie)) /* limits recursion */ - return -1; - mutex_lock_nested(&ep->mtx, depth + 1); ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { @@ -1888,8 +1885,13 @@ static int ep_loop_check_proc(void *priv, void *cookie, int depth) ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->gen == loop_check_gen) continue; - error = ep_loop_check_proc(epi->ffd.file, ep_tovisit, + if (!ep_push_nested(ep_tovisit)) { + error = -1; + } else { + error = ep_loop_check_proc(epi->ffd.file, ep_tovisit, depth + 1); + nesting--; + } if (error != 0) break; } else { @@ -1909,7 +1911,6 @@ static int ep_loop_check_proc(void *priv, void *cookie, int depth) } } mutex_unlock(&ep->mtx); - nesting--; /* pop */ return error; } @@ -1927,7 +1928,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int depth) */ static int ep_loop_check(struct eventpoll *ep, struct file *file) { - return ep_loop_check_proc(file, ep, 0); + int err; + + ep_push_nested(ep); // can't fail + err = ep_loop_check_proc(file, ep, 0); + nesting--; + return err; } static void clear_tfile_check_list(void) From patchwork Sun Oct 4 02:39:14 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815459 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2D2C992C for ; Sun, 4 Oct 2020 02:40:53 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 1D9BD20679 for ; Sun, 4 Oct 2020 02:40:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726795AbgJDCkm (ORCPT ); Sat, 3 Oct 2020 22:40:42 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48892 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726458AbgJDCjd (ORCPT ); Sat, 3 Oct 2020 22:39:33 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BA953C0613E7; Sat, 3 Oct 2020 19:39:32 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvv-00BUqZ-DY; Sun, 04 Oct 2020 02:39:31 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 12/27] get rid of ep_push_nested() Date: Sun, 4 Oct 2020 03:39:14 +0100 Message-Id: <20201004023929.2740074-12-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro The only remaining user is loop checking. But there we only need to check that we have not walked into the epoll we are inserting into - we are adding an edge to acyclic graph, so any loop being created will have to pass through the source of that edge. So we don't need that array of cookies - we have only one eventpoll to watch out for. RIP ep_push_nested(), along with the cookies array. Signed-off-by: Al Viro --- fs/eventpoll.c | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9edea3933790..6b1990b8b9a0 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -254,8 +254,7 @@ static DEFINE_MUTEX(epmutex); static u64 loop_check_gen = 0; /* Used to check for epoll file descriptor inclusion loops */ -static void *cookies[EP_MAX_NESTS + 1]; -static int nesting; +static struct eventpoll *inserting_into; /* Slab cache used to allocate "struct epitem" */ static struct kmem_cache *epi_cache __read_mostly; @@ -424,21 +423,6 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) #endif /* CONFIG_NET_RX_BUSY_POLL */ -static bool ep_push_nested(void *cookie) -{ - int i; - - if (nesting > EP_MAX_NESTS) /* too deep nesting */ - return false; - - for (i = 0; i < nesting; i++) { - if (cookies[i] == cookie) /* loop detected */ - return false; - } - cookies[nesting++] = cookie; - return true; -} - /* * As described in commit 0ccf831cb lockdep: annotate epoll * the use of wait queues used by epoll is done in a very controlled @@ -1885,12 +1869,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int depth) ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->gen == loop_check_gen) continue; - if (!ep_push_nested(ep_tovisit)) { + if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) { error = -1; } else { error = ep_loop_check_proc(epi->ffd.file, ep_tovisit, depth + 1); - nesting--; } if (error != 0) break; @@ -1928,12 +1911,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int depth) */ static int ep_loop_check(struct eventpoll *ep, struct file *file) { - int err; - - ep_push_nested(ep); // can't fail - err = ep_loop_check_proc(file, ep, 0); - nesting--; - return err; + inserting_into = ep; + return ep_loop_check_proc(file, ep, 0); } static void clear_tfile_check_list(void) From patchwork Sun Oct 4 02:39:15 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815433 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8F56292C for ; Sun, 4 Oct 2020 02:40:01 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 80F3D206CC for ; Sun, 4 Oct 2020 02:40:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726645AbgJDCjx (ORCPT ); Sat, 3 Oct 2020 22:39:53 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48894 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726442AbgJDCjd (ORCPT ); Sat, 3 Oct 2020 22:39:33 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D9B12C0613E8; Sat, 3 Oct 2020 19:39:32 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvv-00BUqh-Jn; Sun, 04 Oct 2020 02:39:31 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 13/27] ep_loop_check_proc(): saner calling conventions Date: Sun, 4 Oct 2020 03:39:15 +0100 Message-Id: <20201004023929.2740074-13-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro 1) 'cookie' argument is unused; kill it. 2) 'priv' one is always an epoll struct file, and we only care about its associated struct eventpoll; pass that instead. Signed-off-by: Al Viro --- fs/eventpoll.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 6b1990b8b9a0..e971e3ace557 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1845,19 +1845,14 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, * result in excessive stack usage). * * @priv: Pointer to the epoll file to be currently checked. - * @cookie: Original cookie for this call. This is the top-of-the-chain epoll - * data structure pointer. - * @call_nests: Current dept of the @ep_call_nested() call stack. + * @depth: Current depth of the path being checked. * * Returns: Returns zero if adding the epoll @file inside current epoll * structure @ep does not violate the constraints, or -1 otherwise. */ -static int ep_loop_check_proc(void *priv, void *cookie, int depth) +static int ep_loop_check_proc(struct eventpoll *ep, int depth) { int error = 0; - struct file *file = priv; - struct eventpoll *ep = file->private_data; - struct eventpoll *ep_tovisit; struct rb_node *rbp; struct epitem *epi; @@ -1866,15 +1861,14 @@ static int ep_loop_check_proc(void *priv, void *cookie, int depth) for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { + struct eventpoll *ep_tovisit; ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->gen == loop_check_gen) continue; - if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) { + if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) error = -1; - } else { - error = ep_loop_check_proc(epi->ffd.file, ep_tovisit, - depth + 1); - } + else + error = ep_loop_check_proc(ep_tovisit, depth + 1); if (error != 0) break; } else { @@ -1899,20 +1893,20 @@ static int ep_loop_check_proc(void *priv, void *cookie, int depth) } /** - * ep_loop_check - Performs a check to verify that adding an epoll file (@file) - * another epoll file (represented by @ep) does not create + * ep_loop_check - Performs a check to verify that adding an epoll file (@to) + * into another epoll file (represented by @from) does not create * closed loops or too deep chains. * - * @ep: Pointer to the epoll private data structure. - * @file: Pointer to the epoll file to be checked. + * @from: Pointer to the epoll we are inserting into. + * @to: Pointer to the epoll to be inserted. * - * Returns: Returns zero if adding the epoll @file inside current epoll - * structure @ep does not violate the constraints, or -1 otherwise. + * Returns: Returns zero if adding the epoll @to inside the epoll @from + * does not violate the constraints, or -1 otherwise. */ -static int ep_loop_check(struct eventpoll *ep, struct file *file) +static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to) { inserting_into = ep; - return ep_loop_check_proc(file, ep, 0); + return ep_loop_check_proc(to, 0); } static void clear_tfile_check_list(void) @@ -2086,8 +2080,9 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, loop_check_gen++; full_check = 1; if (is_file_epoll(tf.file)) { + tep = tf.file->private_data; error = -ELOOP; - if (ep_loop_check(ep, tf.file) != 0) + if (ep_loop_check(ep, tep) != 0) goto error_tgt_fput; } else { get_file(tf.file); @@ -2098,7 +2093,6 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, if (error) goto error_tgt_fput; if (is_file_epoll(tf.file)) { - tep = tf.file->private_data; error = epoll_mutex_lock(&tep->mtx, 1, nonblock); if (error) { mutex_unlock(&ep->mtx); From patchwork Sun Oct 4 02:39:16 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815431 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A35486CA for ; Sun, 4 Oct 2020 02:39:53 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 93FD420679 for ; Sun, 4 Oct 2020 02:39:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726636AbgJDCjw (ORCPT ); Sat, 3 Oct 2020 22:39:52 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48896 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726464AbgJDCjd (ORCPT ); Sat, 3 Oct 2020 22:39:33 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 10D70C0613E9; Sat, 3 Oct 2020 19:39:33 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvv-00BUqn-Nk; Sun, 04 Oct 2020 02:39:31 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 14/27] ep_scan_ready_list(): prepare to splitup Date: Sun, 4 Oct 2020 03:39:16 +0100 Message-Id: <20201004023929.2740074-14-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro take the stuff done before and after the callback into separate helpers Signed-off-by: Al Viro --- fs/eventpoll.c | 63 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index e971e3ace557..eb012fdc152e 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -561,28 +561,10 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi) rcu_read_unlock(); } -/** - * ep_scan_ready_list - Scans the ready list in a way that makes possible for - * the scan code, to call f_op->poll(). Also allows for - * O(NumReady) performance. - * - * @ep: Pointer to the epoll private data structure. - * @sproc: Pointer to the scan callback. - * @priv: Private opaque data passed to the @sproc callback. - * @depth: The current depth of recursive f_op->poll calls. - * @ep_locked: caller already holds ep->mtx - * - * Returns: The same integer error code returned by the @sproc callback. - */ -static __poll_t ep_scan_ready_list(struct eventpoll *ep, - __poll_t (*sproc)(struct eventpoll *, - struct list_head *, void *), - void *priv, int depth, bool ep_locked) +static void ep_start_scan(struct eventpoll *ep, + int depth, bool ep_locked, + struct list_head *txlist) { - __poll_t res; - struct epitem *epi, *nepi; - LIST_HEAD(txlist); - lockdep_assert_irqs_enabled(); /* @@ -602,14 +584,16 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, * in a lockless way. */ write_lock_irq(&ep->lock); - list_splice_init(&ep->rdllist, &txlist); + list_splice_init(&ep->rdllist, txlist); WRITE_ONCE(ep->ovflist, NULL); write_unlock_irq(&ep->lock); +} - /* - * Now call the callback function. - */ - res = (*sproc)(ep, &txlist, priv); +static void ep_done_scan(struct eventpoll *ep, + int depth, bool ep_locked, + struct list_head *txlist) +{ + struct epitem *epi, *nepi; write_lock_irq(&ep->lock); /* @@ -644,13 +628,38 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, /* * Quickly re-inject items left on "txlist". */ - list_splice(&txlist, &ep->rdllist); + list_splice(txlist, &ep->rdllist); __pm_relax(ep->ws); write_unlock_irq(&ep->lock); if (!ep_locked) mutex_unlock(&ep->mtx); +} +/** + * ep_scan_ready_list - Scans the ready list in a way that makes possible for + * the scan code, to call f_op->poll(). Also allows for + * O(NumReady) performance. + * + * @ep: Pointer to the epoll private data structure. + * @sproc: Pointer to the scan callback. + * @priv: Private opaque data passed to the @sproc callback. + * @depth: The current depth of recursive f_op->poll calls. + * @ep_locked: caller already holds ep->mtx + * + * Returns: The same integer error code returned by the @sproc callback. + */ +static __poll_t ep_scan_ready_list(struct eventpoll *ep, + __poll_t (*sproc)(struct eventpoll *, + struct list_head *, void *), + void *priv, int depth, bool ep_locked) +{ + __poll_t res; + LIST_HEAD(txlist); + + ep_start_scan(ep, depth, ep_locked, &txlist); + res = (*sproc)(ep, &txlist, priv); + ep_done_scan(ep, depth, ep_locked, &txlist); return res; } From patchwork Sun Oct 4 02:39:17 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815435 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9CF996CA for ; Sun, 4 Oct 2020 02:40:03 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8E7B2206B6 for ; Sun, 4 Oct 2020 02:40:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726627AbgJDCjw (ORCPT ); Sat, 3 Oct 2020 22:39:52 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48898 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726477AbgJDCjd (ORCPT ); Sat, 3 Oct 2020 22:39:33 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 658B5C0613A6; Sat, 3 Oct 2020 19:39:33 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvv-00BUqu-TM; Sun, 04 Oct 2020 02:39:31 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 15/27] lift the calls of ep_read_events_proc() into the callers Date: Sun, 4 Oct 2020 03:39:17 +0100 Message-Id: <20201004023929.2740074-15-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro Expand the calls of ep_scan_ready_list() that get ep_read_events_proc(). As a side benefit we can pass depth to ep_read_events_proc() by value and not by address - the latter used to be forced by the signature expected from ep_scan_ready_list() callback. Signed-off-by: Al Viro --- fs/eventpoll.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index eb012fdc152e..9b9e29e0c85f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -774,7 +774,7 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) } static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head, - void *priv); + int depth); static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt); @@ -787,6 +787,8 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth) { struct eventpoll *ep; + LIST_HEAD(txlist); + __poll_t res; bool locked; pt->_key = epi->event.events; @@ -797,20 +799,19 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, poll_wait(epi->ffd.file, &ep->poll_wait, pt); locked = pt && (pt->_qproc == ep_ptable_queue_proc); - return ep_scan_ready_list(epi->ffd.file->private_data, - ep_read_events_proc, &depth, depth, - locked) & epi->event.events; + ep_start_scan(ep, depth, locked, &txlist); + res = ep_read_events_proc(ep, &txlist, depth + 1); + ep_done_scan(ep, depth, locked, &txlist); + return res & epi->event.events; } static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head, - void *priv) + int depth) { struct epitem *epi, *tmp; poll_table pt; - int depth = *(int *)priv; init_poll_funcptr(&pt, NULL); - depth++; list_for_each_entry_safe(epi, tmp, head, rdllink) { if (ep_item_poll(epi, &pt, depth)) { @@ -832,7 +833,8 @@ static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) { struct eventpoll *ep = file->private_data; - int depth = 0; + LIST_HEAD(txlist); + __poll_t res; /* Insert inside our poll wait queue */ poll_wait(file, &ep->poll_wait, wait); @@ -841,8 +843,10 @@ static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) * Proceed to find out if wanted events are really available inside * the ready list. */ - return ep_scan_ready_list(ep, ep_read_events_proc, - &depth, depth, false); + ep_start_scan(ep, 0, false, &txlist); + res = ep_read_events_proc(ep, &txlist, 1); + ep_done_scan(ep, 0, false, &txlist); + return res; } #ifdef CONFIG_PROC_FS From patchwork Sun Oct 4 02:39:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815457 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0A3C1112E for ; Sun, 4 Oct 2020 02:40:44 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E6997206B6 for ; Sun, 4 Oct 2020 02:40:43 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726808AbgJDCkn (ORCPT ); Sat, 3 Oct 2020 22:40:43 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48904 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726487AbgJDCjd (ORCPT ); Sat, 3 Oct 2020 22:39:33 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 86BF5C0613A7; Sat, 3 Oct 2020 19:39:33 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvw-00BUr2-80; Sun, 04 Oct 2020 02:39:32 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 16/27] lift the calls of ep_send_events_proc() into the callers Date: Sun, 4 Oct 2020 03:39:18 +0100 Message-Id: <20201004023929.2740074-16-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro ... and kill ep_scan_ready_list() Signed-off-by: Al Viro --- fs/eventpoll.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9b9e29e0c85f..3b3a862f8014 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -636,33 +636,6 @@ static void ep_done_scan(struct eventpoll *ep, mutex_unlock(&ep->mtx); } -/** - * ep_scan_ready_list - Scans the ready list in a way that makes possible for - * the scan code, to call f_op->poll(). Also allows for - * O(NumReady) performance. - * - * @ep: Pointer to the epoll private data structure. - * @sproc: Pointer to the scan callback. - * @priv: Private opaque data passed to the @sproc callback. - * @depth: The current depth of recursive f_op->poll calls. - * @ep_locked: caller already holds ep->mtx - * - * Returns: The same integer error code returned by the @sproc callback. - */ -static __poll_t ep_scan_ready_list(struct eventpoll *ep, - __poll_t (*sproc)(struct eventpoll *, - struct list_head *, void *), - void *priv, int depth, bool ep_locked) -{ - __poll_t res; - LIST_HEAD(txlist); - - ep_start_scan(ep, depth, ep_locked, &txlist); - res = (*sproc)(ep, &txlist, priv); - ep_done_scan(ep, depth, ep_locked, &txlist); - return res; -} - static void epi_rcu_free(struct rcu_head *head) { struct epitem *epi = container_of(head, struct epitem, rcu); @@ -1685,11 +1658,15 @@ static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, int maxevents) { struct ep_send_events_data esed; + LIST_HEAD(txlist); esed.maxevents = maxevents; esed.events = events; - ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false); + ep_start_scan(ep, 0, false, &txlist); + ep_send_events_proc(ep, &txlist, &esed); + ep_done_scan(ep, 0, false, &txlist); + return esed.res; } From patchwork Sun Oct 4 02:39:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815427 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1E3C2112E for ; Sun, 4 Oct 2020 02:39:46 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 101C320679 for ; Sun, 4 Oct 2020 02:39:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726597AbgJDCjn (ORCPT ); Sat, 3 Oct 2020 22:39:43 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48888 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726521AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AD634C0613A8; Sat, 3 Oct 2020 19:39:33 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvw-00BUrC-Ct; Sun, 04 Oct 2020 02:39:32 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 17/27] ep_send_events_proc(): fold into the caller Date: Sun, 4 Oct 2020 03:39:19 +0100 Message-Id: <20201004023929.2740074-17-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro ... and get rid of struct ep_send_events_data - not needed anymore. The weird way of passing the arguments in (and real return value out - nominal return value of ep_send_events_proc() is ignored) was due to the signature forced on ep_scan_ready_list() callbacks. Signed-off-by: Al Viro --- fs/eventpoll.c | 60 ++++++++++++++++++++-------------------------------------- 1 file changed, 20 insertions(+), 40 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3b3a862f8014..ac996b959e94 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -233,13 +233,6 @@ struct ep_pqueue { struct epitem *epi; }; -/* Used by the ep_send_events() function as callback private data */ -struct ep_send_events_data { - int maxevents; - struct epoll_event __user *events; - int res; -}; - /* * Configuration options available inside /proc/sys/fs/epoll/ */ @@ -1570,18 +1563,17 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, return 0; } -static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head, - void *priv) +static int ep_send_events(struct eventpoll *ep, + struct epoll_event __user *events, int maxevents) { - struct ep_send_events_data *esed = priv; - __poll_t revents; struct epitem *epi, *tmp; - struct epoll_event __user *uevent = esed->events; - struct wakeup_source *ws; + LIST_HEAD(txlist); poll_table pt; + int res = 0; init_poll_funcptr(&pt, NULL); - esed->res = 0; + + ep_start_scan(ep, 0, false, &txlist); /* * We can loop without lock because we are passed a task private list. @@ -1590,8 +1582,11 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head */ lockdep_assert_held(&ep->mtx); - list_for_each_entry_safe(epi, tmp, head, rdllink) { - if (esed->res >= esed->maxevents) + list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { + struct wakeup_source *ws; + __poll_t revents; + + if (res >= maxevents) break; /* @@ -1622,16 +1617,16 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head if (!revents) continue; - if (__put_user(revents, &uevent->events) || - __put_user(epi->event.data, &uevent->data)) { - list_add(&epi->rdllink, head); + if (__put_user(revents, &events->events) || + __put_user(epi->event.data, &events->data)) { + list_add(&epi->rdllink, &txlist); ep_pm_stay_awake(epi); - if (!esed->res) - esed->res = -EFAULT; - return 0; + if (!res) + res = -EFAULT; + break; } - esed->res++; - uevent++; + res++; + events++; if (epi->event.events & EPOLLONESHOT) epi->event.events &= EP_PRIVATE_BITS; else if (!(epi->event.events & EPOLLET)) { @@ -1650,24 +1645,9 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head ep_pm_stay_awake(epi); } } - - return 0; -} - -static int ep_send_events(struct eventpoll *ep, - struct epoll_event __user *events, int maxevents) -{ - struct ep_send_events_data esed; - LIST_HEAD(txlist); - - esed.maxevents = maxevents; - esed.events = events; - - ep_start_scan(ep, 0, false, &txlist); - ep_send_events_proc(ep, &txlist, &esed); ep_done_scan(ep, 0, false, &txlist); - return esed.res; + return res; } static inline struct timespec64 ep_set_mstimeout(long ms) From patchwork Sun Oct 4 02:39:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815429 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3354892C for ; Sun, 4 Oct 2020 02:39:51 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 25AC4206CC for ; Sun, 4 Oct 2020 02:39:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726590AbgJDCjn (ORCPT ); Sat, 3 Oct 2020 22:39:43 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48916 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726526AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id EF31CC0613A9; Sat, 3 Oct 2020 19:39:33 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvw-00BUrI-Hq; Sun, 04 Oct 2020 02:39:32 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 18/27] lift locking/unlocking ep->mtx out of ep_{start,done}_scan() Date: Sun, 4 Oct 2020 03:39:20 +0100 Message-Id: <20201004023929.2740074-18-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro get rid of depth/ep_locked arguments there and document the kludge in ep_item_poll() that has lead to ep_locked existence in the first place Signed-off-by: Al Viro --- fs/eventpoll.c | 57 ++++++++++++++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ac996b959e94..f9c567af1f5f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -554,20 +554,13 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi) rcu_read_unlock(); } -static void ep_start_scan(struct eventpoll *ep, - int depth, bool ep_locked, - struct list_head *txlist) -{ - lockdep_assert_irqs_enabled(); - - /* - * We need to lock this because we could be hit by - * eventpoll_release_file() and epoll_ctl(). - */ - - if (!ep_locked) - mutex_lock_nested(&ep->mtx, depth); +/* + * ep->mutex needs to be held because we could be hit by + * eventpoll_release_file() and epoll_ctl(). + */ +static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist) +{ /* * Steal the ready list, and re-init the original one to the * empty list. Also, set ep->ovflist to NULL so that events @@ -576,6 +569,7 @@ static void ep_start_scan(struct eventpoll *ep, * because we want the "sproc" callback to be able to do it * in a lockless way. */ + lockdep_assert_irqs_enabled(); write_lock_irq(&ep->lock); list_splice_init(&ep->rdllist, txlist); WRITE_ONCE(ep->ovflist, NULL); @@ -583,7 +577,6 @@ static void ep_start_scan(struct eventpoll *ep, } static void ep_done_scan(struct eventpoll *ep, - int depth, bool ep_locked, struct list_head *txlist) { struct epitem *epi, *nepi; @@ -624,9 +617,6 @@ static void ep_done_scan(struct eventpoll *ep, list_splice(txlist, &ep->rdllist); __pm_relax(ep->ws); write_unlock_irq(&ep->lock); - - if (!ep_locked) - mutex_unlock(&ep->mtx); } static void epi_rcu_free(struct rcu_head *head) @@ -763,11 +753,16 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, ep = epi->ffd.file->private_data; poll_wait(epi->ffd.file, &ep->poll_wait, pt); - locked = pt && (pt->_qproc == ep_ptable_queue_proc); - ep_start_scan(ep, depth, locked, &txlist); + // kludge: ep_insert() calls us with ep->mtx already locked + locked = pt && (pt->_qproc == ep_ptable_queue_proc); + if (!locked) + mutex_lock_nested(&ep->mtx, depth); + ep_start_scan(ep, &txlist); res = ep_read_events_proc(ep, &txlist, depth + 1); - ep_done_scan(ep, depth, locked, &txlist); + ep_done_scan(ep, &txlist); + if (!locked) + mutex_unlock(&ep->mtx); return res & epi->event.events; } @@ -809,9 +804,11 @@ static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) * Proceed to find out if wanted events are really available inside * the ready list. */ - ep_start_scan(ep, 0, false, &txlist); + mutex_lock(&ep->mtx); + ep_start_scan(ep, &txlist); res = ep_read_events_proc(ep, &txlist, 1); - ep_done_scan(ep, 0, false, &txlist); + ep_done_scan(ep, &txlist); + mutex_unlock(&ep->mtx); return res; } @@ -1573,15 +1570,13 @@ static int ep_send_events(struct eventpoll *ep, init_poll_funcptr(&pt, NULL); - ep_start_scan(ep, 0, false, &txlist); + mutex_lock(&ep->mtx); + ep_start_scan(ep, &txlist); /* * We can loop without lock because we are passed a task private list. - * Items cannot vanish during the loop because ep_scan_ready_list() is - * holding "mtx" during this call. + * Items cannot vanish during the loop we are holding ep->mtx. */ - lockdep_assert_held(&ep->mtx); - list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { struct wakeup_source *ws; __poll_t revents; @@ -1609,9 +1604,8 @@ static int ep_send_events(struct eventpoll *ep, /* * If the event mask intersect the caller-requested one, - * deliver the event to userspace. Again, ep_scan_ready_list() - * is holding ep->mtx, so no operations coming from userspace - * can change the item. + * deliver the event to userspace. Again, we are holding ep->mtx, + * so no operations coming from userspace can change the item. */ revents = ep_item_poll(epi, &pt, 1); if (!revents) @@ -1645,7 +1639,8 @@ static int ep_send_events(struct eventpoll *ep, ep_pm_stay_awake(epi); } } - ep_done_scan(ep, 0, false, &txlist); + ep_done_scan(ep, &txlist); + mutex_unlock(&ep->mtx); return res; } From patchwork Sun Oct 4 02:39:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815441 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C90F492C for ; Sun, 4 Oct 2020 02:40:23 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id B9C69206B6 for ; Sun, 4 Oct 2020 02:40:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726698AbgJDCkH (ORCPT ); Sat, 3 Oct 2020 22:40:07 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48904 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726551AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1753FC0613AA; Sat, 3 Oct 2020 19:39:34 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvw-00BUrP-QS; Sun, 04 Oct 2020 02:39:32 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 19/27] ep_insert(): don't open-code ep_remove() on failure exits Date: Sun, 4 Oct 2020 03:39:21 +0100 Message-Id: <20201004023929.2740074-19-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro Signed-off-by: Al Viro --- fs/eventpoll.c | 51 ++++++++++++++------------------------------------- 1 file changed, 14 insertions(+), 37 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index f9c567af1f5f..c987b61701e4 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1384,12 +1384,16 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, epi->next = EP_UNACTIVE_PTR; if (epi->event.events & EPOLLWAKEUP) { error = ep_create_wakeup_source(epi); - if (error) - goto error_create_wakeup_source; + if (error) { + kmem_cache_free(epi_cache, epi); + return error; + } } else { RCU_INIT_POINTER(epi->ws, NULL); } + atomic_long_inc(&ep->user->epoll_watches); + /* Add the current item to the list of active epoll hook for this file */ spin_lock(&tfile->f_lock); list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); @@ -1402,9 +1406,10 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, ep_rbtree_insert(ep, epi); /* now check if we've created too many backpaths */ - error = -EINVAL; - if (full_check && reverse_path_check()) - goto error_remove_epi; + if (unlikely(full_check && reverse_path_check())) { + ep_remove(ep, epi); + return -EINVAL; + } /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -1424,9 +1429,10 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, * install process. Namely an allocation for a wait queue failed due * high memory pressure. */ - error = -ENOMEM; - if (!epq.epi) - goto error_unregister; + if (unlikely(!epq.epi)) { + ep_remove(ep, epi); + return -ENOMEM; + } /* We have to drop the new item inside our item list to keep track of it */ write_lock_irq(&ep->lock); @@ -1448,40 +1454,11 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, write_unlock_irq(&ep->lock); - atomic_long_inc(&ep->user->epoll_watches); - /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(ep, NULL); return 0; - -error_unregister: - ep_unregister_pollwait(ep, epi); -error_remove_epi: - spin_lock(&tfile->f_lock); - list_del_rcu(&epi->fllink); - spin_unlock(&tfile->f_lock); - - rb_erase_cached(&epi->rbn, &ep->rbr); - - /* - * We need to do this because an event could have been arrived on some - * allocated wait queue. Note that we don't care about the ep->ovflist - * list, since that is used/cleaned only inside a section bound by "mtx". - * And ep_insert() is called with "mtx" held. - */ - write_lock_irq(&ep->lock); - if (ep_is_linked(epi)) - list_del_init(&epi->rdllink); - write_unlock_irq(&ep->lock); - - wakeup_source_unregister(ep_wakeup_source(epi)); - -error_create_wakeup_source: - kmem_cache_free(epi_cache, epi); - - return error; } /* From patchwork Sun Oct 4 02:39:22 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815453 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id F192A92C for ; Sun, 4 Oct 2020 02:40:39 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E1B3D206B6 for ; Sun, 4 Oct 2020 02:40:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726760AbgJDCk1 (ORCPT ); Sat, 3 Oct 2020 22:40:27 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48888 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726547AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 451ADC0613AB; Sat, 3 Oct 2020 19:39:34 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvw-00BUrW-UO; Sun, 04 Oct 2020 02:39:32 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 20/27] ep_insert(): we only need tep->mtx around the insertion itself Date: Sun, 4 Oct 2020 03:39:22 +0100 Message-Id: <20201004023929.2740074-20-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro We do need ep->mtx (and we are holding it all along), but that's the lock on the epoll we are inserting into; locking of the epoll being inserted is not needed for most of that work - as the matter of fact, we only need it to provide barriers for the fastpath check (for now). Move taking and releasing it into ep_insert(). The caller (do_epoll_ctl()) doesn't need to bother with that at all. Moreover, that way we kill the kludge in ep_item_poll() - now it's always called with tep unlocked. Signed-off-by: Al Viro Reported-by: kernel test robot --- fs/eventpoll.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c987b61701e4..39947b71f7af 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -731,8 +731,6 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head, int depth); -static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, - poll_table *pt); /* * Differs from ep_eventpoll_poll() in that internal callers already have @@ -745,7 +743,6 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, struct eventpoll *ep; LIST_HEAD(txlist); __poll_t res; - bool locked; pt->_key = epi->event.events; if (!is_file_epoll(epi->ffd.file)) @@ -754,15 +751,11 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, ep = epi->ffd.file->private_data; poll_wait(epi->ffd.file, &ep->poll_wait, pt); - // kludge: ep_insert() calls us with ep->mtx already locked - locked = pt && (pt->_qproc == ep_ptable_queue_proc); - if (!locked) - mutex_lock_nested(&ep->mtx, depth); + mutex_lock_nested(&ep->mtx, depth); ep_start_scan(ep, &txlist); res = ep_read_events_proc(ep, &txlist, depth + 1); ep_done_scan(ep, &txlist); - if (!locked) - mutex_unlock(&ep->mtx); + mutex_unlock(&ep->mtx); return res & epi->event.events; } @@ -1365,6 +1358,10 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, long user_watches; struct epitem *epi; struct ep_pqueue epq; + struct eventpoll *tep = NULL; + + if (is_file_epoll(tfile)) + tep = tfile->private_data; lockdep_assert_irqs_enabled(); @@ -1394,6 +1391,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, atomic_long_inc(&ep->user->epoll_watches); + if (tep) + mutex_lock(&tep->mtx); /* Add the current item to the list of active epoll hook for this file */ spin_lock(&tfile->f_lock); list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); @@ -1404,6 +1403,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, * protected by "mtx", and ep_insert() is called with "mtx" held. */ ep_rbtree_insert(ep, epi); + if (tep) + mutex_unlock(&tep->mtx); /* now check if we've created too many backpaths */ if (unlikely(full_check && reverse_path_check())) { @@ -2034,13 +2035,6 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error = epoll_mutex_lock(&ep->mtx, 0, nonblock); if (error) goto error_tgt_fput; - if (is_file_epoll(tf.file)) { - error = epoll_mutex_lock(&tep->mtx, 1, nonblock); - if (error) { - mutex_unlock(&ep->mtx); - goto error_tgt_fput; - } - } } } @@ -2076,8 +2070,6 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error = -ENOENT; break; } - if (tep != NULL) - mutex_unlock(&tep->mtx); mutex_unlock(&ep->mtx); error_tgt_fput: From patchwork Sun Oct 4 02:39:23 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815449 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 4BCA6112E for ; Sun, 4 Oct 2020 02:40:28 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3D409206B8 for ; Sun, 4 Oct 2020 02:40:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726752AbgJDCk1 (ORCPT ); Sat, 3 Oct 2020 22:40:27 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48894 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726559AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 762A0C0613AC; Sat, 3 Oct 2020 19:39:34 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvx-00BUrd-39; Sun, 04 Oct 2020 02:39:33 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 21/27] take the common part of ep_eventpoll_poll() and ep_item_poll() into helper Date: Sun, 4 Oct 2020 03:39:23 +0100 Message-Id: <20201004023929.2740074-21-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro The only reason why ep_item_poll() can't simply call ep_eventpoll_poll() (or, better yet, call vfs_poll() in all cases) is that we need to tell lockdep how deep into the hierarchy of ->mtx we are. So let's add a variant of ep_eventpoll_poll() that would take depth explicitly and turn ep_eventpoll_poll() into wrapper for that. Signed-off-by: Al Viro --- fs/eventpoll.c | 57 +++++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 39947b71f7af..a50b48d26c55 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -732,6 +732,27 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head, int depth); +static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth) +{ + struct eventpoll *ep = file->private_data; + LIST_HEAD(txlist); + __poll_t res; + + /* Insert inside our poll wait queue */ + poll_wait(file, &ep->poll_wait, wait); + + /* + * Proceed to find out if wanted events are really available inside + * the ready list. + */ + mutex_lock_nested(&ep->mtx, depth); + ep_start_scan(ep, &txlist); + res = ep_read_events_proc(ep, &txlist, depth + 1); + ep_done_scan(ep, &txlist); + mutex_unlock(&ep->mtx); + return res; +} + /* * Differs from ep_eventpoll_poll() in that internal callers already have * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() @@ -740,22 +761,14 @@ static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth) { - struct eventpoll *ep; - LIST_HEAD(txlist); + struct file *file = epi->ffd.file; __poll_t res; pt->_key = epi->event.events; - if (!is_file_epoll(epi->ffd.file)) - return vfs_poll(epi->ffd.file, pt) & epi->event.events; - - ep = epi->ffd.file->private_data; - poll_wait(epi->ffd.file, &ep->poll_wait, pt); - - mutex_lock_nested(&ep->mtx, depth); - ep_start_scan(ep, &txlist); - res = ep_read_events_proc(ep, &txlist, depth + 1); - ep_done_scan(ep, &txlist); - mutex_unlock(&ep->mtx); + if (!is_file_epoll(file)) + res = vfs_poll(file, pt); + else + res = __ep_eventpoll_poll(file, pt, depth); return res & epi->event.events; } @@ -786,23 +799,7 @@ static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) { - struct eventpoll *ep = file->private_data; - LIST_HEAD(txlist); - __poll_t res; - - /* Insert inside our poll wait queue */ - poll_wait(file, &ep->poll_wait, wait); - - /* - * Proceed to find out if wanted events are really available inside - * the ready list. - */ - mutex_lock(&ep->mtx); - ep_start_scan(ep, &txlist); - res = ep_read_events_proc(ep, &txlist, 1); - ep_done_scan(ep, &txlist); - mutex_unlock(&ep->mtx); - return res; + return __ep_eventpoll_poll(file, wait, 0); } #ifdef CONFIG_PROC_FS From patchwork Sun Oct 4 02:39:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815447 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BBB7F92C for ; Sun, 4 Oct 2020 02:40:27 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AD35D206B6 for ; Sun, 4 Oct 2020 02:40:27 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726747AbgJDCk0 (ORCPT ); Sat, 3 Oct 2020 22:40:26 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48890 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726552AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8DDC7C0613AD; Sat, 3 Oct 2020 19:39:34 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvx-00BUrk-Ad; Sun, 04 Oct 2020 02:39:33 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 22/27] fold ep_read_events_proc() into the only caller Date: Sun, 4 Oct 2020 03:39:24 +0100 Message-Id: <20201004023929.2740074-22-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro Signed-off-by: Al Viro --- fs/eventpoll.c | 49 ++++++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a50b48d26c55..1efe8a1a022a 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -729,14 +729,17 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) return 0; } -static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head, - int depth); +static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth); static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth) { struct eventpoll *ep = file->private_data; LIST_HEAD(txlist); - __poll_t res; + struct epitem *epi, *tmp; + poll_table pt; + __poll_t res = 0; + + init_poll_funcptr(&pt, NULL); /* Insert inside our poll wait queue */ poll_wait(file, &ep->poll_wait, wait); @@ -747,7 +750,20 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep */ mutex_lock_nested(&ep->mtx, depth); ep_start_scan(ep, &txlist); - res = ep_read_events_proc(ep, &txlist, depth + 1); + list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { + if (ep_item_poll(epi, &pt, depth + 1)) { + res = EPOLLIN | EPOLLRDNORM; + break; + } else { + /* + * Item has been dropped into the ready list by the poll + * callback, but it's not actually ready, as far as + * caller requested events goes. We can remove it here. + */ + __pm_relax(ep_wakeup_source(epi)); + list_del_init(&epi->rdllink); + } + } ep_done_scan(ep, &txlist); mutex_unlock(&ep->mtx); return res; @@ -772,31 +788,6 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, return res & epi->event.events; } -static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head, - int depth) -{ - struct epitem *epi, *tmp; - poll_table pt; - - init_poll_funcptr(&pt, NULL); - - list_for_each_entry_safe(epi, tmp, head, rdllink) { - if (ep_item_poll(epi, &pt, depth)) { - return EPOLLIN | EPOLLRDNORM; - } else { - /* - * Item has been dropped into the ready list by the poll - * callback, but it's not actually ready, as far as - * caller requested events goes. We can remove it here. - */ - __pm_relax(ep_wakeup_source(epi)); - list_del_init(&epi->rdllink); - } - } - - return 0; -} - static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) { return __ep_eventpoll_poll(file, wait, 0); From patchwork Sun Oct 4 02:39:25 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815443 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B66E66CA for ; Sun, 4 Oct 2020 02:40:24 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id A0852206B6 for ; Sun, 4 Oct 2020 02:40:24 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726694AbgJDCkG (ORCPT ); Sat, 3 Oct 2020 22:40:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48886 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726548AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B22B2C0613AE; Sat, 3 Oct 2020 19:39:34 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvx-00BUrr-Dk; Sun, 04 Oct 2020 02:39:33 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 23/27] ep_insert(): move creation of wakeup source past the fl_ep_links insertion Date: Sun, 4 Oct 2020 03:39:25 +0100 Message-Id: <20201004023929.2740074-23-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro That's the beginning of preparations for taking f_ep_links out of struct file. If insertion might fail, we will need a new failure exit. Having wakeup source creation done after that point will simplify life there; ep_remove() can (and commonly does) live with NULL epi->ws, so it can be used for cleanup after ep_create_wakeup_source() failure. It can't be used before the rbtree insertion, though, so if we are to unify all old failure exits, we need to move that thing down. Then we would be free to do simple kmem_cache_free() on the failure to insert into f_ep_links - no wakeup source to leak on that failure exit. Signed-off-by: Al Viro --- fs/eventpoll.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1efe8a1a022a..66da645d5eb4 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1356,26 +1356,16 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, user_watches = atomic_long_read(&ep->user->epoll_watches); if (unlikely(user_watches >= max_user_watches)) return -ENOSPC; - if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) + if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) return -ENOMEM; /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); INIT_LIST_HEAD(&epi->fllink); - epi->pwqlist = NULL; epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; epi->next = EP_UNACTIVE_PTR; - if (epi->event.events & EPOLLWAKEUP) { - error = ep_create_wakeup_source(epi); - if (error) { - kmem_cache_free(epi_cache, epi); - return error; - } - } else { - RCU_INIT_POINTER(epi->ws, NULL); - } atomic_long_inc(&ep->user->epoll_watches); @@ -1400,6 +1390,14 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, return -EINVAL; } + if (epi->event.events & EPOLLWAKEUP) { + error = ep_create_wakeup_source(epi); + if (error) { + ep_remove(ep, epi); + return error; + } + } + /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); From patchwork Sun Oct 4 02:39:26 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815451 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3A41C92C for ; Sun, 4 Oct 2020 02:40:39 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 21CA920657 for ; Sun, 4 Oct 2020 02:40:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726765AbgJDCk2 (ORCPT ); Sat, 3 Oct 2020 22:40:28 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48916 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726564AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D9D82C0613AF; Sat, 3 Oct 2020 19:39:34 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvx-00BUry-IH; Sun, 04 Oct 2020 02:39:33 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 24/27] convert ->f_ep_links/->fllink to hlist Date: Sun, 4 Oct 2020 03:39:26 +0100 Message-Id: <20201004023929.2740074-24-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro we don't care about the order of elements there Signed-off-by: Al Viro --- fs/eventpoll.c | 18 +++++++++--------- include/linux/eventpoll.h | 4 ++-- include/linux/fs.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 66da645d5eb4..78b8769b72dc 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -160,7 +160,7 @@ struct epitem { struct eventpoll *ep; /* List header used to link this item to the "struct file" items list */ - struct list_head fllink; + struct hlist_node fllink; /* wakeup_source used when EPOLLWAKEUP is set */ struct wakeup_source __rcu *ws; @@ -642,7 +642,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_lock); - list_del_rcu(&epi->fllink); + hlist_del_rcu(&epi->fllink); spin_unlock(&file->f_lock); rb_erase_cached(&epi->rbn, &ep->rbr); @@ -835,7 +835,8 @@ static const struct file_operations eventpoll_fops = { void eventpoll_release_file(struct file *file) { struct eventpoll *ep; - struct epitem *epi, *next; + struct epitem *epi; + struct hlist_node *next; /* * We don't want to get "file->f_lock" because it is not @@ -851,7 +852,7 @@ void eventpoll_release_file(struct file *file) * Besides, ep_remove() acquires the lock, so we can't hold it here. */ mutex_lock(&epmutex); - list_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) { + hlist_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) { ep = epi->ep; mutex_lock_nested(&ep->mtx, 0); ep_remove(ep, epi); @@ -1257,11 +1258,11 @@ static int reverse_path_check_proc(struct file *file, int depth) /* CTL_DEL can remove links here, but that can't increase our count */ rcu_read_lock(); - list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { + hlist_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { struct file *recepient = epi->ep->file; if (WARN_ON(!is_file_epoll(recepient))) continue; - if (list_empty(&recepient->f_ep_links)) + if (hlist_empty(&recepient->f_ep_links)) error = path_count_inc(depth); else error = reverse_path_check_proc(recepient, depth + 1); @@ -1361,7 +1362,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); - INIT_LIST_HEAD(&epi->fllink); epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; @@ -1373,7 +1373,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, mutex_lock(&tep->mtx); /* Add the current item to the list of active epoll hook for this file */ spin_lock(&tfile->f_lock); - list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); + hlist_add_head_rcu(&epi->fllink, &tfile->f_ep_links); spin_unlock(&tfile->f_lock); /* @@ -1999,7 +1999,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, if (error) goto error_tgt_fput; if (op == EPOLL_CTL_ADD) { - if (!list_empty(&f.file->f_ep_links) || + if (!hlist_empty(&f.file->f_ep_links) || ep->gen == loop_check_gen || is_file_epoll(tf.file)) { mutex_unlock(&ep->mtx); diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 8f000fada5a4..4e215ccfa792 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -25,7 +25,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t /* Used to initialize the epoll bits inside the "struct file" */ static inline void eventpoll_init_file(struct file *file) { - INIT_LIST_HEAD(&file->f_ep_links); + INIT_HLIST_HEAD(&file->f_ep_links); INIT_LIST_HEAD(&file->f_tfile_llink); } @@ -50,7 +50,7 @@ static inline void eventpoll_release(struct file *file) * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ - if (likely(list_empty(&file->f_ep_links))) + if (likely(hlist_empty(&file->f_ep_links))) return; /* diff --git a/include/linux/fs.h b/include/linux/fs.h index e019ea2f1347..9dc4c09f1d13 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -951,7 +951,7 @@ struct file { #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct list_head f_ep_links; + struct hlist_head f_ep_links; struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; From patchwork Sun Oct 4 02:39:27 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815445 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 46B55112E for ; Sun, 4 Oct 2020 02:40:25 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 37CA5206B6 for ; Sun, 4 Oct 2020 02:40:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726688AbgJDCkG (ORCPT ); Sat, 3 Oct 2020 22:40:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48892 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726569AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0C2D9C0613B0; Sat, 3 Oct 2020 19:39:35 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvx-00BUs4-NS; Sun, 04 Oct 2020 02:39:33 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 25/27] lift rcu_read_lock() into reverse_path_check() Date: Sun, 4 Oct 2020 03:39:27 +0100 Message-Id: <20201004023929.2740074-25-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro Signed-off-by: Al Viro --- fs/eventpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 78b8769b72dc..8a7ad752befd 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1257,7 +1257,6 @@ static int reverse_path_check_proc(struct file *file, int depth) return -1; /* CTL_DEL can remove links here, but that can't increase our count */ - rcu_read_lock(); hlist_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { struct file *recepient = epi->ep->file; if (WARN_ON(!is_file_epoll(recepient))) @@ -1269,7 +1268,6 @@ static int reverse_path_check_proc(struct file *file, int depth) if (error != 0) break; } - rcu_read_unlock(); return error; } @@ -1291,7 +1289,9 @@ static int reverse_path_check(void) /* let's call this for all tfiles */ list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { path_count_init(); + rcu_read_lock(); error = reverse_path_check_proc(current_file, 0); + rcu_read_unlock(); if (error) break; } From patchwork Sun Oct 4 02:39:28 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815439 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 493D61746 for ; Sun, 4 Oct 2020 02:40:06 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3152F206B6 for ; Sun, 4 Oct 2020 02:40:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726682AbgJDCkF (ORCPT ); Sat, 3 Oct 2020 22:40:05 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48898 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726561AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3472BC0613B1; Sat, 3 Oct 2020 19:39:35 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvx-00BUsC-T1; Sun, 04 Oct 2020 02:39:33 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 26/27] epoll: massage the check list insertion Date: Sun, 4 Oct 2020 03:39:28 +0100 Message-Id: <20201004023929.2740074-26-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro in the "non-epoll target" cases do it in ep_insert() rather than in do_epoll_ctl(), so that we do it only with some epitem is already guaranteed to exist. Signed-off-by: Al Viro --- fs/eventpoll.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8a7ad752befd..eea269670168 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1375,6 +1375,10 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, spin_lock(&tfile->f_lock); hlist_add_head_rcu(&epi->fllink, &tfile->f_ep_links); spin_unlock(&tfile->f_lock); + if (full_check && !tep) { + get_file(tfile); + list_add(&tfile->f_tfile_llink, &tfile_check_list); + } /* * Add the current item to the RB tree. All RB tree operations are @@ -2013,10 +2017,6 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error = -ELOOP; if (ep_loop_check(ep, tep) != 0) goto error_tgt_fput; - } else { - get_file(tf.file); - list_add(&tf.file->f_tfile_llink, - &tfile_check_list); } error = epoll_mutex_lock(&ep->mtx, 0, nonblock); if (error) From patchwork Sun Oct 4 02:39:29 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Al Viro X-Patchwork-Id: 11815437 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DB3F86CA for ; Sun, 4 Oct 2020 02:40:05 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id B6443206B6 for ; Sun, 4 Oct 2020 02:40:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726674AbgJDCkD (ORCPT ); Sat, 3 Oct 2020 22:40:03 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48896 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726562AbgJDCji (ORCPT ); Sat, 3 Oct 2020 22:39:38 -0400 Received: from ZenIV.linux.org.uk (zeniv.linux.org.uk [IPv6:2002:c35c:fd02::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 49BE2C0613B2; Sat, 3 Oct 2020 19:39:35 -0700 (PDT) Received: from viro by ZenIV.linux.org.uk with local (Exim 4.92.3 #3 (Red Hat Linux)) id 1kOtvy-00BUsJ-1u; Sun, 04 Oct 2020 02:39:34 +0000 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Marc Zyngier Subject: [RFC PATCH 27/27] epoll: take epitem list out of struct file Date: Sun, 4 Oct 2020 03:39:29 +0100 Message-Id: <20201004023929.2740074-27-viro@ZenIV.linux.org.uk> X-Mailer: git-send-email 2.25.4 In-Reply-To: <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> References: <20201004023608.GM3421308@ZenIV.linux.org.uk> <20201004023929.2740074-1-viro@ZenIV.linux.org.uk> MIME-Version: 1.0 Sender: Al Viro Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Al Viro Move the head of epitem list out of struct file; for epoll ones it's moved into struct eventpoll (->refs there), for non-epoll - into the new object (struct epitem_head). In place of ->f_ep_links we leave a pointer to the list head (->f_ep). ->f_ep is protected by ->f_lock and it's zeroed as soon as the list of epitems becomes empty (that can happen only in ep_remove() by now). The list of files for reverse path check is *not* going through struct file now - it's a single-linked list going through epitem_head instances. It's terminated by ERR_PTR(-1) (== EP_UNACTIVE_POINTER), so the elements of list can be distinguished by head->next != NULL. epitem_head instances are allocated at ep_insert() time (by attach_epitem()) and freed either by ep_remove() (if it empties the set of epitems *and* epitem_head does not belong to the reverse path check list) or by clear_tfile_check_list() when the list is emptied (if the set of epitems is empty by that point). Allocations are done from a separate slab - minimal kmalloc() size is too large on some architectures. As the result, we trim struct file _and_ get rid of the games with temporary file references. Locking and barriers are interesting (aren't they always); see unlist_file() and ep_remove() for details. The non-obvious part is that ep_remove() needs to decide if it will be the one to free the damn thing *before* actually storing NULL to head->epitems.first - that's what smp_load_acquire is for in there. unlist_file() lockless path is safe, since we hit it only if we observe NULL in head->epitems.first and whoever had done that store is guaranteed to have observed non-NULL in head->next. IOW, their last access had been the store of NULL into ->epitems.first and we can safely free the sucker. OTOH, we are under rcu_read_lock() and both epitem and epitem->file have their freeing RCU-delayed. So if we see non-NULL ->epitems.first, we can grab ->f_lock (all epitems in there share the same struct file) and safely recheck the emptiness of ->epitems; again, ->next is still non-NULL, so ep_remove() couldn't have freed head yet. ->f_lock serializes us wrt ep_remove(); the rest is trivial. Note that once head->epitems becomes NULL, nothing can get inserted into it - the only remaining reference to head after that point is from the reverse path check list. Signed-off-by: Al Viro --- fs/eventpoll.c | 168 ++++++++++++++++++++++++++++++++++------------ fs/file_table.c | 1 - include/linux/eventpoll.h | 11 +-- include/linux/fs.h | 5 +- 4 files changed, 129 insertions(+), 56 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index eea269670168..d3fdabf6fd34 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -215,6 +215,7 @@ struct eventpoll { /* used to optimize loop detection check */ u64 gen; + struct hlist_head refs; #ifdef CONFIG_NET_RX_BUSY_POLL /* used to track busy poll napi_id */ @@ -259,7 +260,45 @@ static struct kmem_cache *pwq_cache __read_mostly; * List of files with newly added links, where we may need to limit the number * of emanating paths. Protected by the epmutex. */ -static LIST_HEAD(tfile_check_list); +struct epitems_head { + struct hlist_head epitems; + struct epitems_head *next; +}; +static struct epitems_head *tfile_check_list = EP_UNACTIVE_PTR; + +static struct kmem_cache *ephead_cache __read_mostly; + +static inline void free_ephead(struct epitems_head *head) +{ + if (head) + kmem_cache_free(ephead_cache, head); +} + +static void list_file(struct file *file) +{ + struct epitems_head *head; + + head = container_of(file->f_ep, struct epitems_head, epitems); + if (!head->next) { + head->next = tfile_check_list; + tfile_check_list = head; + } +} + +static void unlist_file(struct epitems_head *head) +{ + struct epitems_head *to_free = head; + struct hlist_node *p = rcu_dereference(hlist_first_rcu(&head->epitems)); + if (p) { + struct epitem *epi= container_of(p, struct epitem, fllink); + spin_lock(&epi->ffd.file->f_lock); + if (!hlist_empty(&head->epitems)) + to_free = NULL; + head->next = NULL; + spin_unlock(&epi->ffd.file->f_lock); + } + free_ephead(to_free); +} #ifdef CONFIG_SYSCTL @@ -632,6 +671,8 @@ static void epi_rcu_free(struct rcu_head *head) static int ep_remove(struct eventpoll *ep, struct epitem *epi) { struct file *file = epi->ffd.file; + struct epitems_head *to_free; + struct hlist_head *head; lockdep_assert_irqs_enabled(); @@ -642,8 +683,20 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_lock); + to_free = NULL; + head = file->f_ep; + if (head->first == &epi->fllink && !epi->fllink.next) { + file->f_ep = NULL; + if (!is_file_epoll(file)) { + struct epitems_head *v; + v = container_of(head, struct epitems_head, epitems); + if (!smp_load_acquire(&v->next)) + to_free = v; + } + } hlist_del_rcu(&epi->fllink); spin_unlock(&file->f_lock); + free_ephead(to_free); rb_erase_cached(&epi->rbn, &ep->rbr); @@ -852,7 +905,11 @@ void eventpoll_release_file(struct file *file) * Besides, ep_remove() acquires the lock, so we can't hold it here. */ mutex_lock(&epmutex); - hlist_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) { + if (unlikely(!file->f_ep)) { + mutex_unlock(&epmutex); + return; + } + hlist_for_each_entry_safe(epi, next, file->f_ep, fllink) { ep = epi->ep; mutex_lock_nested(&ep->mtx, 0); ep_remove(ep, epi); @@ -1248,7 +1305,7 @@ static void path_count_init(void) path_count[i] = 0; } -static int reverse_path_check_proc(struct file *file, int depth) +static int reverse_path_check_proc(struct hlist_head *refs, int depth) { int error = 0; struct epitem *epi; @@ -1257,14 +1314,12 @@ static int reverse_path_check_proc(struct file *file, int depth) return -1; /* CTL_DEL can remove links here, but that can't increase our count */ - hlist_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { - struct file *recepient = epi->ep->file; - if (WARN_ON(!is_file_epoll(recepient))) - continue; - if (hlist_empty(&recepient->f_ep_links)) + hlist_for_each_entry_rcu(epi, refs, fllink) { + struct hlist_head *refs = &epi->ep->refs; + if (hlist_empty(refs)) error = path_count_inc(depth); else - error = reverse_path_check_proc(recepient, depth + 1); + error = reverse_path_check_proc(refs, depth + 1); if (error != 0) break; } @@ -1272,7 +1327,7 @@ static int reverse_path_check_proc(struct file *file, int depth) } /** - * reverse_path_check - The tfile_check_list is list of file *, which have + * reverse_path_check - The tfile_check_list is list of epitem_head, which have * links that are proposed to be newly added. We need to * make sure that those added links don't add too many * paths such that we will spend all our time waking up @@ -1283,19 +1338,18 @@ static int reverse_path_check_proc(struct file *file, int depth) */ static int reverse_path_check(void) { - int error = 0; - struct file *current_file; + struct epitems_head *p; - /* let's call this for all tfiles */ - list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { + for (p = tfile_check_list; p != EP_UNACTIVE_PTR; p = p->next) { + int error; path_count_init(); rcu_read_lock(); - error = reverse_path_check_proc(current_file, 0); + error = reverse_path_check_proc(&p->epitems, 0); rcu_read_unlock(); if (error) - break; + return error; } - return error; + return 0; } static int ep_create_wakeup_source(struct epitem *epi) @@ -1336,6 +1390,39 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi) wakeup_source_unregister(ws); } +static int attach_epitem(struct file *file, struct epitem *epi) +{ + struct epitems_head *to_free = NULL; + struct hlist_head *head = NULL; + struct eventpoll *ep = NULL; + + if (is_file_epoll(file)) + ep = file->private_data; + + if (ep) { + head = &ep->refs; + } else if (!READ_ONCE(file->f_ep)) { +allocate: + to_free = kmem_cache_zalloc(ephead_cache, GFP_KERNEL); + if (!to_free) + return -ENOMEM; + head = &to_free->epitems; + } + spin_lock(&file->f_lock); + if (!file->f_ep) { + if (unlikely(!head)) { + spin_unlock(&file->f_lock); + goto allocate; + } + file->f_ep = head; + to_free = NULL; + } + hlist_add_head_rcu(&epi->fllink, file->f_ep); + spin_unlock(&file->f_lock); + free_ephead(to_free); + return 0; +} + /* * Must be called with "mtx" held. */ @@ -1367,19 +1454,21 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, epi->event = *event; epi->next = EP_UNACTIVE_PTR; - atomic_long_inc(&ep->user->epoll_watches); - if (tep) mutex_lock(&tep->mtx); /* Add the current item to the list of active epoll hook for this file */ - spin_lock(&tfile->f_lock); - hlist_add_head_rcu(&epi->fllink, &tfile->f_ep_links); - spin_unlock(&tfile->f_lock); - if (full_check && !tep) { - get_file(tfile); - list_add(&tfile->f_tfile_llink, &tfile_check_list); + if (unlikely(attach_epitem(tfile, epi) < 0)) { + kmem_cache_free(epi_cache, epi); + if (tep) + mutex_lock(&tep->mtx); + return -ENOMEM; } + if (full_check && !tep) + list_file(tfile); + + atomic_long_inc(&ep->user->epoll_watches); + /* * Add the current item to the RB tree. All RB tree operations are * protected by "mtx", and ep_insert() is called with "mtx" held. @@ -1813,11 +1902,7 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) * not already there, and calling reverse_path_check() * during ep_insert(). */ - if (list_empty(&epi->ffd.file->f_tfile_llink)) { - if (get_file_rcu(epi->ffd.file)) - list_add(&epi->ffd.file->f_tfile_llink, - &tfile_check_list); - } + list_file(epi->ffd.file); } } mutex_unlock(&ep->mtx); @@ -1844,16 +1929,13 @@ static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to) static void clear_tfile_check_list(void) { - struct file *file; - - /* first clear the tfile_check_list */ - while (!list_empty(&tfile_check_list)) { - file = list_first_entry(&tfile_check_list, struct file, - f_tfile_llink); - list_del_init(&file->f_tfile_llink); - fput(file); + rcu_read_lock(); + while (tfile_check_list != EP_UNACTIVE_PTR) { + struct epitems_head *head = tfile_check_list; + tfile_check_list = head->next; + unlist_file(head); } - INIT_LIST_HEAD(&tfile_check_list); + rcu_read_unlock(); } /* @@ -2003,9 +2085,8 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, if (error) goto error_tgt_fput; if (op == EPOLL_CTL_ADD) { - if (!hlist_empty(&f.file->f_ep_links) || - ep->gen == loop_check_gen || - is_file_epoll(tf.file)) { + if (READ_ONCE(f.file->f_ep) || ep->gen == loop_check_gen || + is_file_epoll(tf.file)) { mutex_unlock(&ep->mtx); error = epoll_mutex_lock(&epmutex, 0, nonblock); if (error) @@ -2216,6 +2297,9 @@ static int __init eventpoll_init(void) pwq_cache = kmem_cache_create("eventpoll_pwq", sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL); + ephead_cache = kmem_cache_create("ep_head", + sizeof(struct epitems_head), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL); + return 0; } fs_initcall(eventpoll_init); diff --git a/fs/file_table.c b/fs/file_table.c index 656647f9575a..f2bb37fd0905 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -113,7 +113,6 @@ static struct file *__alloc_file(int flags, const struct cred *cred) rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); mutex_init(&f->f_pos_lock); - eventpoll_init_file(f); f->f_flags = flags; f->f_mode = OPEN_FMODE(flags); /* f->f_version: 0 */ diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 4e215ccfa792..0350393465d4 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -22,14 +22,6 @@ struct file; struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff); #endif -/* Used to initialize the epoll bits inside the "struct file" */ -static inline void eventpoll_init_file(struct file *file) -{ - INIT_HLIST_HEAD(&file->f_ep_links); - INIT_LIST_HEAD(&file->f_tfile_llink); -} - - /* Used to release the epoll bits inside the "struct file" */ void eventpoll_release_file(struct file *file); @@ -50,7 +42,7 @@ static inline void eventpoll_release(struct file *file) * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ - if (likely(hlist_empty(&file->f_ep_links))) + if (likely(!file->f_ep)) return; /* @@ -72,7 +64,6 @@ static inline int ep_op_has_event(int op) #else -static inline void eventpoll_init_file(struct file *file) {} static inline void eventpoll_release(struct file *file) {} #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 9dc4c09f1d13..a9242e1464d1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -928,7 +928,7 @@ struct file { const struct file_operations *f_op; /* - * Protects f_ep_links, f_flags. + * Protects f_ep, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; @@ -951,8 +951,7 @@ struct file { #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct hlist_head f_ep_links; - struct list_head f_tfile_llink; + struct hlist_head *f_ep; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; errseq_t f_wb_err;