diff mbox series

[net-next] l2tp: avoid using drain_workqueue in l2tp_pre_exit_net

Message ID 20240819145208.3209296-1-jchapman@katalix.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next] l2tp: avoid using drain_workqueue in l2tp_pre_exit_net | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 16 this patch: 16
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 16 this patch: 16
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 16 this patch: 16
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 101 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-08-20--09-00 (tests: 712)

Commit Message

James Chapman Aug. 19, 2024, 2:52 p.m. UTC
Recent commit c1b2e36b8776 ("l2tp: flush workqueue before draining
it") incorrectly uses drain_workqueue. The use of drain_workqueue in
l2tp_pre_exit_net is flawed because the workqueue is shared by all
nets and it is therefore possible for new work items to be queued
while drain_workqueue runs.

Instead of using drain_workqueue, use a loop to delete all tunnels and
__flush_workqueue until all tunnel/session lists of the net are
empty. Add a per-net flag to ensure that no new tunnel can be created
in the net once l2tp_pre_exit_net starts.

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: Tom Parkin <tparkin@katalix.com>
---
 net/l2tp/l2tp_core.c    | 38 +++++++++++++++++++++++++++++---------
 net/l2tp/l2tp_core.h    |  2 +-
 net/l2tp/l2tp_netlink.c |  2 +-
 net/l2tp/l2tp_ppp.c     |  3 ++-
 4 files changed, 33 insertions(+), 12 deletions(-)

Comments

Paolo Abeni Aug. 22, 2024, 10:22 a.m. UTC | #1
On 8/19/24 16:52, James Chapman wrote:
> Recent commit c1b2e36b8776 ("l2tp: flush workqueue before draining
> it") incorrectly uses drain_workqueue. 

isn't the relevant commit fc7ec7f554d7d0a27ba339fcf48df11d14413329?

> The use of drain_workqueue in
> l2tp_pre_exit_net is flawed because the workqueue is shared by all
> nets and it is therefore possible for new work items to be queued
> while drain_workqueue runs.
> 
> Instead of using drain_workqueue, use a loop to delete all tunnels and
> __flush_workqueue until all tunnel/session lists of the net are
> empty. Add a per-net flag to ensure that no new tunnel can be created
> in the net once l2tp_pre_exit_net starts.

We need a fixes tag even for net-next fixes :)

> Signed-off-by: James Chapman <jchapman@katalix.com>
> Signed-off-by: Tom Parkin <tparkin@katalix.com>
> ---
>   net/l2tp/l2tp_core.c    | 38 +++++++++++++++++++++++++++++---------
>   net/l2tp/l2tp_core.h    |  2 +-
>   net/l2tp/l2tp_netlink.c |  2 +-
>   net/l2tp/l2tp_ppp.c     |  3 ++-
>   4 files changed, 33 insertions(+), 12 deletions(-)
> 
> diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
> index af87c781d6a6..246b07342b86 100644
> --- a/net/l2tp/l2tp_core.c
> +++ b/net/l2tp/l2tp_core.c
> @@ -107,6 +107,7 @@ static struct workqueue_struct *l2tp_wq;
>   /* per-net private data for this module */
>   static unsigned int l2tp_net_id;
>   struct l2tp_net {
> +	bool net_closing;
>   	/* Lock for write access to l2tp_tunnel_idr */
>   	spinlock_t l2tp_tunnel_idr_lock;
>   	struct idr l2tp_tunnel_idr;
> @@ -1560,13 +1561,19 @@ static int l2tp_tunnel_sock_create(struct net *net,
>   	return err;
>   }
>   
> -int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
> +int l2tp_tunnel_create(struct net *net, int fd, int version,
> +		       u32 tunnel_id, u32 peer_tunnel_id,
>   		       struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
>   {
> +	struct l2tp_net *pn = l2tp_pernet(net);
>   	struct l2tp_tunnel *tunnel = NULL;
>   	int err;
>   	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
>   
> +	/* This pairs with WRITE_ONCE() in l2tp_pre_exit_net(). */
> +	if (READ_ONCE(pn->net_closing))
> +		return -ENETDOWN;

Is this necessary? the netns is going away, no user space process should 
be able to touch it.

> +
>   	if (cfg)
>   		encap = cfg->encap;
>   
> @@ -1855,16 +1870,21 @@ static __net_exit void l2tp_pre_exit_net(struct net *net)
>   	}
>   	rcu_read_unlock_bh();
>   
> -	if (l2tp_wq) {
> -		/* ensure that all TUNNEL_DELETE work items are run before
> -		 * draining the work queue since TUNNEL_DELETE requests may
> -		 * queue SESSION_DELETE work items for each session in the
> -		 * tunnel. drain_workqueue may otherwise warn if SESSION_DELETE
> -		 * requests are queued while the work queue is being drained.
> -		 */
> +	if (l2tp_wq)
>   		__flush_workqueue(l2tp_wq);
> -		drain_workqueue(l2tp_wq);
> +
> +	/* repeat until all of the net's IDR lists are empty, in case tunnels
> +	 * or sessions were being created just before l2tp_pre_exit_net was
> +	 * called.
> +	 */
> +	rcu_read_lock_bh();
> +	if (!idr_is_empty(&pn->l2tp_tunnel_idr) ||
> +	    !idr_is_empty(&pn->l2tp_v2_session_idr) ||
> +	    !idr_is_empty(&pn->l2tp_v3_session_idr)) {
> +		rcu_read_unlock_bh();
> +		goto again;

This looks not nice, it could keep the kernel spinning for a while.

What about i.e. queue a 'dummy' work on l2tp_wq after 
__flush_workqueue() and explicitly wait for such work to complete?

when such work completes are other l2tp related one in the same netns 
should also be completed.

Cheers,

Paolo
James Chapman Aug. 22, 2024, 2:41 p.m. UTC | #2
On 22/08/2024 11:22, Paolo Abeni wrote:
> 
> 
> On 8/19/24 16:52, James Chapman wrote:
>> Recent commit c1b2e36b8776 ("l2tp: flush workqueue before draining
>> it") incorrectly uses drain_workqueue. 
> 
> isn't the relevant commit fc7ec7f554d7d0a27ba339fcf48df11d14413329?

Good spot. Thanks.

>> The use of drain_workqueue in
>> l2tp_pre_exit_net is flawed because the workqueue is shared by all
>> nets and it is therefore possible for new work items to be queued
>> while drain_workqueue runs.
>>
>> Instead of using drain_workqueue, use a loop to delete all tunnels and
>> __flush_workqueue until all tunnel/session lists of the net are
>> empty. Add a per-net flag to ensure that no new tunnel can be created
>> in the net once l2tp_pre_exit_net starts.
> 
> We need a fixes tag even for net-next fixes :)

Oh ok. My mistake.

>> Signed-off-by: James Chapman <jchapman@katalix.com>
>> Signed-off-by: Tom Parkin <tparkin@katalix.com>
>> ---
>>   net/l2tp/l2tp_core.c    | 38 +++++++++++++++++++++++++++++---------
>>   net/l2tp/l2tp_core.h    |  2 +-
>>   net/l2tp/l2tp_netlink.c |  2 +-
>>   net/l2tp/l2tp_ppp.c     |  3 ++-
>>   4 files changed, 33 insertions(+), 12 deletions(-)
>>
>> diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
>> index af87c781d6a6..246b07342b86 100644
>> --- a/net/l2tp/l2tp_core.c
>> +++ b/net/l2tp/l2tp_core.c
>> @@ -107,6 +107,7 @@ static struct workqueue_struct *l2tp_wq;
>>   /* per-net private data for this module */
>>   static unsigned int l2tp_net_id;
>>   struct l2tp_net {
>> +    bool net_closing;
>>       /* Lock for write access to l2tp_tunnel_idr */
>>       spinlock_t l2tp_tunnel_idr_lock;
>>       struct idr l2tp_tunnel_idr;
>> @@ -1560,13 +1561,19 @@ static int l2tp_tunnel_sock_create(struct net 
>> *net,
>>       return err;
>>   }
>> -int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 
>> peer_tunnel_id,
>> +int l2tp_tunnel_create(struct net *net, int fd, int version,
>> +               u32 tunnel_id, u32 peer_tunnel_id,
>>                  struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel 
>> **tunnelp)
>>   {
>> +    struct l2tp_net *pn = l2tp_pernet(net);
>>       struct l2tp_tunnel *tunnel = NULL;
>>       int err;
>>       enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
>> +    /* This pairs with WRITE_ONCE() in l2tp_pre_exit_net(). */
>> +    if (READ_ONCE(pn->net_closing))
>> +        return -ENETDOWN;
> 
> Is this necessary? the netns is going away, no user space process should 
> be able to touch it.

I considered this too. I was thinking that a bad process could cause 
l2tp_pre_exit_net to loop forever if it keeps creating tunnels. But if 
the net isn't usable by userspace when the pre_exit handler starts then 
I think we're ok to remove the flag.

> 
>> +
>>       if (cfg)
>>           encap = cfg->encap;
>> @@ -1855,16 +1870,21 @@ static __net_exit void 
>> l2tp_pre_exit_net(struct net *net)
>>       }
>>       rcu_read_unlock_bh();
>> -    if (l2tp_wq) {
>> -        /* ensure that all TUNNEL_DELETE work items are run before
>> -         * draining the work queue since TUNNEL_DELETE requests may
>> -         * queue SESSION_DELETE work items for each session in the
>> -         * tunnel. drain_workqueue may otherwise warn if SESSION_DELETE
>> -         * requests are queued while the work queue is being drained.
>> -         */
>> +    if (l2tp_wq)
>>           __flush_workqueue(l2tp_wq);
>> -        drain_workqueue(l2tp_wq);
>> +
>> +    /* repeat until all of the net's IDR lists are empty, in case 
>> tunnels
>> +     * or sessions were being created just before l2tp_pre_exit_net was
>> +     * called.
>> +     */
>> +    rcu_read_lock_bh();
>> +    if (!idr_is_empty(&pn->l2tp_tunnel_idr) ||
>> +        !idr_is_empty(&pn->l2tp_v2_session_idr) ||
>> +        !idr_is_empty(&pn->l2tp_v3_session_idr)) {
>> +        rcu_read_unlock_bh();
>> +        goto again;
> 
> This looks not nice, it could keep the kernel spinning for a while.
> 
> What about i.e. queue a 'dummy' work on l2tp_wq after 
> __flush_workqueue() and explicitly wait for such work to complete?
> 
> when such work completes are other l2tp related one in the same netns 
> should also be completed.

The loop is there in case one or more threads were in 
l2tp_tunnel_register or l2tp_session_register at the point where 
l2tp_pre_exit_net starts. If a tunnel or session is registered after 
l2tp_pre_exit_net loops over all tunnels calling l2tp_tunnel_delete, 
then it would be left behind.

I'll think more on this.

Thanks
diff mbox series

Patch

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index af87c781d6a6..246b07342b86 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -107,6 +107,7 @@  static struct workqueue_struct *l2tp_wq;
 /* per-net private data for this module */
 static unsigned int l2tp_net_id;
 struct l2tp_net {
+	bool net_closing;
 	/* Lock for write access to l2tp_tunnel_idr */
 	spinlock_t l2tp_tunnel_idr_lock;
 	struct idr l2tp_tunnel_idr;
@@ -1560,13 +1561,19 @@  static int l2tp_tunnel_sock_create(struct net *net,
 	return err;
 }
 
-int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
+int l2tp_tunnel_create(struct net *net, int fd, int version,
+		       u32 tunnel_id, u32 peer_tunnel_id,
 		       struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
 {
+	struct l2tp_net *pn = l2tp_pernet(net);
 	struct l2tp_tunnel *tunnel = NULL;
 	int err;
 	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
 
+	/* This pairs with WRITE_ONCE() in l2tp_pre_exit_net(). */
+	if (READ_ONCE(pn->net_closing))
+		return -ENETDOWN;
+
 	if (cfg)
 		encap = cfg->encap;
 
@@ -1832,6 +1839,8 @@  static __net_init int l2tp_init_net(struct net *net)
 {
 	struct l2tp_net *pn = net_generic(net, l2tp_net_id);
 
+	pn->net_closing = false;
+
 	idr_init(&pn->l2tp_tunnel_idr);
 	spin_lock_init(&pn->l2tp_tunnel_idr_lock);
 
@@ -1848,6 +1857,12 @@  static __net_exit void l2tp_pre_exit_net(struct net *net)
 	struct l2tp_tunnel *tunnel = NULL;
 	unsigned long tunnel_id, tmp;
 
+	/* Prevent new tunnel create API requests in the net.
+	 * Pairs with READ_ONCE in l2tp_tunnel_create.
+	 */
+	WRITE_ONCE(pn->net_closing, true);
+
+again:
 	rcu_read_lock_bh();
 	idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
 		if (tunnel)
@@ -1855,16 +1870,21 @@  static __net_exit void l2tp_pre_exit_net(struct net *net)
 	}
 	rcu_read_unlock_bh();
 
-	if (l2tp_wq) {
-		/* ensure that all TUNNEL_DELETE work items are run before
-		 * draining the work queue since TUNNEL_DELETE requests may
-		 * queue SESSION_DELETE work items for each session in the
-		 * tunnel. drain_workqueue may otherwise warn if SESSION_DELETE
-		 * requests are queued while the work queue is being drained.
-		 */
+	if (l2tp_wq)
 		__flush_workqueue(l2tp_wq);
-		drain_workqueue(l2tp_wq);
+
+	/* repeat until all of the net's IDR lists are empty, in case tunnels
+	 * or sessions were being created just before l2tp_pre_exit_net was
+	 * called.
+	 */
+	rcu_read_lock_bh();
+	if (!idr_is_empty(&pn->l2tp_tunnel_idr) ||
+	    !idr_is_empty(&pn->l2tp_v2_session_idr) ||
+	    !idr_is_empty(&pn->l2tp_v3_session_idr)) {
+		rcu_read_unlock_bh();
+		goto again;
 	}
+	rcu_read_unlock_bh();
 }
 
 static __net_exit void l2tp_exit_net(struct net *net)
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index ffd8ced3a51f..a765123e213d 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -232,7 +232,7 @@  struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
  * Creation of a new instance is a two-step process: create, then register.
  * Destruction is triggered using the *_delete functions, and completes asynchronously.
  */
-int l2tp_tunnel_create(int fd, int version, u32 tunnel_id,
+int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
 		       u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
 		       struct l2tp_tunnel **tunnelp);
 int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 284f1dec1b56..cd410144b42e 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -234,7 +234,7 @@  static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 	switch (cfg.encap) {
 	case L2TP_ENCAPTYPE_UDP:
 	case L2TP_ENCAPTYPE_IP:
-		ret = l2tp_tunnel_create(fd, proto_version, tunnel_id,
+		ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id,
 					 peer_tunnel_id, &cfg, &tunnel);
 		break;
 	}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 53baf2dd5d5d..2c083ef2e4ee 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -660,7 +660,8 @@  static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net,
 			if (info->fd < 0)
 				return ERR_PTR(-EBADF);
 
-			error = l2tp_tunnel_create(info->fd,
+			error = l2tp_tunnel_create(net,
+						   info->fd,
 						   info->version,
 						   info->tunnel_id,
 						   info->peer_tunnel_id, &tcfg,