Message ID | 20240819145208.3209296-1-jchapman@katalix.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next] l2tp: avoid using drain_workqueue in l2tp_pre_exit_net | expand |
On 8/19/24 16:52, James Chapman wrote: > Recent commit c1b2e36b8776 ("l2tp: flush workqueue before draining > it") incorrectly uses drain_workqueue. isn't the relevant commit fc7ec7f554d7d0a27ba339fcf48df11d14413329? > The use of drain_workqueue in > l2tp_pre_exit_net is flawed because the workqueue is shared by all > nets and it is therefore possible for new work items to be queued > while drain_workqueue runs. > > Instead of using drain_workqueue, use a loop to delete all tunnels and > __flush_workqueue until all tunnel/session lists of the net are > empty. Add a per-net flag to ensure that no new tunnel can be created > in the net once l2tp_pre_exit_net starts. We need a fixes tag even for net-next fixes :) > Signed-off-by: James Chapman <jchapman@katalix.com> > Signed-off-by: Tom Parkin <tparkin@katalix.com> > --- > net/l2tp/l2tp_core.c | 38 +++++++++++++++++++++++++++++--------- > net/l2tp/l2tp_core.h | 2 +- > net/l2tp/l2tp_netlink.c | 2 +- > net/l2tp/l2tp_ppp.c | 3 ++- > 4 files changed, 33 insertions(+), 12 deletions(-) > > diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c > index af87c781d6a6..246b07342b86 100644 > --- a/net/l2tp/l2tp_core.c > +++ b/net/l2tp/l2tp_core.c > @@ -107,6 +107,7 @@ static struct workqueue_struct *l2tp_wq; > /* per-net private data for this module */ > static unsigned int l2tp_net_id; > struct l2tp_net { > + bool net_closing; > /* Lock for write access to l2tp_tunnel_idr */ > spinlock_t l2tp_tunnel_idr_lock; > struct idr l2tp_tunnel_idr; > @@ -1560,13 +1561,19 @@ static int l2tp_tunnel_sock_create(struct net *net, > return err; > } > > -int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, > +int l2tp_tunnel_create(struct net *net, int fd, int version, > + u32 tunnel_id, u32 peer_tunnel_id, > struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) > { > + struct l2tp_net *pn = l2tp_pernet(net); > struct l2tp_tunnel *tunnel = NULL; > int err; > enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; > > + /* This pairs with WRITE_ONCE() in l2tp_pre_exit_net(). */ > + if (READ_ONCE(pn->net_closing)) > + return -ENETDOWN; Is this necessary? the netns is going away, no user space process should be able to touch it. > + > if (cfg) > encap = cfg->encap; > > @@ -1855,16 +1870,21 @@ static __net_exit void l2tp_pre_exit_net(struct net *net) > } > rcu_read_unlock_bh(); > > - if (l2tp_wq) { > - /* ensure that all TUNNEL_DELETE work items are run before > - * draining the work queue since TUNNEL_DELETE requests may > - * queue SESSION_DELETE work items for each session in the > - * tunnel. drain_workqueue may otherwise warn if SESSION_DELETE > - * requests are queued while the work queue is being drained. > - */ > + if (l2tp_wq) > __flush_workqueue(l2tp_wq); > - drain_workqueue(l2tp_wq); > + > + /* repeat until all of the net's IDR lists are empty, in case tunnels > + * or sessions were being created just before l2tp_pre_exit_net was > + * called. > + */ > + rcu_read_lock_bh(); > + if (!idr_is_empty(&pn->l2tp_tunnel_idr) || > + !idr_is_empty(&pn->l2tp_v2_session_idr) || > + !idr_is_empty(&pn->l2tp_v3_session_idr)) { > + rcu_read_unlock_bh(); > + goto again; This looks not nice, it could keep the kernel spinning for a while. What about i.e. queue a 'dummy' work on l2tp_wq after __flush_workqueue() and explicitly wait for such work to complete? when such work completes are other l2tp related one in the same netns should also be completed. Cheers, Paolo
On 22/08/2024 11:22, Paolo Abeni wrote: > > > On 8/19/24 16:52, James Chapman wrote: >> Recent commit c1b2e36b8776 ("l2tp: flush workqueue before draining >> it") incorrectly uses drain_workqueue. > > isn't the relevant commit fc7ec7f554d7d0a27ba339fcf48df11d14413329? Good spot. Thanks. >> The use of drain_workqueue in >> l2tp_pre_exit_net is flawed because the workqueue is shared by all >> nets and it is therefore possible for new work items to be queued >> while drain_workqueue runs. >> >> Instead of using drain_workqueue, use a loop to delete all tunnels and >> __flush_workqueue until all tunnel/session lists of the net are >> empty. Add a per-net flag to ensure that no new tunnel can be created >> in the net once l2tp_pre_exit_net starts. > > We need a fixes tag even for net-next fixes :) Oh ok. My mistake. >> Signed-off-by: James Chapman <jchapman@katalix.com> >> Signed-off-by: Tom Parkin <tparkin@katalix.com> >> --- >> net/l2tp/l2tp_core.c | 38 +++++++++++++++++++++++++++++--------- >> net/l2tp/l2tp_core.h | 2 +- >> net/l2tp/l2tp_netlink.c | 2 +- >> net/l2tp/l2tp_ppp.c | 3 ++- >> 4 files changed, 33 insertions(+), 12 deletions(-) >> >> diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c >> index af87c781d6a6..246b07342b86 100644 >> --- a/net/l2tp/l2tp_core.c >> +++ b/net/l2tp/l2tp_core.c >> @@ -107,6 +107,7 @@ static struct workqueue_struct *l2tp_wq; >> /* per-net private data for this module */ >> static unsigned int l2tp_net_id; >> struct l2tp_net { >> + bool net_closing; >> /* Lock for write access to l2tp_tunnel_idr */ >> spinlock_t l2tp_tunnel_idr_lock; >> struct idr l2tp_tunnel_idr; >> @@ -1560,13 +1561,19 @@ static int l2tp_tunnel_sock_create(struct net >> *net, >> return err; >> } >> -int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 >> peer_tunnel_id, >> +int l2tp_tunnel_create(struct net *net, int fd, int version, >> + u32 tunnel_id, u32 peer_tunnel_id, >> struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel >> **tunnelp) >> { >> + struct l2tp_net *pn = l2tp_pernet(net); >> struct l2tp_tunnel *tunnel = NULL; >> int err; >> enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; >> + /* This pairs with WRITE_ONCE() in l2tp_pre_exit_net(). */ >> + if (READ_ONCE(pn->net_closing)) >> + return -ENETDOWN; > > Is this necessary? the netns is going away, no user space process should > be able to touch it. I considered this too. I was thinking that a bad process could cause l2tp_pre_exit_net to loop forever if it keeps creating tunnels. But if the net isn't usable by userspace when the pre_exit handler starts then I think we're ok to remove the flag. > >> + >> if (cfg) >> encap = cfg->encap; >> @@ -1855,16 +1870,21 @@ static __net_exit void >> l2tp_pre_exit_net(struct net *net) >> } >> rcu_read_unlock_bh(); >> - if (l2tp_wq) { >> - /* ensure that all TUNNEL_DELETE work items are run before >> - * draining the work queue since TUNNEL_DELETE requests may >> - * queue SESSION_DELETE work items for each session in the >> - * tunnel. drain_workqueue may otherwise warn if SESSION_DELETE >> - * requests are queued while the work queue is being drained. >> - */ >> + if (l2tp_wq) >> __flush_workqueue(l2tp_wq); >> - drain_workqueue(l2tp_wq); >> + >> + /* repeat until all of the net's IDR lists are empty, in case >> tunnels >> + * or sessions were being created just before l2tp_pre_exit_net was >> + * called. >> + */ >> + rcu_read_lock_bh(); >> + if (!idr_is_empty(&pn->l2tp_tunnel_idr) || >> + !idr_is_empty(&pn->l2tp_v2_session_idr) || >> + !idr_is_empty(&pn->l2tp_v3_session_idr)) { >> + rcu_read_unlock_bh(); >> + goto again; > > This looks not nice, it could keep the kernel spinning for a while. > > What about i.e. queue a 'dummy' work on l2tp_wq after > __flush_workqueue() and explicitly wait for such work to complete? > > when such work completes are other l2tp related one in the same netns > should also be completed. The loop is there in case one or more threads were in l2tp_tunnel_register or l2tp_session_register at the point where l2tp_pre_exit_net starts. If a tunnel or session is registered after l2tp_pre_exit_net loops over all tunnels calling l2tp_tunnel_delete, then it would be left behind. I'll think more on this. Thanks
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index af87c781d6a6..246b07342b86 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -107,6 +107,7 @@ static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ static unsigned int l2tp_net_id; struct l2tp_net { + bool net_closing; /* Lock for write access to l2tp_tunnel_idr */ spinlock_t l2tp_tunnel_idr_lock; struct idr l2tp_tunnel_idr; @@ -1560,13 +1561,19 @@ static int l2tp_tunnel_sock_create(struct net *net, return err; } -int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, +int l2tp_tunnel_create(struct net *net, int fd, int version, + u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) { + struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; int err; enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; + /* This pairs with WRITE_ONCE() in l2tp_pre_exit_net(). */ + if (READ_ONCE(pn->net_closing)) + return -ENETDOWN; + if (cfg) encap = cfg->encap; @@ -1832,6 +1839,8 @@ static __net_init int l2tp_init_net(struct net *net) { struct l2tp_net *pn = net_generic(net, l2tp_net_id); + pn->net_closing = false; + idr_init(&pn->l2tp_tunnel_idr); spin_lock_init(&pn->l2tp_tunnel_idr_lock); @@ -1848,6 +1857,12 @@ static __net_exit void l2tp_pre_exit_net(struct net *net) struct l2tp_tunnel *tunnel = NULL; unsigned long tunnel_id, tmp; + /* Prevent new tunnel create API requests in the net. + * Pairs with READ_ONCE in l2tp_tunnel_create. + */ + WRITE_ONCE(pn->net_closing, true); + +again: rcu_read_lock_bh(); idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { if (tunnel) @@ -1855,16 +1870,21 @@ static __net_exit void l2tp_pre_exit_net(struct net *net) } rcu_read_unlock_bh(); - if (l2tp_wq) { - /* ensure that all TUNNEL_DELETE work items are run before - * draining the work queue since TUNNEL_DELETE requests may - * queue SESSION_DELETE work items for each session in the - * tunnel. drain_workqueue may otherwise warn if SESSION_DELETE - * requests are queued while the work queue is being drained. - */ + if (l2tp_wq) __flush_workqueue(l2tp_wq); - drain_workqueue(l2tp_wq); + + /* repeat until all of the net's IDR lists are empty, in case tunnels + * or sessions were being created just before l2tp_pre_exit_net was + * called. + */ + rcu_read_lock_bh(); + if (!idr_is_empty(&pn->l2tp_tunnel_idr) || + !idr_is_empty(&pn->l2tp_v2_session_idr) || + !idr_is_empty(&pn->l2tp_v3_session_idr)) { + rcu_read_unlock_bh(); + goto again; } + rcu_read_unlock_bh(); } static __net_exit void l2tp_exit_net(struct net *net) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index ffd8ced3a51f..a765123e213d 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -232,7 +232,7 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, * Creation of a new instance is a two-step process: create, then register. * Destruction is triggered using the *_delete functions, and completes asynchronously. */ -int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, +int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 284f1dec1b56..cd410144b42e 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -234,7 +234,7 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info switch (cfg.encap) { case L2TP_ENCAPTYPE_UDP: case L2TP_ENCAPTYPE_IP: - ret = l2tp_tunnel_create(fd, proto_version, tunnel_id, + ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id, peer_tunnel_id, &cfg, &tunnel); break; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 53baf2dd5d5d..2c083ef2e4ee 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -660,7 +660,8 @@ static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net, if (info->fd < 0) return ERR_PTR(-EBADF); - error = l2tp_tunnel_create(info->fd, + error = l2tp_tunnel_create(net, + info->fd, info->version, info->tunnel_id, info->peer_tunnel_id, &tcfg,