diff mbox series

[net-next,v6,15/25] ovpn: implement multi-peer support

Message ID 20240827120805.13681-16-antonio@openvpn.net (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Introducing OpenVPN Data Channel Offload | expand

Checks

Context Check Description
netdev/series_format fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; GEN HAS DIFF 2 files changed, 2619 insertions(+);
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 16 this patch: 16
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: openvpn-devel@lists.sourceforge.net
netdev/build_clang success Errors and warnings before: 16 this patch: 16
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 16 this patch: 16
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 336 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-08-30--06-00 (tests: 714)

Commit Message

Antonio Quartulli Aug. 27, 2024, 12:07 p.m. UTC
With this change an ovpn instance will be able to stay connected to
multiple remote endpoints.

This functionality is strictly required when running ovpn on an
OpenVPN server.

Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
---
 drivers/net/ovpn/main.c       |  49 +++++++++-
 drivers/net/ovpn/ovpnstruct.h |  22 +++++
 drivers/net/ovpn/peer.c       | 167 +++++++++++++++++++++++++++++++++-
 drivers/net/ovpn/peer.h       |   9 ++
 4 files changed, 244 insertions(+), 3 deletions(-)

Comments

Sabrina Dubroca Sept. 3, 2024, 2:40 p.m. UTC | #1
2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote:
>  static int ovpn_net_init(struct net_device *dev)
>  {
>  	struct ovpn_struct *ovpn = netdev_priv(dev);
> +	int i, err = gro_cells_init(&ovpn->gro_cells, dev);

I'm not a fan of "hiding" the gro_cells_init call up here. I'd prefer
if this was done just before the corresponding "if (err)".

> +	struct in_device *dev_v4;
>  
> -	return gro_cells_init(&ovpn->gro_cells, dev);
> +	if (err)
> +		return err;
> +
> +	if (ovpn->mode == OVPN_MODE_MP) {
> +		dev_v4 = __in_dev_get_rtnl(dev);
> +		if (dev_v4) {
> +			/* disable redirects as Linux gets confused by ovpn
> +			 * handling same-LAN routing.
> +			 * This happens because a multipeer interface is used as
> +			 * relay point between hosts in the same subnet, while
> +			 * in a classic LAN this would not be needed because the
> +			 * two hosts would be able to talk directly.
> +			 */
> +			IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
> +			IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
> +		}
> +
> +		/* the peer container is fairly large, therefore we dynamically
> +		 * allocate it only when needed
> +		 */
> +		ovpn->peers = kzalloc(sizeof(*ovpn->peers), GFP_KERNEL);
> +		if (!ovpn->peers)

missing gro_cells_destroy

> +			return -ENOMEM;
> +
> +		spin_lock_init(&ovpn->peers->lock_by_id);
> +		spin_lock_init(&ovpn->peers->lock_by_vpn_addr);
> +		spin_lock_init(&ovpn->peers->lock_by_transp_addr);

What's the benefit of having 3 separate locks instead of a single lock
protecting all the hashtables?

> +
> +		for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) {
> +			INIT_HLIST_HEAD(&ovpn->peers->by_id[i]);
> +			INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]);
> +			INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i],
> +					      i);
> +		}
> +	}
> +
> +	return 0;
>  }

> +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
> +{
> +	struct sockaddr_storage sa = { 0 };
> +	struct hlist_nulls_head *nhead;
> +	struct sockaddr_in6 *sa6;
> +	struct sockaddr_in *sa4;
> +	struct hlist_head *head;
> +	struct ovpn_bind *bind;
> +	struct ovpn_peer *tmp;
> +	size_t salen;
> +
> +	spin_lock_bh(&ovpn->peers->lock_by_id);
> +	/* do not add duplicates */
> +	tmp = ovpn_peer_get_by_id(ovpn, peer->id);
> +	if (tmp) {
> +		ovpn_peer_put(tmp);
> +		spin_unlock_bh(&ovpn->peers->lock_by_id);
> +		return -EEXIST;
> +	}
> +
> +	hlist_add_head_rcu(&peer->hash_entry_id,
> +			   ovpn_get_hash_head(ovpn->peers->by_id, &peer->id,
> +					      sizeof(peer->id)));
> +	spin_unlock_bh(&ovpn->peers->lock_by_id);
> +
> +	bind = rcu_dereference_protected(peer->bind, true);
> +	/* peers connected via TCP have bind == NULL */
> +	if (bind) {
> +		switch (bind->remote.in4.sin_family) {
> +		case AF_INET:
> +			sa4 = (struct sockaddr_in *)&sa;
> +
> +			sa4->sin_family = AF_INET;
> +			sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr;
> +			sa4->sin_port = bind->remote.in4.sin_port;
> +			salen = sizeof(*sa4);
> +			break;
> +		case AF_INET6:
> +			sa6 = (struct sockaddr_in6 *)&sa;
> +
> +			sa6->sin6_family = AF_INET6;
> +			sa6->sin6_addr = bind->remote.in6.sin6_addr;
> +			sa6->sin6_port = bind->remote.in6.sin6_port;
> +			salen = sizeof(*sa6);
> +			break;
> +		default:

And remove from the by_id hashtable? Or is that handled somewhere that
I missed (I don't think ovpn_peer_unhash gets called in that case)?

> +			return -EPROTONOSUPPORT;
> +		}
> +
Sabrina Dubroca Sept. 4, 2024, 10:10 a.m. UTC | #2
2024-09-03, 16:40:51 +0200, Sabrina Dubroca wrote:
> 2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote:
> > +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
> > +{
> > +	struct sockaddr_storage sa = { 0 };
> > +	struct hlist_nulls_head *nhead;
> > +	struct sockaddr_in6 *sa6;
> > +	struct sockaddr_in *sa4;
> > +	struct hlist_head *head;
> > +	struct ovpn_bind *bind;
> > +	struct ovpn_peer *tmp;
> > +	size_t salen;
> > +
> > +	spin_lock_bh(&ovpn->peers->lock_by_id);
> > +	/* do not add duplicates */
> > +	tmp = ovpn_peer_get_by_id(ovpn, peer->id);
> > +	if (tmp) {
> > +		ovpn_peer_put(tmp);
> > +		spin_unlock_bh(&ovpn->peers->lock_by_id);
> > +		return -EEXIST;
> > +	}
> > +
> > +	hlist_add_head_rcu(&peer->hash_entry_id,
> > +			   ovpn_get_hash_head(ovpn->peers->by_id, &peer->id,
> > +					      sizeof(peer->id)));
> > +	spin_unlock_bh(&ovpn->peers->lock_by_id);
> > +
> > +	bind = rcu_dereference_protected(peer->bind, true);

What protects us here? We just released lock_by_id and we're not
holding peer->lock.

> > +	/* peers connected via TCP have bind == NULL */
> > +	if (bind) {
> > +		switch (bind->remote.in4.sin_family) {
> > +		case AF_INET:
> > +			sa4 = (struct sockaddr_in *)&sa;
> > +
> > +			sa4->sin_family = AF_INET;
> > +			sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr;
> > +			sa4->sin_port = bind->remote.in4.sin_port;
> > +			salen = sizeof(*sa4);
> > +			break;
> > +		case AF_INET6:
> > +			sa6 = (struct sockaddr_in6 *)&sa;
> > +
> > +			sa6->sin6_family = AF_INET6;
> > +			sa6->sin6_addr = bind->remote.in6.sin6_addr;
> > +			sa6->sin6_port = bind->remote.in6.sin6_port;
> > +			salen = sizeof(*sa6);
> > +			break;
> > +		default:
> 
> And remove from the by_id hashtable? Or is that handled somewhere that
> I missed (I don't think ovpn_peer_unhash gets called in that case)?

ovpn_nl_set_peer_doit does:

		ret = ovpn_peer_add(ovpn, peer);
		if (ret < 0) {
[...]
		/* release right away because peer is not really used in any
		 * context
		 */
		ovpn_peer_release(peer);
		kfree(peer);


But if we fail at this stage, the peer was published in the by_id
hashtable and could be used.

Although AFAICT, ovpn can never create a bind with family !=
AF_INET{,6}, so this is not a real issue -- in that case I guess a
DEBUG_NET_WARN_ON_ONCE with a comment that this should never happen
would be acceptable (but I'd still remove the peer from by_id and go
through the proper release path instead of direct kfree in
ovpn_nl_set_peer_doit). Otherwise, you'd have to reorder things in
this function so that all failures are handled before the peer is
added to any hashtable.

> > +			return -EPROTONOSUPPORT;
> > +		}
> > +
Antonio Quartulli Sept. 5, 2024, 8:02 a.m. UTC | #3
On 03/09/2024 16:40, Sabrina Dubroca wrote:
> 2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote:
>>   static int ovpn_net_init(struct net_device *dev)
>>   {
>>   	struct ovpn_struct *ovpn = netdev_priv(dev);
>> +	int i, err = gro_cells_init(&ovpn->gro_cells, dev);
> 
> I'm not a fan of "hiding" the gro_cells_init call up here. I'd prefer
> if this was done just before the corresponding "if (err)".

I am all with you, but I remember in the past something complaining 
about "variable declared and then re-assigned right after".

But maybe this is not the case anymore.

Will move the initialization down.

> 
>> +	struct in_device *dev_v4;
>>   
>> -	return gro_cells_init(&ovpn->gro_cells, dev);
>> +	if (err)
>> +		return err;
>> +
>> +	if (ovpn->mode == OVPN_MODE_MP) {
>> +		dev_v4 = __in_dev_get_rtnl(dev);
>> +		if (dev_v4) {
>> +			/* disable redirects as Linux gets confused by ovpn
>> +			 * handling same-LAN routing.
>> +			 * This happens because a multipeer interface is used as
>> +			 * relay point between hosts in the same subnet, while
>> +			 * in a classic LAN this would not be needed because the
>> +			 * two hosts would be able to talk directly.
>> +			 */
>> +			IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
>> +			IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
>> +		}
>> +
>> +		/* the peer container is fairly large, therefore we dynamically
>> +		 * allocate it only when needed
>> +		 */
>> +		ovpn->peers = kzalloc(sizeof(*ovpn->peers), GFP_KERNEL);
>> +		if (!ovpn->peers)
> 
> missing gro_cells_destroy

ACK

> 
>> +			return -ENOMEM;
>> +
>> +		spin_lock_init(&ovpn->peers->lock_by_id);
>> +		spin_lock_init(&ovpn->peers->lock_by_vpn_addr);
>> +		spin_lock_init(&ovpn->peers->lock_by_transp_addr);
> 
> What's the benefit of having 3 separate locks instead of a single lock
> protecting all the hashtables?

The main reason was to avoid a deadlock - I thought I had added a 
comment about it...

The problem was a deadlock between acquiring peer->lock and 
ovpn->peers->lock in float() and in then opposite sequence in peers_free().
(IIRC this happens due to ovpn_peer_reset_sockaddr() acquiring peer->lock)

Splitting the larger peers->lock allowed me to avoid this scenario, 
because I don't need to jump through any hoop to coordinate access to 
different hashtables.

> 
>> +
>> +		for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) {
>> +			INIT_HLIST_HEAD(&ovpn->peers->by_id[i]);
>> +			INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]);
>> +			INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i],
>> +					      i);
>> +		}
>> +	}
>> +
>> +	return 0;
>>   }
> 
>> +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
>> +{
>> +	struct sockaddr_storage sa = { 0 };
>> +	struct hlist_nulls_head *nhead;
>> +	struct sockaddr_in6 *sa6;
>> +	struct sockaddr_in *sa4;
>> +	struct hlist_head *head;
>> +	struct ovpn_bind *bind;
>> +	struct ovpn_peer *tmp;
>> +	size_t salen;
>> +
>> +	spin_lock_bh(&ovpn->peers->lock_by_id);
>> +	/* do not add duplicates */
>> +	tmp = ovpn_peer_get_by_id(ovpn, peer->id);
>> +	if (tmp) {
>> +		ovpn_peer_put(tmp);
>> +		spin_unlock_bh(&ovpn->peers->lock_by_id);
>> +		return -EEXIST;
>> +	}
>> +
>> +	hlist_add_head_rcu(&peer->hash_entry_id,
>> +			   ovpn_get_hash_head(ovpn->peers->by_id, &peer->id,
>> +					      sizeof(peer->id)));
>> +	spin_unlock_bh(&ovpn->peers->lock_by_id);
>> +
>> +	bind = rcu_dereference_protected(peer->bind, true);
>> +	/* peers connected via TCP have bind == NULL */
>> +	if (bind) {
>> +		switch (bind->remote.in4.sin_family) {
>> +		case AF_INET:
>> +			sa4 = (struct sockaddr_in *)&sa;
>> +
>> +			sa4->sin_family = AF_INET;
>> +			sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr;
>> +			sa4->sin_port = bind->remote.in4.sin_port;
>> +			salen = sizeof(*sa4);
>> +			break;
>> +		case AF_INET6:
>> +			sa6 = (struct sockaddr_in6 *)&sa;
>> +
>> +			sa6->sin6_family = AF_INET6;
>> +			sa6->sin6_addr = bind->remote.in6.sin6_addr;
>> +			sa6->sin6_port = bind->remote.in6.sin6_port;
>> +			salen = sizeof(*sa6);
>> +			break;
>> +		default:
> 
> And remove from the by_id hashtable? Or is that handled somewhere that
> I missed (I don't think ovpn_peer_unhash gets called in that case)?

No we don't call unhash in this case as we assume the adding just failed 
entirely.

I will add the removal before returning the error (moving the add below 
the switch would extend the locked area too much.)

> 
>> +			return -EPROTONOSUPPORT;
>> +		}
>> +
> 

Thanks a lot
Sabrina Dubroca Sept. 5, 2024, 10:47 a.m. UTC | #4
2024-09-05, 10:02:58 +0200, Antonio Quartulli wrote:
> On 03/09/2024 16:40, Sabrina Dubroca wrote:
> > 2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote:
> > >   static int ovpn_net_init(struct net_device *dev)
> > >   {
> > >   	struct ovpn_struct *ovpn = netdev_priv(dev);
> > > +	int i, err = gro_cells_init(&ovpn->gro_cells, dev);
> > 
> > I'm not a fan of "hiding" the gro_cells_init call up here. I'd prefer
> > if this was done just before the corresponding "if (err)".
> 
> I am all with you, but I remember in the past something complaining about
> "variable declared and then re-assigned right after".
> 
> But maybe this is not the case anymore.

If you had something like:

	int err;

	err = -EINVAL;

sure, it would make sense to combine them.

> 
> Will move the initialization down.

Thanks.


> > > +
> > > +		spin_lock_init(&ovpn->peers->lock_by_id);
> > > +		spin_lock_init(&ovpn->peers->lock_by_vpn_addr);
> > > +		spin_lock_init(&ovpn->peers->lock_by_transp_addr);
> > 
> > What's the benefit of having 3 separate locks instead of a single lock
> > protecting all the hashtables?
> 
> The main reason was to avoid a deadlock - I thought I had added a comment
> about it...

Ok.

I could have missed it, I'm not looking at the comments much now that
I'm familiar with the code.

> The problem was a deadlock between acquiring peer->lock and
> ovpn->peers->lock in float() and in then opposite sequence in peers_free().
> (IIRC this happens due to ovpn_peer_reset_sockaddr() acquiring peer->lock)

I don't see a problem with ovpn_peer_reset_sockaddr, but ovpn_peer_put
can be called with lock_by_id held and then take peer->lock (in
ovpn_peer_release), which would be the opposite order to
ovpn_peer_float if the locks were merged (peer->lock then
lock_by_transp_addr).

This should be solvable with a single lock by delaying the bind
cleanup via call_rcu instead of doing it immediately with
ovpn_peer_release (after that delay, nothing should be using
peer->bind anymore, since we have no reference and no more
rcu_read_lock sections that could have found peer, so we can free
immediately and no need to take peer->lock). And it's I think a bit
more "correct" wrt RCU rules, since at ovpn_peer_put time, even with
refcount=0, we could have a reader still using the peer and deciding
to update its bind (not the case with how ovpn_peer_float is called,
since we have a reference on the peer).

(This could be completely wrong and/or make no sense at all :))

But I'm not going to insist on this, you can keep the separate locks.


> Splitting the larger peers->lock allowed me to avoid this scenario, because
> I don't need to jump through any hoop to coordinate access to different
> hashtables.
> 
> > 
> > > +
> > > +		for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) {
> > > +			INIT_HLIST_HEAD(&ovpn->peers->by_id[i]);
> > > +			INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]);
> > > +			INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i],
> > > +					      i);
> > > +		}
> > > +	}
> > > +
> > > +	return 0;
> > >   }
> > 
> > > +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
> > > +{
> > > +	struct sockaddr_storage sa = { 0 };
> > > +	struct hlist_nulls_head *nhead;
> > > +	struct sockaddr_in6 *sa6;
> > > +	struct sockaddr_in *sa4;
> > > +	struct hlist_head *head;
> > > +	struct ovpn_bind *bind;
> > > +	struct ovpn_peer *tmp;
> > > +	size_t salen;
> > > +
> > > +	spin_lock_bh(&ovpn->peers->lock_by_id);
> > > +	/* do not add duplicates */
> > > +	tmp = ovpn_peer_get_by_id(ovpn, peer->id);
> > > +	if (tmp) {
> > > +		ovpn_peer_put(tmp);
> > > +		spin_unlock_bh(&ovpn->peers->lock_by_id);
> > > +		return -EEXIST;
> > > +	}
> > > +
> > > +	hlist_add_head_rcu(&peer->hash_entry_id,
> > > +			   ovpn_get_hash_head(ovpn->peers->by_id, &peer->id,
> > > +					      sizeof(peer->id)));
> > > +	spin_unlock_bh(&ovpn->peers->lock_by_id);
> > > +
> > > +	bind = rcu_dereference_protected(peer->bind, true);
> > > +	/* peers connected via TCP have bind == NULL */
> > > +	if (bind) {
> > > +		switch (bind->remote.in4.sin_family) {
> > > +		case AF_INET:
> > > +			sa4 = (struct sockaddr_in *)&sa;
> > > +
> > > +			sa4->sin_family = AF_INET;
> > > +			sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr;
> > > +			sa4->sin_port = bind->remote.in4.sin_port;
> > > +			salen = sizeof(*sa4);
> > > +			break;
> > > +		case AF_INET6:
> > > +			sa6 = (struct sockaddr_in6 *)&sa;
> > > +
> > > +			sa6->sin6_family = AF_INET6;
> > > +			sa6->sin6_addr = bind->remote.in6.sin6_addr;
> > > +			sa6->sin6_port = bind->remote.in6.sin6_port;
> > > +			salen = sizeof(*sa6);
> > > +			break;
> > > +		default:
> > 
> > And remove from the by_id hashtable? Or is that handled somewhere that
> > I missed (I don't think ovpn_peer_unhash gets called in that case)?
> 
> No we don't call unhash in this case as we assume the adding just failed
> entirely.
> 
> I will add the removal before returning the error (moving the add below the
> switch would extend the locked area too much.)

I don't think setting a few variables would be too much to do under
the lock (and it would address the issues in my 2nd reply to this
patch).
Antonio Quartulli Sept. 6, 2024, 1:26 p.m. UTC | #5
On 04/09/2024 12:10, Sabrina Dubroca wrote:
> 2024-09-03, 16:40:51 +0200, Sabrina Dubroca wrote:
>> 2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote:
>>> +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
>>> +{
>>> +	struct sockaddr_storage sa = { 0 };
>>> +	struct hlist_nulls_head *nhead;
>>> +	struct sockaddr_in6 *sa6;
>>> +	struct sockaddr_in *sa4;
>>> +	struct hlist_head *head;
>>> +	struct ovpn_bind *bind;
>>> +	struct ovpn_peer *tmp;
>>> +	size_t salen;
>>> +
>>> +	spin_lock_bh(&ovpn->peers->lock_by_id);
>>> +	/* do not add duplicates */
>>> +	tmp = ovpn_peer_get_by_id(ovpn, peer->id);
>>> +	if (tmp) {
>>> +		ovpn_peer_put(tmp);
>>> +		spin_unlock_bh(&ovpn->peers->lock_by_id);
>>> +		return -EEXIST;
>>> +	}
>>> +
>>> +	hlist_add_head_rcu(&peer->hash_entry_id,
>>> +			   ovpn_get_hash_head(ovpn->peers->by_id, &peer->id,
>>> +					      sizeof(peer->id)));
>>> +	spin_unlock_bh(&ovpn->peers->lock_by_id);
>>> +
>>> +	bind = rcu_dereference_protected(peer->bind, true);
> 
> What protects us here? We just released lock_by_id and we're not
> holding peer->lock.

hmm. I think originally it was not possible to hold this peer in any 
other context since the peer was stil being added.
But now we have added it to the by_id table already, so we cannot assume 
that anymore.

Maybe I should simply move this assignment before the 
hlist_add_head_rcu() to regain that assumption..

> 
>>> +	/* peers connected via TCP have bind == NULL */
>>> +	if (bind) {
>>> +		switch (bind->remote.in4.sin_family) {
>>> +		case AF_INET:
>>> +			sa4 = (struct sockaddr_in *)&sa;
>>> +
>>> +			sa4->sin_family = AF_INET;
>>> +			sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr;
>>> +			sa4->sin_port = bind->remote.in4.sin_port;
>>> +			salen = sizeof(*sa4);
>>> +			break;
>>> +		case AF_INET6:
>>> +			sa6 = (struct sockaddr_in6 *)&sa;
>>> +
>>> +			sa6->sin6_family = AF_INET6;
>>> +			sa6->sin6_addr = bind->remote.in6.sin6_addr;
>>> +			sa6->sin6_port = bind->remote.in6.sin6_port;
>>> +			salen = sizeof(*sa6);
>>> +			break;
>>> +		default:
>>
>> And remove from the by_id hashtable? Or is that handled somewhere that
>> I missed (I don't think ovpn_peer_unhash gets called in that case)?
> 
> ovpn_nl_set_peer_doit does:
> 
> 		ret = ovpn_peer_add(ovpn, peer);
> 		if (ret < 0) {
> [...]
> 		/* release right away because peer is not really used in any
> 		 * context
> 		 */
> 		ovpn_peer_release(peer);
> 		kfree(peer);
> 
> 
> But if we fail at this stage, the peer was published in the by_id
> hashtable and could be used.
> 
> Although AFAICT, ovpn can never create a bind with family !=
> AF_INET{,6}, so this is not a real issue -- in that case I guess a
> DEBUG_NET_WARN_ON_ONCE with a comment that this should never happen
> would be acceptable (but I'd still remove the peer from by_id and go
> through the proper release path instead of direct kfree in
> ovpn_nl_set_peer_doit). Otherwise, you'd have to reorder things in
> this function so that all failures are handled before the peer is
> added to any hashtable.

To be honest I don't mind adding a pre-check and error out immediately.
I don't like adding a peer to the table that is actually failing basic 
sanity checks.

Thanks!

> 
>>> +			return -EPROTONOSUPPORT;
>>> +		}
>>> +
>
Antonio Quartulli Sept. 9, 2024, 9:12 a.m. UTC | #6
On 05/09/2024 12:47, Sabrina Dubroca wrote:
>>>> +
>>>> +		spin_lock_init(&ovpn->peers->lock_by_id);
>>>> +		spin_lock_init(&ovpn->peers->lock_by_vpn_addr);
>>>> +		spin_lock_init(&ovpn->peers->lock_by_transp_addr);
>>>
>>> What's the benefit of having 3 separate locks instead of a single lock
>>> protecting all the hashtables?
>>
>> The main reason was to avoid a deadlock - I thought I had added a comment
>> about it...
> 
> Ok.
> 
> I could have missed it, I'm not looking at the comments much now that
> I'm familiar with the code.
> 
>> The problem was a deadlock between acquiring peer->lock and
>> ovpn->peers->lock in float() and in then opposite sequence in peers_free().
>> (IIRC this happens due to ovpn_peer_reset_sockaddr() acquiring peer->lock)
> 
> I don't see a problem with ovpn_peer_reset_sockaddr, but ovpn_peer_put
> can be called with lock_by_id held and then take peer->lock (in
> ovpn_peer_release), which would be the opposite order to
> ovpn_peer_float if the locks were merged (peer->lock then
> lock_by_transp_addr).
> 
> This should be solvable with a single lock by delaying the bind
> cleanup via call_rcu instead of doing it immediately with
> ovpn_peer_release (after that delay, nothing should be using
> peer->bind anymore, since we have no reference and no more
> rcu_read_lock sections that could have found peer, so we can free
> immediately and no need to take peer->lock). And it's I think a bit
> more "correct" wrt RCU rules, since at ovpn_peer_put time, even with
> refcount=0, we could have a reader still using the peer and deciding
> to update its bind (not the case with how ovpn_peer_float is called,
> since we have a reference on the peer).

Yap, I totally agree with your analysis.

In a previous version we simplified the code to the point that call_rcu 
was not needed anymore and we could just rely on kfree_rcu for peer.

Now this "going back to call_rcu" felt a bit wrong, so I tried hard to 
avoid that.
But I agree that actually this is a clear case where a two-steps release 
is the right thing to do.

Will try to get it fixed as per your suggestion. Thanks!

> 
> (This could be completely wrong and/or make no sense at all :))
> 
> But I'm not going to insist on this, you can keep the separate locks.
> 
> 
>> Splitting the larger peers->lock allowed me to avoid this scenario, because
>> I don't need to jump through any hoop to coordinate access to different
>> hashtables.
>>
>>>
>>>> +
>>>> +		for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) {
>>>> +			INIT_HLIST_HEAD(&ovpn->peers->by_id[i]);
>>>> +			INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]);
>>>> +			INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i],
>>>> +					      i);
>>>> +		}
>>>> +	}
>>>> +
>>>> +	return 0;
>>>>    }
>>>
>>>> +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
>>>> +{
>>>> +	struct sockaddr_storage sa = { 0 };
>>>> +	struct hlist_nulls_head *nhead;
>>>> +	struct sockaddr_in6 *sa6;
>>>> +	struct sockaddr_in *sa4;
>>>> +	struct hlist_head *head;
>>>> +	struct ovpn_bind *bind;
>>>> +	struct ovpn_peer *tmp;
>>>> +	size_t salen;
>>>> +
>>>> +	spin_lock_bh(&ovpn->peers->lock_by_id);
>>>> +	/* do not add duplicates */
>>>> +	tmp = ovpn_peer_get_by_id(ovpn, peer->id);
>>>> +	if (tmp) {
>>>> +		ovpn_peer_put(tmp);
>>>> +		spin_unlock_bh(&ovpn->peers->lock_by_id);
>>>> +		return -EEXIST;
>>>> +	}
>>>> +
>>>> +	hlist_add_head_rcu(&peer->hash_entry_id,
>>>> +			   ovpn_get_hash_head(ovpn->peers->by_id, &peer->id,
>>>> +					      sizeof(peer->id)));
>>>> +	spin_unlock_bh(&ovpn->peers->lock_by_id);
>>>> +
>>>> +	bind = rcu_dereference_protected(peer->bind, true);
>>>> +	/* peers connected via TCP have bind == NULL */
>>>> +	if (bind) {
>>>> +		switch (bind->remote.in4.sin_family) {
>>>> +		case AF_INET:
>>>> +			sa4 = (struct sockaddr_in *)&sa;
>>>> +
>>>> +			sa4->sin_family = AF_INET;
>>>> +			sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr;
>>>> +			sa4->sin_port = bind->remote.in4.sin_port;
>>>> +			salen = sizeof(*sa4);
>>>> +			break;
>>>> +		case AF_INET6:
>>>> +			sa6 = (struct sockaddr_in6 *)&sa;
>>>> +
>>>> +			sa6->sin6_family = AF_INET6;
>>>> +			sa6->sin6_addr = bind->remote.in6.sin6_addr;
>>>> +			sa6->sin6_port = bind->remote.in6.sin6_port;
>>>> +			salen = sizeof(*sa6);
>>>> +			break;
>>>> +		default:
>>>
>>> And remove from the by_id hashtable? Or is that handled somewhere that
>>> I missed (I don't think ovpn_peer_unhash gets called in that case)?
>>
>> No we don't call unhash in this case as we assume the adding just failed
>> entirely.
>>
>> I will add the removal before returning the error (moving the add below the
>> switch would extend the locked area too much.)
> 
> I don't think setting a few variables would be too much to do under
> the lock (and it would address the issues in my 2nd reply to this
> patch).

Right - pretty much what I replied to that comment (will move the add 
after the bind dereference)

Cheers,
diff mbox series

Patch

diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c
index 18fefda74fd1..94d2ee87807f 100644
--- a/drivers/net/ovpn/main.c
+++ b/drivers/net/ovpn/main.c
@@ -54,14 +54,53 @@  static void ovpn_struct_free(struct net_device *net)
 	struct ovpn_struct *ovpn = netdev_priv(net);
 
 	gro_cells_destroy(&ovpn->gro_cells);
+	kfree(ovpn->peers);
 	rcu_barrier();
 }
 
 static int ovpn_net_init(struct net_device *dev)
 {
 	struct ovpn_struct *ovpn = netdev_priv(dev);
+	int i, err = gro_cells_init(&ovpn->gro_cells, dev);
+	struct in_device *dev_v4;
 
-	return gro_cells_init(&ovpn->gro_cells, dev);
+	if (err)
+		return err;
+
+	if (ovpn->mode == OVPN_MODE_MP) {
+		dev_v4 = __in_dev_get_rtnl(dev);
+		if (dev_v4) {
+			/* disable redirects as Linux gets confused by ovpn
+			 * handling same-LAN routing.
+			 * This happens because a multipeer interface is used as
+			 * relay point between hosts in the same subnet, while
+			 * in a classic LAN this would not be needed because the
+			 * two hosts would be able to talk directly.
+			 */
+			IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
+			IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
+		}
+
+		/* the peer container is fairly large, therefore we dynamically
+		 * allocate it only when needed
+		 */
+		ovpn->peers = kzalloc(sizeof(*ovpn->peers), GFP_KERNEL);
+		if (!ovpn->peers)
+			return -ENOMEM;
+
+		spin_lock_init(&ovpn->peers->lock_by_id);
+		spin_lock_init(&ovpn->peers->lock_by_vpn_addr);
+		spin_lock_init(&ovpn->peers->lock_by_transp_addr);
+
+		for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) {
+			INIT_HLIST_HEAD(&ovpn->peers->by_id[i]);
+			INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]);
+			INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i],
+					      i);
+		}
+	}
+
+	return 0;
 }
 
 static int ovpn_net_open(struct net_device *dev)
@@ -210,8 +249,14 @@  void ovpn_iface_destruct(struct ovpn_struct *ovpn)
 
 	ovpn->registered = false;
 
-	if (ovpn->mode == OVPN_MODE_P2P)
+	switch (ovpn->mode) {
+	case OVPN_MODE_P2P:
 		ovpn_peer_release_p2p(ovpn);
+		break;
+	default:
+		ovpn_peers_free(ovpn);
+		break;
+	}
 }
 
 static int ovpn_netdev_notifier_call(struct notifier_block *nb,
diff --git a/drivers/net/ovpn/ovpnstruct.h b/drivers/net/ovpn/ovpnstruct.h
index 65497ce115aa..3d74e79fcdf3 100644
--- a/drivers/net/ovpn/ovpnstruct.h
+++ b/drivers/net/ovpn/ovpnstruct.h
@@ -14,6 +14,26 @@ 
 #include <net/net_trackers.h>
 #include <uapi/linux/ovpn.h>
 
+/**
+ * struct ovpn_peer_collection - container of peers for MultiPeer mode
+ * @by_id: table of peers index by ID
+ * @by_vpn_addr: table of peers indexed by VPN IP address
+ * @by_transp_addr: table of peers indexed by transport address (items can be
+ *		    rehashed on the fly due to peer IP change)
+ * @lock_by_id: protects writes to peers by_id table
+ * @lock_by_vpn_addr: protects writes to peers by_vpn_addr table
+ * @lock_by_transp_addr: protects writes to peers by_transp_addr table
+ */
+struct ovpn_peer_collection {
+	DECLARE_HASHTABLE(by_id, 12);
+	DECLARE_HASHTABLE(by_vpn_addr, 12);
+	struct hlist_nulls_head by_transp_addr[1 << 12];
+
+	spinlock_t lock_by_id; /* protects writes to by_id */
+	spinlock_t lock_by_vpn_addr; /* protects by_vpn_addr */
+	spinlock_t lock_by_transp_addr; /* protects by_transp_addr */
+};
+
 /**
  * struct ovpn_struct - per ovpn interface state
  * @dev: the actual netdev representing the tunnel
@@ -21,6 +41,7 @@ 
  * @registered: whether dev is still registered with netdev or not
  * @mode: device operation mode (i.e. p2p, mp, ..)
  * @lock: protect this object
+ * @peers: data structures holding multi-peer references
  * @peer: in P2P mode, this is the only remote peer
  * @dev_list: entry for the module wide device list
  * @gro_cells: pointer to the Generic Receive Offload cell
@@ -31,6 +52,7 @@  struct ovpn_struct {
 	bool registered;
 	enum ovpn_mode mode;
 	spinlock_t lock; /* protect writing to the ovpn_struct object */
+	struct ovpn_peer_collection *peers;
 	struct ovpn_peer __rcu *peer;
 	struct list_head dev_list;
 	struct gro_cells gro_cells;
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
index 6d34c56a4a51..58037c2cbcca 100644
--- a/drivers/net/ovpn/peer.c
+++ b/drivers/net/ovpn/peer.c
@@ -9,6 +9,7 @@ 
 
 #include <linux/skbuff.h>
 #include <linux/list.h>
+#include <linux/hashtable.h>
 
 #include "ovpnstruct.h"
 #include "bind.h"
@@ -309,6 +310,92 @@  bool ovpn_peer_check_by_src(struct ovpn_struct *ovpn, struct sk_buff *skb,
 	return match;
 }
 
+#define ovpn_get_hash_head(_tbl, _key, _key_len) ({		\
+	typeof(_tbl) *__tbl = &(_tbl);				\
+	(&(*__tbl)[jhash(_key, _key_len, 0) % HASH_SIZE(*__tbl)]); }) \
+
+/**
+ * ovpn_peer_add_mp - add peer to related tables in a MP instance
+ * @ovpn: the instance to add the peer to
+ * @peer: the peer to add
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
+{
+	struct sockaddr_storage sa = { 0 };
+	struct hlist_nulls_head *nhead;
+	struct sockaddr_in6 *sa6;
+	struct sockaddr_in *sa4;
+	struct hlist_head *head;
+	struct ovpn_bind *bind;
+	struct ovpn_peer *tmp;
+	size_t salen;
+
+	spin_lock_bh(&ovpn->peers->lock_by_id);
+	/* do not add duplicates */
+	tmp = ovpn_peer_get_by_id(ovpn, peer->id);
+	if (tmp) {
+		ovpn_peer_put(tmp);
+		spin_unlock_bh(&ovpn->peers->lock_by_id);
+		return -EEXIST;
+	}
+
+	hlist_add_head_rcu(&peer->hash_entry_id,
+			   ovpn_get_hash_head(ovpn->peers->by_id, &peer->id,
+					      sizeof(peer->id)));
+	spin_unlock_bh(&ovpn->peers->lock_by_id);
+
+	bind = rcu_dereference_protected(peer->bind, true);
+	/* peers connected via TCP have bind == NULL */
+	if (bind) {
+		switch (bind->remote.in4.sin_family) {
+		case AF_INET:
+			sa4 = (struct sockaddr_in *)&sa;
+
+			sa4->sin_family = AF_INET;
+			sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr;
+			sa4->sin_port = bind->remote.in4.sin_port;
+			salen = sizeof(*sa4);
+			break;
+		case AF_INET6:
+			sa6 = (struct sockaddr_in6 *)&sa;
+
+			sa6->sin6_family = AF_INET6;
+			sa6->sin6_addr = bind->remote.in6.sin6_addr;
+			sa6->sin6_port = bind->remote.in6.sin6_port;
+			salen = sizeof(*sa6);
+			break;
+		default:
+			return -EPROTONOSUPPORT;
+		}
+
+		nhead = ovpn_get_hash_head(ovpn->peers->by_transp_addr, &sa,
+					   salen);
+		spin_lock_bh(&ovpn->peers->lock_by_transp_addr);
+		hlist_nulls_add_head_rcu(&peer->hash_entry_transp_addr, nhead);
+		spin_unlock_bh(&ovpn->peers->lock_by_transp_addr);
+	}
+
+	spin_lock_bh(&ovpn->peers->lock_by_vpn_addr);
+	if (peer->vpn_addrs.ipv4.s_addr != htonl(INADDR_ANY)) {
+		head = ovpn_get_hash_head(ovpn->peers->by_vpn_addr,
+					  &peer->vpn_addrs.ipv4,
+					  sizeof(peer->vpn_addrs.ipv4));
+		hlist_add_head_rcu(&peer->hash_entry_addr4, head);
+	}
+
+	if (!ipv6_addr_any(&peer->vpn_addrs.ipv6)) {
+		head = ovpn_get_hash_head(ovpn->peers->by_vpn_addr,
+					  &peer->vpn_addrs.ipv6,
+					  sizeof(peer->vpn_addrs.ipv6));
+		hlist_add_head_rcu(&peer->hash_entry_addr6, head);
+	}
+	spin_unlock_bh(&ovpn->peers->lock_by_vpn_addr);
+
+	return 0;
+}
+
 /**
  * ovpn_peer_add_p2p - add peer to related tables in a P2P instance
  * @ovpn: the instance to add the peer to
@@ -349,6 +436,8 @@  static int ovpn_peer_add_p2p(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
 int ovpn_peer_add(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
 {
 	switch (ovpn->mode) {
+	case OVPN_MODE_MP:
+		return ovpn_peer_add_mp(ovpn, peer);
 	case OVPN_MODE_P2P:
 		return ovpn_peer_add_p2p(ovpn, peer);
 	default:
@@ -356,6 +445,56 @@  int ovpn_peer_add(struct ovpn_struct *ovpn, struct ovpn_peer *peer)
 	}
 }
 
+/**
+ * ovpn_peer_unhash - remove peer reference from all hashtables
+ * @peer: the peer to remove
+ * @reason: the delete reason to attach to the peer
+ */
+static void ovpn_peer_unhash(struct ovpn_peer *peer,
+			     enum ovpn_del_peer_reason reason)
+	__must_hold(&ovpn->peers->lock_by_id)
+{
+	hlist_del_init_rcu(&peer->hash_entry_id);
+
+	spin_lock_bh(&peer->ovpn->peers->lock_by_vpn_addr);
+	hlist_del_init_rcu(&peer->hash_entry_addr4);
+	hlist_del_init_rcu(&peer->hash_entry_addr6);
+	spin_unlock_bh(&peer->ovpn->peers->lock_by_vpn_addr);
+
+	spin_lock_bh(&peer->ovpn->peers->lock_by_transp_addr);
+	hlist_nulls_del_init_rcu(&peer->hash_entry_transp_addr);
+	spin_unlock_bh(&peer->ovpn->peers->lock_by_transp_addr);
+
+	ovpn_peer_put(peer);
+	peer->delete_reason = reason;
+}
+
+/**
+ * ovpn_peer_del_mp - delete peer from related tables in a MP instance
+ * @peer: the peer to delete
+ * @reason: reason why the peer was deleted (sent to userspace)
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_peer_del_mp(struct ovpn_peer *peer,
+			    enum ovpn_del_peer_reason reason)
+	__must_hold(&peer->ovpn->peers->lock_by_id)
+{
+	struct ovpn_peer *tmp;
+	int ret = -ENOENT;
+
+	tmp = ovpn_peer_get_by_id(peer->ovpn, peer->id);
+	if (tmp == peer) {
+		ovpn_peer_unhash(peer, reason);
+		ret = 0;
+	}
+
+	if (tmp)
+		ovpn_peer_put(tmp);
+
+	return ret;
+}
+
 /**
  * ovpn_peer_del_p2p - delete peer from related tables in a P2P instance
  * @peer: the peer to delete
@@ -411,10 +550,36 @@  void ovpn_peer_release_p2p(struct ovpn_struct *ovpn)
  */
 int ovpn_peer_del(struct ovpn_peer *peer, enum ovpn_del_peer_reason reason)
 {
+	int ret;
+
 	switch (peer->ovpn->mode) {
+	case OVPN_MODE_MP:
+		spin_lock_bh(&peer->ovpn->peers->lock_by_id);
+		ret = ovpn_peer_del_mp(peer, reason);
+		spin_unlock_bh(&peer->ovpn->peers->lock_by_id);
+		return ret;
 	case OVPN_MODE_P2P:
-		return ovpn_peer_del_p2p(peer, reason);
+		spin_lock_bh(&peer->ovpn->lock);
+		ret = ovpn_peer_del_p2p(peer, reason);
+		spin_unlock_bh(&peer->ovpn->lock);
+		return ret;
 	default:
 		return -EOPNOTSUPP;
 	}
 }
+
+/**
+ * ovpn_peers_free - free all peers in the instance
+ * @ovpn: the instance whose peers should be released
+ */
+void ovpn_peers_free(struct ovpn_struct *ovpn)
+{
+	struct hlist_node *tmp;
+	struct ovpn_peer *peer;
+	int bkt;
+
+	spin_lock_bh(&ovpn->peers->lock_by_id);
+	hash_for_each_safe(ovpn->peers->by_id, bkt, tmp, peer, hash_entry_id)
+		ovpn_peer_unhash(peer, OVPN_DEL_PEER_REASON_TEARDOWN);
+	spin_unlock_bh(&ovpn->peers->lock_by_id);
+}
diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
index 86d4696b1529..dc51cc93ef20 100644
--- a/drivers/net/ovpn/peer.h
+++ b/drivers/net/ovpn/peer.h
@@ -30,6 +30,10 @@ 
  * @vpn_addrs: IP addresses assigned over the tunnel
  * @vpn_addrs.ipv4: IPv4 assigned to peer on the tunnel
  * @vpn_addrs.ipv6: IPv6 assigned to peer on the tunnel
+ * @hash_entry_id: entry in the peer ID hashtable
+ * @hash_entry_addr4: entry in the peer IPv4 hashtable
+ * @hash_entry_addr6: entry in the peer IPv6 hashtable
+ * @hash_entry_transp_addr: entry in the peer transport address hashtable
  * @sock: the socket being used to talk to this peer
  * @tcp: keeps track of TCP specific state
  * @tcp.strp: stream parser context (TCP only)
@@ -62,6 +66,10 @@  struct ovpn_peer {
 		struct in_addr ipv4;
 		struct in6_addr ipv6;
 	} vpn_addrs;
+	struct hlist_node hash_entry_id;
+	struct hlist_node hash_entry_addr4;
+	struct hlist_node hash_entry_addr6;
+	struct hlist_nulls_node hash_entry_transp_addr;
 	struct ovpn_socket *sock;
 
 	/* state of the TCP reading. Needed to keep track of how much of a
@@ -126,6 +134,7 @@  struct ovpn_peer *ovpn_peer_new(struct ovpn_struct *ovpn, u32 id);
 int ovpn_peer_add(struct ovpn_struct *ovpn, struct ovpn_peer *peer);
 int ovpn_peer_del(struct ovpn_peer *peer, enum ovpn_del_peer_reason reason);
 void ovpn_peer_release_p2p(struct ovpn_struct *ovpn);
+void ovpn_peers_free(struct ovpn_struct *ovpn);
 
 struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_struct *ovpn,
 					       struct sk_buff *skb);