Message ID | 20240827120805.13681-16-antonio@openvpn.net (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Introducing OpenVPN Data Channel Offload | expand |
2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote: > static int ovpn_net_init(struct net_device *dev) > { > struct ovpn_struct *ovpn = netdev_priv(dev); > + int i, err = gro_cells_init(&ovpn->gro_cells, dev); I'm not a fan of "hiding" the gro_cells_init call up here. I'd prefer if this was done just before the corresponding "if (err)". > + struct in_device *dev_v4; > > - return gro_cells_init(&ovpn->gro_cells, dev); > + if (err) > + return err; > + > + if (ovpn->mode == OVPN_MODE_MP) { > + dev_v4 = __in_dev_get_rtnl(dev); > + if (dev_v4) { > + /* disable redirects as Linux gets confused by ovpn > + * handling same-LAN routing. > + * This happens because a multipeer interface is used as > + * relay point between hosts in the same subnet, while > + * in a classic LAN this would not be needed because the > + * two hosts would be able to talk directly. > + */ > + IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); > + IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; > + } > + > + /* the peer container is fairly large, therefore we dynamically > + * allocate it only when needed > + */ > + ovpn->peers = kzalloc(sizeof(*ovpn->peers), GFP_KERNEL); > + if (!ovpn->peers) missing gro_cells_destroy > + return -ENOMEM; > + > + spin_lock_init(&ovpn->peers->lock_by_id); > + spin_lock_init(&ovpn->peers->lock_by_vpn_addr); > + spin_lock_init(&ovpn->peers->lock_by_transp_addr); What's the benefit of having 3 separate locks instead of a single lock protecting all the hashtables? > + > + for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) { > + INIT_HLIST_HEAD(&ovpn->peers->by_id[i]); > + INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]); > + INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i], > + i); > + } > + } > + > + return 0; > } > +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer) > +{ > + struct sockaddr_storage sa = { 0 }; > + struct hlist_nulls_head *nhead; > + struct sockaddr_in6 *sa6; > + struct sockaddr_in *sa4; > + struct hlist_head *head; > + struct ovpn_bind *bind; > + struct ovpn_peer *tmp; > + size_t salen; > + > + spin_lock_bh(&ovpn->peers->lock_by_id); > + /* do not add duplicates */ > + tmp = ovpn_peer_get_by_id(ovpn, peer->id); > + if (tmp) { > + ovpn_peer_put(tmp); > + spin_unlock_bh(&ovpn->peers->lock_by_id); > + return -EEXIST; > + } > + > + hlist_add_head_rcu(&peer->hash_entry_id, > + ovpn_get_hash_head(ovpn->peers->by_id, &peer->id, > + sizeof(peer->id))); > + spin_unlock_bh(&ovpn->peers->lock_by_id); > + > + bind = rcu_dereference_protected(peer->bind, true); > + /* peers connected via TCP have bind == NULL */ > + if (bind) { > + switch (bind->remote.in4.sin_family) { > + case AF_INET: > + sa4 = (struct sockaddr_in *)&sa; > + > + sa4->sin_family = AF_INET; > + sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr; > + sa4->sin_port = bind->remote.in4.sin_port; > + salen = sizeof(*sa4); > + break; > + case AF_INET6: > + sa6 = (struct sockaddr_in6 *)&sa; > + > + sa6->sin6_family = AF_INET6; > + sa6->sin6_addr = bind->remote.in6.sin6_addr; > + sa6->sin6_port = bind->remote.in6.sin6_port; > + salen = sizeof(*sa6); > + break; > + default: And remove from the by_id hashtable? Or is that handled somewhere that I missed (I don't think ovpn_peer_unhash gets called in that case)? > + return -EPROTONOSUPPORT; > + } > +
2024-09-03, 16:40:51 +0200, Sabrina Dubroca wrote: > 2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote: > > +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer) > > +{ > > + struct sockaddr_storage sa = { 0 }; > > + struct hlist_nulls_head *nhead; > > + struct sockaddr_in6 *sa6; > > + struct sockaddr_in *sa4; > > + struct hlist_head *head; > > + struct ovpn_bind *bind; > > + struct ovpn_peer *tmp; > > + size_t salen; > > + > > + spin_lock_bh(&ovpn->peers->lock_by_id); > > + /* do not add duplicates */ > > + tmp = ovpn_peer_get_by_id(ovpn, peer->id); > > + if (tmp) { > > + ovpn_peer_put(tmp); > > + spin_unlock_bh(&ovpn->peers->lock_by_id); > > + return -EEXIST; > > + } > > + > > + hlist_add_head_rcu(&peer->hash_entry_id, > > + ovpn_get_hash_head(ovpn->peers->by_id, &peer->id, > > + sizeof(peer->id))); > > + spin_unlock_bh(&ovpn->peers->lock_by_id); > > + > > + bind = rcu_dereference_protected(peer->bind, true); What protects us here? We just released lock_by_id and we're not holding peer->lock. > > + /* peers connected via TCP have bind == NULL */ > > + if (bind) { > > + switch (bind->remote.in4.sin_family) { > > + case AF_INET: > > + sa4 = (struct sockaddr_in *)&sa; > > + > > + sa4->sin_family = AF_INET; > > + sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr; > > + sa4->sin_port = bind->remote.in4.sin_port; > > + salen = sizeof(*sa4); > > + break; > > + case AF_INET6: > > + sa6 = (struct sockaddr_in6 *)&sa; > > + > > + sa6->sin6_family = AF_INET6; > > + sa6->sin6_addr = bind->remote.in6.sin6_addr; > > + sa6->sin6_port = bind->remote.in6.sin6_port; > > + salen = sizeof(*sa6); > > + break; > > + default: > > And remove from the by_id hashtable? Or is that handled somewhere that > I missed (I don't think ovpn_peer_unhash gets called in that case)? ovpn_nl_set_peer_doit does: ret = ovpn_peer_add(ovpn, peer); if (ret < 0) { [...] /* release right away because peer is not really used in any * context */ ovpn_peer_release(peer); kfree(peer); But if we fail at this stage, the peer was published in the by_id hashtable and could be used. Although AFAICT, ovpn can never create a bind with family != AF_INET{,6}, so this is not a real issue -- in that case I guess a DEBUG_NET_WARN_ON_ONCE with a comment that this should never happen would be acceptable (but I'd still remove the peer from by_id and go through the proper release path instead of direct kfree in ovpn_nl_set_peer_doit). Otherwise, you'd have to reorder things in this function so that all failures are handled before the peer is added to any hashtable. > > + return -EPROTONOSUPPORT; > > + } > > +
On 03/09/2024 16:40, Sabrina Dubroca wrote: > 2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote: >> static int ovpn_net_init(struct net_device *dev) >> { >> struct ovpn_struct *ovpn = netdev_priv(dev); >> + int i, err = gro_cells_init(&ovpn->gro_cells, dev); > > I'm not a fan of "hiding" the gro_cells_init call up here. I'd prefer > if this was done just before the corresponding "if (err)". I am all with you, but I remember in the past something complaining about "variable declared and then re-assigned right after". But maybe this is not the case anymore. Will move the initialization down. > >> + struct in_device *dev_v4; >> >> - return gro_cells_init(&ovpn->gro_cells, dev); >> + if (err) >> + return err; >> + >> + if (ovpn->mode == OVPN_MODE_MP) { >> + dev_v4 = __in_dev_get_rtnl(dev); >> + if (dev_v4) { >> + /* disable redirects as Linux gets confused by ovpn >> + * handling same-LAN routing. >> + * This happens because a multipeer interface is used as >> + * relay point between hosts in the same subnet, while >> + * in a classic LAN this would not be needed because the >> + * two hosts would be able to talk directly. >> + */ >> + IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); >> + IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; >> + } >> + >> + /* the peer container is fairly large, therefore we dynamically >> + * allocate it only when needed >> + */ >> + ovpn->peers = kzalloc(sizeof(*ovpn->peers), GFP_KERNEL); >> + if (!ovpn->peers) > > missing gro_cells_destroy ACK > >> + return -ENOMEM; >> + >> + spin_lock_init(&ovpn->peers->lock_by_id); >> + spin_lock_init(&ovpn->peers->lock_by_vpn_addr); >> + spin_lock_init(&ovpn->peers->lock_by_transp_addr); > > What's the benefit of having 3 separate locks instead of a single lock > protecting all the hashtables? The main reason was to avoid a deadlock - I thought I had added a comment about it... The problem was a deadlock between acquiring peer->lock and ovpn->peers->lock in float() and in then opposite sequence in peers_free(). (IIRC this happens due to ovpn_peer_reset_sockaddr() acquiring peer->lock) Splitting the larger peers->lock allowed me to avoid this scenario, because I don't need to jump through any hoop to coordinate access to different hashtables. > >> + >> + for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) { >> + INIT_HLIST_HEAD(&ovpn->peers->by_id[i]); >> + INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]); >> + INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i], >> + i); >> + } >> + } >> + >> + return 0; >> } > >> +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer) >> +{ >> + struct sockaddr_storage sa = { 0 }; >> + struct hlist_nulls_head *nhead; >> + struct sockaddr_in6 *sa6; >> + struct sockaddr_in *sa4; >> + struct hlist_head *head; >> + struct ovpn_bind *bind; >> + struct ovpn_peer *tmp; >> + size_t salen; >> + >> + spin_lock_bh(&ovpn->peers->lock_by_id); >> + /* do not add duplicates */ >> + tmp = ovpn_peer_get_by_id(ovpn, peer->id); >> + if (tmp) { >> + ovpn_peer_put(tmp); >> + spin_unlock_bh(&ovpn->peers->lock_by_id); >> + return -EEXIST; >> + } >> + >> + hlist_add_head_rcu(&peer->hash_entry_id, >> + ovpn_get_hash_head(ovpn->peers->by_id, &peer->id, >> + sizeof(peer->id))); >> + spin_unlock_bh(&ovpn->peers->lock_by_id); >> + >> + bind = rcu_dereference_protected(peer->bind, true); >> + /* peers connected via TCP have bind == NULL */ >> + if (bind) { >> + switch (bind->remote.in4.sin_family) { >> + case AF_INET: >> + sa4 = (struct sockaddr_in *)&sa; >> + >> + sa4->sin_family = AF_INET; >> + sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr; >> + sa4->sin_port = bind->remote.in4.sin_port; >> + salen = sizeof(*sa4); >> + break; >> + case AF_INET6: >> + sa6 = (struct sockaddr_in6 *)&sa; >> + >> + sa6->sin6_family = AF_INET6; >> + sa6->sin6_addr = bind->remote.in6.sin6_addr; >> + sa6->sin6_port = bind->remote.in6.sin6_port; >> + salen = sizeof(*sa6); >> + break; >> + default: > > And remove from the by_id hashtable? Or is that handled somewhere that > I missed (I don't think ovpn_peer_unhash gets called in that case)? No we don't call unhash in this case as we assume the adding just failed entirely. I will add the removal before returning the error (moving the add below the switch would extend the locked area too much.) > >> + return -EPROTONOSUPPORT; >> + } >> + > Thanks a lot
2024-09-05, 10:02:58 +0200, Antonio Quartulli wrote: > On 03/09/2024 16:40, Sabrina Dubroca wrote: > > 2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote: > > > static int ovpn_net_init(struct net_device *dev) > > > { > > > struct ovpn_struct *ovpn = netdev_priv(dev); > > > + int i, err = gro_cells_init(&ovpn->gro_cells, dev); > > > > I'm not a fan of "hiding" the gro_cells_init call up here. I'd prefer > > if this was done just before the corresponding "if (err)". > > I am all with you, but I remember in the past something complaining about > "variable declared and then re-assigned right after". > > But maybe this is not the case anymore. If you had something like: int err; err = -EINVAL; sure, it would make sense to combine them. > > Will move the initialization down. Thanks. > > > + > > > + spin_lock_init(&ovpn->peers->lock_by_id); > > > + spin_lock_init(&ovpn->peers->lock_by_vpn_addr); > > > + spin_lock_init(&ovpn->peers->lock_by_transp_addr); > > > > What's the benefit of having 3 separate locks instead of a single lock > > protecting all the hashtables? > > The main reason was to avoid a deadlock - I thought I had added a comment > about it... Ok. I could have missed it, I'm not looking at the comments much now that I'm familiar with the code. > The problem was a deadlock between acquiring peer->lock and > ovpn->peers->lock in float() and in then opposite sequence in peers_free(). > (IIRC this happens due to ovpn_peer_reset_sockaddr() acquiring peer->lock) I don't see a problem with ovpn_peer_reset_sockaddr, but ovpn_peer_put can be called with lock_by_id held and then take peer->lock (in ovpn_peer_release), which would be the opposite order to ovpn_peer_float if the locks were merged (peer->lock then lock_by_transp_addr). This should be solvable with a single lock by delaying the bind cleanup via call_rcu instead of doing it immediately with ovpn_peer_release (after that delay, nothing should be using peer->bind anymore, since we have no reference and no more rcu_read_lock sections that could have found peer, so we can free immediately and no need to take peer->lock). And it's I think a bit more "correct" wrt RCU rules, since at ovpn_peer_put time, even with refcount=0, we could have a reader still using the peer and deciding to update its bind (not the case with how ovpn_peer_float is called, since we have a reference on the peer). (This could be completely wrong and/or make no sense at all :)) But I'm not going to insist on this, you can keep the separate locks. > Splitting the larger peers->lock allowed me to avoid this scenario, because > I don't need to jump through any hoop to coordinate access to different > hashtables. > > > > > > + > > > + for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) { > > > + INIT_HLIST_HEAD(&ovpn->peers->by_id[i]); > > > + INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]); > > > + INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i], > > > + i); > > > + } > > > + } > > > + > > > + return 0; > > > } > > > > > +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer) > > > +{ > > > + struct sockaddr_storage sa = { 0 }; > > > + struct hlist_nulls_head *nhead; > > > + struct sockaddr_in6 *sa6; > > > + struct sockaddr_in *sa4; > > > + struct hlist_head *head; > > > + struct ovpn_bind *bind; > > > + struct ovpn_peer *tmp; > > > + size_t salen; > > > + > > > + spin_lock_bh(&ovpn->peers->lock_by_id); > > > + /* do not add duplicates */ > > > + tmp = ovpn_peer_get_by_id(ovpn, peer->id); > > > + if (tmp) { > > > + ovpn_peer_put(tmp); > > > + spin_unlock_bh(&ovpn->peers->lock_by_id); > > > + return -EEXIST; > > > + } > > > + > > > + hlist_add_head_rcu(&peer->hash_entry_id, > > > + ovpn_get_hash_head(ovpn->peers->by_id, &peer->id, > > > + sizeof(peer->id))); > > > + spin_unlock_bh(&ovpn->peers->lock_by_id); > > > + > > > + bind = rcu_dereference_protected(peer->bind, true); > > > + /* peers connected via TCP have bind == NULL */ > > > + if (bind) { > > > + switch (bind->remote.in4.sin_family) { > > > + case AF_INET: > > > + sa4 = (struct sockaddr_in *)&sa; > > > + > > > + sa4->sin_family = AF_INET; > > > + sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr; > > > + sa4->sin_port = bind->remote.in4.sin_port; > > > + salen = sizeof(*sa4); > > > + break; > > > + case AF_INET6: > > > + sa6 = (struct sockaddr_in6 *)&sa; > > > + > > > + sa6->sin6_family = AF_INET6; > > > + sa6->sin6_addr = bind->remote.in6.sin6_addr; > > > + sa6->sin6_port = bind->remote.in6.sin6_port; > > > + salen = sizeof(*sa6); > > > + break; > > > + default: > > > > And remove from the by_id hashtable? Or is that handled somewhere that > > I missed (I don't think ovpn_peer_unhash gets called in that case)? > > No we don't call unhash in this case as we assume the adding just failed > entirely. > > I will add the removal before returning the error (moving the add below the > switch would extend the locked area too much.) I don't think setting a few variables would be too much to do under the lock (and it would address the issues in my 2nd reply to this patch).
On 04/09/2024 12:10, Sabrina Dubroca wrote: > 2024-09-03, 16:40:51 +0200, Sabrina Dubroca wrote: >> 2024-08-27, 14:07:55 +0200, Antonio Quartulli wrote: >>> +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer) >>> +{ >>> + struct sockaddr_storage sa = { 0 }; >>> + struct hlist_nulls_head *nhead; >>> + struct sockaddr_in6 *sa6; >>> + struct sockaddr_in *sa4; >>> + struct hlist_head *head; >>> + struct ovpn_bind *bind; >>> + struct ovpn_peer *tmp; >>> + size_t salen; >>> + >>> + spin_lock_bh(&ovpn->peers->lock_by_id); >>> + /* do not add duplicates */ >>> + tmp = ovpn_peer_get_by_id(ovpn, peer->id); >>> + if (tmp) { >>> + ovpn_peer_put(tmp); >>> + spin_unlock_bh(&ovpn->peers->lock_by_id); >>> + return -EEXIST; >>> + } >>> + >>> + hlist_add_head_rcu(&peer->hash_entry_id, >>> + ovpn_get_hash_head(ovpn->peers->by_id, &peer->id, >>> + sizeof(peer->id))); >>> + spin_unlock_bh(&ovpn->peers->lock_by_id); >>> + >>> + bind = rcu_dereference_protected(peer->bind, true); > > What protects us here? We just released lock_by_id and we're not > holding peer->lock. hmm. I think originally it was not possible to hold this peer in any other context since the peer was stil being added. But now we have added it to the by_id table already, so we cannot assume that anymore. Maybe I should simply move this assignment before the hlist_add_head_rcu() to regain that assumption.. > >>> + /* peers connected via TCP have bind == NULL */ >>> + if (bind) { >>> + switch (bind->remote.in4.sin_family) { >>> + case AF_INET: >>> + sa4 = (struct sockaddr_in *)&sa; >>> + >>> + sa4->sin_family = AF_INET; >>> + sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr; >>> + sa4->sin_port = bind->remote.in4.sin_port; >>> + salen = sizeof(*sa4); >>> + break; >>> + case AF_INET6: >>> + sa6 = (struct sockaddr_in6 *)&sa; >>> + >>> + sa6->sin6_family = AF_INET6; >>> + sa6->sin6_addr = bind->remote.in6.sin6_addr; >>> + sa6->sin6_port = bind->remote.in6.sin6_port; >>> + salen = sizeof(*sa6); >>> + break; >>> + default: >> >> And remove from the by_id hashtable? Or is that handled somewhere that >> I missed (I don't think ovpn_peer_unhash gets called in that case)? > > ovpn_nl_set_peer_doit does: > > ret = ovpn_peer_add(ovpn, peer); > if (ret < 0) { > [...] > /* release right away because peer is not really used in any > * context > */ > ovpn_peer_release(peer); > kfree(peer); > > > But if we fail at this stage, the peer was published in the by_id > hashtable and could be used. > > Although AFAICT, ovpn can never create a bind with family != > AF_INET{,6}, so this is not a real issue -- in that case I guess a > DEBUG_NET_WARN_ON_ONCE with a comment that this should never happen > would be acceptable (but I'd still remove the peer from by_id and go > through the proper release path instead of direct kfree in > ovpn_nl_set_peer_doit). Otherwise, you'd have to reorder things in > this function so that all failures are handled before the peer is > added to any hashtable. To be honest I don't mind adding a pre-check and error out immediately. I don't like adding a peer to the table that is actually failing basic sanity checks. Thanks! > >>> + return -EPROTONOSUPPORT; >>> + } >>> + >
On 05/09/2024 12:47, Sabrina Dubroca wrote: >>>> + >>>> + spin_lock_init(&ovpn->peers->lock_by_id); >>>> + spin_lock_init(&ovpn->peers->lock_by_vpn_addr); >>>> + spin_lock_init(&ovpn->peers->lock_by_transp_addr); >>> >>> What's the benefit of having 3 separate locks instead of a single lock >>> protecting all the hashtables? >> >> The main reason was to avoid a deadlock - I thought I had added a comment >> about it... > > Ok. > > I could have missed it, I'm not looking at the comments much now that > I'm familiar with the code. > >> The problem was a deadlock between acquiring peer->lock and >> ovpn->peers->lock in float() and in then opposite sequence in peers_free(). >> (IIRC this happens due to ovpn_peer_reset_sockaddr() acquiring peer->lock) > > I don't see a problem with ovpn_peer_reset_sockaddr, but ovpn_peer_put > can be called with lock_by_id held and then take peer->lock (in > ovpn_peer_release), which would be the opposite order to > ovpn_peer_float if the locks were merged (peer->lock then > lock_by_transp_addr). > > This should be solvable with a single lock by delaying the bind > cleanup via call_rcu instead of doing it immediately with > ovpn_peer_release (after that delay, nothing should be using > peer->bind anymore, since we have no reference and no more > rcu_read_lock sections that could have found peer, so we can free > immediately and no need to take peer->lock). And it's I think a bit > more "correct" wrt RCU rules, since at ovpn_peer_put time, even with > refcount=0, we could have a reader still using the peer and deciding > to update its bind (not the case with how ovpn_peer_float is called, > since we have a reference on the peer). Yap, I totally agree with your analysis. In a previous version we simplified the code to the point that call_rcu was not needed anymore and we could just rely on kfree_rcu for peer. Now this "going back to call_rcu" felt a bit wrong, so I tried hard to avoid that. But I agree that actually this is a clear case where a two-steps release is the right thing to do. Will try to get it fixed as per your suggestion. Thanks! > > (This could be completely wrong and/or make no sense at all :)) > > But I'm not going to insist on this, you can keep the separate locks. > > >> Splitting the larger peers->lock allowed me to avoid this scenario, because >> I don't need to jump through any hoop to coordinate access to different >> hashtables. >> >>> >>>> + >>>> + for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) { >>>> + INIT_HLIST_HEAD(&ovpn->peers->by_id[i]); >>>> + INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]); >>>> + INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i], >>>> + i); >>>> + } >>>> + } >>>> + >>>> + return 0; >>>> } >>> >>>> +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer) >>>> +{ >>>> + struct sockaddr_storage sa = { 0 }; >>>> + struct hlist_nulls_head *nhead; >>>> + struct sockaddr_in6 *sa6; >>>> + struct sockaddr_in *sa4; >>>> + struct hlist_head *head; >>>> + struct ovpn_bind *bind; >>>> + struct ovpn_peer *tmp; >>>> + size_t salen; >>>> + >>>> + spin_lock_bh(&ovpn->peers->lock_by_id); >>>> + /* do not add duplicates */ >>>> + tmp = ovpn_peer_get_by_id(ovpn, peer->id); >>>> + if (tmp) { >>>> + ovpn_peer_put(tmp); >>>> + spin_unlock_bh(&ovpn->peers->lock_by_id); >>>> + return -EEXIST; >>>> + } >>>> + >>>> + hlist_add_head_rcu(&peer->hash_entry_id, >>>> + ovpn_get_hash_head(ovpn->peers->by_id, &peer->id, >>>> + sizeof(peer->id))); >>>> + spin_unlock_bh(&ovpn->peers->lock_by_id); >>>> + >>>> + bind = rcu_dereference_protected(peer->bind, true); >>>> + /* peers connected via TCP have bind == NULL */ >>>> + if (bind) { >>>> + switch (bind->remote.in4.sin_family) { >>>> + case AF_INET: >>>> + sa4 = (struct sockaddr_in *)&sa; >>>> + >>>> + sa4->sin_family = AF_INET; >>>> + sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr; >>>> + sa4->sin_port = bind->remote.in4.sin_port; >>>> + salen = sizeof(*sa4); >>>> + break; >>>> + case AF_INET6: >>>> + sa6 = (struct sockaddr_in6 *)&sa; >>>> + >>>> + sa6->sin6_family = AF_INET6; >>>> + sa6->sin6_addr = bind->remote.in6.sin6_addr; >>>> + sa6->sin6_port = bind->remote.in6.sin6_port; >>>> + salen = sizeof(*sa6); >>>> + break; >>>> + default: >>> >>> And remove from the by_id hashtable? Or is that handled somewhere that >>> I missed (I don't think ovpn_peer_unhash gets called in that case)? >> >> No we don't call unhash in this case as we assume the adding just failed >> entirely. >> >> I will add the removal before returning the error (moving the add below the >> switch would extend the locked area too much.) > > I don't think setting a few variables would be too much to do under > the lock (and it would address the issues in my 2nd reply to this > patch). Right - pretty much what I replied to that comment (will move the add after the bind dereference) Cheers,
diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c index 18fefda74fd1..94d2ee87807f 100644 --- a/drivers/net/ovpn/main.c +++ b/drivers/net/ovpn/main.c @@ -54,14 +54,53 @@ static void ovpn_struct_free(struct net_device *net) struct ovpn_struct *ovpn = netdev_priv(net); gro_cells_destroy(&ovpn->gro_cells); + kfree(ovpn->peers); rcu_barrier(); } static int ovpn_net_init(struct net_device *dev) { struct ovpn_struct *ovpn = netdev_priv(dev); + int i, err = gro_cells_init(&ovpn->gro_cells, dev); + struct in_device *dev_v4; - return gro_cells_init(&ovpn->gro_cells, dev); + if (err) + return err; + + if (ovpn->mode == OVPN_MODE_MP) { + dev_v4 = __in_dev_get_rtnl(dev); + if (dev_v4) { + /* disable redirects as Linux gets confused by ovpn + * handling same-LAN routing. + * This happens because a multipeer interface is used as + * relay point between hosts in the same subnet, while + * in a classic LAN this would not be needed because the + * two hosts would be able to talk directly. + */ + IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); + IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; + } + + /* the peer container is fairly large, therefore we dynamically + * allocate it only when needed + */ + ovpn->peers = kzalloc(sizeof(*ovpn->peers), GFP_KERNEL); + if (!ovpn->peers) + return -ENOMEM; + + spin_lock_init(&ovpn->peers->lock_by_id); + spin_lock_init(&ovpn->peers->lock_by_vpn_addr); + spin_lock_init(&ovpn->peers->lock_by_transp_addr); + + for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) { + INIT_HLIST_HEAD(&ovpn->peers->by_id[i]); + INIT_HLIST_HEAD(&ovpn->peers->by_vpn_addr[i]); + INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i], + i); + } + } + + return 0; } static int ovpn_net_open(struct net_device *dev) @@ -210,8 +249,14 @@ void ovpn_iface_destruct(struct ovpn_struct *ovpn) ovpn->registered = false; - if (ovpn->mode == OVPN_MODE_P2P) + switch (ovpn->mode) { + case OVPN_MODE_P2P: ovpn_peer_release_p2p(ovpn); + break; + default: + ovpn_peers_free(ovpn); + break; + } } static int ovpn_netdev_notifier_call(struct notifier_block *nb, diff --git a/drivers/net/ovpn/ovpnstruct.h b/drivers/net/ovpn/ovpnstruct.h index 65497ce115aa..3d74e79fcdf3 100644 --- a/drivers/net/ovpn/ovpnstruct.h +++ b/drivers/net/ovpn/ovpnstruct.h @@ -14,6 +14,26 @@ #include <net/net_trackers.h> #include <uapi/linux/ovpn.h> +/** + * struct ovpn_peer_collection - container of peers for MultiPeer mode + * @by_id: table of peers index by ID + * @by_vpn_addr: table of peers indexed by VPN IP address + * @by_transp_addr: table of peers indexed by transport address (items can be + * rehashed on the fly due to peer IP change) + * @lock_by_id: protects writes to peers by_id table + * @lock_by_vpn_addr: protects writes to peers by_vpn_addr table + * @lock_by_transp_addr: protects writes to peers by_transp_addr table + */ +struct ovpn_peer_collection { + DECLARE_HASHTABLE(by_id, 12); + DECLARE_HASHTABLE(by_vpn_addr, 12); + struct hlist_nulls_head by_transp_addr[1 << 12]; + + spinlock_t lock_by_id; /* protects writes to by_id */ + spinlock_t lock_by_vpn_addr; /* protects by_vpn_addr */ + spinlock_t lock_by_transp_addr; /* protects by_transp_addr */ +}; + /** * struct ovpn_struct - per ovpn interface state * @dev: the actual netdev representing the tunnel @@ -21,6 +41,7 @@ * @registered: whether dev is still registered with netdev or not * @mode: device operation mode (i.e. p2p, mp, ..) * @lock: protect this object + * @peers: data structures holding multi-peer references * @peer: in P2P mode, this is the only remote peer * @dev_list: entry for the module wide device list * @gro_cells: pointer to the Generic Receive Offload cell @@ -31,6 +52,7 @@ struct ovpn_struct { bool registered; enum ovpn_mode mode; spinlock_t lock; /* protect writing to the ovpn_struct object */ + struct ovpn_peer_collection *peers; struct ovpn_peer __rcu *peer; struct list_head dev_list; struct gro_cells gro_cells; diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c index 6d34c56a4a51..58037c2cbcca 100644 --- a/drivers/net/ovpn/peer.c +++ b/drivers/net/ovpn/peer.c @@ -9,6 +9,7 @@ #include <linux/skbuff.h> #include <linux/list.h> +#include <linux/hashtable.h> #include "ovpnstruct.h" #include "bind.h" @@ -309,6 +310,92 @@ bool ovpn_peer_check_by_src(struct ovpn_struct *ovpn, struct sk_buff *skb, return match; } +#define ovpn_get_hash_head(_tbl, _key, _key_len) ({ \ + typeof(_tbl) *__tbl = &(_tbl); \ + (&(*__tbl)[jhash(_key, _key_len, 0) % HASH_SIZE(*__tbl)]); }) \ + +/** + * ovpn_peer_add_mp - add peer to related tables in a MP instance + * @ovpn: the instance to add the peer to + * @peer: the peer to add + * + * Return: 0 on success or a negative error code otherwise + */ +static int ovpn_peer_add_mp(struct ovpn_struct *ovpn, struct ovpn_peer *peer) +{ + struct sockaddr_storage sa = { 0 }; + struct hlist_nulls_head *nhead; + struct sockaddr_in6 *sa6; + struct sockaddr_in *sa4; + struct hlist_head *head; + struct ovpn_bind *bind; + struct ovpn_peer *tmp; + size_t salen; + + spin_lock_bh(&ovpn->peers->lock_by_id); + /* do not add duplicates */ + tmp = ovpn_peer_get_by_id(ovpn, peer->id); + if (tmp) { + ovpn_peer_put(tmp); + spin_unlock_bh(&ovpn->peers->lock_by_id); + return -EEXIST; + } + + hlist_add_head_rcu(&peer->hash_entry_id, + ovpn_get_hash_head(ovpn->peers->by_id, &peer->id, + sizeof(peer->id))); + spin_unlock_bh(&ovpn->peers->lock_by_id); + + bind = rcu_dereference_protected(peer->bind, true); + /* peers connected via TCP have bind == NULL */ + if (bind) { + switch (bind->remote.in4.sin_family) { + case AF_INET: + sa4 = (struct sockaddr_in *)&sa; + + sa4->sin_family = AF_INET; + sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr; + sa4->sin_port = bind->remote.in4.sin_port; + salen = sizeof(*sa4); + break; + case AF_INET6: + sa6 = (struct sockaddr_in6 *)&sa; + + sa6->sin6_family = AF_INET6; + sa6->sin6_addr = bind->remote.in6.sin6_addr; + sa6->sin6_port = bind->remote.in6.sin6_port; + salen = sizeof(*sa6); + break; + default: + return -EPROTONOSUPPORT; + } + + nhead = ovpn_get_hash_head(ovpn->peers->by_transp_addr, &sa, + salen); + spin_lock_bh(&ovpn->peers->lock_by_transp_addr); + hlist_nulls_add_head_rcu(&peer->hash_entry_transp_addr, nhead); + spin_unlock_bh(&ovpn->peers->lock_by_transp_addr); + } + + spin_lock_bh(&ovpn->peers->lock_by_vpn_addr); + if (peer->vpn_addrs.ipv4.s_addr != htonl(INADDR_ANY)) { + head = ovpn_get_hash_head(ovpn->peers->by_vpn_addr, + &peer->vpn_addrs.ipv4, + sizeof(peer->vpn_addrs.ipv4)); + hlist_add_head_rcu(&peer->hash_entry_addr4, head); + } + + if (!ipv6_addr_any(&peer->vpn_addrs.ipv6)) { + head = ovpn_get_hash_head(ovpn->peers->by_vpn_addr, + &peer->vpn_addrs.ipv6, + sizeof(peer->vpn_addrs.ipv6)); + hlist_add_head_rcu(&peer->hash_entry_addr6, head); + } + spin_unlock_bh(&ovpn->peers->lock_by_vpn_addr); + + return 0; +} + /** * ovpn_peer_add_p2p - add peer to related tables in a P2P instance * @ovpn: the instance to add the peer to @@ -349,6 +436,8 @@ static int ovpn_peer_add_p2p(struct ovpn_struct *ovpn, struct ovpn_peer *peer) int ovpn_peer_add(struct ovpn_struct *ovpn, struct ovpn_peer *peer) { switch (ovpn->mode) { + case OVPN_MODE_MP: + return ovpn_peer_add_mp(ovpn, peer); case OVPN_MODE_P2P: return ovpn_peer_add_p2p(ovpn, peer); default: @@ -356,6 +445,56 @@ int ovpn_peer_add(struct ovpn_struct *ovpn, struct ovpn_peer *peer) } } +/** + * ovpn_peer_unhash - remove peer reference from all hashtables + * @peer: the peer to remove + * @reason: the delete reason to attach to the peer + */ +static void ovpn_peer_unhash(struct ovpn_peer *peer, + enum ovpn_del_peer_reason reason) + __must_hold(&ovpn->peers->lock_by_id) +{ + hlist_del_init_rcu(&peer->hash_entry_id); + + spin_lock_bh(&peer->ovpn->peers->lock_by_vpn_addr); + hlist_del_init_rcu(&peer->hash_entry_addr4); + hlist_del_init_rcu(&peer->hash_entry_addr6); + spin_unlock_bh(&peer->ovpn->peers->lock_by_vpn_addr); + + spin_lock_bh(&peer->ovpn->peers->lock_by_transp_addr); + hlist_nulls_del_init_rcu(&peer->hash_entry_transp_addr); + spin_unlock_bh(&peer->ovpn->peers->lock_by_transp_addr); + + ovpn_peer_put(peer); + peer->delete_reason = reason; +} + +/** + * ovpn_peer_del_mp - delete peer from related tables in a MP instance + * @peer: the peer to delete + * @reason: reason why the peer was deleted (sent to userspace) + * + * Return: 0 on success or a negative error code otherwise + */ +static int ovpn_peer_del_mp(struct ovpn_peer *peer, + enum ovpn_del_peer_reason reason) + __must_hold(&peer->ovpn->peers->lock_by_id) +{ + struct ovpn_peer *tmp; + int ret = -ENOENT; + + tmp = ovpn_peer_get_by_id(peer->ovpn, peer->id); + if (tmp == peer) { + ovpn_peer_unhash(peer, reason); + ret = 0; + } + + if (tmp) + ovpn_peer_put(tmp); + + return ret; +} + /** * ovpn_peer_del_p2p - delete peer from related tables in a P2P instance * @peer: the peer to delete @@ -411,10 +550,36 @@ void ovpn_peer_release_p2p(struct ovpn_struct *ovpn) */ int ovpn_peer_del(struct ovpn_peer *peer, enum ovpn_del_peer_reason reason) { + int ret; + switch (peer->ovpn->mode) { + case OVPN_MODE_MP: + spin_lock_bh(&peer->ovpn->peers->lock_by_id); + ret = ovpn_peer_del_mp(peer, reason); + spin_unlock_bh(&peer->ovpn->peers->lock_by_id); + return ret; case OVPN_MODE_P2P: - return ovpn_peer_del_p2p(peer, reason); + spin_lock_bh(&peer->ovpn->lock); + ret = ovpn_peer_del_p2p(peer, reason); + spin_unlock_bh(&peer->ovpn->lock); + return ret; default: return -EOPNOTSUPP; } } + +/** + * ovpn_peers_free - free all peers in the instance + * @ovpn: the instance whose peers should be released + */ +void ovpn_peers_free(struct ovpn_struct *ovpn) +{ + struct hlist_node *tmp; + struct ovpn_peer *peer; + int bkt; + + spin_lock_bh(&ovpn->peers->lock_by_id); + hash_for_each_safe(ovpn->peers->by_id, bkt, tmp, peer, hash_entry_id) + ovpn_peer_unhash(peer, OVPN_DEL_PEER_REASON_TEARDOWN); + spin_unlock_bh(&ovpn->peers->lock_by_id); +} diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h index 86d4696b1529..dc51cc93ef20 100644 --- a/drivers/net/ovpn/peer.h +++ b/drivers/net/ovpn/peer.h @@ -30,6 +30,10 @@ * @vpn_addrs: IP addresses assigned over the tunnel * @vpn_addrs.ipv4: IPv4 assigned to peer on the tunnel * @vpn_addrs.ipv6: IPv6 assigned to peer on the tunnel + * @hash_entry_id: entry in the peer ID hashtable + * @hash_entry_addr4: entry in the peer IPv4 hashtable + * @hash_entry_addr6: entry in the peer IPv6 hashtable + * @hash_entry_transp_addr: entry in the peer transport address hashtable * @sock: the socket being used to talk to this peer * @tcp: keeps track of TCP specific state * @tcp.strp: stream parser context (TCP only) @@ -62,6 +66,10 @@ struct ovpn_peer { struct in_addr ipv4; struct in6_addr ipv6; } vpn_addrs; + struct hlist_node hash_entry_id; + struct hlist_node hash_entry_addr4; + struct hlist_node hash_entry_addr6; + struct hlist_nulls_node hash_entry_transp_addr; struct ovpn_socket *sock; /* state of the TCP reading. Needed to keep track of how much of a @@ -126,6 +134,7 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_struct *ovpn, u32 id); int ovpn_peer_add(struct ovpn_struct *ovpn, struct ovpn_peer *peer); int ovpn_peer_del(struct ovpn_peer *peer, enum ovpn_del_peer_reason reason); void ovpn_peer_release_p2p(struct ovpn_struct *ovpn); +void ovpn_peers_free(struct ovpn_struct *ovpn); struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_struct *ovpn, struct sk_buff *skb);
With this change an ovpn instance will be able to stay connected to multiple remote endpoints. This functionality is strictly required when running ovpn on an OpenVPN server. Signed-off-by: Antonio Quartulli <antonio@openvpn.net> --- drivers/net/ovpn/main.c | 49 +++++++++- drivers/net/ovpn/ovpnstruct.h | 22 +++++ drivers/net/ovpn/peer.c | 167 +++++++++++++++++++++++++++++++++- drivers/net/ovpn/peer.h | 9 ++ 4 files changed, 244 insertions(+), 3 deletions(-)